1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17
18 #include <new>
19 #include <string>
20 #include <utility>
21
22 #include "absl/strings/string_view.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/platform.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/lib/traceme_encode.h" // IWYU pragma: export
28
29 #if !defined(IS_MOBILE_PLATFORM)
30 #include "tensorflow/core/profiler/backends/cpu/traceme_recorder.h"
31 #include "tensorflow/core/profiler/utils/time_utils.h"
32 #endif
33
34 namespace tensorflow {
35 namespace profiler {
36
37 // NOTE: Borrowed from boost C++ libraries. When TF embrace C++17 this should
38 // be replaced with std::is_invocable;
39 template <typename F, typename... Args>
40 struct is_invocable
41 : std::is_constructible<
42 std::function<void(Args...)>,
43 std::reference_wrapper<typename std::remove_reference<F>::type> > {};
44
45 // Predefined levels:
46 // - Level 1 (kCritical) is the default and used only for user instrumentation.
47 // - Level 2 (kInfo) is used by profiler for instrumenting high level program
48 // execution details (expensive TF ops, XLA ops, etc).
49 // - Level 3 (kVerbose) is also used by profiler to instrument more verbose
50 // (low-level) program execution details (cheap TF ops, etc).
51 enum TraceMeLevel {
52 kCritical = 1,
53 kInfo = 2,
54 kVerbose = 3,
55 };
56
57 // This is specifically used for instrumenting Tensorflow ops.
58 // Takes input as whether a TF op is expensive or not and returns the TraceMe
59 // level to be assigned to trace that particular op. Assigns level 2 for
60 // expensive ops (these are high-level details and shown by default in profiler
61 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)62 inline int GetTFTraceMeLevel(bool is_expensive) {
63 return is_expensive ? kInfo : kVerbose;
64 }
65
66 // This class permits user-specified (CPU) tracing activities. A trace activity
67 // is started when an object of this class is created and stopped when the
68 // object is destroyed.
69 //
70 // CPU tracing can be useful when trying to understand what parts of GPU
71 // computation (e.g., kernels and memcpy) correspond to higher level activities
72 // in the overall program. For instance, a collection of kernels maybe
73 // performing one "step" of a program that is better visualized together than
74 // interspersed with kernels from other "steps". Therefore, a TraceMe object
75 // can be created at each "step".
76 //
77 // Two APIs are provided:
78 // (1) Scoped object: a TraceMe object starts tracing on construction, and
79 // stops tracing when it goes out of scope.
80 // {
81 // TraceMe trace("step");
82 // ... do some work ...
83 // }
84 // TraceMe objects can be members of a class, or allocated on the heap.
85 // (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
86 // auto id = ActivityStart("step");
87 // ... do some work ...
88 // ActivityEnd(id);
89 // The two static methods should be called within the same thread.
90 class TraceMe {
91 public:
92 // Constructor that traces a user-defined activity labeled with name
93 // in the UI. Level defines the trace priority, used for filtering TraceMe
94 // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
95 // - Must be a positive integer.
96 // - Can be a value in enum TraceMeLevel.
97 // Users are welcome to use level > 3 in their code, if they wish to filter
98 // out their host traces based on verbosity.
99 explicit TraceMe(absl::string_view name, int level = 1) {
100 DCHECK_GE(level, 1);
101 #if !defined(IS_MOBILE_PLATFORM)
102 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
103 new (&no_init_.name) std::string(name);
104 start_time_ = GetCurrentTimeNanos();
105 }
106 #endif
107 }
108
109 // Do not allow passing a temporary string as the overhead of generating that
110 // string should only be incurred when tracing is enabled. Wrap the temporary
111 // string generation (e.g., StrCat) in a lambda and use the name_generator
112 // template instead.
113 explicit TraceMe(std::string&& name, int level = 1) = delete;
114
115 // Do not allow passing strings by reference or value since the caller
116 // may unintentionally maintain ownership of the name.
117 // Explicitly wrap the name in a string_view if you really wish to maintain
118 // ownership of a string already generated for other purposes. For temporary
119 // strings (e.g., result of StrCat) use the name_generator template.
120 explicit TraceMe(const std::string& name, int level = 1) = delete;
121
122 // This overload is necessary to make TraceMe's with string literals work.
123 // Otherwise, the name_generator template would be used.
124 explicit TraceMe(const char* raw, int level = 1)
TraceMe(absl::string_view (raw),level)125 : TraceMe(absl::string_view(raw), level) {}
126
127 // This overload only generates the name (and possibly metadata) if tracing is
128 // enabled. Useful for avoiding expensive operations (e.g., string
129 // concatenation) when tracing is disabled.
130 // name_generator may be a lambda or functor that returns a type that the
131 // string() constructor can take, e.g., the result of TraceMeEncode.
132 // name_generator is templated, rather than a std::function to avoid
133 // allocations std::function might make even if never called.
134 // Example Usage:
135 // TraceMe trace_me([&]() {
136 // return StrCat("my_trace", id);
137 // }
138 // TraceMe op_trace_me([&]() {
139 // return TraceMeOp(op_name, op_type);
140 // }
141 // TraceMe trace_me_with_metadata([&value1]() {
142 // return TraceMeEncode("my_trace", {{"key1", value1}, {"key2", 42}});
143 // });
144 template <typename NameGeneratorT,
145 std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
146 explicit TraceMe(NameGeneratorT&& name_generator, int level = 1) {
147 DCHECK_GE(level, 1);
148 #if !defined(IS_MOBILE_PLATFORM)
149 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
150 new (&no_init_.name)
151 std::string(std::forward<NameGeneratorT>(name_generator)());
152 start_time_ = GetCurrentTimeNanos();
153 }
154 #endif
155 }
156
157 // Movable.
TraceMe(TraceMe && other)158 TraceMe(TraceMe&& other) { *this = std::move(other); }
159 TraceMe& operator=(TraceMe&& other) {
160 #if !defined(IS_MOBILE_PLATFORM)
161 if (TF_PREDICT_FALSE(other.start_time_ != kUntracedActivity)) {
162 new (&no_init_.name) std::string(std::move(other.no_init_.name));
163 other.no_init_.name.~string();
164 start_time_ = std::exchange(other.start_time_, kUntracedActivity);
165 }
166 #endif
167 return *this;
168 }
169
~TraceMe()170 ~TraceMe() { Stop(); }
171
172 // Stop tracing the activity. Called by the destructor, but exposed to allow
173 // stopping tracing before the object goes out of scope. Only has an effect
174 // the first time it is called.
Stop()175 void Stop() {
176 // We do not need to check the trace level again here.
177 // - If tracing wasn't active to start with, we have kUntracedActivity.
178 // - If tracing was active and was stopped, we have
179 // TraceMeRecorder::Active().
180 // - If tracing was active and was restarted at a lower level, we may
181 // spuriously record the event. This is extremely rare, and acceptable as
182 // event will be discarded when its start timestamp fall outside of the
183 // start/stop session timestamp.
184 #if !defined(IS_MOBILE_PLATFORM)
185 if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
186 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
187 TraceMeRecorder::Record(
188 {std::move(no_init_.name), start_time_, GetCurrentTimeNanos()});
189 }
190 no_init_.name.~string();
191 start_time_ = kUntracedActivity;
192 }
193 #endif
194 }
195
196 // Appends new_metadata to the TraceMe name passed to the constructor.
197 // metadata_generator may be a lambda or functor that returns a type that the
198 // string() constructor can take, e.g., the result of TraceMeEncode.
199 // metadata_generator is only evaluated when tracing is enabled.
200 // metadata_generator is templated, rather than a std::function to avoid
201 // allocations std::function might make even if never called.
202 // Example Usage:
203 // trace_me.AppendMetadata([&value1]() {
204 // return TraceMeEncode({{"key1", value1}, {"key2", 42}});
205 // });
206 template <
207 typename MetadataGeneratorT,
208 std::enable_if_t<is_invocable<MetadataGeneratorT>::value, bool> = true>
AppendMetadata(MetadataGeneratorT && metadata_generator)209 void AppendMetadata(MetadataGeneratorT&& metadata_generator) {
210 #if !defined(IS_MOBILE_PLATFORM)
211 if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
212 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
213 traceme_internal::AppendMetadata(
214 &no_init_.name,
215 std::forward<MetadataGeneratorT>(metadata_generator)());
216 }
217 }
218 #endif
219 }
220
221 // Static API, for use when scoped objects are inconvenient.
222
223 // Record the start time of an activity.
224 // Returns the activity ID, which is used to stop the activity.
225 // Calls `name_generator` to get the name for activity.
226 template <typename NameGeneratorT,
227 std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
228 static int64_t ActivityStart(NameGeneratorT&& name_generator, int level = 1) {
229 #if !defined(IS_MOBILE_PLATFORM)
230 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
231 int64_t activity_id = TraceMeRecorder::NewActivityId();
232 TraceMeRecorder::Record({std::forward<NameGeneratorT>(name_generator)(),
233 GetCurrentTimeNanos(), -activity_id});
234 return activity_id;
235 }
236 #endif
237 return kUntracedActivity;
238 }
239
240 // Record the start time of an activity.
241 // Returns the activity ID, which is used to stop the activity.
242 static int64_t ActivityStart(absl::string_view name, int level = 1) {
243 #if !defined(IS_MOBILE_PLATFORM)
244 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
245 int64_t activity_id = TraceMeRecorder::NewActivityId();
246 TraceMeRecorder::Record(
247 {std::string(name), GetCurrentTimeNanos(), -activity_id});
248 return activity_id;
249 }
250 #endif
251 return kUntracedActivity;
252 }
253
254 // Same as ActivityStart above, an overload for "const std::string&"
255 static int64_t ActivityStart(const std::string& name, int level = 1) {
256 return ActivityStart(absl::string_view(name), level);
257 }
258
259 // Same as ActivityStart above, an overload for "const char*"
260 static int64_t ActivityStart(const char* name, int level = 1) {
261 return ActivityStart(absl::string_view(name), level);
262 }
263
264 // Record the end time of an activity started by ActivityStart().
ActivityEnd(int64_t activity_id)265 static void ActivityEnd(int64_t activity_id) {
266 #if !defined(IS_MOBILE_PLATFORM)
267 // We don't check the level again (see TraceMe::Stop()).
268 if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
269 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
270 TraceMeRecorder::Record(
271 {std::string(), -activity_id, GetCurrentTimeNanos()});
272 }
273 }
274 #endif
275 }
276
277 // Records the time of an instant activity.
278 template <typename NameGeneratorT,
279 std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
280 static void InstantActivity(NameGeneratorT&& name_generator, int level = 1) {
281 #if !defined(IS_MOBILE_PLATFORM)
282 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
283 int64_t now = GetCurrentTimeNanos();
284 TraceMeRecorder::Record({std::forward<NameGeneratorT>(name_generator)(),
285 /*start_time=*/now, /*end_time=*/now});
286 }
287 #endif
288 }
289
290 static bool Active(int level = 1) {
291 #if !defined(IS_MOBILE_PLATFORM)
292 return TraceMeRecorder::Active(level);
293 #else
294 return false;
295 #endif
296 }
297
NewActivityId()298 static int64_t NewActivityId() {
299 #if !defined(IS_MOBILE_PLATFORM)
300 return TraceMeRecorder::NewActivityId();
301 #else
302 return 0;
303 #endif
304 }
305
306 private:
307 // Start time used when tracing is disabled.
308 constexpr static int64_t kUntracedActivity = 0;
309
310 TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
311
312 // Wrap the name into a union so that we can avoid the cost of string
313 // initialization when tracing is disabled.
314 union NoInit {
NoInit()315 NoInit() {}
~NoInit()316 ~NoInit() {}
317 std::string name;
318 } no_init_;
319
320 int64_t start_time_ = kUntracedActivity;
321 };
322
323 // Whether OpKernel::TraceString will populate additional information for
324 // profiler, such as tensor shapes.
TfOpDetailsEnabled()325 inline bool TfOpDetailsEnabled() {
326 return TraceMe::Active(TraceMeLevel::kVerbose);
327 }
328
329 } // namespace profiler
330 } // namespace tensorflow
331
332 #endif // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
333