1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17 
18 #include <new>
19 #include <string>
20 #include <utility>
21 
22 #include "absl/strings/string_view.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/platform.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/lib/traceme_encode.h"  // IWYU pragma: export
28 
29 #if !defined(IS_MOBILE_PLATFORM)
30 #include "tensorflow/core/profiler/backends/cpu/traceme_recorder.h"
31 #include "tensorflow/core/profiler/utils/time_utils.h"
32 #endif
33 
34 namespace tensorflow {
35 namespace profiler {
36 
37 // NOTE: Borrowed from boost C++ libraries. When TF embrace C++17 this should
38 // be replaced with std::is_invocable;
39 template <typename F, typename... Args>
40 struct is_invocable
41     : std::is_constructible<
42           std::function<void(Args...)>,
43           std::reference_wrapper<typename std::remove_reference<F>::type> > {};
44 
45 // Predefined levels:
46 // - Level 1 (kCritical) is the default and used only for user instrumentation.
47 // - Level 2 (kInfo) is used by profiler for instrumenting high level program
48 //   execution details (expensive TF ops, XLA ops, etc).
49 // - Level 3 (kVerbose) is also used by profiler to instrument more verbose
50 //   (low-level) program execution details (cheap TF ops, etc).
51 enum TraceMeLevel {
52   kCritical = 1,
53   kInfo = 2,
54   kVerbose = 3,
55 };
56 
57 // This is specifically used for instrumenting Tensorflow ops.
58 // Takes input as whether a TF op is expensive or not and returns the TraceMe
59 // level to be assigned to trace that particular op. Assigns level 2 for
60 // expensive ops (these are high-level details and shown by default in profiler
61 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)62 inline int GetTFTraceMeLevel(bool is_expensive) {
63   return is_expensive ? kInfo : kVerbose;
64 }
65 
66 // This class permits user-specified (CPU) tracing activities. A trace activity
67 // is started when an object of this class is created and stopped when the
68 // object is destroyed.
69 //
70 // CPU tracing can be useful when trying to understand what parts of GPU
71 // computation (e.g., kernels and memcpy) correspond to higher level activities
72 // in the overall program. For instance, a collection of kernels maybe
73 // performing one "step" of a program that is better visualized together than
74 // interspersed with kernels from other "steps". Therefore, a TraceMe object
75 // can be created at each "step".
76 //
77 // Two APIs are provided:
78 //   (1) Scoped object: a TraceMe object starts tracing on construction, and
79 //       stops tracing when it goes out of scope.
80 //          {
81 //            TraceMe trace("step");
82 //            ... do some work ...
83 //          }
84 //       TraceMe objects can be members of a class, or allocated on the heap.
85 //   (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
86 //          auto id = ActivityStart("step");
87 //          ... do some work ...
88 //          ActivityEnd(id);
89 //       The two static methods should be called within the same thread.
90 class TraceMe {
91  public:
92   // Constructor that traces a user-defined activity labeled with name
93   // in the UI. Level defines the trace priority, used for filtering TraceMe
94   // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
95   // - Must be a positive integer.
96   // - Can be a value in enum TraceMeLevel.
97   // Users are welcome to use level > 3 in their code, if they wish to filter
98   // out their host traces based on verbosity.
99   explicit TraceMe(absl::string_view name, int level = 1) {
100     DCHECK_GE(level, 1);
101 #if !defined(IS_MOBILE_PLATFORM)
102     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
103       new (&no_init_.name) std::string(name);
104       start_time_ = GetCurrentTimeNanos();
105     }
106 #endif
107   }
108 
109   // Do not allow passing a temporary string as the overhead of generating that
110   // string should only be incurred when tracing is enabled. Wrap the temporary
111   // string generation (e.g., StrCat) in a lambda and use the name_generator
112   // template instead.
113   explicit TraceMe(std::string&& name, int level = 1) = delete;
114 
115   // Do not allow passing strings by reference or value since the caller
116   // may unintentionally maintain ownership of the name.
117   // Explicitly wrap the name in a string_view if you really wish to maintain
118   // ownership of a string already generated for other purposes. For temporary
119   // strings (e.g., result of StrCat) use the name_generator template.
120   explicit TraceMe(const std::string& name, int level = 1) = delete;
121 
122   // This overload is necessary to make TraceMe's with string literals work.
123   // Otherwise, the name_generator template would be used.
124   explicit TraceMe(const char* raw, int level = 1)
TraceMe(absl::string_view (raw),level)125       : TraceMe(absl::string_view(raw), level) {}
126 
127   // This overload only generates the name (and possibly metadata) if tracing is
128   // enabled. Useful for avoiding expensive operations (e.g., string
129   // concatenation) when tracing is disabled.
130   // name_generator may be a lambda or functor that returns a type that the
131   // string() constructor can take, e.g., the result of TraceMeEncode.
132   // name_generator is templated, rather than a std::function to avoid
133   // allocations std::function might make even if never called.
134   // Example Usage:
135   //   TraceMe trace_me([&]() {
136   //     return StrCat("my_trace", id);
137   //   }
138   //   TraceMe op_trace_me([&]() {
139   //     return TraceMeOp(op_name, op_type);
140   //   }
141   //   TraceMe trace_me_with_metadata([&value1]() {
142   //     return TraceMeEncode("my_trace", {{"key1", value1}, {"key2", 42}});
143   //   });
144   template <typename NameGeneratorT,
145             std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
146   explicit TraceMe(NameGeneratorT&& name_generator, int level = 1) {
147     DCHECK_GE(level, 1);
148 #if !defined(IS_MOBILE_PLATFORM)
149     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
150       new (&no_init_.name)
151           std::string(std::forward<NameGeneratorT>(name_generator)());
152       start_time_ = GetCurrentTimeNanos();
153     }
154 #endif
155   }
156 
157   // Movable.
TraceMe(TraceMe && other)158   TraceMe(TraceMe&& other) { *this = std::move(other); }
159   TraceMe& operator=(TraceMe&& other) {
160 #if !defined(IS_MOBILE_PLATFORM)
161     if (TF_PREDICT_FALSE(other.start_time_ != kUntracedActivity)) {
162       new (&no_init_.name) std::string(std::move(other.no_init_.name));
163       other.no_init_.name.~string();
164       start_time_ = std::exchange(other.start_time_, kUntracedActivity);
165     }
166 #endif
167     return *this;
168   }
169 
~TraceMe()170   ~TraceMe() { Stop(); }
171 
172   // Stop tracing the activity. Called by the destructor, but exposed to allow
173   // stopping tracing before the object goes out of scope. Only has an effect
174   // the first time it is called.
Stop()175   void Stop() {
176     // We do not need to check the trace level again here.
177     // - If tracing wasn't active to start with, we have kUntracedActivity.
178     // - If tracing was active and was stopped, we have
179     //   TraceMeRecorder::Active().
180     // - If tracing was active and was restarted at a lower level, we may
181     //   spuriously record the event. This is extremely rare, and acceptable as
182     //   event will be discarded when its start timestamp fall outside of the
183     //   start/stop session timestamp.
184 #if !defined(IS_MOBILE_PLATFORM)
185     if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
186       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
187         TraceMeRecorder::Record(
188             {std::move(no_init_.name), start_time_, GetCurrentTimeNanos()});
189       }
190       no_init_.name.~string();
191       start_time_ = kUntracedActivity;
192     }
193 #endif
194   }
195 
196   // Appends new_metadata to the TraceMe name passed to the constructor.
197   // metadata_generator may be a lambda or functor that returns a type that the
198   // string() constructor can take, e.g., the result of TraceMeEncode.
199   // metadata_generator is only evaluated when tracing is enabled.
200   // metadata_generator is templated, rather than a std::function to avoid
201   // allocations std::function might make even if never called.
202   // Example Usage:
203   //   trace_me.AppendMetadata([&value1]() {
204   //     return TraceMeEncode({{"key1", value1}, {"key2", 42}});
205   //   });
206   template <
207       typename MetadataGeneratorT,
208       std::enable_if_t<is_invocable<MetadataGeneratorT>::value, bool> = true>
AppendMetadata(MetadataGeneratorT && metadata_generator)209   void AppendMetadata(MetadataGeneratorT&& metadata_generator) {
210 #if !defined(IS_MOBILE_PLATFORM)
211     if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
212       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
213         traceme_internal::AppendMetadata(
214             &no_init_.name,
215             std::forward<MetadataGeneratorT>(metadata_generator)());
216       }
217     }
218 #endif
219   }
220 
221   // Static API, for use when scoped objects are inconvenient.
222 
223   // Record the start time of an activity.
224   // Returns the activity ID, which is used to stop the activity.
225   // Calls `name_generator` to get the name for activity.
226   template <typename NameGeneratorT,
227             std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
228   static int64_t ActivityStart(NameGeneratorT&& name_generator, int level = 1) {
229 #if !defined(IS_MOBILE_PLATFORM)
230     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
231       int64_t activity_id = TraceMeRecorder::NewActivityId();
232       TraceMeRecorder::Record({std::forward<NameGeneratorT>(name_generator)(),
233                                GetCurrentTimeNanos(), -activity_id});
234       return activity_id;
235     }
236 #endif
237     return kUntracedActivity;
238   }
239 
240   // Record the start time of an activity.
241   // Returns the activity ID, which is used to stop the activity.
242   static int64_t ActivityStart(absl::string_view name, int level = 1) {
243 #if !defined(IS_MOBILE_PLATFORM)
244     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
245       int64_t activity_id = TraceMeRecorder::NewActivityId();
246       TraceMeRecorder::Record(
247           {std::string(name), GetCurrentTimeNanos(), -activity_id});
248       return activity_id;
249     }
250 #endif
251     return kUntracedActivity;
252   }
253 
254   // Same as ActivityStart above, an overload for "const std::string&"
255   static int64_t ActivityStart(const std::string& name, int level = 1) {
256     return ActivityStart(absl::string_view(name), level);
257   }
258 
259   // Same as ActivityStart above, an overload for "const char*"
260   static int64_t ActivityStart(const char* name, int level = 1) {
261     return ActivityStart(absl::string_view(name), level);
262   }
263 
264   // Record the end time of an activity started by ActivityStart().
ActivityEnd(int64_t activity_id)265   static void ActivityEnd(int64_t activity_id) {
266 #if !defined(IS_MOBILE_PLATFORM)
267     // We don't check the level again (see TraceMe::Stop()).
268     if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
269       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
270         TraceMeRecorder::Record(
271             {std::string(), -activity_id, GetCurrentTimeNanos()});
272       }
273     }
274 #endif
275   }
276 
277   // Records the time of an instant activity.
278   template <typename NameGeneratorT,
279             std::enable_if_t<is_invocable<NameGeneratorT>::value, bool> = true>
280   static void InstantActivity(NameGeneratorT&& name_generator, int level = 1) {
281 #if !defined(IS_MOBILE_PLATFORM)
282     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
283       int64_t now = GetCurrentTimeNanos();
284       TraceMeRecorder::Record({std::forward<NameGeneratorT>(name_generator)(),
285                                /*start_time=*/now, /*end_time=*/now});
286     }
287 #endif
288   }
289 
290   static bool Active(int level = 1) {
291 #if !defined(IS_MOBILE_PLATFORM)
292     return TraceMeRecorder::Active(level);
293 #else
294     return false;
295 #endif
296   }
297 
NewActivityId()298   static int64_t NewActivityId() {
299 #if !defined(IS_MOBILE_PLATFORM)
300     return TraceMeRecorder::NewActivityId();
301 #else
302     return 0;
303 #endif
304   }
305 
306  private:
307   // Start time used when tracing is disabled.
308   constexpr static int64_t kUntracedActivity = 0;
309 
310   TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
311 
312   // Wrap the name into a union so that we can avoid the cost of string
313   // initialization when tracing is disabled.
314   union NoInit {
NoInit()315     NoInit() {}
~NoInit()316     ~NoInit() {}
317     std::string name;
318   } no_init_;
319 
320   int64_t start_time_ = kUntracedActivity;
321 };
322 
323 // Whether OpKernel::TraceString will populate additional information for
324 // profiler, such as tensor shapes.
TfOpDetailsEnabled()325 inline bool TfOpDetailsEnabled() {
326   return TraceMe::Active(TraceMeLevel::kVerbose);
327 }
328 
329 }  // namespace profiler
330 }  // namespace tensorflow
331 
332 #endif  // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
333