1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17
18 #include <new>
19 #include <string>
20 #include <utility>
21
22 #include "absl/strings/string_view.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/platform.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/lib/traceme_encode.h" // IWYU pragma: export
28
29 #if !defined(IS_MOBILE_PLATFORM)
30 #include "tensorflow/core/profiler/internal/cpu/traceme_recorder.h"
31 #include "tensorflow/core/profiler/utils/time_utils.h"
32 #endif
33
34 namespace tensorflow {
35 namespace profiler {
36
37 // Predefined levels:
38 // - Level 1 (kCritical) is the default and used only for user instrumentation.
39 // - Level 2 (kInfo) is used by profiler for instrumenting high level program
40 // execution details (expensive TF ops, XLA ops, etc).
41 // - Level 3 (kVerbose) is also used by profiler to instrument more verbose
42 // (low-level) program execution details (cheap TF ops, etc).
43 enum TraceMeLevel {
44 kCritical = 1,
45 kInfo = 2,
46 kVerbose = 3,
47 };
48
49 // This is specifically used for instrumenting Tensorflow ops.
50 // Takes input as whether a TF op is expensive or not and returns the TraceMe
51 // level to be assigned to trace that particular op. Assigns level 2 for
52 // expensive ops (these are high-level details and shown by default in profiler
53 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)54 inline int GetTFTraceMeLevel(bool is_expensive) {
55 return is_expensive ? kInfo : kVerbose;
56 }
57
58 // This class permits user-specified (CPU) tracing activities. A trace activity
59 // is started when an object of this class is created and stopped when the
60 // object is destroyed.
61 //
62 // CPU tracing can be useful when trying to understand what parts of GPU
63 // computation (e.g., kernels and memcpy) correspond to higher level activities
64 // in the overall program. For instance, a collection of kernels maybe
65 // performing one "step" of a program that is better visualized together than
66 // interspersed with kernels from other "steps". Therefore, a TraceMe object
67 // can be created at each "step".
68 //
69 // Two APIs are provided:
70 // (1) Scoped object: a TraceMe object starts tracing on construction, and
71 // stops tracing when it goes out of scope.
72 // {
73 // TraceMe trace("step");
74 // ... do some work ...
75 // }
76 // TraceMe objects can be members of a class, or allocated on the heap.
77 // (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
78 // auto id = ActivityStart("step");
79 // ... do some work ...
80 // ActivityEnd(id);
81 // The two static methods should be called within the same thread.
82 class TraceMe {
83 public:
84 // Constructor that traces a user-defined activity labeled with name
85 // in the UI. Level defines the trace priority, used for filtering TraceMe
86 // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
87 // - Must be a positive integer.
88 // - Can be a value in enum TraceMeLevel.
89 // Users are welcome to use level > 3 in their code, if they wish to filter
90 // out their host traces based on verbosity.
91 explicit TraceMe(absl::string_view name, int level = 1) {
92 DCHECK_GE(level, 1);
93 #if !defined(IS_MOBILE_PLATFORM)
94 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
95 new (&no_init_.name) std::string(name);
96 start_time_ = GetCurrentTimeNanos();
97 }
98 #endif
99 }
100
101 // Do not allow passing a temporary string as the overhead of generating that
102 // string should only be incurred when tracing is enabled. Wrap the temporary
103 // string generation (e.g., StrCat) in a lambda and use the name_generator
104 // template instead.
105 explicit TraceMe(std::string&& name, int level = 1) = delete;
106
107 // Do not allow passing strings by reference or value since the caller
108 // may unintentionally maintain ownership of the name.
109 // Explicitly wrap the name in a string_view if you really wish to maintain
110 // ownership of a string already generated for other purposes. For temporary
111 // strings (e.g., result of StrCat) use the name_generator template.
112 explicit TraceMe(const std::string& name, int level = 1) = delete;
113
114 // This overload is necessary to make TraceMe's with string literals work.
115 // Otherwise, the name_generator template would be used.
116 explicit TraceMe(const char* raw, int level = 1)
TraceMe(absl::string_view (raw),level)117 : TraceMe(absl::string_view(raw), level) {}
118
119 // This overload only generates the name (and possibly metadata) if tracing is
120 // enabled. Useful for avoiding expensive operations (e.g., string
121 // concatenation) when tracing is disabled.
122 // name_generator may be a lambda or functor that returns a type that the
123 // string() constructor can take, e.g., the result of TraceMeEncode.
124 // name_generator is templated, rather than a std::function to avoid
125 // allocations std::function might make even if never called.
126 // Example Usage:
127 // TraceMe trace_me([&]() {
128 // return StrCat("my_trace", id);
129 // }
130 // TraceMe op_trace_me([&]() {
131 // return TraceMeOp(op_name, op_type);
132 // }
133 // TraceMe trace_me_with_metadata([&value1]() {
134 // return TraceMeEncode("my_trace", {{"key1", value1}, {"key2", 42}});
135 // });
136 template <typename NameGeneratorT>
137 explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
138 DCHECK_GE(level, 1);
139 #if !defined(IS_MOBILE_PLATFORM)
140 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
141 new (&no_init_.name) std::string(name_generator());
142 start_time_ = GetCurrentTimeNanos();
143 }
144 #endif
145 }
146
147 // Movable.
TraceMe(TraceMe && other)148 TraceMe(TraceMe&& other) { *this = std::move(other); }
149 TraceMe& operator=(TraceMe&& other) {
150 #if !defined(IS_MOBILE_PLATFORM)
151 if (TF_PREDICT_FALSE(other.start_time_ != kUntracedActivity)) {
152 new (&no_init_.name) std::string(std::move(other.no_init_.name));
153 other.no_init_.name.~string();
154 start_time_ = std::exchange(other.start_time_, kUntracedActivity);
155 }
156 #endif
157 return *this;
158 }
159
~TraceMe()160 ~TraceMe() { Stop(); }
161
162 // Stop tracing the activity. Called by the destructor, but exposed to allow
163 // stopping tracing before the object goes out of scope. Only has an effect
164 // the first time it is called.
Stop()165 void Stop() {
166 // We do not need to check the trace level again here.
167 // - If tracing wasn't active to start with, we have kUntracedActivity.
168 // - If tracing was active and was stopped, we have
169 // TraceMeRecorder::Active().
170 // - If tracing was active and was restarted at a lower level, we may
171 // spuriously record the event. This is extremely rare, and acceptable as
172 // event will be discarded when its start timestamp fall outside of the
173 // start/stop session timestamp.
174 #if !defined(IS_MOBILE_PLATFORM)
175 if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
176 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
177 TraceMeRecorder::Record(
178 {std::move(no_init_.name), start_time_, GetCurrentTimeNanos()});
179 }
180 no_init_.name.~string();
181 start_time_ = kUntracedActivity;
182 }
183 #endif
184 }
185
186 // Appends new_metadata to the TraceMe name passed to the constructor.
187 // metadata_generator may be a lambda or functor that returns a type that the
188 // string() constructor can take, e.g., the result of TraceMeEncode.
189 // metadata_generator is only evaluated when tracing is enabled.
190 // metadata_generator is templated, rather than a std::function to avoid
191 // allocations std::function might make even if never called.
192 // Example Usage:
193 // trace_me.AppendMetadata([&value1]() {
194 // return TraceMeEncode({{"key1", value1}, {"key2", 42}});
195 // });
196 template <typename MetadataGeneratorT>
AppendMetadata(MetadataGeneratorT metadata_generator)197 void AppendMetadata(MetadataGeneratorT metadata_generator) {
198 #if !defined(IS_MOBILE_PLATFORM)
199 if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
200 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
201 traceme_internal::AppendMetadata(&no_init_.name, metadata_generator());
202 }
203 }
204 #endif
205 }
206
207 // Static API, for use when scoped objects are inconvenient.
208
209 // Record the start time of an activity.
210 // Returns the activity ID, which is used to stop the activity.
211 // Calls `name_generator` to get the name for activity.
212 template <typename NameGeneratorT>
213 static int64 ActivityStart(NameGeneratorT name_generator, int level = 1) {
214 #if !defined(IS_MOBILE_PLATFORM)
215 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
216 int64 activity_id = TraceMeRecorder::NewActivityId();
217 TraceMeRecorder::Record(
218 {name_generator(), GetCurrentTimeNanos(), -activity_id});
219 return activity_id;
220 }
221 #endif
222 return kUntracedActivity;
223 }
224
225 // Record the start time of an activity.
226 // Returns the activity ID, which is used to stop the activity.
227 static int64 ActivityStart(absl::string_view name, int level = 1) {
228 #if !defined(IS_MOBILE_PLATFORM)
229 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
230 int64 activity_id = TraceMeRecorder::NewActivityId();
231 TraceMeRecorder::Record(
232 {std::string(name), GetCurrentTimeNanos(), -activity_id});
233 return activity_id;
234 }
235 #endif
236 return kUntracedActivity;
237 }
238
239 // Same as ActivityStart above, an overload for "const std::string&"
240 static int64 ActivityStart(const std::string& name, int level = 1) {
241 return ActivityStart(absl::string_view(name), level);
242 }
243
244 // Same as ActivityStart above, an overload for "const char*"
245 static int64 ActivityStart(const char* name, int level = 1) {
246 return ActivityStart(absl::string_view(name), level);
247 }
248
249 // Record the end time of an activity started by ActivityStart().
ActivityEnd(int64 activity_id)250 static void ActivityEnd(int64 activity_id) {
251 #if !defined(IS_MOBILE_PLATFORM)
252 // We don't check the level again (see TraceMe::Stop()).
253 if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
254 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
255 TraceMeRecorder::Record(
256 {std::string(), -activity_id, GetCurrentTimeNanos()});
257 }
258 }
259 #endif
260 }
261
262 // Records the time of an instant activity.
263 template <typename NameGeneratorT>
264 static void InstantActivity(NameGeneratorT name_generator, int level = 1) {
265 #if !defined(IS_MOBILE_PLATFORM)
266 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
267 int64 now = GetCurrentTimeNanos();
268 TraceMeRecorder::Record(
269 {name_generator(), /*start_time=*/now, /*end_time=*/now});
270 }
271 #endif
272 }
273
274 static bool Active(int level = 1) {
275 #if !defined(IS_MOBILE_PLATFORM)
276 return TraceMeRecorder::Active(level);
277 #else
278 return false;
279 #endif
280 }
281
NewActivityId()282 static int64 NewActivityId() {
283 #if !defined(IS_MOBILE_PLATFORM)
284 return TraceMeRecorder::NewActivityId();
285 #else
286 return 0;
287 #endif
288 }
289
290 private:
291 // Start time used when tracing is disabled.
292 constexpr static int64 kUntracedActivity = 0;
293
294 TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
295
296 // Wrap the name into a union so that we can avoid the cost of string
297 // initialization when tracing is disabled.
298 union NoInit {
NoInit()299 NoInit() {}
~NoInit()300 ~NoInit() {}
301 std::string name;
302 } no_init_;
303
304 int64 start_time_ = kUntracedActivity;
305 };
306
307 // Whether OpKernel::TraceString will populate additional information for
308 // profiler, such as tensor shapes.
TfOpDetailsEnabled()309 inline bool TfOpDetailsEnabled() {
310 return TraceMe::Active(TraceMeLevel::kVerbose);
311 }
312
313 } // namespace profiler
314 } // namespace tensorflow
315
316 #endif // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
317