1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17
18 #include "absl/strings/string_view.h"
19 #include "tensorflow/core/platform/env_time.h"
20 #include "tensorflow/core/platform/logging.h"
21 #include "tensorflow/core/platform/macros.h"
22 #include "tensorflow/core/platform/platform.h"
23 #include "tensorflow/core/platform/types.h"
24 #if !defined(IS_MOBILE_PLATFORM)
25 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
26 #endif
27
28 namespace tensorflow {
29 namespace profiler {
30
31 // Predefined levels:
32 // - Level 1 (kCritical) is the default and used only for user instrumentation.
33 // - Level 2 (kInfo) is used by profiler for instrumenting high level program
34 // execution details (expensive TF ops, XLA ops, etc).
35 // - Level 3 (kVerbose) is also used by profiler to instrument more verbose
36 // (low-level) program execution details (cheap TF ops, etc).
37 enum TraceMeLevel {
38 kCritical = 1,
39 kInfo = 2,
40 kVerbose = 3,
41 };
42
43 // This is specifically used for instrumenting Tensorflow ops.
44 // Takes input as whether a TF op is expensive or not and returns the TraceMe
45 // level to be assigned to trace that particular op. Assigns level 2 for
46 // expensive ops (these are high-level details and shown by default in profiler
47 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)48 inline int GetTFTraceMeLevel(bool is_expensive) {
49 return is_expensive ? kInfo : kVerbose;
50 }
51
52 // This class permits user-specified (CPU) tracing activities. A trace activity
53 // is started when an object of this class is created and stopped when the
54 // object is destroyed.
55 //
56 // CPU tracing can be useful when trying to understand what parts of GPU
57 // computation (e.g., kernels and memcpy) correspond to higher level activities
58 // in the overall program. For instance, a collection of kernels maybe
59 // performing one "step" of a program that is better visualized together than
60 // interspersed with kernels from other "steps". Therefore, a TraceMe object
61 // can be created at each "step".
62 //
63 // Two APIs are provided:
64 // (1) Scoped object: a TraceMe object starts tracing on construction, and
65 // stops tracing when it goes out of scope.
66 // {
67 // TraceMe trace("step");
68 // ... do some work ...
69 // }
70 // TraceMe objects can be members of a class, or allocated on the heap.
71 // (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
72 // auto id = ActivityStart("step");
73 // ... do some work ...
74 // ActivityEnd(id);
75 class TraceMe {
76 public:
77 // Constructor that traces a user-defined activity labeled with activity_name
78 // in the UI. Level defines the trace priority, used for filtering TraceMe
79 // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
80 // - Must be a positive integer.
81 // - Can be a value in enum TraceMeLevel.
82 // Users are welcome to use level > 3 in their code, if they wish to filter
83 // out their host traces based on verbosity.
84 explicit TraceMe(absl::string_view activity_name, int level = 1) {
85 DCHECK_GE(level, 1);
86 #if !defined(IS_MOBILE_PLATFORM)
87 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
88 new (&no_init_.name) string(activity_name);
89 start_time_ = EnvTime::NowNanos();
90 }
91 #endif
92 }
93
94 // string&& constructor to prevent an unnecessary string copy, e.g. when a
95 // TraceMe is constructed based on the result of a StrCat operation.
96 // Note: We can't take the string by value because a) it would make the
97 // overloads ambiguous, and b) we want lvalue strings to use the string_view
98 // constructor so we avoid copying them when tracing is disabled.
99 explicit TraceMe(string &&activity_name, int level = 1) {
100 DCHECK_GE(level, 1);
101 #if !defined(IS_MOBILE_PLATFORM)
102 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
103 new (&no_init_.name) string(std::move(activity_name));
104 start_time_ = EnvTime::NowNanos();
105 }
106 #endif
107 }
108
109 // Do not allow passing strings by reference or value since the caller
110 // may unintentionally maintain ownership of the activity_name.
111 // Explicitly std::move the activity_name or wrap it in a string_view if
112 // you really wish to maintain ownership.
113 explicit TraceMe(const string &activity_name, int level = 1) = delete;
114
115 // This overload is necessary to make TraceMe's with string literals work.
116 // Otherwise, the string&& and the string_view constructor would be equally
117 // good overload candidates.
118 explicit TraceMe(const char *raw, int level = 1)
TraceMe(absl::string_view (raw),level)119 : TraceMe(absl::string_view(raw), level) {}
120
121 // This overload only generates the activity name if tracing is enabled.
122 // Useful for avoiding things like string concatenation when tracing is
123 // disabled. The |name_generator| may be a lambda or functor that returns a
124 // type that the string() constructor can take.
125 // name_generator is templated, rather than a std::function to avoid
126 // allocations std::function might make even if never called.
127 // Usage: profiler::TraceMe([&]{ return StrCat(prefix, ":", postfix); });
128 template <typename NameGeneratorT>
129 explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
130 DCHECK_GE(level, 1);
131 #if !defined(IS_MOBILE_PLATFORM)
132 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
133 new (&no_init_.name) string(name_generator());
134 start_time_ = EnvTime::NowNanos();
135 }
136 #endif
137 }
138
139 // Stop tracing the activity. Called by the destructor, but exposed to allow
140 // stopping tracing before the object goes out of scope. Only has an effect
141 // the first time it is called.
Stop()142 void Stop() {
143 // We do not need to check the trace level again here.
144 // - If tracing wasn't active to start with, we have kUntracedActivity.
145 // - If tracing was active and was stopped, we have
146 // TraceMeRecorder::Active().
147 // - If tracing was active and was restarted at a lower level, we may
148 // spuriously record the event. This is extremely rare, and acceptable as
149 // event will be discarded when its start timestamp fall outside of the
150 // start/stop session timestamp.
151 #if !defined(IS_MOBILE_PLATFORM)
152 if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
153 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
154 TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name),
155 start_time_, EnvTime::NowNanos()});
156 }
157 no_init_.name.~string();
158 start_time_ = kUntracedActivity;
159 }
160 #endif
161 }
162
~TraceMe()163 ~TraceMe() { Stop(); }
164
165 // Static API, for use when scoped objects are inconvenient.
166
167 // Record the start time of an activity.
168 // Returns the activity ID, which is used to stop the activity.
169 static uint64 ActivityStart(absl::string_view name, int level = 1) {
170 #if !defined(IS_MOBILE_PLATFORM)
171 if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
172 uint64 activity_id = TraceMeRecorder::NewActivityId();
173 TraceMeRecorder::Record({activity_id, string(name),
174 /*start_time=*/EnvTime::NowNanos(),
175 /*end_time=*/0});
176 return activity_id;
177 }
178 #endif
179 return kUntracedActivity;
180 }
181
182 // Record the end time of an activity started by ActivityStart().
ActivityEnd(uint64 activity_id)183 static void ActivityEnd(uint64 activity_id) {
184 #if !defined(IS_MOBILE_PLATFORM)
185 // We don't check the level again (see TraceMe::Stop()).
186 if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
187 if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
188 TraceMeRecorder::Record({activity_id, /*name=*/"", /*start_time=*/0,
189 /*end_time=*/EnvTime::NowNanos()});
190 }
191 }
192 #endif
193 }
194
195 static bool Active(int level = 1) {
196 #if !defined(IS_MOBILE_PLATFORM)
197 return TraceMeRecorder::Active(level);
198 #else
199 return false;
200 #endif
201 }
202
203 private:
204 // Activity ID or start time used when tracing is disabled.
205 constexpr static uint64 kUntracedActivity = 0;
206 // Activity ID used as a placeholder when both start and end are present.
207 constexpr static uint64 kCompleteActivity = 1;
208
209 TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
210
211 // Wrap the name into a union so that we can avoid the cost of string
212 // initialization when tracing is disabled.
213 union NoInit {
NoInit()214 NoInit() {}
~NoInit()215 ~NoInit() {}
216 string name;
217 } no_init_;
218
219 uint64 start_time_ = kUntracedActivity;
220 };
221
222 // Whether OpKernel::TraceString will populate additional information for
223 // profiler, such as tensor shapes.
TfOpDetailsEnabled()224 inline bool TfOpDetailsEnabled() {
225 return TraceMe::Active(TraceMeLevel::kVerbose);
226 }
227
228 } // namespace profiler
229 } // namespace tensorflow
230
231 #endif // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
232