1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17
18 #include <string>
19
20 #include "absl/strings/string_view.h"
21 #include "absl/types/optional.h"
22 #include "tensorflow/core/platform/env.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/types.h"
25 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
26
27 namespace tensorflow {
28 namespace profiler {
29
30 // This is specifically used in xprof_bridge for instrumenting Tensorflow ops.
31 // Takes input as whether a TF op is expensive or not and returns the TraceMe
32 // level to be assigned to trace that particular op. Assigns level 2 for
33 // expensive ops (these are high-level details and shown by default in xprof
34 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)35 inline int GetTFTraceMeLevel(bool is_expensive) { return is_expensive ? 2 : 3; }
36
37 // This class permits user-specified (CPU) tracing activities. A trace activity
38 // is started when an object of this class is created and stopped when the
39 // object is destroyed.
40 //
41 // CPU tracing can be useful when trying to understand what parts of GPU
42 // computation (e.g., kernels and memcpy) correspond to higher level activities
43 // in the overall program. For instance, a collection of kernels maybe
44 // performing one "step" of a program that is better visualized together than
45 // interspersed with kernels from other "steps". Therefore, a TraceMe object
46 // can be created at each "step".
47 //
48 // Two APIs are provided:
49 // (1) Scoped object: a TraceMe object starts tracing on construction, and
50 // stops tracing when it goes out of scope.
51 // {
52 // TraceMe trace("step");
53 // ... do some work ...
54 // }
55 // TraceMe objects can be members of a class, or allocated on the heap.
56 // (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
57 // auto id = ActivityStart("step");
58 // ... do some work ...
59 // ActivityEnd(id);
60 class TraceMe {
61 public:
62 // Constructor that traces a user-defined activity labeled with activity_name
63 // in the UI. Level defines the trace priority, used for filtering TraceMe
64 // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
65 // - Must be a positive integer.
66 // - Level 1 is the default and used only for user instrumentation.
67 // - Level 2 is used by xprof for instrumenting high level program execution
68 // details (expensive TF ops, XLA ops, etc).
69 // - Level 3 is also used by xprof to instrument more verbose (low-level)
70 // program execution details (cheap TF ops, etc).
71 // Users are welcome to use level >= 2 in their code, if they wish to filter
72 // out their host traces based on verbosity.
73 explicit TraceMe(absl::string_view activity_name, int level = 1) {
74 DCHECK_GE(level, 1);
75 if (TraceMeRecorder::Active(level)) {
76 new (&no_init_.name) string(activity_name);
77 start_time_ = Env::Default()->NowNanos();
78 } else {
79 start_time_ = kUntracedActivity;
80 }
81 }
82
83 // string&& constructor to prevent an unnecessary string copy, e.g. when a
84 // TraceMe is constructed based on the result of a StrCat operation.
85 // Note: We can't take the string by value because a) it would make the
86 // overloads ambiguous, and b) we want lvalue strings to use the string_view
87 // constructor so we avoid copying them when tracing is disabled.
88 explicit TraceMe(string &&activity_name, int level = 1) {
89 DCHECK_GE(level, 1);
90 if (TraceMeRecorder::Active(level)) {
91 new (&no_init_.name) string(std::move(activity_name));
92 start_time_ = Env::Default()->NowNanos();
93 } else {
94 start_time_ = kUntracedActivity;
95 }
96 }
97
98 // Do not allow passing strings by reference or value since the caller
99 // may unintentionally maintain ownership of the activity_name.
100 // Explicitly std::move the activity_name or wrap it in a string_view if
101 // you really wish to maintain ownership.
102 explicit TraceMe(const string &activity_name, int level = 1) = delete;
103
104 // This overload is necessary to make TraceMe's with string literals work.
105 // Otherwise, the string&& and the string_view constructor would be equally
106 // good overload candidates.
107 explicit TraceMe(const char *raw, int level = 1)
TraceMe(absl::string_view (raw),level)108 : TraceMe(absl::string_view(raw), level) {}
109
110 // This overload only generates the activity name if tracing is enabled.
111 // Useful for avoiding things like string concatenation when tracing is
112 // disabled. The |name_generator| may be a lambda or functor that returns a
113 // type that the string() constructor can take.
114 // name_generator is templated, rather than a std::function to avoid
115 // allocations std::function might make even if never called.
116 // Usage: xprof::TraceMe([&]{ return StrCat(prefix, ":", postfix); });
117 template <typename NameGeneratorT>
118 explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
119 DCHECK_GE(level, 1);
120 if (TraceMeRecorder::Active(level)) {
121 new (&no_init_.name) string(name_generator());
122 start_time_ = Env::Default()->NowNanos();
123 } else {
124 start_time_ = kUntracedActivity;
125 }
126 }
127
~TraceMe()128 ~TraceMe() {
129 // We do not need to check the trace level again here.
130 // - If tracing wasn't active to start with, we have kUntracedActivity.
131 // - If tracing was active and was stopped, we have
132 // TraceMeRecorder::Active().
133 // - If tracing was active and was restarted at a lower level, we may
134 // spuriously record the event. This is extremely rare, and acceptable as
135 // event will be discarded when its start timestamp fall outside of the
136 // start/stop session timestamp (recorded in XprofResponse).
137 if (start_time_ != kUntracedActivity) {
138 if (TraceMeRecorder::Active()) {
139 TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name),
140 start_time_, Env::Default()->NowNanos()});
141 }
142 no_init_.name.~string();
143 }
144 }
145
146 // TraceMe is not movable or copyable.
147 TraceMe(const TraceMe &) = delete;
148 TraceMe &operator=(const TraceMe &) = delete;
149
150 // Static API, for use when scoped objects are inconvenient.
151
152 // Record the start time of an activity.
153 // Returns the activity ID, which is used to stop the activity.
154 static uint64 ActivityStart(absl::string_view name, int level = 1) {
155 return TraceMeRecorder::Active(level) ? ActivityStartImpl(name)
156 : kUntracedActivity;
157 }
158
159 // Record the end time of an activity started by ActivityStart().
ActivityEnd(uint64 activity_id)160 static void ActivityEnd(uint64 activity_id) {
161 // We don't check the level again (see ~TraceMe()).
162 if (activity_id != kUntracedActivity) {
163 if (TraceMeRecorder::Active()) {
164 ActivityEndImpl(activity_id);
165 }
166 }
167 }
168
169 private:
170 // Activity ID or start time used when tracing is disabled.
171 constexpr static uint64 kUntracedActivity = 0;
172 // Activity ID used as a placeholder when both start and end are present.
173 constexpr static uint64 kCompleteActivity = 1;
174
175 static uint64 ActivityStartImpl(absl::string_view activity_name);
176 static void ActivityEndImpl(uint64 activity_id);
177
178 // Wrap the name into a union so that we can avoid the cost of string
179 // initialization when tracing is disabled.
180 union NoInit {
NoInit()181 NoInit() {}
~NoInit()182 ~NoInit() {}
183 string name;
184 } no_init_;
185
186 uint64 start_time_;
187 };
188
189 } // namespace profiler
190 } // namespace tensorflow
191
192 #endif // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
193