1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
17
18 #include <algorithm>
19 #include <memory>
20 #include <vector>
21
22 #include "absl/algorithm/container.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/strings/str_cat.h"
25 #include "absl/strings/string_view.h"
26 #include "absl/types/optional.h"
27 #include "tensorflow/core/lib/gtl/map_util.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/types.h"
30 #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
31 #include "tensorflow/core/profiler/convert/op_stack.h"
32 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
33 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
34 #include "tensorflow/core/profiler/utils/cost_utils.h"
35 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
36 #include "tensorflow/core/profiler/utils/op_utils.h"
37 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
38 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
39 #include "tensorflow/core/profiler/utils/timespan.h"
40 #include "tensorflow/core/profiler/utils/trace_utils.h"
41 #include "tensorflow/core/profiler/utils/xplane_schema.h"
42 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
43
44 namespace tensorflow {
45 namespace profiler {
46 namespace {
47
48 // Type of a TensorFlow Op activity, which is either beginning or ending an Op.
49 enum TfActivityType { kTfOpBegin, kTfOpEnd };
50
51 // Instant activity representing the begin or end of a host-side TF Op.
52 struct TfActivity {
53 // The timestamp in picoseconds when this activity happened.
54 uint64 timestamp_ps;
55 // The ID of this Op.
56 uint32 tf_op_id;
57 // Type of this activity.
58 TfActivityType activity_type;
59 // Full TF op name and type of this activity (backed by XEvent::name).
60 TfOp tf_op;
61 // Whether it is eagerly executed.
62 bool is_eager;
63 };
64
65 // TF Op metrics stored as element in OpStack.
66 struct TfOpInfo {
TfOpInfotensorflow::profiler::__anon9a5635520111::TfOpInfo67 explicit TfOpInfo(uint64 ts) : start_timestamp_ps(ts) {}
68
69 // Start timestamp in picoseconds.
70 uint64 start_timestamp_ps;
71 // Children duration in picoseconds.
72 uint64 children_duration_ps = 0;
73 };
74
75 // Processes a TF-activity on particular core.
ProcessOneTfActivity(const TfActivity & activity,OpStack<TfOpInfo> * tf_op_stack,TfMetricsDbData * tf_metrics_data)76 void ProcessOneTfActivity(const TfActivity& activity,
77 OpStack<TfOpInfo>* tf_op_stack,
78 TfMetricsDbData* tf_metrics_data) {
79 uint32 tf_op_id = activity.tf_op_id;
80 switch (activity.activity_type) {
81 case kTfOpBegin: {
82 tf_op_stack->Push(tf_op_id,
83 absl::make_unique<TfOpInfo>(activity.timestamp_ps));
84 break;
85 }
86 case kTfOpEnd: {
87 std::unique_ptr<TfOpInfo> info = tf_op_stack->Pop(tf_op_id);
88 if (info == nullptr) {
89 // This happens if TraceMes overlap.
90 VLOG(1) << "No begin event found for TF activity id=" << tf_op_id
91 << " name=" << activity.tf_op.name
92 << " type=" << activity.tf_op.type;
93 break;
94 }
95 Timespan tf_op_span =
96 PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
97 tf_metrics_data->tf_metrics_db_builder.EnterOp(
98 activity.tf_op.name, activity.tf_op.type, activity.is_eager,
99 tf_op_span.duration_ps(), info->children_duration_ps);
100 TfOpInfo* parent_info = tf_op_stack->Top();
101 if (parent_info != nullptr) {
102 parent_info->children_duration_ps += tf_op_span.duration_ps();
103 }
104 if (IsInfeedEnqueueOp(activity.tf_op.type)) {
105 tf_metrics_data->tf_metrics_db_builder.EnterHostInfeedEnqueue(
106 tf_op_span);
107 }
108 break;
109 }
110 }
111 }
112
113 // Processes all TF-activities on the given core.
ProcessTfActivities(std::vector<TfActivity> * tf_activities,TfMetricsDbData * tf_metrics_db_data)114 void ProcessTfActivities(std::vector<TfActivity>* tf_activities,
115 TfMetricsDbData* tf_metrics_db_data) {
116 if (tf_activities->empty()) return;
117 absl::c_stable_sort(*tf_activities,
118 [](const TfActivity& a, const TfActivity& b) {
119 return a.timestamp_ps < b.timestamp_ps;
120 });
121 OpStack<TfOpInfo> tf_op_stack;
122 for (const auto& tf_activity : *tf_activities) {
123 ProcessOneTfActivity(tf_activity, &tf_op_stack, tf_metrics_db_data);
124 }
125 SetTotalTimePs(
126 tf_metrics_db_data->tf_metrics_db,
127 tf_activities->back().timestamp_ps - tf_activities->front().timestamp_ps);
128 }
129
CollectTfActivities(const XLineVisitor & line,const absl::flat_hash_map<int64,TfOp> & tf_ops,std::vector<TfActivity> * tf_activities)130 void CollectTfActivities(const XLineVisitor& line,
131 const absl::flat_hash_map<int64, TfOp>& tf_ops,
132 std::vector<TfActivity>* tf_activities) {
133 uint32 tf_op_id = 0;
134 tf_activities->reserve(line.NumEvents() * 2);
135 line.ForEachEvent([&tf_ops, &tf_op_id,
136 &tf_activities](const XEventVisitor& event) {
137 const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
138 if (tf_op != nullptr) {
139 ++tf_op_id;
140 bool is_eager = false;
141 if (absl::optional<XStatVisitor> stat =
142 event.GetStat(StatType::kIsEager)) {
143 is_eager = stat->IntValue();
144 }
145 Timespan span = event.GetTimespan();
146 tf_activities->push_back(
147 {span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
148 tf_activities->push_back(
149 {span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
150 }
151 });
152 }
153
154 } // namespace
155
CollectTfOpsFromHostThreadsXPlane(const XPlane & host_trace)156 absl::flat_hash_map<int64, TfOp> CollectTfOpsFromHostThreadsXPlane(
157 const XPlane& host_trace) {
158 absl::flat_hash_map<int64, TfOp> tf_ops;
159 for (const auto& id_metadata : host_trace.event_metadata()) {
160 const XEventMetadata& metadata = id_metadata.second;
161 // On the host, we have added some user-specified TraceMe's in addition to
162 // the TraceMe's added to every TensorFlow op by the system. These
163 // user-inserted TraceMe's have "unknown" type. We don't count them in
164 // Tf-stats.
165 TfOp tf_op = ParseTfOpFullname(metadata.name());
166 if (tf_op.category != Category::kUnknown) {
167 tf_ops.try_emplace(metadata.id(), tf_op);
168 }
169 }
170 return tf_ops;
171 }
172
ConvertHostThreadsXLineToTfMetricsDbData(const XLineVisitor & line,const absl::flat_hash_map<int64,TfOp> & tf_ops)173 TfMetricsDbData ConvertHostThreadsXLineToTfMetricsDbData(
174 const XLineVisitor& line, const absl::flat_hash_map<int64, TfOp>& tf_ops) {
175 TfMetricsDbData tf_metrics_db_data;
176 if (!tf_ops.empty()) {
177 std::vector<TfActivity> tf_activities;
178 CollectTfActivities(line, tf_ops, &tf_activities);
179 ProcessTfActivities(&tf_activities, &tf_metrics_db_data);
180 }
181 return tf_metrics_db_data;
182 }
183
ConsumeTfMetricsDbData(TfMetricsDbData src,OpMetricsDbCombiner * dst)184 void ConsumeTfMetricsDbData(TfMetricsDbData src, OpMetricsDbCombiner* dst) {
185 AddIdleOp(src.tf_metrics_db);
186 dst->Combine(src.tf_metrics_db);
187 src.tf_metrics_db.Clear();
188 }
189
ConvertHostThreadsXPlaneToOpMetricsDb(const XPlane & host_trace)190 OpMetricsDb ConvertHostThreadsXPlaneToOpMetricsDb(const XPlane& host_trace) {
191 absl::flat_hash_map<int64, TfOp> tf_ops =
192 CollectTfOpsFromHostThreadsXPlane(host_trace);
193 OpMetricsDb result;
194 OpMetricsDbCombiner combiner(&result);
195 XPlaneVisitor plane = CreateTfXPlaneVisitor(&host_trace);
196 plane.ForEachLine([&tf_ops, &combiner](const XLineVisitor& line) {
197 ConsumeTfMetricsDbData(
198 ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner);
199 });
200 return result;
201 }
202
ConvertDeviceTraceXPlaneToOpMetricsDb(const XPlane & device_trace)203 OpMetricsDb ConvertDeviceTraceXPlaneToOpMetricsDb(const XPlane& device_trace) {
204 OpMetricsDb result;
205 DeviceOpMetricsDbBuilder device_op_metrics_db_builder(&result);
206
207 int64_t first_op_offset_ps = kint64max;
208 int64_t last_op_offset_ps = 0;
209
210 TfOpRoofLineCostEstimator op_level_cost_estimator;
211 XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_trace);
212 plane.ForEachLine([&](const XLineVisitor& line) {
213 if (IsDerivedThreadId(line.Id())) return;
214 line.ForEachEvent([&](const XEventVisitor& event) {
215 first_op_offset_ps = std::min(first_op_offset_ps, event.OffsetPs());
216 last_op_offset_ps = std::max(last_op_offset_ps, event.EndOffsetPs());
217
218 absl::string_view tf_op_full_name;
219 bool is_eager;
220 event.ForEachStat([&](const XStatVisitor& stat) {
221 if (stat.Type() == StatType::kLevel0 || // old way to deliver tf_op.
222 stat.Type() == StatType::kTfOp) {
223 tf_op_full_name = stat.StrOrRefValue();
224 } else if (stat.Type() == StatType::kIsEager) {
225 is_eager = stat.IntValue();
226 }
227 });
228 if (tf_op_full_name.empty()) return;
229 TfOp tf_op = ParseTfOpFullname(tf_op_full_name);
230 TfOpRoofLineCostEstimator::OpRoofLineStats costs;
231 if (tf_op.category != Category::kUnknown) {
232 costs = op_level_cost_estimator.Predict(event);
233 }
234 device_op_metrics_db_builder.EnterOp(
235 /*program_id=*/0, absl::StrCat(tf_op.name, "/", event.Name()),
236 tf_op.type, tf_op_full_name, is_eager,
237 /*occurrences=*/1, event.DurationPs(),
238 /*children_time_ps=*/0, costs.flops, costs.bytes_accessed);
239 });
240 });
241 SetTotalTimePs(
242 result, last_op_offset_ps ? last_op_offset_ps - first_op_offset_ps : 0);
243 AddIdleOp(result);
244 return result;
245 }
246
247 } // namespace profiler
248 } // namespace tensorflow
249