• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
17 
18 #include <algorithm>
19 #include <memory>
20 #include <vector>
21 
22 #include "absl/algorithm/container.h"
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/strings/str_cat.h"
25 #include "absl/strings/string_view.h"
26 #include "absl/types/optional.h"
27 #include "tensorflow/core/lib/gtl/map_util.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/types.h"
30 #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
31 #include "tensorflow/core/profiler/convert/op_stack.h"
32 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
33 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
34 #include "tensorflow/core/profiler/utils/cost_utils.h"
35 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
36 #include "tensorflow/core/profiler/utils/op_utils.h"
37 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
38 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
39 #include "tensorflow/core/profiler/utils/timespan.h"
40 #include "tensorflow/core/profiler/utils/trace_utils.h"
41 #include "tensorflow/core/profiler/utils/xplane_schema.h"
42 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
43 
44 namespace tensorflow {
45 namespace profiler {
46 namespace {
47 
48 // Type of a TensorFlow Op activity, which is either beginning or ending an Op.
49 enum TfActivityType { kTfOpBegin, kTfOpEnd };
50 
51 // Instant activity representing the begin or end of a host-side TF Op.
52 struct TfActivity {
53   // The timestamp in picoseconds when this activity happened.
54   uint64 timestamp_ps;
55   // The ID of this Op.
56   uint32 tf_op_id;
57   // Type of this activity.
58   TfActivityType activity_type;
59   // Full TF op name and type of this activity (backed by XEvent::name).
60   TfOp tf_op;
61   // Whether it is eagerly executed.
62   bool is_eager;
63 };
64 
65 // TF Op metrics stored as element in OpStack.
66 struct TfOpInfo {
TfOpInfotensorflow::profiler::__anon9a5635520111::TfOpInfo67   explicit TfOpInfo(uint64 ts) : start_timestamp_ps(ts) {}
68 
69   // Start timestamp in picoseconds.
70   uint64 start_timestamp_ps;
71   // Children duration in picoseconds.
72   uint64 children_duration_ps = 0;
73 };
74 
75 // Processes a TF-activity on particular core.
ProcessOneTfActivity(const TfActivity & activity,OpStack<TfOpInfo> * tf_op_stack,TfMetricsDbData * tf_metrics_data)76 void ProcessOneTfActivity(const TfActivity& activity,
77                           OpStack<TfOpInfo>* tf_op_stack,
78                           TfMetricsDbData* tf_metrics_data) {
79   uint32 tf_op_id = activity.tf_op_id;
80   switch (activity.activity_type) {
81     case kTfOpBegin: {
82       tf_op_stack->Push(tf_op_id,
83                         absl::make_unique<TfOpInfo>(activity.timestamp_ps));
84       break;
85     }
86     case kTfOpEnd: {
87       std::unique_ptr<TfOpInfo> info = tf_op_stack->Pop(tf_op_id);
88       if (info == nullptr) {
89         // This happens if TraceMes overlap.
90         VLOG(1) << "No begin event found for TF activity id=" << tf_op_id
91                 << " name=" << activity.tf_op.name
92                 << " type=" << activity.tf_op.type;
93         break;
94       }
95       Timespan tf_op_span =
96           PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
97       tf_metrics_data->tf_metrics_db_builder.EnterOp(
98           activity.tf_op.name, activity.tf_op.type, activity.is_eager,
99           tf_op_span.duration_ps(), info->children_duration_ps);
100       TfOpInfo* parent_info = tf_op_stack->Top();
101       if (parent_info != nullptr) {
102         parent_info->children_duration_ps += tf_op_span.duration_ps();
103       }
104       if (IsInfeedEnqueueOp(activity.tf_op.type)) {
105         tf_metrics_data->tf_metrics_db_builder.EnterHostInfeedEnqueue(
106             tf_op_span);
107       }
108       break;
109     }
110   }
111 }
112 
113 // Processes all TF-activities on the given core.
ProcessTfActivities(std::vector<TfActivity> * tf_activities,TfMetricsDbData * tf_metrics_db_data)114 void ProcessTfActivities(std::vector<TfActivity>* tf_activities,
115                          TfMetricsDbData* tf_metrics_db_data) {
116   if (tf_activities->empty()) return;
117   absl::c_stable_sort(*tf_activities,
118                       [](const TfActivity& a, const TfActivity& b) {
119                         return a.timestamp_ps < b.timestamp_ps;
120                       });
121   OpStack<TfOpInfo> tf_op_stack;
122   for (const auto& tf_activity : *tf_activities) {
123     ProcessOneTfActivity(tf_activity, &tf_op_stack, tf_metrics_db_data);
124   }
125   SetTotalTimePs(
126       tf_metrics_db_data->tf_metrics_db,
127       tf_activities->back().timestamp_ps - tf_activities->front().timestamp_ps);
128 }
129 
CollectTfActivities(const XLineVisitor & line,const absl::flat_hash_map<int64,TfOp> & tf_ops,std::vector<TfActivity> * tf_activities)130 void CollectTfActivities(const XLineVisitor& line,
131                          const absl::flat_hash_map<int64, TfOp>& tf_ops,
132                          std::vector<TfActivity>* tf_activities) {
133   uint32 tf_op_id = 0;
134   tf_activities->reserve(line.NumEvents() * 2);
135   line.ForEachEvent([&tf_ops, &tf_op_id,
136                      &tf_activities](const XEventVisitor& event) {
137     const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
138     if (tf_op != nullptr) {
139       ++tf_op_id;
140       bool is_eager = false;
141       if (absl::optional<XStatVisitor> stat =
142               event.GetStat(StatType::kIsEager)) {
143         is_eager = stat->IntValue();
144       }
145       Timespan span = event.GetTimespan();
146       tf_activities->push_back(
147           {span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
148       tf_activities->push_back(
149           {span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
150     }
151   });
152 }
153 
154 }  // namespace
155 
CollectTfOpsFromHostThreadsXPlane(const XPlane & host_trace)156 absl::flat_hash_map<int64, TfOp> CollectTfOpsFromHostThreadsXPlane(
157     const XPlane& host_trace) {
158   absl::flat_hash_map<int64, TfOp> tf_ops;
159   for (const auto& id_metadata : host_trace.event_metadata()) {
160     const XEventMetadata& metadata = id_metadata.second;
161     // On the host, we have added some user-specified TraceMe's in addition to
162     // the TraceMe's added to every TensorFlow op by the system. These
163     // user-inserted TraceMe's have "unknown" type. We don't count them in
164     // Tf-stats.
165     TfOp tf_op = ParseTfOpFullname(metadata.name());
166     if (tf_op.category != Category::kUnknown) {
167       tf_ops.try_emplace(metadata.id(), tf_op);
168     }
169   }
170   return tf_ops;
171 }
172 
ConvertHostThreadsXLineToTfMetricsDbData(const XLineVisitor & line,const absl::flat_hash_map<int64,TfOp> & tf_ops)173 TfMetricsDbData ConvertHostThreadsXLineToTfMetricsDbData(
174     const XLineVisitor& line, const absl::flat_hash_map<int64, TfOp>& tf_ops) {
175   TfMetricsDbData tf_metrics_db_data;
176   if (!tf_ops.empty()) {
177     std::vector<TfActivity> tf_activities;
178     CollectTfActivities(line, tf_ops, &tf_activities);
179     ProcessTfActivities(&tf_activities, &tf_metrics_db_data);
180   }
181   return tf_metrics_db_data;
182 }
183 
ConsumeTfMetricsDbData(TfMetricsDbData src,OpMetricsDbCombiner * dst)184 void ConsumeTfMetricsDbData(TfMetricsDbData src, OpMetricsDbCombiner* dst) {
185   AddIdleOp(src.tf_metrics_db);
186   dst->Combine(src.tf_metrics_db);
187   src.tf_metrics_db.Clear();
188 }
189 
ConvertHostThreadsXPlaneToOpMetricsDb(const XPlane & host_trace)190 OpMetricsDb ConvertHostThreadsXPlaneToOpMetricsDb(const XPlane& host_trace) {
191   absl::flat_hash_map<int64, TfOp> tf_ops =
192       CollectTfOpsFromHostThreadsXPlane(host_trace);
193   OpMetricsDb result;
194   OpMetricsDbCombiner combiner(&result);
195   XPlaneVisitor plane = CreateTfXPlaneVisitor(&host_trace);
196   plane.ForEachLine([&tf_ops, &combiner](const XLineVisitor& line) {
197     ConsumeTfMetricsDbData(
198         ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner);
199   });
200   return result;
201 }
202 
ConvertDeviceTraceXPlaneToOpMetricsDb(const XPlane & device_trace)203 OpMetricsDb ConvertDeviceTraceXPlaneToOpMetricsDb(const XPlane& device_trace) {
204   OpMetricsDb result;
205   DeviceOpMetricsDbBuilder device_op_metrics_db_builder(&result);
206 
207   int64_t first_op_offset_ps = kint64max;
208   int64_t last_op_offset_ps = 0;
209 
210   TfOpRoofLineCostEstimator op_level_cost_estimator;
211   XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_trace);
212   plane.ForEachLine([&](const XLineVisitor& line) {
213     if (IsDerivedThreadId(line.Id())) return;
214     line.ForEachEvent([&](const XEventVisitor& event) {
215       first_op_offset_ps = std::min(first_op_offset_ps, event.OffsetPs());
216       last_op_offset_ps = std::max(last_op_offset_ps, event.EndOffsetPs());
217 
218       absl::string_view tf_op_full_name;
219       bool is_eager;
220       event.ForEachStat([&](const XStatVisitor& stat) {
221         if (stat.Type() == StatType::kLevel0 ||  // old way to deliver tf_op.
222             stat.Type() == StatType::kTfOp) {
223           tf_op_full_name = stat.StrOrRefValue();
224         } else if (stat.Type() == StatType::kIsEager) {
225           is_eager = stat.IntValue();
226         }
227       });
228       if (tf_op_full_name.empty()) return;
229       TfOp tf_op = ParseTfOpFullname(tf_op_full_name);
230       TfOpRoofLineCostEstimator::OpRoofLineStats costs;
231       if (tf_op.category != Category::kUnknown) {
232         costs = op_level_cost_estimator.Predict(event);
233       }
234       device_op_metrics_db_builder.EnterOp(
235           /*program_id=*/0, absl::StrCat(tf_op.name, "/", event.Name()),
236           tf_op.type, tf_op_full_name, is_eager,
237           /*occurrences=*/1, event.DurationPs(),
238           /*children_time_ps=*/0, costs.flops, costs.bytes_accessed);
239     });
240   });
241   SetTotalTimePs(
242       result, last_op_offset_ps ? last_op_offset_ps - first_op_offset_ps : 0);
243   AddIdleOp(result);
244   return result;
245 }
246 
247 }  // namespace profiler
248 }  // namespace tensorflow
249