• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/utils/xplane_schema.h"
17 
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/strings/string_view.h"
20 #include "absl/types/optional.h"
21 #include "tensorflow/core/lib/gtl/map_util.h"
22 #include "tensorflow/core/platform/logging.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
25 
26 namespace tensorflow {
27 namespace profiler {
28 
29 const absl::string_view kHostThreadsPlaneName = "/host:CPU";
30 const absl::string_view kGpuPlanePrefix = "/device:GPU:";
31 const absl::string_view kTpuPlanePrefix = "/device:TPU:";
32 // TODO(b/195582092): change it to /device:custom once all literals are
33 // migrated.
34 const absl::string_view kCustomPlanePrefix = "/device:CUSTOM:";
35 
36 const absl::string_view kTpuRuntimePlaneName = "/host:TPU-runtime";
37 const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
38 const absl::string_view kRoctracerApiPlaneName = "/host:ROCTRACER";
39 const absl::string_view kMetadataPlaneName = "/host:metadata";
40 const absl::string_view kTFStreamzPlaneName = "/host:tfstreamz";
41 const absl::string_view kPythonTracerPlaneName = "/host:python-tracer";
42 
43 const absl::string_view kStepLineName = "Steps";
44 const absl::string_view kTensorFlowNameScopeLineName = "TensorFlow Name Scope";
45 const absl::string_view kTensorFlowOpLineName = "TensorFlow Ops";
46 const absl::string_view kXlaModuleLineName = "XLA Modules";
47 const absl::string_view kXlaOpLineName = "XLA Ops";
48 const absl::string_view kKernelLaunchLineName = "Launch Stats";
49 const absl::string_view kSourceLineName = "Source code";
50 
51 namespace {
52 
53 constexpr int kNumHostEventTypes =
54     HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
55 
56 constexpr int kNumStatTypes =
57     StatType::kLastStatType - StatType::kFirstStatType + 1;
58 
59 using HostEventTypeMap = absl::flat_hash_map<absl::string_view, HostEventType>;
60 using HostEventTypeStrMap =
61     absl::flat_hash_map<HostEventType, absl::string_view>;
62 using StatTypeMap = absl::flat_hash_map<absl::string_view, StatType>;
63 using StatTypeStrMap = absl::flat_hash_map<StatType, absl::string_view>;
64 
GetHostEventTypeMap()65 const HostEventTypeMap& GetHostEventTypeMap() {
66   static auto* host_event_type_map = new HostEventTypeMap({
67       {"UnknownHostEventType", kUnknownHostEventType},
68       {"TraceContext", kTraceContext},
69       {"SessionRun", kSessionRun},
70       {"FunctionRun", kFunctionRun},
71       {"RunGraph", kRunGraph},
72       {"RunGraphDone", kRunGraphDone},
73       {"TfOpRun", kTfOpRun},
74       {"EagerKernelExecute", kEagerKernelExecute},
75       {"ExecutorState::Process", kExecutorStateProcess},
76       {"ExecutorDoneCallback", kExecutorDoneCallback},
77       {"MemoryAllocation", kMemoryAllocation},
78       {"MemoryDeallocation", kMemoryDeallocation},
79       // Performance counter related.
80       {"RemotePerfCounter", kRemotePerf},
81       // tf data captured function events.
82       {"InstantiatedCapturedFunction::Run", kTfDataCapturedFunctionRun},
83       {"InstantiatedCapturedFunction::RunWithBorrowedArgs",
84        kTfDataCapturedFunctionRunWithBorrowedArgs},
85       {"InstantiatedCapturedFunction::RunInstantiated",
86        kTfDataCapturedFunctionRunInstantiated},
87       {"InstantiatedCapturedFunction::RunAsync",
88        kTfDataCapturedFunctionRunAsync},
89       // Functional ops.
90       {"CallOp", kCallOp},
91       {"ParallelForOp", kParallelForOp},
92       {"ForeverOp", kForeverOp},
93       {"NumericalGradientOp-EvalRight", kNumericalGradientOpEvalRight},
94       {"NumericalGradientOp-EvalLeft", kNumericalGradientOpEvalLeft},
95       {"SymbolicGradientOp", kSymbolicGradientOp},
96       {"RemoteCallOp", kRemoteCallOp},
97       {"IfOp", kIfOp},
98       {"CaseOp", kCaseOp},
99       {"WhileOp-EvalCond", kWhileOpEvalCond},
100       {"WhileOp-StartBody", kWhileOpStartBody},
101       {"ForOp", kForOp},
102       {"PartitionedCallOp", kPartitionedCallOp},
103       // tf.data related.
104       {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
105       {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
106       {"Iterator", kIterator},
107       {"Iterator::Prefetch::Generator", kDeviceInputPipelineSecondIterator},
108       {"PrefetchProduce", kPrefetchProduce},
109       {"PrefetchConsume", kPrefetchConsume},
110       {"ParallelInterleaveProduce", kParallelInterleaveProduce},
111       {"ParallelInterleaveConsume", kParallelInterleaveConsume},
112       {"ParallelInterleaveInitializeInput",
113        kParallelInterleaveInitializedInput},
114       {"ParallelMapProduce", kParallelMapProduce},
115       {"ParallelMapConsume", kParallelMapConsume},
116       {"MapAndBatchProduce", kMapAndBatchProduce},
117       {"MapAndBatchConsume", kMapAndBatchConsume},
118       {"ParseExampleProduce", kParseExampleProduce},
119       {"ParseExampleConsume", kParseExampleConsume},
120       {"ParallelBatchProduce", kParallelBatchProduce},
121       {"ParallelBatchConsume", kParallelBatchConsume},
122       // Batching related.
123       {"BatchingSessionRun", kBatchingSessionRun},
124       {"ProcessBatch", kProcessBatch},
125       {"ConcatInputTensors", kConcatInputTensors},
126       {"MergeInputTensors", kMergeInputTensors},
127       {"ScheduleWithoutSplit", kScheduleWithoutSplit},
128       {"ScheduleWithSplit", kScheduleWithSplit},
129       {"ASBSQueue::Schedule", kASBSQueueSchedule},
130       // TFRT related.
131       {"TfrtModelRun", kTfrtModelRun},
132       // JAX related.
133       {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices},
134       // GPU related.
135       {"KernelLaunch", kKernelLaunch},
136       {"KernelExecute", kKernelExecute},
137   });
138   DCHECK_EQ(host_event_type_map->size(), kNumHostEventTypes);
139   return *host_event_type_map;
140 }
141 
GetStatTypeMap()142 const StatTypeMap& GetStatTypeMap() {
143   static auto* stat_type_map = new StatTypeMap({
144       {"UnknownStatType", kUnknownStatType},
145       // TraceMe arguments.
146       {"id", kStepId},
147       {"parent_step_id", kParentStepId},
148       {"function_step_id", kFunctionStepId},
149       {"device_ordinal", kDeviceOrdinal},
150       {"chip_ordinal", kChipOrdinal},
151       {"node_ordinal", kNodeOrdinal},
152       {"model_id", kModelId},
153       {"queue_addr", kQueueAddr},
154       {"request_id", kRequestId},
155       {"run_id", kRunId},
156       {"graph_type", kGraphType},
157       {"step_num", kStepNum},
158       {"iter_num", kIterNum},
159       {"index_on_host", kIndexOnHost},
160       {"allocator_name", kAllocatorName},
161       {"bytes_reserved", kBytesReserved},
162       {"bytes_allocated", kBytesAllocated},
163       {"bytes_available", kBytesAvailable},
164       {"fragmentation", kFragmentation},
165       {"peak_bytes_in_use", kPeakBytesInUse},
166       {"requested_bytes", kRequestedBytes},
167       {"allocation_bytes", kAllocationBytes},
168       {"addr", kAddress},
169       {"region_type", kRegionType},
170       {"data_type", kDataType},
171       {"shape", kTensorShapes},
172       {"layout", kTensorLayout},
173       {"kpi_name", kKpiName},
174       {"kpi_value", kKpiValue},
175       {"element_id", kElementId},
176       {"parent_id", kParentId},
177       // XPlane semantics related.
178       {"_pt", kProducerType},
179       {"_ct", kConsumerType},
180       {"_p", kProducerId},
181       {"_c", kConsumerId},
182       {"_r", kIsRoot},
183       {"_a", kIsAsync},
184       // Device trace arguments.
185       {"device_id", kDeviceId},
186       {"context_id", kContextId},
187       {"correlation_id", kCorrelationId},
188       {"memcpy_details", kMemcpyDetails},
189       {"memalloc_details", kMemallocDetails},
190       {"MemFree_details", kMemFreeDetails},
191       {"Memset_details", kMemsetDetails},
192       {"MemoryResidency_details", kMemoryResidencyDetails},
193       {"kernel_details", kKernelDetails},
194       {"annotation", kKernelAnnotation},
195       {"nvtx_range", kNVTXRange},
196       {"stream", kStream},
197       // Stats added when processing traces.
198       {"group_id", kGroupId},
199       {"flow", kFlow},
200       {"step_name", kStepName},
201       {"level 0", kLevel0},
202       {"tf_op", kTfOp},
203       {"hlo_op", kHloOp},
204       {"hlo_module", kHloModule},
205       {"equation", kEquation},
206       {"is_eager", kIsEager},
207       {"tf_function_call", kTfFunctionCall},
208       {"tracing_count", kTfFunctionTracingCount},
209       {"flops", kFlops},
210       {"bytes_accessed", kBytesAccessed},
211       {"selected_group_ids", kSelectedGroupIds},
212       {"source", kSourceInfo},
213       {"model_name", kModelName},
214       {"model_version", kModelVersion},
215       // Performance counter related.
216       {"Raw Value", kRawValue},
217       {"Scaled Value", kScaledValue},
218       {"Thread Id", kThreadId},
219       // XLA metadata map related.
220       {"SELF_DURATION_PS", kSelfDurationPs},
221       {"MIN_DURATION_PS", kMinDurationPs},
222       {"Hlo Proto", kHloProto},
223       // Device capability related.
224       {"clock_rate", kDevCapClockRateKHz},
225       {"core_count", kDevCapCoreCount},
226       {"memory_bandwidth", kDevCapMemoryBandwidth},
227       {"memory_size", kDevCapMemorySize},
228       {"compute_cap_major", kDevCapComputeCapMajor},
229       {"compute_cap_minor", kDevCapComputeCapMinor},
230       // Batching related.
231       {"batch_size_after_padding", kBatchSizeAfterPadding},
232       {"padding_amount", kPaddingAmount},
233       {"batching_input_task_size", kBatchingInputTaskSize},
234       // GPU related metrics.
235       {"theoretical_occupancy_pct", kTheoreticalOccupancyPct},
236       {"occupancy_min_grid_size", kOccupancyMinGridSize},
237       {"occupancy_suggested_block_size", kOccupancySuggestedBlockSize},
238   });
239   DCHECK_EQ(stat_type_map->size(), kNumStatTypes);
240   return *stat_type_map;
241 }
242 
GetHostEventTypeStrMap()243 const HostEventTypeStrMap& GetHostEventTypeStrMap() {
244   static auto* host_event_type_str_map = new HostEventTypeStrMap(
245       gtl::ReverseMap<HostEventTypeStrMap>(GetHostEventTypeMap()));
246   return *host_event_type_str_map;
247 }
248 
GetStatTypeStrMap()249 const StatTypeStrMap& GetStatTypeStrMap() {
250   static auto* stat_type_str_map =
251       new StatTypeStrMap(gtl::ReverseMap<StatTypeStrMap>(GetStatTypeMap()));
252   return *stat_type_str_map;
253 }
254 
255 }  // namespace
256 
GetHostEventTypeStr(HostEventType event_type)257 absl::string_view GetHostEventTypeStr(HostEventType event_type) {
258   return GetHostEventTypeStrMap().at(event_type);
259 }
260 
FindHostEventType(absl::string_view event_name)261 absl::optional<int64> FindHostEventType(absl::string_view event_name) {
262   if (auto event_type = gtl::FindOrNull(GetHostEventTypeMap(), event_name)) {
263     return *event_type;
264   }
265   return absl::nullopt;
266 }
267 
FindTfOpEventType(absl::string_view event_name)268 absl::optional<int64> FindTfOpEventType(absl::string_view event_name) {
269   // TF op names.
270   Category category = ParseTfOpFullname(event_name).category;
271   switch (category) {
272     case Category::kTensorFlow:
273       return HostEventType::kTfOpRun;
274     case Category::kTfData:
275       return HostEventType::kIterator;
276     default:
277       return absl::nullopt;
278   }
279 }
280 
GetStatTypeStr(StatType stat_type)281 absl::string_view GetStatTypeStr(StatType stat_type) {
282   return GetStatTypeStrMap().at(stat_type);
283 }
284 
FindStatType(absl::string_view stat_name)285 absl::optional<int64> FindStatType(absl::string_view stat_name) {
286   if (auto stat_type = gtl::FindOrNull(GetStatTypeMap(), stat_name)) {
287     return *stat_type;
288   }
289   return absl::nullopt;
290 }
291 
IsInternalEvent(absl::optional<int64> event_type)292 bool IsInternalEvent(absl::optional<int64> event_type) {
293   // TODO(b/162102421): Introduce a prefix for internal event names.
294   if (!event_type.has_value()) return false;
295   switch (*event_type) {
296     case HostEventType::kMemoryAllocation:
297     case HostEventType::kMemoryDeallocation:
298     case HostEventType::kPrefetchProduce:
299     case HostEventType::kPrefetchConsume:
300     case HostEventType::kParallelInterleaveProduce:
301     case HostEventType::kParallelInterleaveConsume:
302     case HostEventType::kParallelInterleaveInitializedInput:
303     case HostEventType::kParallelMapProduce:
304     case HostEventType::kParallelMapConsume:
305     case HostEventType::kMapAndBatchProduce:
306     case HostEventType::kMapAndBatchConsume:
307     case HostEventType::kParseExampleProduce:
308     case HostEventType::kParseExampleConsume:
309       return true;
310     default:
311       return false;
312   }
313 }
314 
IsInternalStat(absl::optional<int64> stat_type)315 bool IsInternalStat(absl::optional<int64> stat_type) {
316   // TODO(b/162102421): Introduce a prefix for internal stat names.
317   if (!stat_type.has_value()) return false;
318   switch (*stat_type) {
319     case StatType::kKernelDetails:
320     case StatType::kLevel0:
321     case StatType::kProducerType:
322     case StatType::kProducerId:
323     case StatType::kConsumerType:
324     case StatType::kConsumerId:
325     case StatType::kIsRoot:
326     case StatType::kIsAsync:
327     case StatType::kFlops:
328     case StatType::kBytesAccessed:
329       return true;
330     default:
331       return false;
332   }
333 }
334 
335 }  // namespace profiler
336 }  // namespace tensorflow
337