1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/utils/xplane_schema.h"
17
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/strings/string_view.h"
20 #include "absl/types/optional.h"
21 #include "tensorflow/core/lib/gtl/map_util.h"
22 #include "tensorflow/core/platform/logging.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
25
26 namespace tensorflow {
27 namespace profiler {
28
29 const absl::string_view kHostThreadsPlaneName = "/host:CPU";
30 const absl::string_view kGpuPlanePrefix = "/device:GPU:";
31 const absl::string_view kTpuPlanePrefix = "/device:TPU:";
32 // TODO(b/195582092): change it to /device:custom once all literals are
33 // migrated.
34 const absl::string_view kCustomPlanePrefix = "/device:CUSTOM:";
35
36 const absl::string_view kTpuRuntimePlaneName = "/host:TPU-runtime";
37 const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
38 const absl::string_view kRoctracerApiPlaneName = "/host:ROCTRACER";
39 const absl::string_view kMetadataPlaneName = "/host:metadata";
40 const absl::string_view kTFStreamzPlaneName = "/host:tfstreamz";
41 const absl::string_view kPythonTracerPlaneName = "/host:python-tracer";
42
43 const absl::string_view kStepLineName = "Steps";
44 const absl::string_view kTensorFlowNameScopeLineName = "TensorFlow Name Scope";
45 const absl::string_view kTensorFlowOpLineName = "TensorFlow Ops";
46 const absl::string_view kXlaModuleLineName = "XLA Modules";
47 const absl::string_view kXlaOpLineName = "XLA Ops";
48 const absl::string_view kKernelLaunchLineName = "Launch Stats";
49 const absl::string_view kSourceLineName = "Source code";
50
51 namespace {
52
53 constexpr int kNumHostEventTypes =
54 HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
55
56 constexpr int kNumStatTypes =
57 StatType::kLastStatType - StatType::kFirstStatType + 1;
58
59 using HostEventTypeMap = absl::flat_hash_map<absl::string_view, HostEventType>;
60 using HostEventTypeStrMap =
61 absl::flat_hash_map<HostEventType, absl::string_view>;
62 using StatTypeMap = absl::flat_hash_map<absl::string_view, StatType>;
63 using StatTypeStrMap = absl::flat_hash_map<StatType, absl::string_view>;
64
GetHostEventTypeMap()65 const HostEventTypeMap& GetHostEventTypeMap() {
66 static auto* host_event_type_map = new HostEventTypeMap({
67 {"UnknownHostEventType", kUnknownHostEventType},
68 {"TraceContext", kTraceContext},
69 {"SessionRun", kSessionRun},
70 {"FunctionRun", kFunctionRun},
71 {"RunGraph", kRunGraph},
72 {"RunGraphDone", kRunGraphDone},
73 {"TfOpRun", kTfOpRun},
74 {"EagerKernelExecute", kEagerKernelExecute},
75 {"ExecutorState::Process", kExecutorStateProcess},
76 {"ExecutorDoneCallback", kExecutorDoneCallback},
77 {"MemoryAllocation", kMemoryAllocation},
78 {"MemoryDeallocation", kMemoryDeallocation},
79 // Performance counter related.
80 {"RemotePerfCounter", kRemotePerf},
81 // tf data captured function events.
82 {"InstantiatedCapturedFunction::Run", kTfDataCapturedFunctionRun},
83 {"InstantiatedCapturedFunction::RunWithBorrowedArgs",
84 kTfDataCapturedFunctionRunWithBorrowedArgs},
85 {"InstantiatedCapturedFunction::RunInstantiated",
86 kTfDataCapturedFunctionRunInstantiated},
87 {"InstantiatedCapturedFunction::RunAsync",
88 kTfDataCapturedFunctionRunAsync},
89 // Functional ops.
90 {"CallOp", kCallOp},
91 {"ParallelForOp", kParallelForOp},
92 {"ForeverOp", kForeverOp},
93 {"NumericalGradientOp-EvalRight", kNumericalGradientOpEvalRight},
94 {"NumericalGradientOp-EvalLeft", kNumericalGradientOpEvalLeft},
95 {"SymbolicGradientOp", kSymbolicGradientOp},
96 {"RemoteCallOp", kRemoteCallOp},
97 {"IfOp", kIfOp},
98 {"CaseOp", kCaseOp},
99 {"WhileOp-EvalCond", kWhileOpEvalCond},
100 {"WhileOp-StartBody", kWhileOpStartBody},
101 {"ForOp", kForOp},
102 {"PartitionedCallOp", kPartitionedCallOp},
103 // tf.data related.
104 {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
105 {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
106 {"Iterator", kIterator},
107 {"Iterator::Prefetch::Generator", kDeviceInputPipelineSecondIterator},
108 {"PrefetchProduce", kPrefetchProduce},
109 {"PrefetchConsume", kPrefetchConsume},
110 {"ParallelInterleaveProduce", kParallelInterleaveProduce},
111 {"ParallelInterleaveConsume", kParallelInterleaveConsume},
112 {"ParallelInterleaveInitializeInput",
113 kParallelInterleaveInitializedInput},
114 {"ParallelMapProduce", kParallelMapProduce},
115 {"ParallelMapConsume", kParallelMapConsume},
116 {"MapAndBatchProduce", kMapAndBatchProduce},
117 {"MapAndBatchConsume", kMapAndBatchConsume},
118 {"ParseExampleProduce", kParseExampleProduce},
119 {"ParseExampleConsume", kParseExampleConsume},
120 {"ParallelBatchProduce", kParallelBatchProduce},
121 {"ParallelBatchConsume", kParallelBatchConsume},
122 // Batching related.
123 {"BatchingSessionRun", kBatchingSessionRun},
124 {"ProcessBatch", kProcessBatch},
125 {"ConcatInputTensors", kConcatInputTensors},
126 {"MergeInputTensors", kMergeInputTensors},
127 {"ScheduleWithoutSplit", kScheduleWithoutSplit},
128 {"ScheduleWithSplit", kScheduleWithSplit},
129 {"ASBSQueue::Schedule", kASBSQueueSchedule},
130 // TFRT related.
131 {"TfrtModelRun", kTfrtModelRun},
132 // JAX related.
133 {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices},
134 // GPU related.
135 {"KernelLaunch", kKernelLaunch},
136 {"KernelExecute", kKernelExecute},
137 });
138 DCHECK_EQ(host_event_type_map->size(), kNumHostEventTypes);
139 return *host_event_type_map;
140 }
141
GetStatTypeMap()142 const StatTypeMap& GetStatTypeMap() {
143 static auto* stat_type_map = new StatTypeMap({
144 {"UnknownStatType", kUnknownStatType},
145 // TraceMe arguments.
146 {"id", kStepId},
147 {"parent_step_id", kParentStepId},
148 {"function_step_id", kFunctionStepId},
149 {"device_ordinal", kDeviceOrdinal},
150 {"chip_ordinal", kChipOrdinal},
151 {"node_ordinal", kNodeOrdinal},
152 {"model_id", kModelId},
153 {"queue_addr", kQueueAddr},
154 {"request_id", kRequestId},
155 {"run_id", kRunId},
156 {"graph_type", kGraphType},
157 {"step_num", kStepNum},
158 {"iter_num", kIterNum},
159 {"index_on_host", kIndexOnHost},
160 {"allocator_name", kAllocatorName},
161 {"bytes_reserved", kBytesReserved},
162 {"bytes_allocated", kBytesAllocated},
163 {"bytes_available", kBytesAvailable},
164 {"fragmentation", kFragmentation},
165 {"peak_bytes_in_use", kPeakBytesInUse},
166 {"requested_bytes", kRequestedBytes},
167 {"allocation_bytes", kAllocationBytes},
168 {"addr", kAddress},
169 {"region_type", kRegionType},
170 {"data_type", kDataType},
171 {"shape", kTensorShapes},
172 {"layout", kTensorLayout},
173 {"kpi_name", kKpiName},
174 {"kpi_value", kKpiValue},
175 {"element_id", kElementId},
176 {"parent_id", kParentId},
177 // XPlane semantics related.
178 {"_pt", kProducerType},
179 {"_ct", kConsumerType},
180 {"_p", kProducerId},
181 {"_c", kConsumerId},
182 {"_r", kIsRoot},
183 {"_a", kIsAsync},
184 // Device trace arguments.
185 {"device_id", kDeviceId},
186 {"context_id", kContextId},
187 {"correlation_id", kCorrelationId},
188 {"memcpy_details", kMemcpyDetails},
189 {"memalloc_details", kMemallocDetails},
190 {"MemFree_details", kMemFreeDetails},
191 {"Memset_details", kMemsetDetails},
192 {"MemoryResidency_details", kMemoryResidencyDetails},
193 {"kernel_details", kKernelDetails},
194 {"annotation", kKernelAnnotation},
195 {"nvtx_range", kNVTXRange},
196 {"stream", kStream},
197 // Stats added when processing traces.
198 {"group_id", kGroupId},
199 {"flow", kFlow},
200 {"step_name", kStepName},
201 {"level 0", kLevel0},
202 {"tf_op", kTfOp},
203 {"hlo_op", kHloOp},
204 {"hlo_module", kHloModule},
205 {"equation", kEquation},
206 {"is_eager", kIsEager},
207 {"tf_function_call", kTfFunctionCall},
208 {"tracing_count", kTfFunctionTracingCount},
209 {"flops", kFlops},
210 {"bytes_accessed", kBytesAccessed},
211 {"selected_group_ids", kSelectedGroupIds},
212 {"source", kSourceInfo},
213 {"model_name", kModelName},
214 {"model_version", kModelVersion},
215 // Performance counter related.
216 {"Raw Value", kRawValue},
217 {"Scaled Value", kScaledValue},
218 {"Thread Id", kThreadId},
219 // XLA metadata map related.
220 {"SELF_DURATION_PS", kSelfDurationPs},
221 {"MIN_DURATION_PS", kMinDurationPs},
222 {"Hlo Proto", kHloProto},
223 // Device capability related.
224 {"clock_rate", kDevCapClockRateKHz},
225 {"core_count", kDevCapCoreCount},
226 {"memory_bandwidth", kDevCapMemoryBandwidth},
227 {"memory_size", kDevCapMemorySize},
228 {"compute_cap_major", kDevCapComputeCapMajor},
229 {"compute_cap_minor", kDevCapComputeCapMinor},
230 // Batching related.
231 {"batch_size_after_padding", kBatchSizeAfterPadding},
232 {"padding_amount", kPaddingAmount},
233 {"batching_input_task_size", kBatchingInputTaskSize},
234 // GPU related metrics.
235 {"theoretical_occupancy_pct", kTheoreticalOccupancyPct},
236 {"occupancy_min_grid_size", kOccupancyMinGridSize},
237 {"occupancy_suggested_block_size", kOccupancySuggestedBlockSize},
238 });
239 DCHECK_EQ(stat_type_map->size(), kNumStatTypes);
240 return *stat_type_map;
241 }
242
GetHostEventTypeStrMap()243 const HostEventTypeStrMap& GetHostEventTypeStrMap() {
244 static auto* host_event_type_str_map = new HostEventTypeStrMap(
245 gtl::ReverseMap<HostEventTypeStrMap>(GetHostEventTypeMap()));
246 return *host_event_type_str_map;
247 }
248
GetStatTypeStrMap()249 const StatTypeStrMap& GetStatTypeStrMap() {
250 static auto* stat_type_str_map =
251 new StatTypeStrMap(gtl::ReverseMap<StatTypeStrMap>(GetStatTypeMap()));
252 return *stat_type_str_map;
253 }
254
255 } // namespace
256
GetHostEventTypeStr(HostEventType event_type)257 absl::string_view GetHostEventTypeStr(HostEventType event_type) {
258 return GetHostEventTypeStrMap().at(event_type);
259 }
260
FindHostEventType(absl::string_view event_name)261 absl::optional<int64> FindHostEventType(absl::string_view event_name) {
262 if (auto event_type = gtl::FindOrNull(GetHostEventTypeMap(), event_name)) {
263 return *event_type;
264 }
265 return absl::nullopt;
266 }
267
FindTfOpEventType(absl::string_view event_name)268 absl::optional<int64> FindTfOpEventType(absl::string_view event_name) {
269 // TF op names.
270 Category category = ParseTfOpFullname(event_name).category;
271 switch (category) {
272 case Category::kTensorFlow:
273 return HostEventType::kTfOpRun;
274 case Category::kTfData:
275 return HostEventType::kIterator;
276 default:
277 return absl::nullopt;
278 }
279 }
280
GetStatTypeStr(StatType stat_type)281 absl::string_view GetStatTypeStr(StatType stat_type) {
282 return GetStatTypeStrMap().at(stat_type);
283 }
284
FindStatType(absl::string_view stat_name)285 absl::optional<int64> FindStatType(absl::string_view stat_name) {
286 if (auto stat_type = gtl::FindOrNull(GetStatTypeMap(), stat_name)) {
287 return *stat_type;
288 }
289 return absl::nullopt;
290 }
291
IsInternalEvent(absl::optional<int64> event_type)292 bool IsInternalEvent(absl::optional<int64> event_type) {
293 // TODO(b/162102421): Introduce a prefix for internal event names.
294 if (!event_type.has_value()) return false;
295 switch (*event_type) {
296 case HostEventType::kMemoryAllocation:
297 case HostEventType::kMemoryDeallocation:
298 case HostEventType::kPrefetchProduce:
299 case HostEventType::kPrefetchConsume:
300 case HostEventType::kParallelInterleaveProduce:
301 case HostEventType::kParallelInterleaveConsume:
302 case HostEventType::kParallelInterleaveInitializedInput:
303 case HostEventType::kParallelMapProduce:
304 case HostEventType::kParallelMapConsume:
305 case HostEventType::kMapAndBatchProduce:
306 case HostEventType::kMapAndBatchConsume:
307 case HostEventType::kParseExampleProduce:
308 case HostEventType::kParseExampleConsume:
309 return true;
310 default:
311 return false;
312 }
313 }
314
IsInternalStat(absl::optional<int64> stat_type)315 bool IsInternalStat(absl::optional<int64> stat_type) {
316 // TODO(b/162102421): Introduce a prefix for internal stat names.
317 if (!stat_type.has_value()) return false;
318 switch (*stat_type) {
319 case StatType::kKernelDetails:
320 case StatType::kLevel0:
321 case StatType::kProducerType:
322 case StatType::kProducerId:
323 case StatType::kConsumerType:
324 case StatType::kConsumerId:
325 case StatType::kIsRoot:
326 case StatType::kIsAsync:
327 case StatType::kFlops:
328 case StatType::kBytesAccessed:
329 return true;
330 default:
331 return false;
332 }
333 }
334
335 } // namespace profiler
336 } // namespace tensorflow
337