1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/utils/xplane_schema.h"
17
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/strings/string_view.h"
20 #include "absl/types/optional.h"
21 #include "tensorflow/core/lib/gtl/map_util.h"
22 #include "tensorflow/core/platform/logging.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
25
26 namespace tensorflow {
27 namespace profiler {
28
29 const absl::string_view kHostThreadsPlaneName = "/host:CPU";
30 const absl::string_view kGpuPlanePrefix = "/device:GPU:";
31 const absl::string_view kTpuPlanePrefix = "/device:TPU:";
32 const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
33 const absl::string_view kMetadataPlaneName = "/host:metadata";
34 const absl::string_view kTFStreamzPlaneName = "/host:tfstreamz";
35 const absl::string_view kPythonTracerPlaneName = "/host:python-tracer";
36
37 const absl::string_view kStepLineName = "Steps";
38 const absl::string_view kTensorFlowNameScopeLineName = "TensorFlow Name Scope";
39 const absl::string_view kTensorFlowOpLineName = "TensorFlow Ops";
40 const absl::string_view kXlaModuleLineName = "XLA Modules";
41 const absl::string_view kXlaOpLineName = "XLA Ops";
42 const absl::string_view kKernelLaunchLineName = "Launch Stats";
43
44 namespace {
45
46 constexpr int kNumHostEventTypes =
47 HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
48
49 constexpr int kNumStatTypes =
50 StatType::kLastStatType - StatType::kFirstStatType + 1;
51
52 using HostEventTypeMap = absl::flat_hash_map<absl::string_view, HostEventType>;
53 using HostEventTypeStrMap =
54 absl::flat_hash_map<HostEventType, absl::string_view>;
55 using StatTypeMap = absl::flat_hash_map<absl::string_view, StatType>;
56 using StatTypeStrMap = absl::flat_hash_map<StatType, absl::string_view>;
57
GetHostEventTypeMap()58 const HostEventTypeMap& GetHostEventTypeMap() {
59 static auto* host_event_type_map = new HostEventTypeMap({
60 {"UnknownHostEventType", kUnknownHostEventType},
61 {"TraceContext", kTraceContext},
62 {"SessionRun", kSessionRun},
63 {"FunctionRun", kFunctionRun},
64 {"RunGraph", kRunGraph},
65 {"RunGraphDone", kRunGraphDone},
66 {"TfOpRun", kTfOpRun},
67 {"EagerKernelExecute", kEagerKernelExecute},
68 {"ExecutorState::Process", kExecutorStateProcess},
69 {"ExecutorDoneCallback", kExecutorDoneCallback},
70 {"MemoryAllocation", kMemoryAllocation},
71 {"MemoryDeallocation", kMemoryDeallocation},
72 // Performance counter related.
73 {"RemotePerfCounter", kRemotePerf},
74 // tf data captured function events.
75 {"InstantiatedCapturedFunction::Run", kTfDataCapturedFunctionRun},
76 {"InstantiatedCapturedFunction::RunWithBorrowedArgs",
77 kTfDataCapturedFunctionRunWithBorrowedArgs},
78 {"InstantiatedCapturedFunction::RunInstantiated",
79 kTfDataCapturedFunctionRunInstantiated},
80 {"InstantiatedCapturedFunction::RunAsync",
81 kTfDataCapturedFunctionRunAsync},
82 // Functional ops.
83 {"CallOp", kCallOp},
84 {"ParallelForOp", kParallelForOp},
85 {"ForeverOp", kForeverOp},
86 {"NumericalGradientOp-EvalRight", kNumericalGradientOpEvalRight},
87 {"NumericalGradientOp-EvalLeft", kNumericalGradientOpEvalLeft},
88 {"SymbolicGradientOp", kSymbolicGradientOp},
89 {"RemoteCallOp", kRemoteCallOp},
90 {"IfOp", kIfOp},
91 {"CaseOp", kCaseOp},
92 {"WhileOp-EvalCond", kWhileOpEvalCond},
93 {"WhileOp-StartBody", kWhileOpStartBody},
94 {"ForOp", kForOp},
95 {"PartitionedCallOp", kPartitionedCallOp},
96 // tf.data related.
97 {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
98 {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
99 {"Iterator", kIterator},
100 {"Iterator::Prefetch::Generator", kDeviceInputPipelineSecondIterator},
101 {"PrefetchProduce", kPrefetchProduce},
102 {"PrefetchConsume", kPrefetchConsume},
103 {"ParallelInterleaveProduce", kParallelInterleaveProduce},
104 {"ParallelInterleaveConsume", kParallelInterleaveConsume},
105 {"ParallelInterleaveInitializeInput",
106 kParallelInterleaveInitializedInput},
107 {"ParallelMapProduce", kParallelMapProduce},
108 {"ParallelMapConsume", kParallelMapConsume},
109 {"MapAndBatchProduce", kMapAndBatchProduce},
110 {"MapAndBatchConsume", kMapAndBatchConsume},
111 {"ParseExampleProduce", kParseExampleProduce},
112 {"ParseExampleConsume", kParseExampleConsume},
113 // Batching related.
114 {"BatchingSessionRun", kBatchingSessionRun},
115 {"ProcessBatch", kProcessBatch},
116 {"ConcatInputTensors", kConcatInputTensors},
117 {"MergeInputTensors", kMergeInputTensors},
118 {"ScheduleWithoutSplit", kScheduleWithoutSplit},
119 {"ScheduleWithSplit", kScheduleWithSplit},
120 {"ASBSQueue::Schedule", kASBSQueueSchedule},
121 // JAX related.
122 {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices},
123 // GPU related.
124 {"KernelLaunch", kKernelLaunch},
125 {"KernelExecute", kKernelExecute},
126 });
127 DCHECK_EQ(host_event_type_map->size(), kNumHostEventTypes);
128 return *host_event_type_map;
129 }
130
GetStatTypeMap()131 const StatTypeMap& GetStatTypeMap() {
132 static auto* stat_type_map = new StatTypeMap({
133 {"UnknownStatType", kUnknownStatType},
134 // TraceMe arguments.
135 {"id", kStepId},
136 {"parent_step_id", kParentStepId},
137 {"function_step_id", kFunctionStepId},
138 {"device_ordinal", kDeviceOrdinal},
139 {"chip_ordinal", kChipOrdinal},
140 {"node_ordinal", kNodeOrdinal},
141 {"model_id", kModelId},
142 {"queue_addr", kQueueAddr},
143 {"request_id", kRequestId},
144 {"run_id", kRunId},
145 {"graph_type", kGraphType},
146 {"step_num", kStepNum},
147 {"iter_num", kIterNum},
148 {"index_on_host", kIndexOnHost},
149 {"allocator_name", kAllocatorName},
150 {"bytes_reserved", kBytesReserved},
151 {"bytes_allocated", kBytesAllocated},
152 {"bytes_available", kBytesAvailable},
153 {"fragmentation", kFragmentation},
154 {"peak_bytes_in_use", kPeakBytesInUse},
155 {"requested_bytes", kRequestedBytes},
156 {"allocation_bytes", kAllocationBytes},
157 {"addr", kAddress},
158 {"region_type", kRegionType},
159 {"data_type", kDataType},
160 {"shape", kTensorShapes},
161 {"layout", kTensorLayout},
162 {"kpi_name", kKpiName},
163 {"kpi_value", kKpiValue},
164 {"element_id", kElementId},
165 {"parent_id", kParentId},
166 // XPlane semantics related.
167 {"_pt", kProducerType},
168 {"_ct", kConsumerType},
169 {"_p", kProducerId},
170 {"_c", kConsumerId},
171 {"_r", kIsRoot},
172 {"_a", kIsAsync},
173 // Device trace arguments.
174 {"device_id", kDeviceId},
175 {"context_id", kContextId},
176 {"correlation_id", kCorrelationId},
177 {"memcpy_details", kMemcpyDetails},
178 {"memalloc_details", kMemallocDetails},
179 {"MemFree_details", kMemFreeDetails},
180 {"Memset_details", kMemsetDetails},
181 {"MemoryResidency_details", kMemoryResidencyDetails},
182 {"kernel_details", kKernelDetails},
183 {"annotation", kKernelAnnotation},
184 {"nvtx_range", kNVTXRange},
185 {"stream", kStream},
186 // Stats added when processing traces.
187 {"group_id", kGroupId},
188 {"flow", kFlow},
189 {"step_name", kStepName},
190 {"level 0", kLevel0},
191 {"tf_op", kTfOp},
192 {"hlo_op", kHloOp},
193 {"hlo_module", kHloModule},
194 {"equation", kEquation},
195 {"is_eager", kIsEager},
196 {"tf_function_call", kTfFunctionCall},
197 {"tracing_count", kTfFunctionTracingCount},
198 {"flops", kFlops},
199 {"bytes_accessed", kBytesAccessed},
200 {"selected_group_ids", kSelectedGroupIds},
201 // Performance counter related.
202 {"Raw Value", kRawValue},
203 {"Scaled Value", kScaledValue},
204 {"Thread Id", kThreadId},
205 // XLA metadata map related.
206 {"SELF_DURATION_PS", kSelfDurationPs},
207 {"MIN_DURATION_PS", kMinDurationPs},
208 {"Hlo Proto", kHloProto},
209 // Device capability related.
210 {"clock_rate", kDevCapClockRateKHz},
211 {"core_count", kDevCapCoreCount},
212 {"memory_bandwidth", kDevCapMemoryBandwidth},
213 {"memory_size", kDevCapMemorySize},
214 {"compute_cap_major", kDevCapComputeCapMajor},
215 {"compute_cap_minor", kDevCapComputeCapMinor},
216 // Batching related.
217 {"batch_size_after_padding", kBatchSizeAfterPadding},
218 {"padding_amount", kPaddingAmount},
219 {"batching_input_task_size", kBatchingInputTaskSize},
220 // GPU related metrics.
221 {"theoretical_occupancy_pct", kTheoreticalOccupancyPct},
222 {"occupancy_min_grid_size", kOccupancyMinGridSize},
223 {"occupancy_suggested_block_size", kOccupancySuggestedBlockSize},
224 });
225 DCHECK_EQ(stat_type_map->size(), kNumStatTypes);
226 return *stat_type_map;
227 }
228
GetHostEventTypeStrMap()229 const HostEventTypeStrMap& GetHostEventTypeStrMap() {
230 static auto* host_event_type_str_map = new HostEventTypeStrMap(
231 gtl::ReverseMap<HostEventTypeStrMap>(GetHostEventTypeMap()));
232 return *host_event_type_str_map;
233 }
234
GetStatTypeStrMap()235 const StatTypeStrMap& GetStatTypeStrMap() {
236 static auto* stat_type_str_map =
237 new StatTypeStrMap(gtl::ReverseMap<StatTypeStrMap>(GetStatTypeMap()));
238 return *stat_type_str_map;
239 }
240
241 } // namespace
242
GetHostEventTypeStr(HostEventType event_type)243 absl::string_view GetHostEventTypeStr(HostEventType event_type) {
244 return GetHostEventTypeStrMap().at(event_type);
245 }
246
FindHostEventType(absl::string_view event_name)247 absl::optional<int64> FindHostEventType(absl::string_view event_name) {
248 if (auto event_type = gtl::FindOrNull(GetHostEventTypeMap(), event_name)) {
249 return *event_type;
250 }
251 return absl::nullopt;
252 }
253
FindTfOpEventType(absl::string_view event_name)254 absl::optional<int64> FindTfOpEventType(absl::string_view event_name) {
255 // TF op names.
256 Category category = ParseTfOpFullname(event_name).category;
257 switch (category) {
258 case Category::kTensorFlow:
259 return HostEventType::kTfOpRun;
260 case Category::kTfData:
261 return HostEventType::kIterator;
262 default:
263 return absl::nullopt;
264 }
265 }
266
GetStatTypeStr(StatType stat_type)267 absl::string_view GetStatTypeStr(StatType stat_type) {
268 return GetStatTypeStrMap().at(stat_type);
269 }
270
FindStatType(absl::string_view stat_name)271 absl::optional<int64> FindStatType(absl::string_view stat_name) {
272 if (auto stat_type = gtl::FindOrNull(GetStatTypeMap(), stat_name)) {
273 return *stat_type;
274 }
275 return absl::nullopt;
276 }
277
IsInternalEvent(absl::optional<int64> event_type)278 bool IsInternalEvent(absl::optional<int64> event_type) {
279 // TODO(b/162102421): Introduce a prefix for internal event names.
280 if (!event_type.has_value()) return false;
281 switch (*event_type) {
282 case HostEventType::kMemoryAllocation:
283 case HostEventType::kMemoryDeallocation:
284 case HostEventType::kPrefetchProduce:
285 case HostEventType::kPrefetchConsume:
286 case HostEventType::kParallelInterleaveProduce:
287 case HostEventType::kParallelInterleaveConsume:
288 case HostEventType::kParallelInterleaveInitializedInput:
289 case HostEventType::kParallelMapProduce:
290 case HostEventType::kParallelMapConsume:
291 case HostEventType::kMapAndBatchProduce:
292 case HostEventType::kMapAndBatchConsume:
293 case HostEventType::kParseExampleProduce:
294 case HostEventType::kParseExampleConsume:
295 return true;
296 default:
297 return false;
298 }
299 }
300
IsInternalStat(absl::optional<int64> stat_type)301 bool IsInternalStat(absl::optional<int64> stat_type) {
302 // TODO(b/162102421): Introduce a prefix for internal stat names.
303 if (!stat_type.has_value()) return false;
304 switch (*stat_type) {
305 case StatType::kKernelDetails:
306 case StatType::kLevel0:
307 case StatType::kProducerType:
308 case StatType::kProducerId:
309 case StatType::kConsumerType:
310 case StatType::kConsumerId:
311 case StatType::kIsRoot:
312 case StatType::kIsAsync:
313 case StatType::kFlops:
314 case StatType::kBytesAccessed:
315 return true;
316 default:
317 return false;
318 }
319 }
320
321 } // namespace profiler
322 } // namespace tensorflow
323