1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
18
19 #include "absl/strings/match.h"
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/string_view.h"
22 #include "absl/types/optional.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/types.h"
26
27 namespace tensorflow {
28 namespace profiler {
29
30 // Name of XPlane that contains TraceMe events.
31 TF_CONST_INIT extern const absl::string_view kHostThreadsPlaneName;
32 // Name prefix of XPlane that contains GPU events.
33 TF_CONST_INIT extern const absl::string_view kGpuPlanePrefix;
34 // Name prefix of XPlane that contains TPU events.
35 TF_CONST_INIT extern const absl::string_view kTpuPlanePrefix;
36 // Name prefix of XPlane that contains custom device events.
37 TF_CONST_INIT extern const absl::string_view kCustomPlanePrefix;
38 // Name prefix of XPlane that contains TPU runtime events.
39 TF_CONST_INIT extern const absl::string_view kTpuRuntimePlaneName;
40 // Name of XPlane that contains CUPTI driver API generated events.
41 TF_CONST_INIT extern const absl::string_view kCuptiDriverApiPlaneName;
42 // Name of XPlane that contains Roctracer API generated events.
43 TF_CONST_INIT extern const absl::string_view kRoctracerApiPlaneName;
44 // Name of XPlane that contains profile metadata such as XLA debug info.
45 TF_CONST_INIT extern const absl::string_view kMetadataPlaneName;
46 // Name of XPlane that contains kpi related metrics.
47 TF_CONST_INIT extern const absl::string_view kTFStreamzPlaneName;
48 // Name of XPlane that contains events from python tracer.
49 TF_CONST_INIT extern const absl::string_view kPythonTracerPlaneName;
50
51 // Names of XLines that contain ML-level events.
52 TF_CONST_INIT extern const absl::string_view kStepLineName;
53 TF_CONST_INIT extern const absl::string_view kTensorFlowNameScopeLineName;
54 TF_CONST_INIT extern const absl::string_view kTensorFlowOpLineName;
55 TF_CONST_INIT extern const absl::string_view kXlaModuleLineName;
56 TF_CONST_INIT extern const absl::string_view kXlaOpLineName;
57 TF_CONST_INIT extern const absl::string_view kKernelLaunchLineName;
58 TF_CONST_INIT extern const absl::string_view kSourceLineName;
59
60 // Interesting event types (i.e., TraceMe names).
61 enum HostEventType {
62 kFirstHostEventType = 0,
63 kUnknownHostEventType = kFirstHostEventType,
64 kTraceContext,
65 kSessionRun,
66 kFunctionRun,
67 kRunGraph,
68 kRunGraphDone,
69 kTfOpRun,
70 kEagerKernelExecute,
71 kExecutorStateProcess,
72 kExecutorDoneCallback,
73 kMemoryAllocation,
74 kMemoryDeallocation,
75 // Performance counter related.
76 kRemotePerf,
77 // tf.data captured function events.
78 kTfDataCapturedFunctionRun,
79 kTfDataCapturedFunctionRunWithBorrowedArgs,
80 kTfDataCapturedFunctionRunInstantiated,
81 kTfDataCapturedFunctionRunAsync,
82 // Functional ops.
83 kCallOp,
84 kParallelForOp,
85 kForeverOp,
86 kNumericalGradientOpEvalRight,
87 kNumericalGradientOpEvalLeft,
88 kSymbolicGradientOp,
89 kRemoteCallOp,
90 kIfOp,
91 kCaseOp,
92 kWhileOpEvalCond,
93 kWhileOpStartBody,
94 kForOp,
95 kPartitionedCallOp,
96 // tf.data related.
97 kIteratorGetNextOp,
98 kIteratorGetNextAsOptionalOp,
99 kIterator,
100 kDeviceInputPipelineSecondIterator,
101 kPrefetchProduce,
102 kPrefetchConsume,
103 kParallelInterleaveProduce,
104 kParallelInterleaveConsume,
105 kParallelInterleaveInitializedInput,
106 kParallelMapProduce,
107 kParallelMapConsume,
108 kMapAndBatchProduce,
109 kMapAndBatchConsume,
110 kParseExampleProduce,
111 kParseExampleConsume,
112 kParallelBatchProduce,
113 kParallelBatchConsume,
114 // Batching related.
115 kBatchingSessionRun,
116 kProcessBatch,
117 kConcatInputTensors,
118 kMergeInputTensors,
119 kScheduleWithoutSplit,
120 kScheduleWithSplit,
121 kASBSQueueSchedule,
122 // TFRT related.
123 kTfrtModelRun,
124 // JAX related.
125 kExecuteOnLocalDevices,
126 // GPU related.
127 kKernelLaunch,
128 kKernelExecute,
129 kLastHostEventType = kKernelExecute,
130 };
131
132 enum StatType {
133 kFirstStatType = 0,
134 kUnknownStatType = kFirstStatType,
135 // TraceMe arguments.
136 kStepId,
137 kParentStepId,
138 kFunctionStepId,
139 kDeviceOrdinal,
140 kChipOrdinal,
141 kNodeOrdinal,
142 kModelId,
143 kQueueAddr,
144 kRequestId,
145 kRunId,
146 kGraphType,
147 kStepNum,
148 kIterNum,
149 kIndexOnHost,
150 kAllocatorName,
151 kBytesReserved,
152 kBytesAllocated,
153 kBytesAvailable,
154 kFragmentation,
155 kPeakBytesInUse,
156 kRequestedBytes,
157 kAllocationBytes,
158 kAddress,
159 kRegionType,
160 kDataType,
161 kTensorShapes,
162 kTensorLayout,
163 kKpiName,
164 kKpiValue,
165 kElementId,
166 kParentId,
167 // XPlane semantics related.
168 kProducerType,
169 kConsumerType,
170 kProducerId,
171 kConsumerId,
172 kIsRoot,
173 kIsAsync,
174 // Device trace arguments.
175 kDeviceId,
176 kContextId,
177 kCorrelationId,
178 // TODO(b/176137043): These "details" should differentiate between activity
179 // and API event sources.
180 kMemcpyDetails,
181 kMemallocDetails,
182 kMemFreeDetails,
183 kMemsetDetails,
184 kMemoryResidencyDetails,
185 kKernelAnnotation,
186 kNVTXRange,
187 kKernelDetails,
188 kStream,
189 // Stats added when processing traces.
190 kGroupId,
191 kFlow,
192 kStepName,
193 kLevel0,
194 kTfOp,
195 kHloOp,
196 kHloModule,
197 kEquation,
198 kIsEager,
199 kTfFunctionCall,
200 kTfFunctionTracingCount,
201 kFlops,
202 kBytesAccessed,
203 kSelectedGroupIds,
204 kSourceInfo,
205 kModelName,
206 kModelVersion,
207 // Performance counter related.
208 kRawValue,
209 kScaledValue,
210 kThreadId,
211 // XLA metadata map related.
212 kSelfDurationPs,
213 kMinDurationPs,
214 kHloProto,
215 // Device capability related.
216 kDevCapClockRateKHz,
217 kDevCapCoreCount,
218 kDevCapMemoryBandwidth,
219 kDevCapMemorySize,
220 kDevCapComputeCapMajor,
221 kDevCapComputeCapMinor,
222 // Batching related.
223 kBatchSizeAfterPadding,
224 kPaddingAmount,
225 kBatchingInputTaskSize,
226 // GPU occupancy metrics
227 kTheoreticalOccupancyPct,
228 kOccupancyMinGridSize,
229 kOccupancySuggestedBlockSize,
230 kLastStatType = kOccupancySuggestedBlockSize,
231 };
232
GpuPlaneName(int32_t device_ordinal)233 inline std::string GpuPlaneName(int32_t device_ordinal) {
234 return absl::StrCat(kGpuPlanePrefix, device_ordinal);
235 }
236
237 absl::string_view GetHostEventTypeStr(HostEventType event_type);
238
239 bool IsHostEventType(HostEventType event_type, absl::string_view event_name);
240
IsHostEventType(HostEventType event_type,absl::string_view event_name)241 inline bool IsHostEventType(HostEventType event_type,
242 absl::string_view event_name) {
243 return GetHostEventTypeStr(event_type) == event_name;
244 }
245
246 absl::optional<int64> FindHostEventType(absl::string_view event_name);
247
248 absl::optional<int64> FindTfOpEventType(absl::string_view event_name);
249
250 absl::string_view GetStatTypeStr(StatType stat_type);
251
252 bool IsStatType(StatType stat_type, absl::string_view stat_name);
253
IsStatType(StatType stat_type,absl::string_view stat_name)254 inline bool IsStatType(StatType stat_type, absl::string_view stat_name) {
255 return GetStatTypeStr(stat_type) == stat_name;
256 }
257
258 absl::optional<int64> FindStatType(absl::string_view stat_name);
259
260 // Returns true if the given event shouldn't be shown in the trace viewer.
261 bool IsInternalEvent(absl::optional<int64> event_type);
262
263 // Returns true if the given stat shouldn't be shown in the trace viewer.
264 bool IsInternalStat(absl::optional<int64> stat_type);
265
266 // Support for flow events:
267 // This class enables encoding/decoding the flow id and direction, stored as
268 // XStat value.
269 class XFlow {
270 public:
271 enum FlowDirection {
272 kFlowUnspecified = 0x0,
273 kFlowIn = 0x1,
274 kFlowOut = 0x2,
275 kFlowInOut = 0x3,
276 };
277
XFlow(uint64 flow_id,FlowDirection direction)278 XFlow(uint64 flow_id, FlowDirection direction)
279 : encoded_((flow_id << 2) | (direction & 0x3)) {
280 DCHECK_NE(Direction(), kFlowUnspecified);
281 }
282
283 // Encoding
ToStatValue()284 uint64 ToStatValue() const { return encoded_; }
285
286 // Decoding
FromStatValue(uint64 encoded)287 static XFlow FromStatValue(uint64 encoded) { return XFlow(encoded); }
288
Id()289 uint64 Id() const { return (encoded_ >> 2); }
Direction()290 FlowDirection Direction() const { return FlowDirection(encoded_ & 0x3); }
291
292 private:
XFlow(uint64 encoded)293 explicit XFlow(uint64 encoded) : encoded_(encoded) {}
294
295 uint64 encoded_;
296 };
297
298 } // namespace profiler
299 } // namespace tensorflow
300
301 #endif // TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
302