1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
18
19 #include "absl/strings/match.h"
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/string_view.h"
22 #include "absl/types/optional.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/types.h"
26
27 namespace tensorflow {
28 namespace profiler {
29
30 // Name of XPlane that contains TraceMe events.
31 TF_CONST_INIT extern const absl::string_view kHostThreadsPlaneName;
32 // Name prefix of XPlane that contains GPU events.
33 TF_CONST_INIT extern const absl::string_view kGpuPlanePrefix;
34 // Name prefix of XPlane that contains TPU events.
35 TF_CONST_INIT extern const absl::string_view kTpuPlanePrefix;
36 // Name of XPlane that contains CUPTI driver API generated events.
37 TF_CONST_INIT extern const absl::string_view kCuptiDriverApiPlaneName;
38 // Name of XPlane that contains profile metadata such as XLA debug info.
39 TF_CONST_INIT extern const absl::string_view kMetadataPlaneName;
40 // Name of XPlane that contains kpi related metrics.
41 TF_CONST_INIT extern const absl::string_view kTFStreamzPlaneName;
42 // Name of XPlane that contains events from python tracer.
43 TF_CONST_INIT extern const absl::string_view kPythonTracerPlaneName;
44
45 // Names of XLines that contain ML-level events.
46 TF_CONST_INIT extern const absl::string_view kStepLineName;
47 TF_CONST_INIT extern const absl::string_view kTensorFlowNameScopeLineName;
48 TF_CONST_INIT extern const absl::string_view kTensorFlowOpLineName;
49 TF_CONST_INIT extern const absl::string_view kXlaModuleLineName;
50 TF_CONST_INIT extern const absl::string_view kXlaOpLineName;
51 TF_CONST_INIT extern const absl::string_view kKernelLaunchLineName;
52
53 // Interesting event types (i.e., TraceMe names).
54 enum HostEventType {
55 kFirstHostEventType = 0,
56 kUnknownHostEventType = kFirstHostEventType,
57 kTraceContext,
58 kSessionRun,
59 kFunctionRun,
60 kRunGraph,
61 kRunGraphDone,
62 kTfOpRun,
63 kEagerKernelExecute,
64 kExecutorStateProcess,
65 kExecutorDoneCallback,
66 kMemoryAllocation,
67 kMemoryDeallocation,
68 // Performance counter related.
69 kRemotePerf,
70 // tf.data captured function events.
71 kTfDataCapturedFunctionRun,
72 kTfDataCapturedFunctionRunWithBorrowedArgs,
73 kTfDataCapturedFunctionRunInstantiated,
74 kTfDataCapturedFunctionRunAsync,
75 // Functional ops.
76 kCallOp,
77 kParallelForOp,
78 kForeverOp,
79 kNumericalGradientOpEvalRight,
80 kNumericalGradientOpEvalLeft,
81 kSymbolicGradientOp,
82 kRemoteCallOp,
83 kIfOp,
84 kCaseOp,
85 kWhileOpEvalCond,
86 kWhileOpStartBody,
87 kForOp,
88 kPartitionedCallOp,
89 // tf.data related.
90 kIteratorGetNextOp,
91 kIteratorGetNextAsOptionalOp,
92 kIterator,
93 kDeviceInputPipelineSecondIterator,
94 kPrefetchProduce,
95 kPrefetchConsume,
96 kParallelInterleaveProduce,
97 kParallelInterleaveConsume,
98 kParallelInterleaveInitializedInput,
99 kParallelMapProduce,
100 kParallelMapConsume,
101 kMapAndBatchProduce,
102 kMapAndBatchConsume,
103 kParseExampleProduce,
104 kParseExampleConsume,
105 // Batching related.
106 kBatchingSessionRun,
107 kProcessBatch,
108 kConcatInputTensors,
109 kMergeInputTensors,
110 kScheduleWithoutSplit,
111 kScheduleWithSplit,
112 kASBSQueueSchedule,
113 // JAX related.
114 kExecuteOnLocalDevices,
115 // GPU related.
116 kKernelLaunch,
117 kKernelExecute,
118 kLastHostEventType = kKernelExecute,
119 };
120
121 enum StatType {
122 kFirstStatType = 0,
123 kUnknownStatType = kFirstStatType,
124 // TraceMe arguments.
125 kStepId,
126 kParentStepId,
127 kFunctionStepId,
128 kDeviceOrdinal,
129 kChipOrdinal,
130 kNodeOrdinal,
131 kModelId,
132 kQueueAddr,
133 kRequestId,
134 kRunId,
135 kGraphType,
136 kStepNum,
137 kIterNum,
138 kIndexOnHost,
139 kAllocatorName,
140 kBytesReserved,
141 kBytesAllocated,
142 kBytesAvailable,
143 kFragmentation,
144 kPeakBytesInUse,
145 kRequestedBytes,
146 kAllocationBytes,
147 kAddress,
148 kRegionType,
149 kDataType,
150 kTensorShapes,
151 kTensorLayout,
152 kKpiName,
153 kKpiValue,
154 kElementId,
155 kParentId,
156 // XPlane semantics related.
157 kProducerType,
158 kConsumerType,
159 kProducerId,
160 kConsumerId,
161 kIsRoot,
162 kIsAsync,
163 // Device trace arguments.
164 kDeviceId,
165 kContextId,
166 kCorrelationId,
167 // TODO(b/176137043): These "details" should differentiate between activity
168 // and API event sources.
169 kMemcpyDetails,
170 kMemallocDetails,
171 kMemFreeDetails,
172 kMemsetDetails,
173 kMemoryResidencyDetails,
174 kKernelAnnotation,
175 kNVTXRange,
176 kKernelDetails,
177 kStream,
178 // Stats added when processing traces.
179 kGroupId,
180 kFlow,
181 kStepName,
182 kLevel0,
183 kTfOp,
184 kHloOp,
185 kHloModule,
186 kEquation,
187 kIsEager,
188 kTfFunctionCall,
189 kTfFunctionTracingCount,
190 kFlops,
191 kBytesAccessed,
192 kSelectedGroupIds,
193 // Performance counter related.
194 kRawValue,
195 kScaledValue,
196 kThreadId,
197 // XLA metadata map related.
198 kSelfDurationPs,
199 kMinDurationPs,
200 kHloProto,
201 // Device capability related.
202 kDevCapClockRateKHz,
203 kDevCapCoreCount,
204 kDevCapMemoryBandwidth,
205 kDevCapMemorySize,
206 kDevCapComputeCapMajor,
207 kDevCapComputeCapMinor,
208 // Batching related.
209 kBatchSizeAfterPadding,
210 kPaddingAmount,
211 kBatchingInputTaskSize,
212 // GPU occupancy metrics
213 kTheoreticalOccupancyPct,
214 kOccupancyMinGridSize,
215 kOccupancySuggestedBlockSize,
216 kLastStatType = kOccupancySuggestedBlockSize,
217 };
218
GpuPlaneName(int32 device_ordinal)219 inline std::string GpuPlaneName(int32 device_ordinal) {
220 return absl::StrCat(kGpuPlanePrefix, device_ordinal);
221 }
222
223 absl::string_view GetHostEventTypeStr(HostEventType event_type);
224
225 bool IsHostEventType(HostEventType event_type, absl::string_view event_name);
226
IsHostEventType(HostEventType event_type,absl::string_view event_name)227 inline bool IsHostEventType(HostEventType event_type,
228 absl::string_view event_name) {
229 return GetHostEventTypeStr(event_type) == event_name;
230 }
231
232 absl::optional<int64> FindHostEventType(absl::string_view event_name);
233
234 absl::optional<int64> FindTfOpEventType(absl::string_view event_name);
235
236 absl::string_view GetStatTypeStr(StatType stat_type);
237
238 bool IsStatType(StatType stat_type, absl::string_view stat_name);
239
IsStatType(StatType stat_type,absl::string_view stat_name)240 inline bool IsStatType(StatType stat_type, absl::string_view stat_name) {
241 return GetStatTypeStr(stat_type) == stat_name;
242 }
243
244 absl::optional<int64> FindStatType(absl::string_view stat_name);
245
246 // Returns true if the given event shouldn't be shown in the trace viewer.
247 bool IsInternalEvent(absl::optional<int64> event_type);
248
249 // Returns true if the given stat shouldn't be shown in the trace viewer.
250 bool IsInternalStat(absl::optional<int64> stat_type);
251
252 // Support for flow events:
253 // This class enables encoding/decoding the flow id and direction, stored as
254 // XStat value.
255 class XFlow {
256 public:
257 enum FlowDirection {
258 kFlowUnspecified = 0x0,
259 kFlowIn = 0x1,
260 kFlowOut = 0x2,
261 kFlowInOut = 0x3,
262 };
263
XFlow(uint64 flow_id,FlowDirection direction)264 XFlow(uint64 flow_id, FlowDirection direction)
265 : encoded_((flow_id << 2) | (direction & 0x3)) {
266 DCHECK_NE(Direction(), kFlowUnspecified);
267 }
268
269 // Encoding
ToStatValue()270 uint64 ToStatValue() const { return encoded_; }
271
272 // Decoding
FromStatValue(uint64 encoded)273 static XFlow FromStatValue(uint64 encoded) { return XFlow(encoded); }
274
Id()275 uint64 Id() const { return (encoded_ >> 2); }
Direction()276 FlowDirection Direction() const { return FlowDirection(encoded_ & 0x3); }
277
278 private:
XFlow(uint64 encoded)279 explicit XFlow(uint64 encoded) : encoded_(encoded) {}
280
281 uint64 encoded_;
282 };
283
284 } // namespace profiler
285 } // namespace tensorflow
286
287 #endif // TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
288