• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "google/protobuf/any.proto";
6import "tensorflow/core/profiler/protobuf/op_metrics.proto";
7
8// Breakdown of step-time on generic hardware. Note that these components are
9// mutually exclusive so that adding them together is equal to the step time. If
10// an execution time interval has multiple types of event happening, we need to
11// pick one of the event type to attribute the time interval to.
12message GenericStepBreakdown {
13  // Map event type to the accumulated duration in
14  // picoseconds of that type.
15  map<int32, uint64> type_ps = 1;
16}
17
18// Information about memory transfer to/from device memory.
19message DeviceMemoryTransfer {
20  uint64 occurrence = 1;
21  double time_us = 2;
22  uint64 bytes_transferred = 3;
23}
24
25// Next ID: 6
26// Result proto for StepInfo.
27message StepInfoResult {
28  // The step number.
29  uint32 step_num = 1;
30  // The step name.
31  string step_name = 5;
32  // The step duration in picoseconds.
33  uint64 duration_ps = 2;
34  // The start time of this step in picoseconds.
35  uint64 begin_ps = 3;
36  // Breakdown of the step-time. Can be unpacked into a GenericStepBreakdown.
37  google.protobuf.Any step_breakdown = 4;
38}
39
40// Result proto for metrics on flow events.
41message FlowEventInfo {
42  // Unique id for each send and recv pair.
43  uint64 flow_id = 1;
44  // Channel id generated by the XLA compiler, it is statically unique within an
45  // HloModule.
46  int64 channel_id = 2;
47  // The name of the hlo op.
48  string name = 3;
49  // Category of the hlo op.
50  string category = 4;
51  // The start time in picoseconds of the op event.
52  uint64 start_time_ps = 5;
53  // The end time in picoseconds of the op event.
54  uint64 end_time_ps = 6;
55  // The size of the op in bytes.
56  uint64 byte_size = 7;
57  // The replica id of the program running the flow event.
58  uint32 replica_id = 8;
59}
60
61// Result database for core to core flow events.
62message FlowDbResult {
63  repeated FlowEventInfo flow_info = 1;
64}
65
66// Result proto for all -educe ops.
67message AllReduceInfo {
68  // Unique id for all-reduce ops.
69  uint64 id = 1;
70  // The name of the hlo op.
71  string name = 2;
72  // For all-reduce nodes from different modules, if they have the same
73  // all_reduce_id, they will be 'Allreduce'd'. If empty, AllReduce will not be
74  // applied across modules.
75  uint64 all_reduce_id = 3;
76  // The start time in picoseconds of the op event.
77  uint64 start_time_ps = 4;
78  // The end time in picoseconds of the op event.
79  uint64 end_time_ps = 5;
80  // The size of the op in bytes.
81  uint64 byte_size = 6;
82}
83
84// Result database for all-reduce ops.
85message AllReduceDbResult {
86  repeated AllReduceInfo all_reduce_info = 1;
87}
88
89// Result proto for information in a step across all cores.
90message PerCoreStepInfo {
91  // The step number.
92  uint32 step_num = 1;
93  // A map from core_id to StepInfo.
94  map<uint32, StepInfoResult> step_info_per_core = 2;
95  // The result for the per-step HLO-metric database.
96  OpMetricsDb hlo_metrics_db = 3;
97  // The result for send and recv flows.
98  map<uint32, FlowDbResult> flow_db_per_core = 4;
99  // A map from core ID to program replica id. Replica id map could change
100  // during a profile session, but should stay stable within a step.
101  map<uint32, uint32> core_id_to_replica_id_map = 5;
102  // A map from core_id to all-reduce ops.
103  map<uint32, AllReduceDbResult> all_reduce_db_per_core = 6;
104  // Information about deivce memory transfers, categoried by source and
105  // destination. Ordered by following categories:
106  // 1. HostToDevice
107  // 2. DeviceToHost
108  // 3. DeviceToDevice
109  repeated DeviceMemoryTransfer device_memory_transfers = 7;
110}
111
112// Result proto for a StepDatabase.
113message StepDatabaseResult {
114  // A sequence of PerCoreStepInfo.
115  repeated PerCoreStepInfo step_sequence = 1;
116  // Whether the step db uses incomplete step information.
117  // This flag is set to true when:
118  // 1) no step marker or annotation present.
119  // 2) profiling duration is too short to cover a full step.
120  // If this flag is false, we will group and breakdown the
121  // profile by complete steps only and ignore incomplete steps.
122  // If this flag is true, we will simply aggregate and breakdown over the total
123  // profile as a single step.
124  bool use_incomplete_step = 2;
125  // Number of steps dropped during post processing.
126  uint32 num_steps_dropped = 3;
127  // If the step_sequence is empty because:
128  //   * there is no step profiled on any host, then empty_intersect is false.
129  //   * there are steps profiled on some host, but the intersection of steps
130  //     over all hosts is empty, then empty_intersect is true.
131  bool empty_intersect = 4;
132}
133