1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5import "google/protobuf/any.proto"; 6import "tensorflow/core/profiler/protobuf/op_metrics.proto"; 7 8// Breakdown of step-time on generic hardware. Note that these components are 9// mutually exclusive so that adding them together is equal to the step time. If 10// an execution time interval has multiple types of event happening, we need to 11// pick one of the event type to attribute the time interval to. 12message GenericStepBreakdown { 13 // Map event type to the accumulated duration in 14 // picoseconds of that type. 15 map<int32, uint64> type_ps = 1; 16} 17 18// Information about memory transfer to/from device memory. 19message DeviceMemoryTransfer { 20 uint64 occurrence = 1; 21 double time_us = 2; 22 uint64 bytes_transferred = 3; 23} 24 25// Next ID: 5 26// Result proto for StepInfo. 27message StepInfoResult { 28 // The step number. 29 uint32 step_num = 1; 30 // The step duration in picoseconds. 31 uint64 duration_ps = 2; 32 // The start time of this step in picoseconds. 33 uint64 begin_ps = 3; 34 // Breakdown of the step-time. Can be unpacked into a GenericStepBreakdown. 35 google.protobuf.Any step_breakdown = 4; 36} 37 38// Result proto for metrics on flow events. 39message FlowEventInfo { 40 // Unique id for each send and recv pair. 41 uint64 flow_id = 1; 42 // Channel id generated by the XLA compiler, it is statically unique within an 43 // HloModule. 44 int64 channel_id = 2; 45 // The name of the hlo op. 46 string name = 3; 47 // Category of the hlo op. 48 string category = 4; 49 // The start time in picoseconds of the op event. 50 uint64 start_time_ps = 5; 51 // The end time in picoseconds of the op event. 52 uint64 end_time_ps = 6; 53 // The size of the op in bytes. 54 uint64 byte_size = 7; 55 // The replica id of the program running the flow event. 56 uint32 replica_id = 8; 57} 58 59// Result database for core to core flow events. 60message FlowDbResult { 61 repeated FlowEventInfo flow_info = 1; 62} 63 64// Result proto for all -educe ops. 65message AllReduceInfo { 66 // Unique id for all-reduce ops. 67 uint64 id = 1; 68 // The name of the hlo op. 69 string name = 2; 70 // For all-reduce nodes from different modules, if they have the same 71 // all_reduce_id, they will be 'Allreduce'd'. If empty, AllReduce will not be 72 // applied across modules. 73 uint64 all_reduce_id = 3; 74 // The start time in picoseconds of the op event. 75 uint64 start_time_ps = 4; 76 // The end time in picoseconds of the op event. 77 uint64 end_time_ps = 5; 78 // The size of the op in bytes. 79 uint64 byte_size = 6; 80} 81 82// Result database for all-reduce ops. 83message AllReduceDbResult { 84 repeated AllReduceInfo all_reduce_info = 1; 85} 86 87// Result proto for information in a step across all cores. 88message PerCoreStepInfo { 89 // The step number. 90 uint32 step_num = 1; 91 // A map from core_id to StepInfo. 92 map<uint32, StepInfoResult> step_info_per_core = 2; 93 // The result for the per-step HLO-metric database. 94 OpMetricsDb hlo_metrics_db = 3; 95 // The result for send and recv flows. 96 map<uint32, FlowDbResult> flow_db_per_core = 4; 97 // A map from core ID to program replica id. Replica id map could change 98 // during a profile session, but should stay stable within a step. 99 map<uint32, uint32> core_id_to_replica_id_map = 5; 100 // A map from core_id to all-reduce ops. 101 map<uint32, AllReduceDbResult> all_reduce_db_per_core = 6; 102 // Information about deivce memory transfers, categoried by source and 103 // destination. Ordered by following categories: 104 // 1. HostToDevice 105 // 2. DeviceToHost 106 // 3. DeviceToDevice 107 repeated DeviceMemoryTransfer device_memory_transfers = 7; 108} 109 110// Result proto for a StepDatabase. 111message StepDatabaseResult { 112 // A sequence of PerCoreStepInfo. 113 repeated PerCoreStepInfo step_sequence = 1; 114 // Whether the step db uses incomplete step information. 115 // This flag is set to true when: 116 // 1) no step marker or annotation present. 117 // 2) profiling duration is too short to cover a full step. 118 // If this flag is false, we will group and breakdown the 119 // profile by complete steps only and ignore incomplete steps. 120 // If this flag is true, we will simply aggregate and breakdown over the total 121 // profile as a single step. 122 bool use_incomplete_step = 2; 123 // Number of steps dropped during post processing. 124 uint32 num_steps_dropped = 3; 125} 126