android-12.0.0_r34/s

syntax = "proto3";

package tensorflow.profiler;

import "google/protobuf/any.proto";
import "tensorflow/core/profiler/protobuf/op_metrics.proto";

// Breakdown of step-time on generic hardware. Note that these components are
// mutually exclusive so that adding them together is equal to the step time. If
// an execution time interval has multiple types of event happening, we need to
// pick one of the event type to attribute the time interval to.
message GenericStepBreakdown {
  // Map event type to the accumulated duration in
  // picoseconds of that type.
  map<int32, uint64> type_ps = 1;
}

// Information about memory transfer to/from device memory.
message DeviceMemoryTransfer {
  uint64 occurrence = 1;
  double time_us = 2;
  uint64 bytes_transferred = 3;
}

// Next ID: 5
// Result proto for StepInfo.
message StepInfoResult {
  // The step number.
  uint32 step_num = 1;
  // The step duration in picoseconds.
  uint64 duration_ps = 2;
  // The start time of this step in picoseconds.
  uint64 begin_ps = 3;
  // Breakdown of the step-time. Can be unpacked into a GenericStepBreakdown.
  google.protobuf.Any step_breakdown = 4;
}

// Result proto for metrics on flow events.
message FlowEventInfo {
  // Unique id for each send and recv pair.
  uint64 flow_id = 1;
  // Channel id generated by the XLA compiler, it is statically unique within an
  // HloModule.
  int64 channel_id = 2;
  // The name of the hlo op.
  string name = 3;
  // Category of the hlo op.
  string category = 4;
  // The start time in picoseconds of the op event.
  uint64 start_time_ps = 5;
  // The end time in picoseconds of the op event.
  uint64 end_time_ps = 6;
  // The size of the op in bytes.
  uint64 byte_size = 7;
  // The replica id of the program running the flow event.
  uint32 replica_id = 8;
}

// Result database for core to core flow events.
message FlowDbResult {
  repeated FlowEventInfo flow_info = 1;
}

// Result proto for all -educe ops.
message AllReduceInfo {
  // Unique id for all-reduce ops.
  uint64 id = 1;
  // The name of the hlo op.
  string name = 2;
  // For all-reduce nodes from different modules, if they have the same
  // all_reduce_id, they will be 'Allreduce'd'. If empty, AllReduce will not be
  // applied across modules.
  uint64 all_reduce_id = 3;
  // The start time in picoseconds of the op event.
  uint64 start_time_ps = 4;
  // The end time in picoseconds of the op event.
  uint64 end_time_ps = 5;
  // The size of the op in bytes.
  uint64 byte_size = 6;
}

// Result database for all-reduce ops.
message AllReduceDbResult {
  repeated AllReduceInfo all_reduce_info = 1;
}

// Result proto for information in a step across all cores.
message PerCoreStepInfo {
  // The step number.
  uint32 step_num = 1;
  // A map from core_id to StepInfo.
  map<uint32, StepInfoResult> step_info_per_core = 2;
  // The result for the per-step HLO-metric database.
  OpMetricsDb hlo_metrics_db = 3;
  // The result for send and recv flows.
  map<uint32, FlowDbResult> flow_db_per_core = 4;
  // A map from core ID to program replica id. Replica id map could change
  // during a profile session, but should stay stable within a step.
  map<uint32, uint32> core_id_to_replica_id_map = 5;
  // A map from core_id to all-reduce ops.
  map<uint32, AllReduceDbResult> all_reduce_db_per_core = 6;
  // Information about deivce memory transfers, categoried by source and
  // destination. Ordered by following categories:
  // 1. HostToDevice
  // 2. DeviceToHost
  // 3. DeviceToDevice
  repeated DeviceMemoryTransfer device_memory_transfers = 7;
}

// Result proto for a StepDatabase.
message StepDatabaseResult {
  // A sequence of PerCoreStepInfo.
  repeated PerCoreStepInfo step_sequence = 1;
  // Whether the step db uses incomplete step information.
  // This flag is set to true when:
  // 1) no step marker or annotation present.
  // 2) profiling duration is too short to cover a full step.
  // If this flag is false, we will group and breakdown the
  // profile by complete steps only and ignore incomplete steps.
  // If this flag is true, we will simply aggregate and breakdown over the total
  // profile as a single step.
  bool use_incomplete_step = 2;
  // Number of steps dropped during post processing.
  uint32 num_steps_dropped = 3;
}