1syntax = "proto3"; 2 3package xrt; 4 5import "tensorflow/compiler/tf2xla/host_compute_metadata.proto"; 6import "tensorflow/compiler/xla/service/hlo.proto"; 7import "tensorflow/compiler/xla/xla.proto"; 8import "tensorflow/compiler/xla/xla_data.proto"; 9 10message DeviceAssignment { 11 message ComputationDevice { 12 message DeviceMeshCoordinates { 13 // The mesh coordinates for the device. Usually (X, Y, Core), in the order 14 // in which they are returned in the TopologyProto. 15 // X = value(0) 16 // Y = value(1) 17 // Core = value(2) 18 repeated int32 value = 1; 19 } 20 // As many replicas as there are in the replicated computation. 21 repeated DeviceMeshCoordinates replica_devices = 1; 22 } 23 // As many ComputationDevice as many there are computations (number 24 // of cores per replica). 25 repeated ComputationDevice computation_devices = 1; 26} 27 28// Options for an XLA compilation. 29message XLAComputationConfig { 30 // The number of replicas the computation will be run on. If this is 31 // default (0) it is interpreted as 1. 32 int32 num_replicas = 1; 33 // The number of "model-parallel" cores per replica. If this is 34 // default (0) it is interpreted as 1. 35 int32 num_cores_per_replica = 2; 36 // Optional metadata about host sends and recvs. 37 tensorflow.tf2xla.HostComputeMetadata host_compute_metadata = 3; 38 39 // The arg/result shapes for the whole computation. 40 xla.ProgramShapeProto program_shape = 4; 41 // The arg/result shapes for each core of a model-parallel 42 // computation. per_core_args_and_result_shapes is optional for a 43 // single-core computation. 44 repeated xla.ProgramShapeProto per_core_program_shape = 5; 45 // Describes how replicated computation instances should be assigned to 46 // devices. There are num_cores_per_replica computations, and each one will be 47 // sent and executed to the set of replica device numbers described in the 48 // DeviceAssignment proto. 49 DeviceAssignment device_assignment = 6; 50 // The debugging options to be passed to the XLA compilation process. 51 xla.DebugOptions debug_options = 7; 52 53 // Everything inside Experimental is subject to change and is not subject 54 // to API stability guarantees in 55 // https://www.tensorflow.org/guide/version_compat. 56 message Experimental { 57 message UpdateIndexPair { 58 int32 index = 1; 59 bool updated = 2; 60 } 61 62 // stateful_input_indices is only useful when using XRT-compiled 63 // programs together with standard TensorFlow TPU execution ops, so should 64 // be ignored by most clients. 65 // 66 // Optionally the client can pass information about which inputs 67 // to the computation are updates to "stateful" quantities. Each 68 // element of stateful_input_indices includes an index indicating 69 // which input argument it corresponds to, and a bool indicating 70 // whether the value is updated or not. If the XRT computation is 71 // going to be used with a TensorFlow TPU execution op then an 72 // input index must be present for each input that will correspond 73 // to a resource variable in the execution op, and may not be 74 // present for any other input. 75 repeated UpdateIndexPair stateful_input_indices = 1; 76 } 77 78 Experimental experimental = 8; 79} 80 81// Options and XLA computation for a compilation. 82message XLAComputation { 83 XLAComputationConfig config = 1; 84 xla.HloSnapshot hlo_snapshot = 2; 85} 86 87// Literal to allocate space for, and transfer to, device memory. 88message XLAAllocation { 89 reserved 1; 90 xla.LiteralProto value = 2; 91} 92 93// Node in a tree describing a tuple constructed from input handles. A 94// node is an internal node if tuples is non-empty, in which case 95// input_index and release_input_handle are ignored. Otherwise a node 96// is a leaf node. Each leaf XLATupleNode is the index of an input 97// which corresponds to a handle that will be grafted onto the output 98// tuple at that location. If release_input_handle is true that input 99// handle will be released and become invalid. Inputs may be repeated 100// in which case leaves of the output tuple will alias. If an input is 101// repeated, release_input_handle must be false for every leaf where 102// that input appears. 103// 104// For example, if input 0 has shape {} and input 1 has shape {2,3} 105// then the XLATupleNode with structure {1,{0,1}} corresponds to a 106// tuple with shape {{2,3},{{},{2,3}}}. 107message XLATupleNode { 108 int32 input_index = 1; 109 bool release_input_handle = 2; 110 repeated XLATupleNode tuples = 3; 111} 112 113// Options for an XLA execution. 114message XRTExecutionConfig { 115 // Local device to run on. This is present because the execute Op 116 // may be placed on a device such as CPU or TPU_SYSTEM that 117 // logically manages multiple cores. 118 int32 device_ordinal = 1; 119 // Which model-parallel computation to run from the compiled bundle. 120 int32 core_index_in_replica = 2; 121 // Optional key to disambiguate between executions. This is only 122 // needed if multiple host send/recvs may be outstanding 123 // concurrently with executions. 124 string execution_instance_key = 3; 125 // If non-zero, rng_seed to reset the core with. 126 uint32 rng_seed = 4; 127 // If true, release allocation handles on the inputs after running. 128 bool release_input_handles = 5; 129 // If true, release the handle to the computation after running. 130 bool release_compilation_handle = 6; 131 // If set to true, and the result shape is a tuple, then instead of returning 132 // a single tuple allocation the execution will return a vector of 133 // allocations, one for each of the first-level elements of the result tuple. 134 bool return_exploded_tuple = 7; 135} 136 137message XRTChainedExecuteConfig { 138 // If non-zero, rng_seed to reset the core with. 139 uint32 rng_seed = 1; 140 // Which model-parallel computation to run from the compiled bundle. 141 int32 core_index_in_replica = 2; 142 // Optional key to disambiguate between executions. This is only needed if 143 // multiple host send/recvs may be outstanding concurrently with executions. 144 string execution_instance_key = 3; 145} 146 147// A single chained execute operation. An operation can either be a device data 148// load, or an existing (as in, previously compiled and accessible via its int64 149// handle) XLA computation execution. 150message XRTChainedExecuteOp { 151 // Represents an input for this operation. 152 message Input { 153 // The index within the XRTChainedExecutePlan.ops post-order of the source 154 // operation for this input. 155 int64 op_index = 1; 156 // The output index of the value generated by the operation at op_index. 157 // Zero (default value) means no index ({}) while if an indexing is 158 // required, output_index needs to be set to index+1. 159 // Thanks proto3! 160 int64 output_index = 2; 161 } 162 // Represents an output of the XRTChainedExecute operation, which should 163 // originate by the output of this operation. 164 message Output { 165 // The index in the value generated by this operation, which should be 166 // forwarded as XRTChainedExecute output. If output_index is zero (default 167 // value) the whole output will be used as result. This means that if the 168 // output shape is a tuple, the result will be the full tuple. Otherwise the 169 // real sub-tuple index will be output_index - 1. 170 int64 output_index = 1; 171 // The index in the vector of the results returned by the XRTChainedExecute 172 // operation, where this output should be forwarded. 173 int64 result_index = 2; 174 } 175 176 oneof op_oneof { 177 // The handle to an existing XRT device data. 178 int64 data_handle = 1; 179 // The handle to an existing XRT compiled computation. 180 int64 computation_handle = 2; 181 } 182 // The outputs of this XRTChainedExecuteOp operation. 183 repeated Output outputs = 3; 184 // The inputs of this XRTChainedExecuteOp operation. If data_handle is set, 185 // there are no inputs. 186 repeated Input inputs = 4; 187} 188 189// Execution plan for the XRTChainedExecute operation. 190message XRTChainedExecutePlan { 191 // The post order with the XRT computations to be executed. 192 repeated XRTChainedExecuteOp ops = 1; 193} 194 195// The message used to encode the options for the XRTMetricsCollect operation. 196message XRTMetricsCollect { 197 // A list of regular expressions to match the metric names. Empty means to 198 // return all the metrics reported by the collection registry. 199 repeated string metrics_regex = 1; 200} 201 202message Percentiles { 203 message Point { 204 // In the [0, 100] range. 205 double percentile = 1; 206 double value = 2; 207 } 208 209 // The time (in nanoseconds) of the first sample within the samples buffer. 210 uint64 start_nstime = 1; 211 // The time (in nanoseconds) of the last sample within the samples buffer. 212 uint64 end_nstime = 2; 213 // The minimum value of the samples within the samples buffer. 214 double min_value = 3; 215 // The maximum value of the samples within the samples buffer. 216 double max_value = 4; 217 // The mean value of the samples within the samples buffer. 218 double mean = 5; 219 // The stndard deviation of the samples within the samples buffer. 220 double stddev = 6; 221 // The number samples within the samples buffer. 222 uint64 num_samples = 7; 223 // The total number of times this metrics has been posted a value to. 224 uint64 total_samples = 8; 225 // The sum of all the posted values. 226 double accumulator = 9; 227 // The percentile points reported by the metric. 228 repeated Point points = 10; 229} 230 231message MetricValues { 232 // The metric name. 233 string name = 1; 234 235 oneof values_oneof { 236 Percentiles percentiles_value = 2; 237 int64 int64_value = 3; 238 } 239} 240 241message MetricsReport { 242 repeated MetricValues metrics = 1; 243} 244