1syntax = "proto3"; 2 3package xrt; 4 5import "tensorflow/compiler/tf2xla/host_compute_metadata.proto"; 6import "tensorflow/compiler/xla/xla.proto"; 7import "tensorflow/compiler/xla/xla_data.proto"; 8import "tensorflow/compiler/xla/service/hlo.proto"; 9 10message DeviceAssignment { 11 message ComputationDevice { 12 message DeviceMeshCoordinates { 13 // The mesh coordinates for the device. Usually (X, Y, Core), in the order 14 // in which they are returned in the TopologyProto. 15 // X = value(0) 16 // Y = value(1) 17 // Core = value(2) 18 repeated int32 value = 1; 19 } 20 // As many replicas as there are in the replicated computation. 21 repeated DeviceMeshCoordinates replica_devices = 1; 22 } 23 // As many ComputationDevice as many there are computations (number 24 // of cores per replica). 25 repeated ComputationDevice computation_devices = 1; 26} 27 28// Options for an XLA compilation. 29message XLAComputationConfig { 30 // The number of replicas the computation will be run on. If this is 31 // default (0) it is interpreted as 1. 32 int32 num_replicas = 1; 33 // The number of "model-parallel" cores per replica. If this is 34 // default (0) it is interpreted as 1. 35 int32 num_cores_per_replica = 2; 36 // Optional metadata about host sends and recvs. 37 tensorflow.tf2xla.HostComputeMetadata host_compute_metadata = 3; 38 39 // The arg/result shapes for the whole computation. 40 xla.ProgramShapeProto program_shape = 4; 41 // The arg/result shapes for each core of a model-parallel 42 // computation. per_core_args_and_result_shapes is optional for a 43 // single-core computation. 44 repeated xla.ProgramShapeProto per_core_program_shape = 5; 45 // Describes how replicated computation instances should be assigned to 46 // devices. There are num_cores_per_replica computations, and each one will be 47 // sent and executed to the set of replica device numbers described in the 48 // DeviceAssignment proto. 49 DeviceAssignment device_assignment = 6; 50 // The debugging options to be passed to the XLA compilation process. 51 xla.DebugOptions debug_options = 7; 52} 53 54// Options and XLA computation for a compilation. 55message XLAComputation { 56 XLAComputationConfig config = 1; 57 xla.HloSnapshot hlo_snapshot = 2; 58} 59 60// Literal to allocate space for, and transfer to, device memory. 61message XLAAllocation { 62 reserved 1; 63 xla.LiteralProto value = 2; 64} 65 66// Node in a tree describing a tuple constructed from input handles. A 67// node is an internal node if tuples is non-empty, in which case 68// input_index and release_input_handle are ignored. Otherwise a node 69// is a leaf node. Each leaf XLATupleNode is the index of an input 70// which corresponds to a handle that will be grafted onto the output 71// tuple at that location. If release_input_handle is true that input 72// handle will be released and become invalid. Inputs may be repeated 73// in which case leaves of the output tuple will alias. If an input is 74// repeated, release_input_handle must be false for every leaf where 75// that input appears. 76// 77// For example, if input 0 has shape {} and input 1 has shape {2,3} 78// then the XLATupleNode with structure {1,{0,1}} corresponds to a 79// tuple with shape {{2,3},{{},{2,3}}}. 80message XLATupleNode { 81 int32 input_index = 1; 82 bool release_input_handle = 2; 83 repeated XLATupleNode tuples = 3; 84} 85 86// Options for an XLA execution. 87message XRTExecutionConfig { 88 // Local device to run on. This is present because the execute Op 89 // may be placed on a device such as CPU or TPU_SYSTEM that 90 // logically manages multiple cores. 91 int32 device_ordinal = 1; 92 // Which model-parallel computation to run from the compiled bundle. 93 int32 core_index_in_replica = 2; 94 // Optional key to disambiguate between executions. This is only 95 // needed if multiple host send/recvs may be outstanding 96 // concurrently with executions. 97 string execution_instance_key = 3; 98 // If non-zero, rng_seed to reset the core with. 99 uint32 rng_seed = 4; 100 // If true, release allocation handles on the inputs after running. 101 bool release_input_handles = 5; 102 // If true, release the handle to the computation after running. 103 bool release_compilation_handle = 6; 104 // If set to true, and the result shape is a tuple, then instead of returning 105 // a single tuple allocation the execution will return a vector of 106 // allocations, one for each of the first-level elements of the result tuple. 107 bool return_exploded_tuple = 7; 108} 109