1syntax = "proto3"; 2 3package tensorflow.data; 4 5option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; 6 7// Next tag: 2 8message ProcessingModeDef { 9 // Specifies how data is sharded among tf.data service workers. 10 enum ShardingPolicy { 11 // No sharding will be performed. Each worker produces the entire dataset 12 // without any sharding. With this mode, the best practice is to shuffle the 13 // dataset nondeterministically so that workers process the dataset in 14 // different orders. 15 OFF = 0; 16 17 // The input dataset is dynamically split among workers at runtime. Each 18 // worker gets the next split when it reads data from the dispatcher. There 19 // is no fixed sharding with this mode. 20 DYNAMIC = 1; 21 22 // The following are static sharding policies. The semantics are similar to 23 // `tf.data.experimental.AutoShardPolicy`. These policies require: 24 // * The tf.data service cluster has a fixed size, and you need to specify 25 // the workers in DispatcherConfig. 26 // * Each client only reads from the local tf.data service worker. 27 // 28 // Shards by input files (each worker will get a set of files to process). 29 // When this option is selected, make sure that there is at least as many 30 // files as workers. If there are fewer input files than workers, a runtime 31 // error will be raised. 32 FILE = 2; 33 34 // Shards by elements produced by the dataset. Each worker will process the 35 // whole dataset and discard the portion that is not for itself. Note that 36 // for this mode to correctly partitions the dataset elements, the dataset 37 // needs to produce elements in a deterministic order. 38 DATA = 3; 39 40 // Attempts FILE-based sharding, falling back to DATA-based sharding on 41 // failures. 42 FILE_OR_DATA = 4; 43 44 // Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a 45 // placeholder to replace with `shard(num_workers, worker_index)`. 46 HINT = 5; 47 } 48 ShardingPolicy sharding_policy = 1; 49} 50 51// tf.data service deployment mode. 52enum DeploymentMode { 53 DEPLOYMENT_MODE_UNSPECIFIED = 0; 54 // tf.data service workers colocate with TF workers. 55 DEPLOYMENT_MODE_COLOCATED = 1; 56 // tf.data service workers run in dedicated tf.data hosts. 57 DEPLOYMENT_MODE_REMOTE = 2; 58 // tf.data service workers run in colocated TF hosts and dedicated tf.data 59 // hosts. 60 DEPLOYMENT_MODE_HYBRID = 3; 61} 62 63// Metadata related to tf.data service datasets. 64// Next tag: 4 65message DataServiceMetadata { 66 oneof optional_element_spec { 67 // Serialized element spec. 68 bytes element_spec = 1; 69 } 70 71 enum Compression { 72 COMPRESSION_UNSPECIFIED = 0; 73 // No compression. 74 COMPRESSION_OFF = 1; 75 // Snappy compression as defined in tensorflow/core/platform/snappy.h. 76 COMPRESSION_SNAPPY = 2; 77 } 78 Compression compression = 2; 79 80 // Cardinality of the dataset. 81 int64 cardinality = 3; 82} 83 84message CrossTrainerCacheOptions { 85 string trainer_id = 1; 86} 87 88// Data service config available to the client through GetDataServiceConfig RPC. 89// Next tag: 2 90message DataServiceConfig { 91 DeploymentMode deployment_mode = 1; 92} 93