• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow.data;
4
5option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
6
7// Next tag: 2
8message ProcessingModeDef {
9  // Specifies how data is sharded among tf.data service workers.
10  enum ShardingPolicy {
11    // No sharding will be performed. Each worker produces the entire dataset
12    // without any sharding. With this mode, the best practice is to shuffle the
13    // dataset nondeterministically so that workers process the dataset in
14    // different orders.
15    OFF = 0;
16
17    // The input dataset is dynamically split among workers at runtime. Each
18    // worker gets the next split when it reads data from the dispatcher. There
19    // is no fixed sharding with this mode.
20    DYNAMIC = 1;
21
22    // The following are static sharding policies. The semantics are similar to
23    // `tf.data.experimental.AutoShardPolicy`. These policies require:
24    // * The tf.data service cluster has a fixed size, and you need to specify
25    //   the workers in DispatcherConfig.
26    // * Each client only reads from the local tf.data service worker.
27    //
28    // Shards by input files (each worker will get a set of files to process).
29    // When this option is selected, make sure that there is at least as many
30    // files as workers. If there are fewer input files than workers, a runtime
31    // error will be raised.
32    FILE = 2;
33
34    // Shards by elements produced by the dataset. Each worker will process the
35    // whole dataset and discard the portion that is not for itself. Note that
36    // for this mode to correctly partitions the dataset elements, the dataset
37    // needs to produce elements in a deterministic order.
38    DATA = 3;
39
40    // Attempts FILE-based sharding, falling back to DATA-based sharding on
41    // failures.
42    FILE_OR_DATA = 4;
43
44    // Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a
45    // placeholder to replace with `shard(num_workers, worker_index)`.
46    HINT = 5;
47  }
48  ShardingPolicy sharding_policy = 1;
49}
50
51// tf.data service deployment mode.
52enum DeploymentMode {
53  DEPLOYMENT_MODE_UNSPECIFIED = 0;
54  // tf.data service workers colocate with TF workers.
55  DEPLOYMENT_MODE_COLOCATED = 1;
56  // tf.data service workers run in dedicated tf.data hosts.
57  DEPLOYMENT_MODE_REMOTE = 2;
58  // tf.data service workers run in colocated TF hosts and dedicated tf.data
59  // hosts.
60  DEPLOYMENT_MODE_HYBRID = 3;
61}
62
63// Metadata related to tf.data service datasets.
64// Next tag: 4
65message DataServiceMetadata {
66  oneof optional_element_spec {
67    // Serialized element spec.
68    bytes element_spec = 1;
69  }
70
71  enum Compression {
72    COMPRESSION_UNSPECIFIED = 0;
73    // No compression.
74    COMPRESSION_OFF = 1;
75    // Snappy compression as defined in tensorflow/core/platform/snappy.h.
76    COMPRESSION_SNAPPY = 2;
77  }
78  Compression compression = 2;
79
80  // Cardinality of the dataset.
81  int64 cardinality = 3;
82}
83
84message CrossTrainerCacheOptions {
85  string trainer_id = 1;
86}
87
88// Data service config available to the client through GetDataServiceConfig RPC.
89// Next tag: 2
90message DataServiceConfig {
91  DeploymentMode deployment_mode = 1;
92}
93