1syntax = "proto3"; 2 3package tensorflow.data; 4 5// Represents the type of auto-sharding we enable. 6enum AutoShardPolicy { 7 AUTO = 0; 8 FILE = 1; 9 DATA = 2; 10 OFF = -1; 11} 12 13message DistributeOptions { 14 // The type of sharding that auto-shard should attempt. If this is set to 15 // FILE, then we will attempt to shard by files (each worker will get a set of 16 // files to process). If we cannot find a set of files to shard for at least 17 // one file per worker, we will error out. When this option is selected, make 18 // sure that you have enough files so that each worker gets at least one file. 19 // There will be a runtime error thrown if there are insufficient files. If 20 // this is set to DATA, then we will shard by elements produced by the 21 // dataset, and each worker will process the whole dataset and discard the 22 // portion that is not for itself. If this is set to OFF, then we will not 23 // autoshard, and each worker will receive a copy of the full dataset. This 24 // option is set to AUTO by default, AUTO will attempt to first shard by FILE, 25 // and fall back to sharding by DATA if we cannot find a set of files to 26 // shard. 27 AutoShardPolicy auto_shard_policy = 1; 28 // The number of devices attached to this input pipeline. 29 oneof optional_num_devices { 30 int32 num_devices = 2; 31 } 32} 33 34message MapVectorization { 35 // Whether to vectorize map transformations. 36 oneof optional_enabled { 37 bool enabled = 1; 38 } 39 // Whether to use ChooseFastestBranchDataset with this transformation. If 40 // True, the pipeline picks between the vectorized and original segment at 41 // runtime based on their iterations speed. 42 oneof optional_use_choose_fastest { 43 bool use_choose_fastest = 2; 44 } 45} 46 47message OptimizationOptions { 48 // Whether to apply default graph optimizations. If False, only graph 49 // optimizations that have been explicitly enabled will be applied. 50 oneof optional_apply_default_optimizations { 51 bool apply_default_optimizations = 1; 52 } 53 // Whether to automatically tune performance knobs. 54 oneof optional_autotune { 55 bool autotune = 2; 56 } 57 // When autotuning is enabled (through autotune), determines whether to also 58 // autotune buffer sizes for datasets with parallelism. 59 oneof optional_autotune_buffers { 60 bool autotune_buffers = 3; 61 } 62 // When autotuning is enabled (through autotune), determines the CPU budget to 63 // use. Values greater than the number of schedulable CPU cores are allowed 64 // but may result in CPU contention. 65 oneof optional_autotune_cpu_budget { 66 int32 autotune_cpu_budget = 4; 67 } 68 // When autotuning is enabled (through autotune), determines the RAM budget to 69 // use. Values greater than the available RAM in bytes may result in OOM. If 70 // 0, defaults to half of the available RAM in bytes. 71 oneof optional_autotune_ram_budget { 72 int32 autotune_ram_budget = 5; 73 } 74 // Whether to fuse filter transformations. 75 oneof optional_filter_fusion { 76 bool filter_fusion = 6; 77 } 78 // Whether to fuse filter dataset that predicts random_uniform < rate into a 79 // sampling dataset. 80 oneof optional_filter_with_random_uniform_fusion { 81 bool filter_with_random_uniform_fusion = 7; 82 } 83 // Whether to hoist tf.random_uniform() ops out of map transformations. 84 oneof optional_hoist_random_uniform { 85 bool hoist_random_uniform = 8; 86 } 87 // Whether to fuse map and batch transformations. 88 oneof optional_map_and_batch_fusion { 89 bool map_and_batch_fusion = 9; 90 } 91 // Whether to fuse map and filter transformations. 92 oneof optional_map_and_filter_fusion { 93 bool map_and_filter_fusion = 10; 94 } 95 // Whether to fuse map transformations. 96 oneof optional_map_fusion { 97 bool map_fusion = 11; 98 } 99 // Whether to parallelize stateless map transformations. 100 oneof optional_map_parallelization { 101 bool map_parallelization = 12; 102 } 103 // The map vectorization options associated with the dataset. 104 MapVectorization map_vectorization = 13; 105 // Whether to eliminate no-op transformations. 106 oneof optional_noop_elimination { 107 bool noop_elimination = 14; 108 } 109 // Whether to parallelize copying of batch elements. This optimization is 110 // highly experimental and can cause performance degradation (e.g. when the 111 // parallelization overhead exceeds the benefits of performing the data copies 112 // in parallel). You should only enable this optimization if a) your input 113 // pipeline is bottlenecked on batching and b) you have validated that this 114 // optimization improves performance. 115 oneof optional_parallel_batch { 116 bool parallel_batch = 15; 117 } 118 // Whether to reorder ops that will discard data to the front of unary 119 // cardinality preserving transformations, e.g. dataset.map(...).take(3) will 120 // be optimized to dataset.take(3).map(...). For now this optimization will 121 // move `skip`, `shard` and `take` to the front of `map` and `prefetch`. This 122 // optimization is only for performance; it will not affect the output of the 123 // dataset. 124 oneof optional_reorder_data_discarding_ops { 125 bool reorder_data_discarding_ops = 16; 126 } 127 // Whether to fuse shuffle and repeat transformations. 128 oneof optional_shuffle_and_repeat_fusion { 129 bool shuffle_and_repeat_fusion = 17; 130 } 131} 132 133message ThreadingOptions { 134 // If set, it overrides the maximum degree of intra-op parallelism. 135 oneof optional_max_intra_op_parallelism { 136 int32 max_intra_op_parallelism = 1; 137 } 138 // If set, the dataset will use a private threadpool of the given size. 139 oneof optional_private_threadpool_size { 140 int32 private_threadpool_size = 2; 141 } 142} 143 144// Represents how to handle external state during serialization. 145enum ExternalStatePolicy { 146 WARN = 0; 147 IGNORE = 1; 148 FAIL = 2; 149} 150 151// Message stored with Dataset objects to control how datasets are processed and 152// optimized. 153message Options { 154 // Whether the outputs need to be produced in deterministic order. 155 oneof optional_deterministic { 156 bool deterministic = 1; 157 } 158 // The distribution strategy options associated with the dataset. 159 DistributeOptions distribute_options = 2; 160 // The optimization options associated with the dataset. 161 OptimizationOptions optimization_options = 3; 162 // Whether to introduce 'slack' in the last `prefetch` of the input pipeline, 163 // if it exists. This may reduce CPU contention with accelerator host-side 164 // activity at the start of a step. The slack frequency is determined by the 165 // number of devices attached to this input pipeline. 166 oneof optional_slack { 167 bool slack = 4; 168 } 169 // The threading options associated with the dataset. 170 ThreadingOptions threading_options = 5; 171 // This option can be used to override the default policy for how to handle 172 // external state when serializing a dataset or checkpointing its iterator. 173 // There are three settings available - IGNORE: External state is ignored 174 // without a warning; WARN: External state is ignored and a warning is logged; 175 // FAIL: External state results in an error. 176 oneof optional_external_state_policy { 177 ExternalStatePolicy external_state_policy = 6; 178 } 179} 180