1syntax = "proto3"; 2 3package tensorflow.data; 4 5import "tensorflow/core/framework/model.proto"; 6 7option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/dataset_options_go_proto"; 8 9// Represents the type of auto-sharding we enable. 10enum AutoShardPolicy { 11 // AUTO: Attempts FILE-based sharding, falling back to DATA-based sharding. 12 AUTO = 0; 13 // FILE: Shards by input files (i.e. each worker will get a set of files to 14 // process). When this option is selected, make sure that there is at least as 15 // many files as workers. If there are fewer input files than workers, a 16 // runtime error will be raised. 17 FILE = 1; 18 // DATA: Shards by elements produced by the dataset. Each worker will process 19 // the whole dataset and discard the portion that is not for itself. Note that 20 // for this mode to correctly partitions the dataset elements, the dataset 21 // needs to produce elements in a deterministic order. 22 DATA = 2; 23 // HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated 24 // as a placeholder to replace with `shard(num_workers, worker_index)`. 25 HINT = 3; 26 // OFF: No sharding will be performed. 27 OFF = -1; 28} 29 30// next: 5 31message AutotuneOptions { 32 // Whether to automatically tune performance knobs. 33 oneof optional_enabled { 34 bool enabled = 1; 35 } 36 // When autotuning is enabled (through autotune), determines the CPU budget to 37 // use. Values greater than the number of schedulable CPU cores are allowed 38 // but may result in CPU contention. 39 oneof optional_cpu_budget { 40 int32 cpu_budget = 2; 41 } 42 // When autotuning is enabled (through autotune), determines the RAM budget to 43 // use. Values greater than the available RAM in bytes may result in OOM. If 44 // 0, defaults to half of the available RAM in bytes. 45 oneof optional_ram_budget { 46 int64 ram_budget = 3; 47 } 48 49 // When autotuning is enabled (through autotune), determines the algorithm to 50 // use. If not explicitly set by user, autotuning will follow HILL_CLIMB 51 // algorithm but has more flexibility to tune parameters more aggressively, 52 // in which case the behavior is implementation specific and may change over 53 // time. 54 oneof optional_autotune_algorithm { 55 model.AutotuneAlgorithm autotune_algorithm = 4; 56 } 57} 58 59// next: 2 60message CardinalityOptions { 61 enum ComputeLevel { 62 CARDINALITY_COMPUTE_UNSPECIFIED = 0; 63 // Cardinality will only be computed if it can be determined in a cheap 64 // manner (ie. without reading from file sources). If the cardinality would 65 // be nontrivial to compute, Cardinality() will return UNKNOWN_CARDINALITY. 66 CARDINALITY_COMPUTE_LOW = 1; 67 // Moderate effort will be made to determine cardinality, such as reading 68 // index data from source files. If significant work is needed to compute 69 // cardinality (e.g. reading entire source file contents or executing user 70 // defined functions), Cardinality() will return UNKNOWN_CARDINALITY. 71 CARDINALITY_COMPUTE_MODERATE = 2; 72 } 73 ComputeLevel compute_level = 1; 74} 75 76// next: 3 77message DistributeOptions { 78 AutoShardPolicy auto_shard_policy = 1; 79 // The number of devices attached to this input pipeline. 80 oneof optional_num_devices { 81 int32 num_devices = 2; 82 } 83} 84 85// next: 20 86message OptimizationOptions { 87 // Whether to apply default graph optimizations. If False, only graph 88 // optimizations that have been explicitly enabled will be applied. 89 oneof optional_apply_default_optimizations { 90 bool apply_default_optimizations = 1; 91 } 92 reserved 2; 93 reserved 3; 94 reserved 4; 95 reserved 5; 96 // Whether to fuse filter transformations. 97 oneof optional_filter_fusion { 98 bool filter_fusion = 6; 99 } 100 // NOTE: field id 7 deleted in June 2021. 101 reserved 7; 102 // NOTE: field id 8 deleted in June 2021. 103 reserved 8; 104 // Whether to fuse map and batch transformations. 105 oneof optional_map_and_batch_fusion { 106 bool map_and_batch_fusion = 9; 107 } 108 // Whether to fuse map and filter transformations. 109 oneof optional_map_and_filter_fusion { 110 bool map_and_filter_fusion = 10; 111 } 112 // Whether to fuse map transformations. 113 oneof optional_map_fusion { 114 bool map_fusion = 11; 115 } 116 // Whether to parallelize stateless map transformations. 117 oneof optional_map_parallelization { 118 bool map_parallelization = 12; 119 } 120 121 // NOTE: field id 13 deleted in June 2021. 122 reserved 13; 123 124 // Whether to eliminate no-op transformations. 125 oneof optional_noop_elimination { 126 bool noop_elimination = 14; 127 } 128 // Whether to parallelize copying of batch elements. This optimization is 129 // highly experimental and can cause performance degradation (e.g. when the 130 // parallelization overhead exceeds the benefits of performing the data copies 131 // in parallel). You should only enable this optimization if a) your input 132 // pipeline is bottlenecked on batching and b) you have validated that this 133 // optimization improves performance. 134 oneof optional_parallel_batch { 135 bool parallel_batch = 15; 136 } 137 // Field id 16 was removed in 06/2021. 138 reserved 16; 139 // Whether to fuse shuffle and repeat transformations. 140 oneof optional_shuffle_and_repeat_fusion { 141 bool shuffle_and_repeat_fusion = 17; 142 } 143 // Whether to parallelize stateless filter transformations. 144 oneof optional_filter_parallelization { 145 bool filter_parallelization = 18; 146 } 147 // Whether to inject 'Prefetch' as the last transformation. Only takes effect 148 // if the last transformation is synchronous; otherwise does nothing. 149 oneof optional_inject_prefetch { 150 bool inject_prefetch = 19; 151 } 152} 153 154// next: 3 155message ThreadingOptions { 156 // If set, it overrides the maximum degree of intra-op parallelism. 157 oneof optional_max_intra_op_parallelism { 158 int32 max_intra_op_parallelism = 1; 159 } 160 // If set, the dataset will use a private threadpool of the given size. 161 oneof optional_private_threadpool_size { 162 int32 private_threadpool_size = 2; 163 } 164} 165 166// Represents how to handle external state during serialization. 167enum ExternalStatePolicy { 168 POLICY_WARN = 0; 169 POLICY_IGNORE = 1; 170 POLICY_FAIL = 2; 171} 172 173// Message stored with Dataset objects to control how datasets are processed and 174// optimized. 175// 176// next: 8 177message Options { 178 // Whether the outputs need to be produced in deterministic order. 179 oneof optional_deterministic { 180 bool deterministic = 1; 181 } 182 // The distribution strategy options associated with the dataset. 183 AutotuneOptions autotune_options = 7; 184 // The distribution strategy options associated with the dataset. 185 DistributeOptions distribute_options = 2; 186 // The optimization options associated with the dataset. 187 OptimizationOptions optimization_options = 3; 188 // Whether to introduce 'slack' in the last `prefetch` of the input pipeline, 189 // if it exists. This may reduce CPU contention with accelerator host-side 190 // activity at the start of a step. The slack frequency is determined by the 191 // number of devices attached to this input pipeline. 192 oneof optional_slack { 193 bool slack = 4; 194 } 195 // The threading options associated with the dataset. 196 ThreadingOptions threading_options = 5; 197 // This option can be used to override the default policy for how to handle 198 // external state when serializing a dataset or checkpointing its iterator. 199 // There are three settings available - IGNORE: External state is ignored 200 // without a warning; WARN: External state is ignored and a warning is logged; 201 // FAIL: External state results in an error. 202 oneof optional_external_state_policy { 203 ExternalStatePolicy external_state_policy = 6; 204 } 205} 206