1syntax = "proto3"; 2 3package tensorflow.tpu; 4 5import "tensorflow/core/protobuf/tpu/optimization_parameters.proto"; 6import "tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.proto"; 7 8message TPUEmbeddingConfiguration { 9 // Description of the various embedding tables. 10 message TableDescriptor { 11 // Name of the table. 12 string name = 1; 13 // Size of the vocabulary (i.e., number of rows) in the table. 14 int32 vocabulary_size = 2; 15 // The embedding dimension (i.e., the width of the embedding table). 16 int32 dimension = 3; 17 // Number of features mapped to this table. 18 int32 num_features = 4; 19 // Details of the learning algorithm used to update the embedding 20 // parameters. 21 OptimizationParameters optimization_parameters = 5; 22 } 23 repeated TableDescriptor table_descriptor = 1; 24 25 // Mode. Should the embedding layer program be run for inference (just forward 26 // pass), training (both forward and backward pass) or just the backward_pass. 27 enum Mode { 28 UNSPECIFIED = 0; 29 INFERENCE = 1; 30 TRAINING = 2; 31 BACKWARD_PASS_ONLY = 3; 32 } 33 Mode mode = 2; 34 35 // Number of samples in each batch of embedding layer activations sent to 36 // the TensorCore. 37 int32 batch_size_per_tensor_core = 3; 38 39 // Number of TPU hosts used for inference/training. 40 int32 num_hosts = 4; 41 42 // Number of TensorCore used for inference/training. 43 int32 num_tensor_cores = 5; 44 45 // Sharding strategy of the embedding tables among the hosts. 46 // If the sharding_strategy is "mod", each id is assigned to host 47 // "id % num_hosts". For instance, 13 ids are split across 5 hosts as: 48 // [[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]]. 49 // If the sharding_strategy is "div", ids are assigned to hosts in a 50 // contiguous manner. In this case, 13 ids are split across 5 hosts as: 51 // [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]. 52 // In both the strategies, if the id space does not evenly divide the number 53 // of hosts, each of the first "table_descriptor.vocabulary_size % num_hosts" 54 // hosts will be assigned one more id. 55 // This partitioning strategy exactly follows that in the embedding_lookup 56 // TensorFlow function at tensorflow/python/ops/embedding_ops.py. 57 enum ShardingStrategy { 58 DIV_DEFAULT = 0; 59 MOD = 1; 60 } 61 ShardingStrategy sharding_strategy = 6; 62 63 // This parameter determines if the execution of the sparse core will be 64 // pipelined with that of the TensorCore. This parameter only affects results 65 // when mode=TRAINING. If mode=INFERENCE or BACKWARD_PASS_ONLY, this parameter 66 // does not affect execution and hence, is a don't care value. 67 // 68 // false: The execution of the sparse core is not pipelined with that of the 69 // TensorCore. The forward pass of every step on the sparse core is executed 70 // only after the backward pass of the previous step is complete. And the 71 // backward pass on the sparse core is executed only after the embedding 72 // gradients have been computed on the TensorCore on every step. This ensures 73 // that the activations on every step observe the gradient updates from the 74 // previous step on both the sparse core and the TensorCore. 75 // 76 // true: The execution of the sparse core is pipelined with that of the 77 // TensorCore. The forward pass of every step on the sparse core can be 78 // executed after the forward pass of the previous step is complete without 79 // waiting for the backward pass. This improves the utilization of the sparse 80 // core allowing it to process step N+1 while the embedding gradients for step 81 // N are computed on the TensorCore. The backward pass of every step on the 82 // sparse core is executed directly after the forward pass for the next step 83 // is complete. The drawback is that embedding activations for step N+1 do not 84 // observe the embedding gradient updates from step N. This could affect model 85 // quality if step N and N+1 involve the same set of embedding IDs. However, 86 // since the embedding updates are sparse, this is generally not considered a 87 // problem. 88 bool pipeline_execution_with_tensor_core = 7; 89 90 // Extended output layout information; if not provided, a compatibility mode 91 // will use defaults that match the old layout. Providing a value for this 92 // field is EXPERIMENTAL and most ways of filling it will probably break. Do 93 // not set it unless you know what you are doing. 94 TPUEmbeddingOutputLayout output_layout = 8; 95} 96