/external/tensorflow/tensorflow/python/distribute/ |
D | tpu_strategy_model_parallelism_test.py | 65 num_replicas = resolver.get_tpu_system_metadata().num_cores // 2 67 topology, num_replicas=num_replicas, computation_shape=[1, 1, 1, 2]) 72 return strategy, num_replicas 82 strategy, num_replicas = get_tpu_strategy() 88 self.assertLen(strategy.experimental_local_results(v), num_replicas) 89 self.assertLen(strategy.experimental_local_results(w), num_replicas) 115 self.assertEqual(30. * num_replicas, 147 strategy, num_replicas = get_tpu_strategy() 161 self.assertEqual(20. * num_replicas, 170 self.assertEqual(30. * num_replicas, [all …]
|
D | strategy_combinations_test.py | 50 num_replicas = strategy.reduce( 52 self.assertEqual(self.evaluate(num_replicas), 2.) 66 num_replicas = strategy.reduce( 68 self.assertEqual(self.evaluate(num_replicas), 4.) 79 num_replicas = distribution.reduce( 81 self.assertEqual(2, self.evaluate(num_replicas))
|
/external/tensorflow/tensorflow/c/eager/parallel_device/ |
D | parallel_device_testlib.h | 84 template <std::size_t num_replicas> 87 std::array<TensorHandlePtr, num_replicas>* components, TF_Status* status); 90 template <std::size_t num_replicas> 93 const std::array<TFE_TensorHandle*, num_replicas>& components, 116 template <std::size_t num_replicas> 119 const std::array<TFE_TensorHandle*, num_replicas>& components, in CreatePerDeviceValues() 124 TFE_OpSetAttrInt(op.get(), "N", num_replicas); in CreatePerDeviceValues() 125 for (int i = 0; i < num_replicas; ++i) { in CreatePerDeviceValues()
|
/external/tensorflow/tensorflow/compiler/xla/service/ |
D | all_reduce_folder.cc | 38 int64_t num_replicas = 0; in FoldReplicaGroups() local 41 num_replicas = std::max(num_replicas, id); in FoldReplicaGroups() 44 num_replicas++; in FoldReplicaGroups() 51 std::vector<int> replica_group_no(num_replicas, -1); in FoldReplicaGroups() 69 std::vector<int64_t> contributing_replicas_set_id(num_replicas, 0); in FoldReplicaGroups() 73 std::vector<bool> contributors(num_replicas, false); in FoldReplicaGroups() 117 for (int64_t replica = 0; replica < num_replicas; ++replica) { in FoldReplicaGroups()
|
D | hlo_runner.cc | 222 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 236 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 267 options.infeed_values.size() == options.num_replicas); in ExecuteReplicatedImpl() 270 num_threads += options.num_replicas; in ExecuteReplicatedImpl() 278 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 298 options.outfeed_values->resize(options.num_replicas); in ExecuteReplicatedImpl() 300 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 330 exec_results.reserve(options.num_replicas); in ExecuteReplicatedImpl() 331 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 357 options.num_replicas); in ExecuteReplicated() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gpu_spmd_partitioner.h | 31 HloComputation* computation, int64_t num_partitions, int64_t num_replicas, in GpuSpmdPartitioningVisitor() argument 35 : spmd::SpmdPartitioningVisitor(computation, num_partitions, num_replicas, in GpuSpmdPartitioningVisitor() 44 GpuSpmdPartitioner(int64_t num_partitions, int64_t num_replicas) in GpuSpmdPartitioner() argument 45 : spmd::SpmdPartitioner(num_partitions, num_replicas, in GpuSpmdPartitioner() 50 HloComputation* computation, int64_t num_partitions, int64_t num_replicas,
|
/external/tensorflow/tensorflow/core/kernels/data/experimental/ |
D | auto_shard_dataset_op.cc | 47 int64_t index, num_workers, auto_shard_policy, num_replicas; in MakeDataset() local 63 num_replicas = num_replicas_; in MakeDataset() 66 num_replicas]() { in MakeDataset() 67 return CreateConfig(num_workers, index, auto_shard_policy, num_replicas); in MakeDataset() 83 int64_t num_replicas) { in CreateConfig() argument 96 {kNumReplicas, num_replicas}}}; in CreateConfig()
|
/external/tensorflow/tensorflow/compiler/xla/service/spmd/ |
D | stateful_rng_spmd_partitioner.h | 32 HloComputation* computation, int64_t num_partitions, int64_t num_replicas, in StatefulRngSpmdPartitioningVisitor() argument 36 : spmd::SpmdPartitioningVisitor(computation, num_partitions, num_replicas, in StatefulRngSpmdPartitioningVisitor() 45 StatefulRngSpmdPartitioner(int64_t num_partitions, int64_t num_replicas) in StatefulRngSpmdPartitioner() argument 46 : spmd::SpmdPartitioner(num_partitions, num_replicas, in StatefulRngSpmdPartitioner() 51 HloComputation* computation, int64_t num_partitions, int64_t num_replicas,
|
/external/tensorflow/tensorflow/python/tpu/ |
D | device_assignment.py | 113 def num_replicas(self) -> int: member in DeviceAssignment 177 num_replicas: int = 1) -> "DeviceAssignment": 179 num_replicas) 336 num_replicas: int = 1, 431 if num_replicas > max_replicas: 435 num_replicas, max_replicas, computation_shape, computation_stride, 443 if num_replicas > 0: 444 remaining_replicas = num_replicas 473 replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32) 479 and num_replicas == max_replicas) # Full replication. [all …]
|
/external/tensorflow/tensorflow/python/data/experimental/ops/ |
D | distribute.py | 70 def __init__(self, input_dataset, num_workers, index, num_replicas=None): argument 80 num_replicas=num_replicas, 89 def _AutoShardDatasetV1(input_dataset, num_workers, index, num_replicas=None): # pylint: disable=i… argument 91 _AutoShardDataset(input_dataset, num_workers, index, num_replicas)) 258 def __init__(self, input_dataset, num_replicas): argument 285 if output_dims[0] is not None and output_dims[0] % num_replicas == 0: 286 return output_dims[0] // num_replicas 307 num_replicas=num_replicas,
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/ |
D | tpu_rewrite_device_util.cc | 184 int num_replicas, int num_cores_per_replica, in GetFullMeshTPUExecutionDeviceAssignment() argument 190 if (num_replicas != 1 && num_replicas != num_tpu_devices) in GetFullMeshTPUExecutionDeviceAssignment() 192 num_tpu_devices, ", got ", num_replicas); in GetFullMeshTPUExecutionDeviceAssignment() 200 devices_and_hosts.reserve(num_replicas); in GetFullMeshTPUExecutionDeviceAssignment() 201 for (int i = 0; i < num_replicas; ++i) { in GetFullMeshTPUExecutionDeviceAssignment() 343 int num_replicas, int num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() argument 354 num_replicas * num_cores_per_replica * kTPUTopologyRank; in GetGeneralTPUExecutionDeviceAssignment() 360 kTPUTopologyRank, " (", num_replicas, " * ", num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() 377 num_replicas, llvm::SmallVector<TPUDeviceAndHost, 8>( in GetGeneralTPUExecutionDeviceAssignment() 379 xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); in GetGeneralTPUExecutionDeviceAssignment() [all …]
|
/external/tensorflow/tensorflow/compiler/xrt/kernels/ |
D | xrt_compile_ops.cc | 55 const xrt::DeviceAssignment& xrt_device_assignment, int num_replicas, in GenerateXlaDeviceAssignment() argument 69 if (num_replicas != computation_devices.replica_devices_size()) { in GenerateXlaDeviceAssignment() 73 num_replicas, in GenerateXlaDeviceAssignment() 133 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compile() local 163 build_options.set_num_replicas(num_replicas); in Compile() 171 xla::DeviceAssignment device_assignment(num_replicas, in Compile() 174 GenerateXlaDeviceAssignment(config.device_assignment(), num_replicas, in Compile()
|
D | tpu_compile_ops.cc | 93 tensorflow::tpu::TpuMeshStateInterface* mesh_state, int num_replicas, in CompilationCacheKey() argument 98 metadata.set_num_replicas(num_replicas); in CompilationCacheKey() 178 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compute() local 179 CHECK_GT(num_replicas, 0); in Compute() 184 computation_proto, mesh_state, num_replicas, num_cores_per_replica); in Compute()
|
/external/tensorflow/tensorflow/core/tpu/graph_rewrite/ |
D | encapsulate_tpu_computations_pass.cc | 431 int num_replicas; in MoveHeadOutsideCompilationToHost() local 433 GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); in MoveHeadOutsideCompilationToHost() 435 (input_types.size() - num_distributed_vars) / num_replicas; in MoveHeadOutsideCompilationToHost() 439 for (int replica_id = 0; replica_id < num_replicas; replica_id++) { in MoveHeadOutsideCompilationToHost() 460 : (old_num_per_replica_inputs * num_replicas + in MoveHeadOutsideCompilationToHost() 524 new_input_types.reserve(num_replicas * new_num_per_replica_inputs + in MoveHeadOutsideCompilationToHost() 526 for (int replica_id = 0; replica_id < num_replicas; ++replica_id) { in MoveHeadOutsideCompilationToHost() 547 num_new_per_replica_input_types / num_replicas + num_distributed_vars + in MoveHeadOutsideCompilationToHost() 563 num_replicas * new_num_per_replica_inputs + num_other_inputs); in MoveHeadOutsideCompilationToHost() 565 num_replicas * new_num_per_replica_inputs + num_other_inputs - 1; in MoveHeadOutsideCompilationToHost() [all …]
|
/external/tensorflow/tensorflow/python/eager/benchmarks/resnet50/ |
D | resnet50_test_util.py | 47 num_replicas=1): argument 50 replica_str = '' if num_replicas == 1 else 'replicas_%d_' % num_replicas 53 extras = {'examples_per_sec': (num_replicas * batch_size) / avg_time}
|
/external/tensorflow/tensorflow/python/tpu/tests/ |
D | tpu_embedding_v2_enqueue_mode_test.py | 180 num_replicas = strategy.num_replicas_in_sync 194 update = ([[0.3 * num_replicas], [0.3 * num_replicas * 2]], 195 [[0.3 * num_replicas * 2], [0.3 * num_replicas]], 196 [[0.1 * num_replicas], [0.1 / 3 * num_replicas]])
|
D | tpu_embedding_v2_correctness_sequence_feature_test.py | 47 num_replicas = strategy.num_replicas_in_sync 117 per_row_update = (0.3 * num_replicas, 118 0.3 * num_replicas, 119 0.1 * num_replicas)
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/ |
D | tpu_cluster_formation.mlir | 9 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =… 20 // Test TPUReplicateMetadata ops `name` and `num_replicas` attributes are not 25 …", _replication_info = "replicate", device = "device", name = "name", num_replicas = 1, topology =… 30 // CHECK-NOT: num_replicas = 1 37 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =… 52 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =… 83 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =… 108 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =… 127 …type = "TPU", _replication_info = "replicate_1", device = "device_1", num_replicas = 1, topology =… 132 …type = "TPU", _replication_info = "replicate_0", device = "device_0", num_replicas = 1, topology =… [all …]
|
/external/tensorflow/tensorflow/compiler/xla/client/ |
D | executable_build_options.cc | 63 int num_replicas) { in set_num_replicas() argument 64 num_replicas_ = num_replicas; in set_num_replicas() 115 output.set_num_replicas(num_replicas()); in ToProto() 144 output.set_num_replicas(input.num_replicas()); in ExecutableBuildOptionsFromProto() 195 execution_options.set_num_replicas(build_options.num_replicas()); in CreateExecutionOptions()
|
/external/tensorflow/tensorflow/compiler/xla/python/tpu_driver/client/ |
D | tpu_client_extension.cc | 48 [](PyTpuClient* client, int num_replicas, int num_partitions) in PYBIND11_MODULE() 53 num_replicas, num_partitions)); in PYBIND11_MODULE() 55 result.resize(num_replicas); in PYBIND11_MODULE() 56 for (int r = 0; r < num_replicas; ++r) { in PYBIND11_MODULE() 69 [](PyTpuClient* client, int num_replicas) in PYBIND11_MODULE() argument 73 num_replicas, /*num_partitions=*/1)); in PYBIND11_MODULE() 75 for (int i = 0; i < num_replicas; ++i) { in PYBIND11_MODULE()
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/ |
D | tpu_variable_runtime_reformatting.cc | 133 int64_t num_replicas = replicate.n(); in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping() local 155 num_inputs = num_replicas; in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping() 228 int64_t num_replicas = replicate.n(); in AddInputsToReplicateOp() local 229 assert(new_inputs.size() == num_replicas); in AddInputsToReplicateOp() 237 .size() == num_replicas); in AddInputsToReplicateOp() 246 for (int64_t i = 0; i < num_replicas; ++i) { in AddInputsToReplicateOp() 263 replicate.getLoc(), num_replicas, devices, new_replicated_inputs, in AddInputsToReplicateOp() 337 int64_t num_replicas = replicate.n(); in HandleReplicateOp() local 338 if (num_replicas == 1) return; in HandleReplicateOp() 447 while_op.getLoc(), num_replicas, devices, unformat_replicate_operands, in HandleReplicateOp()
|
D | replicate_to_island.cc | 143 int num_replicas, llvm::SmallVectorImpl<tf_executor::IslandOp>& replicas) { in ExpandReplicateIntoReplicas() argument 144 replicas.reserve(num_replicas); in ExpandReplicateIntoReplicas() 161 for (int i : llvm::seq<int>(0, num_replicas)) { in ExpandReplicateIntoReplicas() 241 const int num_replicas = replicate_op.n(); in CreateIslandsFromReplicate() local 246 replicate_op, num_replicas, replicas))) in CreateIslandsFromReplicate() 255 replicas_outputs[num_replicas * replica_result_and_idx.index() + in CreateIslandsFromReplicate()
|
/external/tensorflow/tensorflow/dtensor/mlir/utils/ |
D | update_tpu_metadata.cc | 87 int num_replicas = mesh_config.num_devices(); in UpdateTPUCompileMetadata() local 88 metadata_proto.set_num_replicas(num_replicas); in UpdateTPUCompileMetadata() 125 device_assignment.set_replica_count(num_replicas); in UpdateTPUCompileMetadata() 130 for (int i = 0; i < num_replicas; ++i) { in UpdateTPUCompileMetadata()
|
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v2/ |
D | TPUReplicateMetadata.pbtxt | 4 name: "num_replicas" 65 name: "num_replicas" 133 name: "num_replicas" 208 name: "num_replicas" 290 name: "num_replicas"
|
/external/tensorflow/tensorflow/core/grappler/optimizers/data/ |
D | auto_shard.cc | 182 int64_t num_replicas, GraphDef* output, 655 Status RewriteRebatchV2ToV1(const NodeDef& sink_node, int64_t num_replicas, in RewriteRebatchV2ToV1() argument 674 if (num_replicas < 1) { in RewriteRebatchV2ToV1() 678 num_replicas, ", but expected to be >= 1."); in RewriteRebatchV2ToV1() 680 auto num_replicas_node = graph_utils::AddScalarConstNode(num_replicas, graph); in RewriteRebatchV2ToV1() 705 int64_t num_replicas, MutableGraphView* graph) { in ShardByData() argument 720 TF_RETURN_IF_ERROR(RewriteRebatchV2ToV1(*shard_before, num_replicas, graph)); in ShardByData() 727 int64_t num_replicas, MutableGraphView* graph) { in ShardByHint() argument 758 int64_t num_replicas, MutableGraphView* graph, in ApplyAutoShard() argument 769 return ShardByData(sink_node, num_workers, index, num_replicas, graph); in ApplyAutoShard() [all …]
|