/external/tensorflow/tensorflow/c/eager/parallel_device/ |
D | parallel_device_testlib.h | 84 template <std::size_t num_replicas> 87 std::array<TensorHandlePtr, num_replicas>* components, TF_Status* status); 90 template <std::size_t num_replicas> 93 const std::array<TFE_TensorHandle*, num_replicas>& components, 116 template <std::size_t num_replicas> 119 const std::array<TFE_TensorHandle*, num_replicas>& components, in CreatePerDeviceValues() 124 TFE_OpSetAttrInt(op.get(), "N", num_replicas); in CreatePerDeviceValues() 125 for (int i = 0; i < num_replicas; ++i) { in CreatePerDeviceValues()
|
/external/tensorflow/tensorflow/core/kernels/data/experimental/ |
D | auto_shard_dataset_op.cc | 47 int64 index, num_workers, auto_shard_policy, num_replicas; in MakeDataset() local 58 num_replicas = num_replicas_; in MakeDataset() 61 num_replicas]() { in MakeDataset() 62 return CreateConfig(num_workers, index, auto_shard_policy, num_replicas); in MakeDataset() 75 int64 num_replicas) { in CreateConfig() argument 88 {kNumReplicas, num_replicas}}}; in CreateConfig()
|
D | rebatch_dataset_op.cc | 44 int64 num_replicas; in MakeDataset() local 46 ParseScalarArgument(ctx, "num_replicas", &num_replicas)); in MakeDataset() 48 ctx, num_replicas > 0, in MakeDataset() 51 new Dataset(ctx, input, num_replicas, output_types_, output_shapes_); in MakeDataset() 58 const int64 num_replicas, const DataTypeVector& output_types, in Dataset() argument 62 num_replicas_(num_replicas), in Dataset() 67 num_replicas))}}) { in Dataset() 104 Node* num_replicas = nullptr; in AsGraphDefInternal() local 105 TF_RETURN_IF_ERROR(b->AddScalar(num_replicas_, &num_replicas)); in AsGraphDefInternal() 107 b->AddDataset(this, {input_graph_node, num_replicas}, output)); in AsGraphDefInternal()
|
/external/tensorflow/tensorflow/python/tpu/ |
D | device_assignment.py | 118 def num_replicas(self) -> int: member in DeviceAssignment 182 num_replicas: int = 1) -> "DeviceAssignment": 184 num_replicas) 341 num_replicas: int = 1, 432 if num_replicas > max_replicas: 436 num_replicas, max_replicas, computation_shape, computation_stride, 444 if num_replicas > 0: 445 remaining_replicas = num_replicas 474 replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32) 480 and num_replicas == max_replicas) # Full replication. [all …]
|
D | tpu_embedding_v2_correctness_test.py | 287 num_replicas = strategy.num_replicas_in_sync 315 (num_replicas // 2)), 317 (num_replicas // 2))), 318 axis=1).reshape([self.batch_size * num_replicas, 4]) 323 (activation_friends_gold0,) * num_replicas) 325 loss_gold = [loss_gold0] * num_replicas 339 global_batch_size = self.batch_size * num_replicas 487 num_replicas = strategy.num_replicas_in_sync 555 per_row_update = (0.3 * num_replicas, 556 0.3 * num_replicas, [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/ |
D | hlo_runner.cc | 210 for (int64 i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 224 for (int64 i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 252 int64 num_threads = (options.infeed != nullptr) ? options.num_replicas : 0; in ExecuteReplicatedImpl() 254 num_threads += options.num_replicas; in ExecuteReplicatedImpl() 262 for (int64 i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 280 for (int64 i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 309 for (int64 i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl() 335 options.num_replicas); in ExecuteReplicated() 337 LOG(INFO) << "Creating thread pool for " << options.num_replicas in ExecuteReplicated() 340 tensorflow::Env::Default(), "replicas", options.num_replicas); in ExecuteReplicated() [all …]
|
/external/tensorflow/tensorflow/python/data/experimental/ops/ |
D | distribute.py | 69 def __init__(self, input_dataset, num_workers, index, num_replicas=None): argument 79 num_replicas=num_replicas, 88 def _AutoShardDatasetV1(input_dataset, num_workers, index, num_replicas=None): # pylint: disable=i… argument 90 _AutoShardDataset(input_dataset, num_workers, index, num_replicas)) 244 def __init__(self, input_dataset, num_replicas): argument 270 if output_dims[0] is not None and output_dims[0] % num_replicas == 0: 271 return output_dims[0] // num_replicas 288 num_replicas=num_replicas,
|
/external/tensorflow/tensorflow/python/eager/benchmarks/resnet50/ |
D | resnet50_test_util.py | 51 num_replicas=1): argument 54 replica_str = '' if num_replicas == 1 else 'replicas_%d_' % num_replicas 57 extras = {'examples_per_sec': (num_replicas * batch_size) / avg_time}
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/ |
D | tpu_rewrite_device_util.cc | 188 int num_replicas, int num_cores_per_replica, in GetFullMeshTPUExecutionDeviceAssignment() argument 194 if (num_replicas != 1 && num_replicas != num_tpu_devices) in GetFullMeshTPUExecutionDeviceAssignment() 196 num_tpu_devices, ", got ", num_replicas); in GetFullMeshTPUExecutionDeviceAssignment() 204 devices_and_hosts.reserve(num_replicas); in GetFullMeshTPUExecutionDeviceAssignment() 205 for (int i = 0; i < num_replicas; ++i) { in GetFullMeshTPUExecutionDeviceAssignment() 347 int num_replicas, int num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() argument 358 num_replicas * num_cores_per_replica * kTPUTopologyRank; in GetGeneralTPUExecutionDeviceAssignment() 364 kTPUTopologyRank, " (", num_replicas, " * ", num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() 381 num_replicas, llvm::SmallVector<TPUDeviceAndHost, 8>( in GetGeneralTPUExecutionDeviceAssignment() 383 xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); in GetGeneralTPUExecutionDeviceAssignment() [all …]
|
/external/tensorflow/tensorflow/compiler/xrt/kernels/ |
D | xrt_compile_ops.cc | 55 const xrt::DeviceAssignment& xrt_device_assignment, int num_replicas, in GenerateXlaDeviceAssignment() argument 68 if (num_replicas != computation_devices.replica_devices_size()) { in GenerateXlaDeviceAssignment() 72 num_replicas, in GenerateXlaDeviceAssignment() 132 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compile() local 162 build_options.set_num_replicas(num_replicas); in Compile() 170 xla::DeviceAssignment device_assignment(num_replicas, in Compile() 173 GenerateXlaDeviceAssignment(config.device_assignment(), num_replicas, in Compile()
|
D | tpu_compile_ops.cc | 92 tensorflow::tpu::TpuMeshStateInterface* mesh_state, int num_replicas, in CompilationCacheKey() argument 97 metadata.set_num_replicas(num_replicas); in CompilationCacheKey() 177 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compute() local 178 CHECK_GT(num_replicas, 0); in Compute() 183 computation_proto, mesh_state, num_replicas, num_cores_per_replica); in Compute()
|
/external/tensorflow/tensorflow/core/tpu/graph_rewrite/ |
D | encapsulate_tpu_computations_pass.cc | 457 int num_replicas; in MoveHeadOutsideCompilationToHost() local 459 GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); in MoveHeadOutsideCompilationToHost() 461 (input_types.size() - num_distributed_vars) / num_replicas; in MoveHeadOutsideCompilationToHost() 465 for (int replica_id = 0; replica_id < num_replicas; replica_id++) { in MoveHeadOutsideCompilationToHost() 488 : (old_num_per_replica_inputs * num_replicas + in MoveHeadOutsideCompilationToHost() 552 new_input_types.reserve(num_replicas * new_num_per_replica_inputs + in MoveHeadOutsideCompilationToHost() 554 for (int replica_id = 0; replica_id < num_replicas; ++replica_id) { in MoveHeadOutsideCompilationToHost() 575 num_new_per_replica_input_types / num_replicas + num_distributed_vars + in MoveHeadOutsideCompilationToHost() 591 num_replicas * new_num_per_replica_inputs + num_other_inputs); in MoveHeadOutsideCompilationToHost() 593 num_replicas * new_num_per_replica_inputs + num_other_inputs - 1; in MoveHeadOutsideCompilationToHost() [all …]
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/ |
D | tpu_cluster_formation.mlir | 9 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 1, to… 20 // Test TPUReplicateMetadata ops `name` and `num_replicas` attributes are not 25 …a"() {_tpu_replicate = "replicate", device = "device", name = "name", num_replicas = 1, topology =… 30 // CHECK-NOT: num_replicas = 1 37 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 1, to… 52 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 1, to… 83 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 1, to… 108 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 1, to… 127 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate_1", device = "device_1", num_replicas = 1… 132 …"tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate_0", device = "device_0", num_replicas = 1… [all …]
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/ |
D | tpu_cluster_formation.cc | 342 LogicalResult ReplicateCluster(tf_device::ClusterOp cluster, int num_replicas, in ReplicateCluster() argument 345 if (num_replicas == 1) return success(); in ReplicateCluster() 347 if (num_replicas < 1) in ReplicateCluster() 399 int num_inputs = is_packed ? 1 : num_replicas; in ReplicateCluster() 425 cluster.getLoc(), num_replicas, in ReplicateCluster() 441 std::next(replicate_op.result_begin(), idx * num_replicas), in ReplicateCluster() 442 std::next(replicate_op.result_begin(), (idx + 1) * num_replicas)); in ReplicateCluster() 455 if (def_num_results != num_replicas) in ReplicateCluster() 456 return def->emitOpError() << "requires " << num_replicas << " results"; in ReplicateCluster() 563 auto num_replicas = cluster_metadata->getSecond().get(kNumReplicasAttr); in FormClustersInBlock() local [all …]
|
D | tpu_variable_runtime_reformatting.cc | 180 int64_t num_replicas = replicate.n(); in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping() local 202 num_inputs = num_replicas; in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping() 275 int64_t num_replicas = replicate.n(); in AddInputsToReplicateOp() local 276 assert(new_inputs.size() == num_replicas); in AddInputsToReplicateOp() 284 .size() == num_replicas); in AddInputsToReplicateOp() 293 for (int64_t i = 0; i < num_replicas; ++i) { in AddInputsToReplicateOp() 310 replicate.getLoc(), num_replicas, devices, new_replicated_inputs, in AddInputsToReplicateOp() 384 int64_t num_replicas = replicate.n(); in HandleReplicateOp() local 385 if (num_replicas == 1) return; in HandleReplicateOp() 494 while_op.getLoc(), num_replicas, devices, unformat_replicate_operands, in HandleReplicateOp()
|
D | replicate_to_island.cc | 141 int num_replicas, llvm::SmallVectorImpl<tf_executor::IslandOp>& replicas) { in ExpandReplicateIntoReplicas() argument 142 replicas.reserve(num_replicas); in ExpandReplicateIntoReplicas() 159 for (int i : llvm::seq<int>(0, num_replicas)) { in ExpandReplicateIntoReplicas() 239 const int num_replicas = replicate_op.n(); in CreateIslandsFromReplicate() local 244 replicate_op, num_replicas, replicas))) in CreateIslandsFromReplicate() 253 replicas_outputs[num_replicas * replica_result_and_idx.index() + in CreateIslandsFromReplicate()
|
D | tpu_rewrite_pass.cc | 303 tf_device::ClusterFuncOp op, int num_replicas, int num_cores_per_replica, in SetMetadataProtoFromClusterFuncOp() argument 306 metadata->set_num_replicas(num_replicas); in SetMetadataProtoFromClusterFuncOp() 350 tf_device::ClusterFuncOp cluster_func, int num_replicas, in BuildCompileOp() argument 357 cluster_func, num_replicas, num_cores_per_replica, in BuildCompileOp() 418 const int num_replicas = tpu_devices.size(); in AssignDevicesToReplicate() local 424 devices_by_core.reserve(num_replicas); in AssignDevicesToReplicate() 425 for (int replica = 0; replica < num_replicas; ++replica) in AssignDevicesToReplicate() 437 hosts.reserve(num_replicas); in AssignDevicesToReplicate() 438 for (int replica = 0; replica < num_replicas; ++replica) in AssignDevicesToReplicate() 571 int num_replicas = 1; in Rewrite() local [all …]
|
/external/tensorflow/tensorflow/python/distribute/ |
D | strategy_combinations_test.py | 53 num_replicas = strategy.reduce( 55 self.assertEqual(self.evaluate(num_replicas), 2.) 69 num_replicas = strategy.reduce( 71 self.assertEqual(self.evaluate(num_replicas), 4.) 82 num_replicas = distribution.reduce( 84 self.assertEqual(2, self.evaluate(num_replicas))
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | hlo_test_base.cc | 207 int64 num_replicas, bool use_threads, bool run_hlo_passes) { in ExecuteReplicated() argument 209 options.num_replicas = num_replicas; in ExecuteReplicated() 220 int64 num_replicas, DeviceAssignment* device_assignment, in ExecuteReplicated() argument 223 options.num_replicas = num_replicas; in ExecuteReplicated() 237 int64 num_replicas, bool run_hlo_passes) { in ExecuteReplicated() argument 239 options.num_replicas = num_replicas; in ExecuteReplicated() 419 int64 num_replicas, in RunReplicated() argument 422 ParseAndReturnVerifiedModule(hlo_string, num_replicas); in RunReplicated() 445 options.num_replicas = num_replicas; in RunReplicated()
|
/external/tensorflow/tensorflow/compiler/xla/python/tpu_driver/client/ |
D | tpu_client_extension.cc | 38 [](PyTpuClient* client, int num_replicas, int num_partitions) in PYBIND11_MODULE() 43 num_replicas, num_partitions)); in PYBIND11_MODULE() 45 result.resize(num_replicas); in PYBIND11_MODULE() 46 for (int r = 0; r < num_replicas; ++r) { in PYBIND11_MODULE() 59 [](PyTpuClient* client, int num_replicas) in PYBIND11_MODULE() argument 63 num_replicas, /*num_partitions=*/1)); in PYBIND11_MODULE() 65 for (int i = 0; i < num_replicas; ++i) { in PYBIND11_MODULE()
|
D | tpu_client.cc | 134 int num_replicas, int num_partitions) const { in GetDefaultDeviceAssignment() argument 138 if (num_replicas * num_partitions <= local_device_count()) { in GetDefaultDeviceAssignment() 139 DeviceAssignment assignment(num_replicas, num_partitions); in GetDefaultDeviceAssignment() 140 for (int replica = 0; replica < num_replicas; ++replica) { in GetDefaultDeviceAssignment() 150 return placer.AssignDevices(num_replicas, num_partitions); in GetDefaultDeviceAssignment() 518 const int num_replicas = device_assignment_.replica_count(); in PyTpuExecutable() local 521 for (int replica = 0; replica < num_replicas; ++replica) { in PyTpuExecutable() 623 if (num_replicas() != 1) { in Execute() 626 num_replicas()); in Execute() 678 argument_handles.size(), num_local_devices, num_replicas(), in ExecuteOnLocalDevices() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/client/ |
D | executable_build_options.cc | 63 int num_replicas) { in set_num_replicas() argument 64 num_replicas_ = num_replicas; in set_num_replicas() 125 execution_options.set_num_replicas(build_options.num_replicas()); in CreateExecutionOptions()
|
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v2/ |
D | TPUReplicateMetadata.pbtxt | 4 name: "num_replicas" 65 name: "num_replicas" 133 name: "num_replicas" 208 name: "num_replicas"
|
/external/tensorflow/tensorflow/compiler/xla/pjrt/ |
D | tpu_client.cc | 83 int num_replicas, int num_partitions) const override; 101 int num_replicas, int num_partitions) const { in GetDefaultDeviceAssignment() argument 106 if (num_replicas * num_partitions <= num_local_devices) { in GetDefaultDeviceAssignment() 107 return tf_tpu::TpuComputationPlacer::AssignLocalDevices(host, num_replicas, in GetDefaultDeviceAssignment() 111 return PjRtStreamExecutorClient::GetDefaultDeviceAssignment(num_replicas, in GetDefaultDeviceAssignment()
|
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v1/ |
D | TPUReplicatedOutput.pbtxt | 10 number_attr: "num_replicas" 13 name: "num_replicas"
|