Home
last modified time | relevance | path

Searched refs:num_replicas (Results 1 – 25 of 163) sorted by relevance

1234567

/external/tensorflow/tensorflow/python/distribute/
Dtpu_strategy_model_parallelism_test.py65 num_replicas = resolver.get_tpu_system_metadata().num_cores // 2
67 topology, num_replicas=num_replicas, computation_shape=[1, 1, 1, 2])
72 return strategy, num_replicas
82 strategy, num_replicas = get_tpu_strategy()
88 self.assertLen(strategy.experimental_local_results(v), num_replicas)
89 self.assertLen(strategy.experimental_local_results(w), num_replicas)
115 self.assertEqual(30. * num_replicas,
147 strategy, num_replicas = get_tpu_strategy()
161 self.assertEqual(20. * num_replicas,
170 self.assertEqual(30. * num_replicas,
[all …]
Dstrategy_combinations_test.py50 num_replicas = strategy.reduce(
52 self.assertEqual(self.evaluate(num_replicas), 2.)
66 num_replicas = strategy.reduce(
68 self.assertEqual(self.evaluate(num_replicas), 4.)
79 num_replicas = distribution.reduce(
81 self.assertEqual(2, self.evaluate(num_replicas))
/external/tensorflow/tensorflow/c/eager/parallel_device/
Dparallel_device_testlib.h84 template <std::size_t num_replicas>
87 std::array<TensorHandlePtr, num_replicas>* components, TF_Status* status);
90 template <std::size_t num_replicas>
93 const std::array<TFE_TensorHandle*, num_replicas>& components,
116 template <std::size_t num_replicas>
119 const std::array<TFE_TensorHandle*, num_replicas>& components, in CreatePerDeviceValues()
124 TFE_OpSetAttrInt(op.get(), "N", num_replicas); in CreatePerDeviceValues()
125 for (int i = 0; i < num_replicas; ++i) { in CreatePerDeviceValues()
/external/tensorflow/tensorflow/compiler/xla/service/
Dall_reduce_folder.cc38 int64_t num_replicas = 0; in FoldReplicaGroups() local
41 num_replicas = std::max(num_replicas, id); in FoldReplicaGroups()
44 num_replicas++; in FoldReplicaGroups()
51 std::vector<int> replica_group_no(num_replicas, -1); in FoldReplicaGroups()
69 std::vector<int64_t> contributing_replicas_set_id(num_replicas, 0); in FoldReplicaGroups()
73 std::vector<bool> contributors(num_replicas, false); in FoldReplicaGroups()
117 for (int64_t replica = 0; replica < num_replicas; ++replica) { in FoldReplicaGroups()
Dhlo_runner.cc222 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl()
236 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl()
267 options.infeed_values.size() == options.num_replicas); in ExecuteReplicatedImpl()
270 num_threads += options.num_replicas; in ExecuteReplicatedImpl()
278 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl()
298 options.outfeed_values->resize(options.num_replicas); in ExecuteReplicatedImpl()
300 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl()
330 exec_results.reserve(options.num_replicas); in ExecuteReplicatedImpl()
331 for (int64_t i = 0; i < options.num_replicas; ++i) { in ExecuteReplicatedImpl()
357 options.num_replicas); in ExecuteReplicated()
[all …]
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dgpu_spmd_partitioner.h31 HloComputation* computation, int64_t num_partitions, int64_t num_replicas, in GpuSpmdPartitioningVisitor() argument
35 : spmd::SpmdPartitioningVisitor(computation, num_partitions, num_replicas, in GpuSpmdPartitioningVisitor()
44 GpuSpmdPartitioner(int64_t num_partitions, int64_t num_replicas) in GpuSpmdPartitioner() argument
45 : spmd::SpmdPartitioner(num_partitions, num_replicas, in GpuSpmdPartitioner()
50 HloComputation* computation, int64_t num_partitions, int64_t num_replicas,
/external/tensorflow/tensorflow/core/kernels/data/experimental/
Dauto_shard_dataset_op.cc47 int64_t index, num_workers, auto_shard_policy, num_replicas; in MakeDataset() local
63 num_replicas = num_replicas_; in MakeDataset()
66 num_replicas]() { in MakeDataset()
67 return CreateConfig(num_workers, index, auto_shard_policy, num_replicas); in MakeDataset()
83 int64_t num_replicas) { in CreateConfig() argument
96 {kNumReplicas, num_replicas}}}; in CreateConfig()
/external/tensorflow/tensorflow/compiler/xla/service/spmd/
Dstateful_rng_spmd_partitioner.h32 HloComputation* computation, int64_t num_partitions, int64_t num_replicas, in StatefulRngSpmdPartitioningVisitor() argument
36 : spmd::SpmdPartitioningVisitor(computation, num_partitions, num_replicas, in StatefulRngSpmdPartitioningVisitor()
45 StatefulRngSpmdPartitioner(int64_t num_partitions, int64_t num_replicas) in StatefulRngSpmdPartitioner() argument
46 : spmd::SpmdPartitioner(num_partitions, num_replicas, in StatefulRngSpmdPartitioner()
51 HloComputation* computation, int64_t num_partitions, int64_t num_replicas,
/external/tensorflow/tensorflow/python/tpu/
Ddevice_assignment.py113 def num_replicas(self) -> int: member in DeviceAssignment
177 num_replicas: int = 1) -> "DeviceAssignment":
179 num_replicas)
336 num_replicas: int = 1,
431 if num_replicas > max_replicas:
435 num_replicas, max_replicas, computation_shape, computation_stride,
443 if num_replicas > 0:
444 remaining_replicas = num_replicas
473 replica_offsets = np.full([num_replicas, topology_rank], -1, dtype=np.int32)
479 and num_replicas == max_replicas) # Full replication.
[all …]
/external/tensorflow/tensorflow/python/data/experimental/ops/
Ddistribute.py70 def __init__(self, input_dataset, num_workers, index, num_replicas=None): argument
80 num_replicas=num_replicas,
89 def _AutoShardDatasetV1(input_dataset, num_workers, index, num_replicas=None): # pylint: disable=i… argument
91 _AutoShardDataset(input_dataset, num_workers, index, num_replicas))
258 def __init__(self, input_dataset, num_replicas): argument
285 if output_dims[0] is not None and output_dims[0] % num_replicas == 0:
286 return output_dims[0] // num_replicas
307 num_replicas=num_replicas,
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/
Dtpu_rewrite_device_util.cc184 int num_replicas, int num_cores_per_replica, in GetFullMeshTPUExecutionDeviceAssignment() argument
190 if (num_replicas != 1 && num_replicas != num_tpu_devices) in GetFullMeshTPUExecutionDeviceAssignment()
192 num_tpu_devices, ", got ", num_replicas); in GetFullMeshTPUExecutionDeviceAssignment()
200 devices_and_hosts.reserve(num_replicas); in GetFullMeshTPUExecutionDeviceAssignment()
201 for (int i = 0; i < num_replicas; ++i) { in GetFullMeshTPUExecutionDeviceAssignment()
343 int num_replicas, int num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment() argument
354 num_replicas * num_cores_per_replica * kTPUTopologyRank; in GetGeneralTPUExecutionDeviceAssignment()
360 kTPUTopologyRank, " (", num_replicas, " * ", num_cores_per_replica, in GetGeneralTPUExecutionDeviceAssignment()
377 num_replicas, llvm::SmallVector<TPUDeviceAndHost, 8>( in GetGeneralTPUExecutionDeviceAssignment()
379 xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica); in GetGeneralTPUExecutionDeviceAssignment()
[all …]
/external/tensorflow/tensorflow/compiler/xrt/kernels/
Dxrt_compile_ops.cc55 const xrt::DeviceAssignment& xrt_device_assignment, int num_replicas, in GenerateXlaDeviceAssignment() argument
69 if (num_replicas != computation_devices.replica_devices_size()) { in GenerateXlaDeviceAssignment()
73 num_replicas, in GenerateXlaDeviceAssignment()
133 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compile() local
163 build_options.set_num_replicas(num_replicas); in Compile()
171 xla::DeviceAssignment device_assignment(num_replicas, in Compile()
174 GenerateXlaDeviceAssignment(config.device_assignment(), num_replicas, in Compile()
Dtpu_compile_ops.cc93 tensorflow::tpu::TpuMeshStateInterface* mesh_state, int num_replicas, in CompilationCacheKey() argument
98 metadata.set_num_replicas(num_replicas); in CompilationCacheKey()
178 int num_replicas = config.num_replicas() ? config.num_replicas() : 1; in Compute() local
179 CHECK_GT(num_replicas, 0); in Compute()
184 computation_proto, mesh_state, num_replicas, num_cores_per_replica); in Compute()
/external/tensorflow/tensorflow/core/tpu/graph_rewrite/
Dencapsulate_tpu_computations_pass.cc431 int num_replicas; in MoveHeadOutsideCompilationToHost() local
433 GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); in MoveHeadOutsideCompilationToHost()
435 (input_types.size() - num_distributed_vars) / num_replicas; in MoveHeadOutsideCompilationToHost()
439 for (int replica_id = 0; replica_id < num_replicas; replica_id++) { in MoveHeadOutsideCompilationToHost()
460 : (old_num_per_replica_inputs * num_replicas + in MoveHeadOutsideCompilationToHost()
524 new_input_types.reserve(num_replicas * new_num_per_replica_inputs + in MoveHeadOutsideCompilationToHost()
526 for (int replica_id = 0; replica_id < num_replicas; ++replica_id) { in MoveHeadOutsideCompilationToHost()
547 num_new_per_replica_input_types / num_replicas + num_distributed_vars + in MoveHeadOutsideCompilationToHost()
563 num_replicas * new_num_per_replica_inputs + num_other_inputs); in MoveHeadOutsideCompilationToHost()
565 num_replicas * new_num_per_replica_inputs + num_other_inputs - 1; in MoveHeadOutsideCompilationToHost()
[all …]
/external/tensorflow/tensorflow/python/eager/benchmarks/resnet50/
Dresnet50_test_util.py47 num_replicas=1): argument
50 replica_str = '' if num_replicas == 1 else 'replicas_%d_' % num_replicas
53 extras = {'examples_per_sec': (num_replicas * batch_size) / avg_time}
/external/tensorflow/tensorflow/python/tpu/tests/
Dtpu_embedding_v2_enqueue_mode_test.py180 num_replicas = strategy.num_replicas_in_sync
194 update = ([[0.3 * num_replicas], [0.3 * num_replicas * 2]],
195 [[0.3 * num_replicas * 2], [0.3 * num_replicas]],
196 [[0.1 * num_replicas], [0.1 / 3 * num_replicas]])
Dtpu_embedding_v2_correctness_sequence_feature_test.py47 num_replicas = strategy.num_replicas_in_sync
117 per_row_update = (0.3 * num_replicas,
118 0.3 * num_replicas,
119 0.1 * num_replicas)
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/
Dtpu_cluster_formation.mlir9 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =…
20 // Test TPUReplicateMetadata ops `name` and `num_replicas` attributes are not
25 …", _replication_info = "replicate", device = "device", name = "name", num_replicas = 1, topology =…
30 // CHECK-NOT: num_replicas = 1
37 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =…
52 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =…
83 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =…
108 …ice_type = "TPU", _replication_info = "replicate", device = "device", num_replicas = 1, topology =…
127 …type = "TPU", _replication_info = "replicate_1", device = "device_1", num_replicas = 1, topology =…
132 …type = "TPU", _replication_info = "replicate_0", device = "device_0", num_replicas = 1, topology =…
[all …]
/external/tensorflow/tensorflow/compiler/xla/client/
Dexecutable_build_options.cc63 int num_replicas) { in set_num_replicas() argument
64 num_replicas_ = num_replicas; in set_num_replicas()
115 output.set_num_replicas(num_replicas()); in ToProto()
144 output.set_num_replicas(input.num_replicas()); in ExecutableBuildOptionsFromProto()
195 execution_options.set_num_replicas(build_options.num_replicas()); in CreateExecutionOptions()
/external/tensorflow/tensorflow/compiler/xla/python/tpu_driver/client/
Dtpu_client_extension.cc48 [](PyTpuClient* client, int num_replicas, int num_partitions) in PYBIND11_MODULE()
53 num_replicas, num_partitions)); in PYBIND11_MODULE()
55 result.resize(num_replicas); in PYBIND11_MODULE()
56 for (int r = 0; r < num_replicas; ++r) { in PYBIND11_MODULE()
69 [](PyTpuClient* client, int num_replicas) in PYBIND11_MODULE() argument
73 num_replicas, /*num_partitions=*/1)); in PYBIND11_MODULE()
75 for (int i = 0; i < num_replicas; ++i) { in PYBIND11_MODULE()
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/transforms/
Dtpu_variable_runtime_reformatting.cc133 int64_t num_replicas = replicate.n(); in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping() local
155 num_inputs = num_replicas; in AnnotateCompileOpAndGetExecuteArgToWhileArgsMapping()
228 int64_t num_replicas = replicate.n(); in AddInputsToReplicateOp() local
229 assert(new_inputs.size() == num_replicas); in AddInputsToReplicateOp()
237 .size() == num_replicas); in AddInputsToReplicateOp()
246 for (int64_t i = 0; i < num_replicas; ++i) { in AddInputsToReplicateOp()
263 replicate.getLoc(), num_replicas, devices, new_replicated_inputs, in AddInputsToReplicateOp()
337 int64_t num_replicas = replicate.n(); in HandleReplicateOp() local
338 if (num_replicas == 1) return; in HandleReplicateOp()
447 while_op.getLoc(), num_replicas, devices, unformat_replicate_operands, in HandleReplicateOp()
Dreplicate_to_island.cc143 int num_replicas, llvm::SmallVectorImpl<tf_executor::IslandOp>& replicas) { in ExpandReplicateIntoReplicas() argument
144 replicas.reserve(num_replicas); in ExpandReplicateIntoReplicas()
161 for (int i : llvm::seq<int>(0, num_replicas)) { in ExpandReplicateIntoReplicas()
241 const int num_replicas = replicate_op.n(); in CreateIslandsFromReplicate() local
246 replicate_op, num_replicas, replicas))) in CreateIslandsFromReplicate()
255 replicas_outputs[num_replicas * replica_result_and_idx.index() + in CreateIslandsFromReplicate()
/external/tensorflow/tensorflow/dtensor/mlir/utils/
Dupdate_tpu_metadata.cc87 int num_replicas = mesh_config.num_devices(); in UpdateTPUCompileMetadata() local
88 metadata_proto.set_num_replicas(num_replicas); in UpdateTPUCompileMetadata()
125 device_assignment.set_replica_count(num_replicas); in UpdateTPUCompileMetadata()
130 for (int i = 0; i < num_replicas; ++i) { in UpdateTPUCompileMetadata()
/external/tensorflow/tensorflow/core/ops/compat/ops_history_v2/
DTPUReplicateMetadata.pbtxt4 name: "num_replicas"
65 name: "num_replicas"
133 name: "num_replicas"
208 name: "num_replicas"
290 name: "num_replicas"
/external/tensorflow/tensorflow/core/grappler/optimizers/data/
Dauto_shard.cc182 int64_t num_replicas, GraphDef* output,
655 Status RewriteRebatchV2ToV1(const NodeDef& sink_node, int64_t num_replicas, in RewriteRebatchV2ToV1() argument
674 if (num_replicas < 1) { in RewriteRebatchV2ToV1()
678 num_replicas, ", but expected to be >= 1."); in RewriteRebatchV2ToV1()
680 auto num_replicas_node = graph_utils::AddScalarConstNode(num_replicas, graph); in RewriteRebatchV2ToV1()
705 int64_t num_replicas, MutableGraphView* graph) { in ShardByData() argument
720 TF_RETURN_IF_ERROR(RewriteRebatchV2ToV1(*shard_before, num_replicas, graph)); in ShardByData()
727 int64_t num_replicas, MutableGraphView* graph) { in ShardByHint() argument
758 int64_t num_replicas, MutableGraphView* graph, in ApplyAutoShard() argument
769 return ShardByData(sink_node, num_workers, index, num_replicas, graph); in ApplyAutoShard()
[all …]

1234567