Home
last modified time | relevance | path

Searched refs:all_reduce (Results 1 – 25 of 63) sorted by relevance

123

/external/tensorflow/tensorflow/compiler/xla/service/
Dall_reduce_simplifier.cc39 [this](const HloInstruction* all_reduce) -> int64 { in Run() argument
40 if (all_reduce->replica_groups().empty()) { in Run()
44 for (const auto& group : all_reduce->replica_groups()) { in Run()
80 auto all_reduce = all_reduce_and_group_size.first; in Run() local
83 TF_RETURN_IF_ERROR(all_reduce->parent()->ReplaceInstruction( in Run()
84 all_reduce, all_reduce->mutable_operand(0))); in Run()
88 if (all_reduce->to_apply()->instruction_count() != 3 || in Run()
89 all_reduce->to_apply()->num_parameters() != 2) { in Run()
93 switch (all_reduce->to_apply()->root_instruction()->opcode()) { in Run()
98 all_reduce->parent()->AddInstruction(HloInstruction::CreateConstant( in Run()
[all …]
Dwhile_loop_all_reduce_code_motion.cc66 MovableAllReduceContext IsAllReduceMovable(HloInstruction* all_reduce, in IsAllReduceMovable() argument
68 auto all_reduce_is_summation = [](HloInstruction* all_reduce) -> bool { in IsAllReduceMovable() argument
69 HloInstruction* to_apply_root = all_reduce->to_apply()->root_instruction(); in IsAllReduceMovable()
70 if (all_reduce->to_apply()->num_parameters() != 2) { in IsAllReduceMovable()
82 all_reduce->shape().element_type()) || in IsAllReduceMovable()
83 !all_reduce_is_summation(all_reduce)) { in IsAllReduceMovable()
271 HloInstruction* all_reduce, in IsAllReduceMovable()
280 to_visit.push(all_reduce); in IsAllReduceMovable()
343 return get_accumulation_contexts(all_reduce, while_body); in IsAllReduceMovable()
570 for (HloInstruction* all_reduce : while_body_all_reduces) { in Run()
[all …]
Dar_crs_combiner.cc96 auto all_reduce = Cast<HloAllReduceInstruction>(hlo); in HasCombinableReplicaGroup() local
97 auto replica_groups = all_reduce->replica_groups(); in HasCombinableReplicaGroup()
98 CHECK(all_reduce->IsCrossModuleAllReduce()); in HasCombinableReplicaGroup()
100 if (all_reduce->use_global_device_ids()) { in HasCombinableReplicaGroup()
543 auto all_reduce = pair.ar; in RewriteGraph() local
544 auto parent_computation = all_reduce->parent(); in RewriteGraph()
545 auto channel_id = all_reduce->channel_id(); in RewriteGraph()
546 auto prev = all_reduce->mutable_operand(0); in RewriteGraph()
547 auto next = all_reduce->users()[0]; in RewriteGraph()
548 TF_CHECK_OK(all_reduce->ReplaceUseWith(next, prev)); in RewriteGraph()
[all …]
Dar_crs_combiner.h97 ArCrsPair(HloInstruction* all_reduce, HloInstruction* cross_replica_sum, in ArCrsPair()
99 : ar(all_reduce), crs(cross_replica_sum), distance(dist) {} in ArCrsPair()
Dall_reduce_combiner.cc202 auto channel_id = [](const HloInstruction* all_reduce) { in CreateComputationGroups() argument
203 return all_reduce->IsCrossModuleAllReduce() in CreateComputationGroups()
204 ? all_reduce->channel_id().value() in CreateComputationGroups()
205 : -1 * all_reduce->unique_id(); in CreateComputationGroups()
Dhlo_reachability.cc141 for (HloInstruction* all_reduce : it->second) { in Build()
142 add_dependencies(all_reduce); in Build()
/external/tensorflow/tensorflow/compiler/xla/tests/
Dall_reduce_test.cc124 …(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
125 … %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
126 ….12), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
127 ….12), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
128 …omputation.15, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
129 ….19), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
130 …e-element.21), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
131 ….19), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
166 …(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
167 … %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F()
[all …]
/external/tensorflow/tensorflow/python/ops/
Dcollective_ops_gpu_test.py73 collectives.append(collective_ops.all_reduce(
92 collective_ops.all_reduce(
115 collectives.append(collective_ops.all_reduce(
137 collectives.append(collective_ops.all_reduce(
239 collective_ops.all_reduce(in0, self._group_size, group_key,
243 collective_ops.all_reduce(in1, self._group_size, group_key,
259 c0 = collective_ops.all_reduce(
264 c1 = collective_ops.all_reduce(
283 collective_ops.all_reduce(
300 collective_ops.all_reduce(
[all …]
Dcollective_ops_test.py75 collective_ops.all_reduce(
105 all_reduces.append(collective_ops.all_reduce(
216 collective_op = collective_ops.all_reduce(
267 colred0 = collective_ops.all_reduce(input0, group_size, group_key,
269 colred1 = collective_ops.all_reduce(input1, group_size, group_key,
466 c0 = collective_ops.all_reduce(
471 c1 = collective_ops.all_reduce(
504 collective_ops.all_reduce(all_args[0], group_size, group_key,
508 collective_ops.all_reduce(all_args[1], group_size, group_key,
545 reduced_tensor1 = collective_ops.all_reduce(
[all …]
Dcollective_ops_xla_test.py65 reduced_tensor1 = collective_ops.all_reduce(
67 reduced_tensor2 = collective_ops.all_reduce(
Dcollective_ops_benchmark.py56 r = collective_ops.all_reduce(t, group_size, group_key,
/external/tensorflow/tensorflow/python/kernel_tests/
Dcollective_ops_multi_worker_test.py113 collective_ops.all_reduce(
186 collective_ops.all_reduce(
204 collective_ops.all_reduce(
214 collective_ops.all_reduce(
226 collective_ops.all_reduce(
257 collective_ops.all_reduce(in_tensor, group_size, group_key,
263 collective_ops.all_reduce(in_tensor, group_size, group_key,
271 collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key)
286 collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key)
Dcollective_ops_test.py47 all_reduce = _collective_ops.all_reduce variable in CollectiveOpsV1
56 def all_reduce(t, group_size, group_key, instance_key, *args, **kwargs): member in CollectiveOpsV2
100 combinations.NamedObject('all_reduce', CollectiveOpsV1.all_reduce),
102 CollectiveOpsV2.all_reduce),
135 return collective_ops.all_reduce(
151 collective_ops.all_reduce(
159 collective_ops.all_reduce(
277 collective_ops.all_reduce(
281 collective_ops.all_reduce(
285 collective_ops.all_reduce(
[all …]
/external/tensorflow/third_party/nccl/
Darchive.patch5 diff --git a/src/collectives/device/all_reduce.cu b/src/collectives/device/all_reduce.cu.cc
7 rename from src/collectives/device/all_reduce.cu
8 rename to src/collectives/device/all_reduce.cu.cc
/external/tensorflow/tensorflow/python/distribute/v1/
DBUILD45 name = "all_reduce",
47 "all_reduce.py",
62 ":all_reduce",
/external/llvm-project/mlir/test/Dialect/GPU/
Dinvalid.mlir235 %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32)
243 %res = "gpu.all_reduce"(%arg0) ({
253 %res = "gpu.all_reduce"(%arg0) ({}) {op = "foo"} : (f32) -> (f32)
261 %res = "gpu.all_reduce"(%arg0) ({}) {op = "and"} : (f32) -> (f32)
269 %res = "gpu.all_reduce"(%arg0) ({
279 %res = "gpu.all_reduce"(%arg0) ({
289 %res = "gpu.all_reduce"(%arg0) ({
299 %res = "gpu.all_reduce"(%arg0) ({
310 %res = "gpu.all_reduce"(%arg0) ({
Dmultiple-all-reduce.mlir13 %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
15 %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
/external/tensorflow/tensorflow/python/keras/layers/
Dnormalization_v2.py154 y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM, local_sum)
155 y_squared_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
157 global_batch_size = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
/external/tensorflow/tensorflow/core/api_def/python_api/
Dapi_def_CollectiveReduce.pbtxt4 name: "collective.all_reduce"
/external/llvm-project/mlir/test/mlir-cuda-runner/
Dmultiple-all-reduce.mlir53 %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
55 %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
Dall-reduce-region.mlir15 %xor = "gpu.all_reduce"(%val) ({
Dall-reduce-op.mlir23 %sum = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
/external/tensorflow/tensorflow/python/distribute/
Dcross_device_utils.py335 def all_reduce(self, member in CollectiveReplicaLauncher
416 self.all_reduce(input_tensor, None, communication_hint, timeout))
427 reduced = self.all_reduce(
554 reduced = self.all_reduce(
/external/tensorflow/tensorflow/tools/api/golden/v1/
Dtensorflow.distribute.-replica-context.pbtxt27 name: "all_reduce"
/external/tensorflow/tensorflow/tools/api/golden/v2/
Dtensorflow.distribute.-replica-context.pbtxt31 name: "all_reduce"

123