/external/tensorflow/tensorflow/compiler/xla/service/ |
D | all_reduce_simplifier.cc | 39 [this](const HloInstruction* all_reduce) -> int64 { in Run() argument 40 if (all_reduce->replica_groups().empty()) { in Run() 44 for (const auto& group : all_reduce->replica_groups()) { in Run() 80 auto all_reduce = all_reduce_and_group_size.first; in Run() local 83 TF_RETURN_IF_ERROR(all_reduce->parent()->ReplaceInstruction( in Run() 84 all_reduce, all_reduce->mutable_operand(0))); in Run() 88 if (all_reduce->to_apply()->instruction_count() != 3 || in Run() 89 all_reduce->to_apply()->num_parameters() != 2) { in Run() 93 switch (all_reduce->to_apply()->root_instruction()->opcode()) { in Run() 98 all_reduce->parent()->AddInstruction(HloInstruction::CreateConstant( in Run() [all …]
|
D | while_loop_all_reduce_code_motion.cc | 66 MovableAllReduceContext IsAllReduceMovable(HloInstruction* all_reduce, in IsAllReduceMovable() argument 68 auto all_reduce_is_summation = [](HloInstruction* all_reduce) -> bool { in IsAllReduceMovable() argument 69 HloInstruction* to_apply_root = all_reduce->to_apply()->root_instruction(); in IsAllReduceMovable() 70 if (all_reduce->to_apply()->num_parameters() != 2) { in IsAllReduceMovable() 82 all_reduce->shape().element_type()) || in IsAllReduceMovable() 83 !all_reduce_is_summation(all_reduce)) { in IsAllReduceMovable() 271 HloInstruction* all_reduce, in IsAllReduceMovable() 280 to_visit.push(all_reduce); in IsAllReduceMovable() 343 return get_accumulation_contexts(all_reduce, while_body); in IsAllReduceMovable() 570 for (HloInstruction* all_reduce : while_body_all_reduces) { in Run() [all …]
|
D | ar_crs_combiner.cc | 96 auto all_reduce = Cast<HloAllReduceInstruction>(hlo); in HasCombinableReplicaGroup() local 97 auto replica_groups = all_reduce->replica_groups(); in HasCombinableReplicaGroup() 98 CHECK(all_reduce->IsCrossModuleAllReduce()); in HasCombinableReplicaGroup() 100 if (all_reduce->use_global_device_ids()) { in HasCombinableReplicaGroup() 543 auto all_reduce = pair.ar; in RewriteGraph() local 544 auto parent_computation = all_reduce->parent(); in RewriteGraph() 545 auto channel_id = all_reduce->channel_id(); in RewriteGraph() 546 auto prev = all_reduce->mutable_operand(0); in RewriteGraph() 547 auto next = all_reduce->users()[0]; in RewriteGraph() 548 TF_CHECK_OK(all_reduce->ReplaceUseWith(next, prev)); in RewriteGraph() [all …]
|
D | ar_crs_combiner.h | 97 ArCrsPair(HloInstruction* all_reduce, HloInstruction* cross_replica_sum, in ArCrsPair() 99 : ar(all_reduce), crs(cross_replica_sum), distance(dist) {} in ArCrsPair()
|
D | all_reduce_combiner.cc | 202 auto channel_id = [](const HloInstruction* all_reduce) { in CreateComputationGroups() argument 203 return all_reduce->IsCrossModuleAllReduce() in CreateComputationGroups() 204 ? all_reduce->channel_id().value() in CreateComputationGroups() 205 : -1 * all_reduce->unique_id(); in CreateComputationGroups()
|
D | hlo_reachability.cc | 141 for (HloInstruction* all_reduce : it->second) { in Build() 142 add_dependencies(all_reduce); in Build()
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | all_reduce_test.cc | 124 …(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 125 … %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 126 ….12), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 127 ….12), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 128 …omputation.15, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 129 ….19), index=1, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 130 …e-element.21), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 131 ….19), index=0, metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 166 …(f32[] %p0.1), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() 167 … %convert.11), metadata={op_type="xla::cross_replica_sum" source_file="all_reduce@xla_model.py" so… in XLA_TEST_F() [all …]
|
/external/tensorflow/tensorflow/python/ops/ |
D | collective_ops_gpu_test.py | 73 collectives.append(collective_ops.all_reduce( 92 collective_ops.all_reduce( 115 collectives.append(collective_ops.all_reduce( 137 collectives.append(collective_ops.all_reduce( 239 collective_ops.all_reduce(in0, self._group_size, group_key, 243 collective_ops.all_reduce(in1, self._group_size, group_key, 259 c0 = collective_ops.all_reduce( 264 c1 = collective_ops.all_reduce( 283 collective_ops.all_reduce( 300 collective_ops.all_reduce( [all …]
|
D | collective_ops_test.py | 75 collective_ops.all_reduce( 105 all_reduces.append(collective_ops.all_reduce( 216 collective_op = collective_ops.all_reduce( 267 colred0 = collective_ops.all_reduce(input0, group_size, group_key, 269 colred1 = collective_ops.all_reduce(input1, group_size, group_key, 466 c0 = collective_ops.all_reduce( 471 c1 = collective_ops.all_reduce( 504 collective_ops.all_reduce(all_args[0], group_size, group_key, 508 collective_ops.all_reduce(all_args[1], group_size, group_key, 545 reduced_tensor1 = collective_ops.all_reduce( [all …]
|
D | collective_ops_xla_test.py | 65 reduced_tensor1 = collective_ops.all_reduce( 67 reduced_tensor2 = collective_ops.all_reduce(
|
D | collective_ops_benchmark.py | 56 r = collective_ops.all_reduce(t, group_size, group_key,
|
/external/tensorflow/tensorflow/python/kernel_tests/ |
D | collective_ops_multi_worker_test.py | 113 collective_ops.all_reduce( 186 collective_ops.all_reduce( 204 collective_ops.all_reduce( 214 collective_ops.all_reduce( 226 collective_ops.all_reduce( 257 collective_ops.all_reduce(in_tensor, group_size, group_key, 263 collective_ops.all_reduce(in_tensor, group_size, group_key, 271 collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key) 286 collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key)
|
D | collective_ops_test.py | 47 all_reduce = _collective_ops.all_reduce variable in CollectiveOpsV1 56 def all_reduce(t, group_size, group_key, instance_key, *args, **kwargs): member in CollectiveOpsV2 100 combinations.NamedObject('all_reduce', CollectiveOpsV1.all_reduce), 102 CollectiveOpsV2.all_reduce), 135 return collective_ops.all_reduce( 151 collective_ops.all_reduce( 159 collective_ops.all_reduce( 277 collective_ops.all_reduce( 281 collective_ops.all_reduce( 285 collective_ops.all_reduce( [all …]
|
/external/tensorflow/third_party/nccl/ |
D | archive.patch | 5 diff --git a/src/collectives/device/all_reduce.cu b/src/collectives/device/all_reduce.cu.cc 7 rename from src/collectives/device/all_reduce.cu 8 rename to src/collectives/device/all_reduce.cu.cc
|
/external/tensorflow/tensorflow/python/distribute/v1/ |
D | BUILD | 45 name = "all_reduce", 47 "all_reduce.py", 62 ":all_reduce",
|
/external/llvm-project/mlir/test/Dialect/GPU/ |
D | invalid.mlir | 235 %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32) 243 %res = "gpu.all_reduce"(%arg0) ({ 253 %res = "gpu.all_reduce"(%arg0) ({}) {op = "foo"} : (f32) -> (f32) 261 %res = "gpu.all_reduce"(%arg0) ({}) {op = "and"} : (f32) -> (f32) 269 %res = "gpu.all_reduce"(%arg0) ({ 279 %res = "gpu.all_reduce"(%arg0) ({ 289 %res = "gpu.all_reduce"(%arg0) ({ 299 %res = "gpu.all_reduce"(%arg0) ({ 310 %res = "gpu.all_reduce"(%arg0) ({
|
D | multiple-all-reduce.mlir | 13 %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) 15 %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
|
/external/tensorflow/tensorflow/python/keras/layers/ |
D | normalization_v2.py | 154 y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM, local_sum) 155 y_squared_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM, 157 global_batch_size = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
|
/external/tensorflow/tensorflow/core/api_def/python_api/ |
D | api_def_CollectiveReduce.pbtxt | 4 name: "collective.all_reduce"
|
/external/llvm-project/mlir/test/mlir-cuda-runner/ |
D | multiple-all-reduce.mlir | 53 %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) 55 %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
|
D | all-reduce-region.mlir | 15 %xor = "gpu.all_reduce"(%val) ({
|
D | all-reduce-op.mlir | 23 %sum = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
|
/external/tensorflow/tensorflow/python/distribute/ |
D | cross_device_utils.py | 335 def all_reduce(self, member in CollectiveReplicaLauncher 416 self.all_reduce(input_tensor, None, communication_hint, timeout)) 427 reduced = self.all_reduce( 554 reduced = self.all_reduce(
|
/external/tensorflow/tensorflow/tools/api/golden/v1/ |
D | tensorflow.distribute.-replica-context.pbtxt | 27 name: "all_reduce"
|
/external/tensorflow/tensorflow/tools/api/golden/v2/ |
D | tensorflow.distribute.-replica-context.pbtxt | 31 name: "all_reduce"
|