/external/tensorflow/tensorflow/python/ops/ |
D | collective_ops.py | 24 def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, argument 50 if group_size <= 1: 53 group_size=group_size, 61 def all_gather(t, group_size, group_key, instance_key): argument 79 if group_size <= 1: 82 output_shape = [dims[0] * group_size] + dims[1:] 85 group_size=group_size, 90 def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): argument 121 if group_size <= 1: 132 group_size=group_size, [all …]
|
D | collective_ops_test.py | 53 group_size = 2 56 config = config_pb2.ConfigProto(device_count={'CPU': group_size}) 59 for cpu in range(group_size): 64 in_tensor, group_size, group_key, instance, 'Add', 'Div')) 66 for i in range(group_size * num_instances):
|
/external/tensorflow/tensorflow/core/kernels/ |
D | mkl_batch_matmul_op.cc | 115 std::vector<MKL_INT> group_size(1, batch_size); in Compute() local 131 &group_size[0]); in Compute() 144 const MKL_INT group_count, const MKL_INT *group_size) { in MklCblasGemmBatch() argument 146 group_size[0], TransA ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch() 148 group_size[0], TransB ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch() 149 std::vector<float> alpha_Array(group_size[0], 1.0); in MklCblasGemmBatch() 150 std::vector<float> beta_Array(group_size[0], 0.0); in MklCblasGemmBatch() 154 group_count, group_size); in MklCblasGemmBatch() 163 const MKL_INT group_count, const MKL_INT *group_size) { in MklCblasGemmBatch() argument 165 group_size[0], TransA ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch() [all …]
|
D | collective_nccl_reducer.cc | 82 Tensor group_size; in Run() local 92 group_size_val.scalar<float>()() = col_params_->group.group_size; in Run() 95 group_size_val.scalar<double>()() = col_params_->group.group_size; in Run() 98 group_size_val.scalar<int32>()() = col_params_->group.group_size; in Run() 101 group_size_val.scalar<int64>()() = col_params_->group.group_size; in Run() 107 group_size = Tensor( in Run() 113 &group_size_val, col_ctx_->device, &group_size, in Run() 135 const int num_global_devices = col_params_->group.group_size; in Run() 197 col_params_->final_op.get(), col_ctx_->output, &group_size); in Run()
|
D | roll_op.cc | 160 int64 group_size = 0; in DoRollWithMemcpy() local 164 group_size = isd_indx_skip * isd_stride + remainder_offset; in DoRollWithMemcpy() 167 group_size = isd_indx_skip * isd_stride + remainder_offset; in DoRollWithMemcpy() 173 memcpy(out_ptr, in_ptr, group_size * sizeof(T)); in DoRollWithMemcpy() 176 i += group_size; in DoRollWithMemcpy() 177 out_ptr += group_size; in DoRollWithMemcpy() 178 in_ptr += group_size; in DoRollWithMemcpy() 206 group_size = isd_indx_skip * isd_stride; in DoRollWithMemcpy() 209 group_size = isd_indx_skip * isd_stride; in DoRollWithMemcpy()
|
D | sparse_softmax_op.cc | 103 const int group_size = group_vals.size(); in Compute() local 109 Eigen::Tensor<T, 1, Eigen::RowMajor> tmp(group_size); in Compute() 117 output_flat.data() + count, group_size); in Compute() 120 count += group_size; in Compute()
|
D | bias_op_gpu.cu.cc | 151 int group_size) { in BiasGradNCHW_SharedAtomics() argument 168 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics() 224 int group_size = (config.block_count + bias_size - 1) / bias_size; in compute() local 225 config.block_count = group_size * bias_size; in compute() 232 bias_backprop, batch, bias_size, image_size, group_size)); in compute()
|
D | collective_ops.cc | 40 if (col_params_.group.group_size > in CanProceedWithCompute() 76 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveGatherOpKernel() 127 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveReduceOpKernel() 236 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveBcastSendOpKernel() 305 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveBcastRecvOpKernel()
|
D | collective_nccl_reducer_test.cc | 120 col_params_.group.group_size = num_ranks; in Init() 206 .Attr("group_size", params.group.group_size) in GetCollectiveReduce() 226 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance()
|
D | random_op.cc | 687 const size_t group_size = device.maxSyclThreadsPerBlock(); in operator ()() local 688 const size_t group_count = (size + group_size - 1) / group_size; in operator ()() 699 sycl::nd_range<1>(sycl::range<1>(group_count * group_size), in operator ()() 700 sycl::range<1>(group_size)), in operator ()()
|
/external/tensorflow/tensorflow/core/common_runtime/ |
D | collective_param_resolver_local.cc | 69 gr->group.group_size = cp->group.group_size; in CompleteGroupLocal() 73 << " group_size=" << gr->group.group_size; in CompleteGroupLocal() 89 } else if (cp->group.group_size != gr->group.group_size) { in CompleteGroupLocal() 92 cp->group.group_size, " and group_key", cp->group.group_key, in CompleteGroupLocal() 93 " but that group has size ", gr->group.group_size); in CompleteGroupLocal() 100 if (gr->device_set.size() == gr->group.group_size) { in CompleteGroupLocal() 119 << " group_size=" << gr->group.group_size in CompleteGroupLocal() 127 VLOG(2) << "group_size " << gr->group.group_size << " set size " in CompleteGroupLocal() 130 if (gr->device_set.size() < gr->group.group_size) { in CompleteGroupLocal() 134 CHECK_EQ(gr->device_set.size(), gr->group.group_size); in CompleteGroupLocal() [all …]
|
D | hierarchical_tree_broadcaster_test.cc | 60 cp.group.group_size = D; \ 262 col_params_.group.group_size = num_workers * num_devices_per_worker; in Init() 380 .Attr("group_size", params.group.group_size) in GetCollectiveBcastSend() 398 .Attr("group_size", params.group.group_size) in GetCollectiveBcastRecv() 538 cp->group.group_size = num_tasks * num_gpus; in PrepColParamsForSubdivPermsTest() 567 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance() 577 int group_size = col_params_.group.group_size; in DeviceInstance() local 578 CHECK_EQ(group_size, col_params_.instance.device_names.size()); in DeviceInstance() 588 for (int i = 0; i < group_size; i++) { in DeviceInstance() 790 cp.group.group_size = 0; in TEST_F() [all …]
|
D | hierarchical_tree_broadcaster.cc | 91 for (int di = 1; di < col_params->group.group_size; ++di) { in InitializeCollectiveParams() 247 int group_size = 0; in TreeSendTo() local 250 group_size++; in TreeSendTo() 265 if (group_size > 1) { in TreeSendTo() 268 if (group_size > 2 && source_rank != 1) { in TreeSendTo() 273 if (successor_rank < group_size && successor_rank != source_rank) { in TreeSendTo()
|
D | ring_gatherer_test.cc | 179 col_params_.group.group_size = num_workers * num_devices; in Init() 352 .Attr("group_size", params.group.group_size) in GetCollectiveGather() 390 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance() 396 int group_size = col_params_.group.group_size; in DeviceInstance() local 397 CHECK_EQ(group_size, in DeviceInstance() 534 cp.group.group_size = kNumDevs; in SetUpCollectiveParams()
|
D | ring_reducer_test.cc | 201 col_params_.group.group_size = num_workers * num_devices; in Init() 375 .Attr("group_size", params.group.group_size) in GetCollectiveReduce() 414 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance() 420 int group_size = col_params_.group.group_size; in DeviceInstance() local 421 CHECK_EQ(group_size, in DeviceInstance() 561 cp.group.group_size = kNumDevs; in SetUpCollectiveParams()
|
D | ring_alg.cc | 113 col_params->group.group_size / col_params->group.num_tasks; in GenerateSubdivsInCollectiveParams() 130 int num_chunks = col_params->group.group_size * num_subdivs; in GenerateSubdivsInCollectiveParams() 181 for (int di = 1; di < col_params->group.group_size; ++di) { in InitializeCollectiveParams() 236 DCHECK_EQ(col_params->group.group_size, perm.size()); in InitializeCollectiveParams()
|
D | collective_param_resolver_local_test.cc | 89 cp.group.group_size = kNumGpus; in TEST_F() 164 cp->group.group_size = 3; in TEST_F() 207 cp->group.group_size = 3; in InitializeCollectiveParamsForBroadcast()
|
/external/tensorflow/tensorflow/python/distribute/ |
D | cross_device_utils.py | 80 group_size = num_devices // 2 83 group_1_main_device = (group_0_main_device + group_size) % num_devices 84 if group_0_main_device < group_size: 86 group_1_begin = group_size 88 group_0_begin = group_size 93 group_0_begin + group_size] 100 group_1_begin + group_size] 120 if (group_0_main_device < group_size) == (j < group_size): 169 def group_device_names(devices, group_size): argument 185 if group_size > num_devices: [all …]
|
D | all_reduce.py | 497 group_size = span * 2 500 if (d % group_size) >= (group_size / 2): 532 group_size = span * 2 535 if (d % group_size) >= (group_size / 2):
|
/external/tensorflow/tensorflow/core/distributed_runtime/ |
D | collective_param_resolver_distributed.cc | 32 req_.set_group_size(group.group_size); in CompleteGroupCall() 59 req_.set_group_size(group.group_size); in CompleteInstanceCall() 117 cp.group.group_size = request->group_size(); in CompleteGroupAsync() 128 response->set_group_size(gr->group.group_size); in CompleteGroupAsync() 150 cp->group.group_size = request->group_size(); in CompleteInstanceAsync() 222 gr->group.group_size = resp.group_size(); in UpdateGroupCache() 224 if (resp.device_name_size() != gr->group.group_size) { in UpdateGroupCache() 232 if (resp.task_name_size() != gr->group.group_size) { in UpdateGroupCache() 334 if (ir->known_count < cp->group.group_size) { in UpdateInstanceCache() 335 ir->known_count = cp->group.group_size; in UpdateInstanceCache() [all …]
|
/external/mesa3d/src/mesa/state_tracker/ |
D | st_cb_compute.c | 39 const GLuint *group_size, in st_dispatch_compute_common() argument 60 info.block[i] = group_size ? group_size[i] : prog->info.cs.local_size[i]; in st_dispatch_compute_common() 89 const GLuint *group_size) in st_dispatch_compute_group_size() argument 91 st_dispatch_compute_common(ctx, num_groups, group_size, NULL, 0); in st_dispatch_compute_group_size()
|
/external/mesa3d/src/mesa/main/ |
D | compute.c | 104 const GLuint *group_size) in validate_DispatchComputeGroupSizeARB() argument 150 if (group_size[i] == 0 || in validate_DispatchComputeGroupSizeARB() 151 group_size[i] > ctx->Const.MaxComputeVariableGroupSize[i]) { in validate_DispatchComputeGroupSizeARB() 157 total_invocations *= group_size[i]; in validate_DispatchComputeGroupSizeARB() 319 const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z }; in dispatch_compute_group_size() local 330 !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size)) in dispatch_compute_group_size() 336 ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size); in dispatch_compute_group_size()
|
/external/tensorflow/tensorflow/compiler/xla/service/ |
D | convolution_group_converter.cc | 116 std::vector<int32> GetMaskIds(int64 group_size, int64 group_count) { in GetMaskIds() argument 119 for (int j = 0; j < group_size; ++j) { in GetMaskIds() 173 int64 group_size = filter_shape.dimensions(kernel_input_feature_dim); in GetExpandedFilterMask() local 178 GetMaskIds(group_size, group_count); in GetExpandedFilterMask() 337 int64 group_size = filter->shape().dimensions(kernel_input_feature_dim); in HandleConvolution() local 347 if (group_size == 1) { in HandleConvolution() 407 ShapeUtil::AppendMajorDimension(group_size, &reshaped_activation_shape); in HandleConvolution() 440 dim->set_size(group_size); in HandleConvolution() 507 activation_slice_starts[activation_input_feature_dim] = i * group_size; in HandleConvolution() 509 (i + 1) * group_size; in HandleConvolution() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | grouped_convolution_test.cc | 37 int64 group_size, group_count; member 67 int64 group_size = option[5]; in GetConv2DTestCases() local 71 config.group_size = group_size; in GetConv2DTestCases() 72 config.group_count = input_feature / group_size; in GetConv2DTestCases() 80 config.kernel_dims = {kernel_size, kernel_size, group_size, output_feature}; in GetConv2DTestCases()
|
/external/mesa3d/src/mesa/drivers/dri/i965/ |
D | brw_compute.c | 124 unsigned group_size = prog_data->local_size[0] * in brw_emit_gpgpu_walker() local 127 (group_size + simd_size - 1) / simd_size; in brw_emit_gpgpu_walker() 130 const unsigned right_non_aligned = group_size & (simd_size - 1); in brw_emit_gpgpu_walker()
|