Home
last modified time | relevance | path

Searched refs:group_size (Results 1 – 25 of 53) sorted by relevance

123

/external/tensorflow/tensorflow/python/ops/
Dcollective_ops.py24 def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, argument
50 if group_size <= 1:
53 group_size=group_size,
61 def all_gather(t, group_size, group_key, instance_key): argument
79 if group_size <= 1:
82 output_shape = [dims[0] * group_size] + dims[1:]
85 group_size=group_size,
90 def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): argument
121 if group_size <= 1:
132 group_size=group_size,
[all …]
Dcollective_ops_test.py53 group_size = 2
56 config = config_pb2.ConfigProto(device_count={'CPU': group_size})
59 for cpu in range(group_size):
64 in_tensor, group_size, group_key, instance, 'Add', 'Div'))
66 for i in range(group_size * num_instances):
/external/tensorflow/tensorflow/core/kernels/
Dmkl_batch_matmul_op.cc115 std::vector<MKL_INT> group_size(1, batch_size); in Compute() local
131 &group_size[0]); in Compute()
144 const MKL_INT group_count, const MKL_INT *group_size) { in MklCblasGemmBatch() argument
146 group_size[0], TransA ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch()
148 group_size[0], TransB ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch()
149 std::vector<float> alpha_Array(group_size[0], 1.0); in MklCblasGemmBatch()
150 std::vector<float> beta_Array(group_size[0], 0.0); in MklCblasGemmBatch()
154 group_count, group_size); in MklCblasGemmBatch()
163 const MKL_INT group_count, const MKL_INT *group_size) { in MklCblasGemmBatch() argument
165 group_size[0], TransA ? CblasTrans : CblasNoTrans); in MklCblasGemmBatch()
[all …]
Dcollective_nccl_reducer.cc82 Tensor group_size; in Run() local
92 group_size_val.scalar<float>()() = col_params_->group.group_size; in Run()
95 group_size_val.scalar<double>()() = col_params_->group.group_size; in Run()
98 group_size_val.scalar<int32>()() = col_params_->group.group_size; in Run()
101 group_size_val.scalar<int64>()() = col_params_->group.group_size; in Run()
107 group_size = Tensor( in Run()
113 &group_size_val, col_ctx_->device, &group_size, in Run()
135 const int num_global_devices = col_params_->group.group_size; in Run()
197 col_params_->final_op.get(), col_ctx_->output, &group_size); in Run()
Droll_op.cc160 int64 group_size = 0; in DoRollWithMemcpy() local
164 group_size = isd_indx_skip * isd_stride + remainder_offset; in DoRollWithMemcpy()
167 group_size = isd_indx_skip * isd_stride + remainder_offset; in DoRollWithMemcpy()
173 memcpy(out_ptr, in_ptr, group_size * sizeof(T)); in DoRollWithMemcpy()
176 i += group_size; in DoRollWithMemcpy()
177 out_ptr += group_size; in DoRollWithMemcpy()
178 in_ptr += group_size; in DoRollWithMemcpy()
206 group_size = isd_indx_skip * isd_stride; in DoRollWithMemcpy()
209 group_size = isd_indx_skip * isd_stride; in DoRollWithMemcpy()
Dsparse_softmax_op.cc103 const int group_size = group_vals.size(); in Compute() local
109 Eigen::Tensor<T, 1, Eigen::RowMajor> tmp(group_size); in Compute()
117 output_flat.data() + count, group_size); in Compute()
120 count += group_size; in Compute()
Dbias_op_gpu.cu.cc151 int group_size) { in BiasGradNCHW_SharedAtomics() argument
168 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics()
224 int group_size = (config.block_count + bias_size - 1) / bias_size; in compute() local
225 config.block_count = group_size * bias_size; in compute()
232 bias_backprop, batch, bias_size, image_size, group_size)); in compute()
Dcollective_ops.cc40 if (col_params_.group.group_size > in CanProceedWithCompute()
76 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveGatherOpKernel()
127 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveReduceOpKernel()
236 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveBcastSendOpKernel()
305 OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); in CollectiveBcastRecvOpKernel()
Dcollective_nccl_reducer_test.cc120 col_params_.group.group_size = num_ranks; in Init()
206 .Attr("group_size", params.group.group_size) in GetCollectiveReduce()
226 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance()
Drandom_op.cc687 const size_t group_size = device.maxSyclThreadsPerBlock(); in operator ()() local
688 const size_t group_count = (size + group_size - 1) / group_size; in operator ()()
699 sycl::nd_range<1>(sycl::range<1>(group_count * group_size), in operator ()()
700 sycl::range<1>(group_size)), in operator ()()
/external/tensorflow/tensorflow/core/common_runtime/
Dcollective_param_resolver_local.cc69 gr->group.group_size = cp->group.group_size; in CompleteGroupLocal()
73 << " group_size=" << gr->group.group_size; in CompleteGroupLocal()
89 } else if (cp->group.group_size != gr->group.group_size) { in CompleteGroupLocal()
92 cp->group.group_size, " and group_key", cp->group.group_key, in CompleteGroupLocal()
93 " but that group has size ", gr->group.group_size); in CompleteGroupLocal()
100 if (gr->device_set.size() == gr->group.group_size) { in CompleteGroupLocal()
119 << " group_size=" << gr->group.group_size in CompleteGroupLocal()
127 VLOG(2) << "group_size " << gr->group.group_size << " set size " in CompleteGroupLocal()
130 if (gr->device_set.size() < gr->group.group_size) { in CompleteGroupLocal()
134 CHECK_EQ(gr->device_set.size(), gr->group.group_size); in CompleteGroupLocal()
[all …]
Dhierarchical_tree_broadcaster_test.cc60 cp.group.group_size = D; \
262 col_params_.group.group_size = num_workers * num_devices_per_worker; in Init()
380 .Attr("group_size", params.group.group_size) in GetCollectiveBcastSend()
398 .Attr("group_size", params.group.group_size) in GetCollectiveBcastRecv()
538 cp->group.group_size = num_tasks * num_gpus; in PrepColParamsForSubdivPermsTest()
567 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance()
577 int group_size = col_params_.group.group_size; in DeviceInstance() local
578 CHECK_EQ(group_size, col_params_.instance.device_names.size()); in DeviceInstance()
588 for (int i = 0; i < group_size; i++) { in DeviceInstance()
790 cp.group.group_size = 0; in TEST_F()
[all …]
Dhierarchical_tree_broadcaster.cc91 for (int di = 1; di < col_params->group.group_size; ++di) { in InitializeCollectiveParams()
247 int group_size = 0; in TreeSendTo() local
250 group_size++; in TreeSendTo()
265 if (group_size > 1) { in TreeSendTo()
268 if (group_size > 2 && source_rank != 1) { in TreeSendTo()
273 if (successor_rank < group_size && successor_rank != source_rank) { in TreeSendTo()
Dring_gatherer_test.cc179 col_params_.group.group_size = num_workers * num_devices; in Init()
352 .Attr("group_size", params.group.group_size) in GetCollectiveGather()
390 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance()
396 int group_size = col_params_.group.group_size; in DeviceInstance() local
397 CHECK_EQ(group_size, in DeviceInstance()
534 cp.group.group_size = kNumDevs; in SetUpCollectiveParams()
Dring_reducer_test.cc201 col_params_.group.group_size = num_workers * num_devices; in Init()
375 .Attr("group_size", params.group.group_size) in GetCollectiveReduce()
414 col_params_.group.group_size = parent_->col_params_.group.group_size; in DeviceInstance()
420 int group_size = col_params_.group.group_size; in DeviceInstance() local
421 CHECK_EQ(group_size, in DeviceInstance()
561 cp.group.group_size = kNumDevs; in SetUpCollectiveParams()
Dring_alg.cc113 col_params->group.group_size / col_params->group.num_tasks; in GenerateSubdivsInCollectiveParams()
130 int num_chunks = col_params->group.group_size * num_subdivs; in GenerateSubdivsInCollectiveParams()
181 for (int di = 1; di < col_params->group.group_size; ++di) { in InitializeCollectiveParams()
236 DCHECK_EQ(col_params->group.group_size, perm.size()); in InitializeCollectiveParams()
Dcollective_param_resolver_local_test.cc89 cp.group.group_size = kNumGpus; in TEST_F()
164 cp->group.group_size = 3; in TEST_F()
207 cp->group.group_size = 3; in InitializeCollectiveParamsForBroadcast()
/external/tensorflow/tensorflow/python/distribute/
Dcross_device_utils.py80 group_size = num_devices // 2
83 group_1_main_device = (group_0_main_device + group_size) % num_devices
84 if group_0_main_device < group_size:
86 group_1_begin = group_size
88 group_0_begin = group_size
93 group_0_begin + group_size]
100 group_1_begin + group_size]
120 if (group_0_main_device < group_size) == (j < group_size):
169 def group_device_names(devices, group_size): argument
185 if group_size > num_devices:
[all …]
Dall_reduce.py497 group_size = span * 2
500 if (d % group_size) >= (group_size / 2):
532 group_size = span * 2
535 if (d % group_size) >= (group_size / 2):
/external/tensorflow/tensorflow/core/distributed_runtime/
Dcollective_param_resolver_distributed.cc32 req_.set_group_size(group.group_size); in CompleteGroupCall()
59 req_.set_group_size(group.group_size); in CompleteInstanceCall()
117 cp.group.group_size = request->group_size(); in CompleteGroupAsync()
128 response->set_group_size(gr->group.group_size); in CompleteGroupAsync()
150 cp->group.group_size = request->group_size(); in CompleteInstanceAsync()
222 gr->group.group_size = resp.group_size(); in UpdateGroupCache()
224 if (resp.device_name_size() != gr->group.group_size) { in UpdateGroupCache()
232 if (resp.task_name_size() != gr->group.group_size) { in UpdateGroupCache()
334 if (ir->known_count < cp->group.group_size) { in UpdateInstanceCache()
335 ir->known_count = cp->group.group_size; in UpdateInstanceCache()
[all …]
/external/mesa3d/src/mesa/state_tracker/
Dst_cb_compute.c39 const GLuint *group_size, in st_dispatch_compute_common() argument
60 info.block[i] = group_size ? group_size[i] : prog->info.cs.local_size[i]; in st_dispatch_compute_common()
89 const GLuint *group_size) in st_dispatch_compute_group_size() argument
91 st_dispatch_compute_common(ctx, num_groups, group_size, NULL, 0); in st_dispatch_compute_group_size()
/external/mesa3d/src/mesa/main/
Dcompute.c104 const GLuint *group_size) in validate_DispatchComputeGroupSizeARB() argument
150 if (group_size[i] == 0 || in validate_DispatchComputeGroupSizeARB()
151 group_size[i] > ctx->Const.MaxComputeVariableGroupSize[i]) { in validate_DispatchComputeGroupSizeARB()
157 total_invocations *= group_size[i]; in validate_DispatchComputeGroupSizeARB()
319 const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z }; in dispatch_compute_group_size() local
330 !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size)) in dispatch_compute_group_size()
336 ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size); in dispatch_compute_group_size()
/external/tensorflow/tensorflow/compiler/xla/service/
Dconvolution_group_converter.cc116 std::vector<int32> GetMaskIds(int64 group_size, int64 group_count) { in GetMaskIds() argument
119 for (int j = 0; j < group_size; ++j) { in GetMaskIds()
173 int64 group_size = filter_shape.dimensions(kernel_input_feature_dim); in GetExpandedFilterMask() local
178 GetMaskIds(group_size, group_count); in GetExpandedFilterMask()
337 int64 group_size = filter->shape().dimensions(kernel_input_feature_dim); in HandleConvolution() local
347 if (group_size == 1) { in HandleConvolution()
407 ShapeUtil::AppendMajorDimension(group_size, &reshaped_activation_shape); in HandleConvolution()
440 dim->set_size(group_size); in HandleConvolution()
507 activation_slice_starts[activation_input_feature_dim] = i * group_size; in HandleConvolution()
509 (i + 1) * group_size; in HandleConvolution()
[all …]
/external/tensorflow/tensorflow/compiler/xla/tests/
Dgrouped_convolution_test.cc37 int64 group_size, group_count; member
67 int64 group_size = option[5]; in GetConv2DTestCases() local
71 config.group_size = group_size; in GetConv2DTestCases()
72 config.group_count = input_feature / group_size; in GetConv2DTestCases()
80 config.kernel_dims = {kernel_size, kernel_size, group_size, output_feature}; in GetConv2DTestCases()
/external/mesa3d/src/mesa/drivers/dri/i965/
Dbrw_compute.c124 unsigned group_size = prog_data->local_size[0] * in brw_emit_gpgpu_walker() local
127 (group_size + simd_size - 1) / simd_size; in brw_emit_gpgpu_walker()
130 const unsigned right_non_aligned = group_size & (simd_size - 1); in brw_emit_gpgpu_walker()

123