Home
last modified time | relevance | path

Searched refs:inner_size (Results 1 – 25 of 64) sorted by relevance

123

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dreduce_fp32.c26 int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_dat… in ReduceMean() argument
36 const float *outer_src = src_data + j * axis_size * inner_size; in ReduceMean()
37 float *outer_dst = dst_data + j * inner_size; in ReduceMean()
38 for (k = 0; k < inner_size; k++) { in ReduceMean()
43 tmp += inner_src[i * inner_size]; in ReduceMean()
51 int IntReduceMean(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data… in IntReduceMean() argument
65 int block_mod = inner_size % C4NUM; in IntReduceMean()
66 int block_c4 = inner_size - block_mod; in IntReduceMean()
69 const int *outer_src = src_data + j * axis_size * inner_size; in IntReduceMean()
70 int *outer_dst = dst_data + j * inner_size; in IntReduceMean()
[all …]
Dreduce_fp32.h25 int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_dat…
27 int IntReduceMean(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data…
29 int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data…
31 int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data,…
33 int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data…
35 int IntReduceMax(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data,…
37 int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data…
39 int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data,…
41 int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_dat…
43 int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data…
[all …]
Dlog_softmax_fp32.c50 int inner_size = 1; in LogSoftmax() local
57 inner_size *= input_shape[i]; in LogSoftmax()
60 int outter_offset = i * input_shape[axis] * inner_size; in LogSoftmax()
61 int sum_outter_offset = i * inner_size; in LogSoftmax()
62 for (int k = 0; k < inner_size; k++) { in LogSoftmax()
67 int axis_offset = inner_offset + j * inner_size; in LogSoftmax()
71 int axis_offset = inner_offset + j * inner_size; in LogSoftmax()
78 int outter_offset = i * input_shape[axis] * inner_size; in LogSoftmax()
79 int sum_outter_offset = i * inner_size; in LogSoftmax()
81 int axis_offset = outter_offset + j * inner_size; in LogSoftmax()
[all …]
Dsoftmax_fp32.c109 int inner_size = 1; in Softmax() local
116 inner_size *= input_shape[i]; in Softmax()
119 int outter_offset = i * input_shape[axis] * inner_size; in Softmax()
120 int sum_outter_offset = i * inner_size; in Softmax()
121 for (int k = 0; k < inner_size; k++) { in Softmax()
126 int axis_offset = inner_offset + j * inner_size; in Softmax()
130 int axis_offset = inner_offset + j * inner_size; in Softmax()
137 int outter_offset = i * input_shape[axis] * inner_size; in Softmax()
138 int sum_outter_offset = i * inner_size; in Softmax()
140 int axis_offset = outter_offset + j * inner_size; in Softmax()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
Dscatter_functor_impl.cu21 __global__ void ScatterUpdateKernel(const size_t inner_size, const size_t updates_size, const S *in… in ScatterUpdateKernel() argument
24 const size_t index = pos / inner_size; in ScatterUpdateKernel()
25 const size_t offset = pos % inner_size; in ScatterUpdateKernel()
26 const size_t current_pos = indices[index] * inner_size + offset; in ScatterUpdateKernel()
32 __global__ void ScatterAddKernel(const size_t inner_size, const size_t updates_size, const S *indic… in ScatterAddKernel() argument
35 const size_t index = pos / inner_size; in ScatterAddKernel()
36 const size_t offset = pos % inner_size; in ScatterAddKernel()
37 const size_t current_pos = indices[index] * inner_size + offset; in ScatterAddKernel()
43 __global__ void ScatterSubKernel(const size_t inner_size, const size_t updates_size, const S *indic… in ScatterSubKernel() argument
46 const size_t index = pos / inner_size; in ScatterSubKernel()
[all …]
Dindex_add_impl.cu23 const size_t src_axis_size, const size_t dst_axis_size, const size_t inner_size) { in IndexAddAtomic() argument
25 const size_t src_axis_idx = (pos / inner_size) % src_axis_size; in IndexAddAtomic()
26 const size_t src_outer_idx = pos / (src_axis_size * inner_size); in IndexAddAtomic()
29 const size_t dst_inner_idx = pos % inner_size; in IndexAddAtomic()
30 …const size_t dst_idx = src_outer_idx * (dst_axis_size * inner_size) + dst_axis_idx * inner_size + … in IndexAddAtomic()
38 const size_t src_axis_size, const size_t dst_axis_size, const size_t inner_size) { in IndexAdd() argument
40 const size_t src_axis_idx = (pos / inner_size) % src_axis_size; in IndexAdd()
41 const size_t src_outer_idx = pos / (src_axis_size * inner_size); in IndexAdd()
44 const size_t dst_inner_idx = pos % inner_size; in IndexAdd()
45 …const size_t dst_idx = src_outer_idx * (dst_axis_size * inner_size) + dst_axis_idx * inner_size + … in IndexAdd()
[all …]
Dargmax_impl.cu22 const size_t inner_size, S *output) { in Argmax() argument
23 for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < outer_size * inner_size; in Argmax()
25 size_t x = pos / inner_size % outer_size; in Argmax()
26 size_t y = pos % inner_size; in Argmax()
28 size_t input_offset = x * bound * inner_size + 0 * inner_size + y; in Argmax()
31 input_offset = x * bound * inner_size + i * inner_size + y; in Argmax()
42 void CalArgmax(const T *input, const S bound, const size_t outer_size, const size_t inner_size, in CalArgmax() argument
44 …gmax<<<GET_BLOCKS(outer_size), GET_THREADS, 0, cuda_stream>>>(input, bound, outer_size, inner_size, in CalArgmax()
50 const size_t inner_size, int *output, cudaStream_t cuda_stream);
52 const size_t inner_size, int *output, cudaStream_t cuda_stream);
Dunsorted_segment_max.cu22 size_t inner_size, bool fp16_flag, T init_K, T *output) { in UnsortedSegmentMax() argument
27 …ize_t t_idx = blockIdx.x * blockDim.x + threadIdx.x; t_idx < KWARPSIZE * num_segments * inner_size; in UnsortedSegmentMax()
29 size_t segment_id = t_idx / KWARPSIZE / inner_size; in UnsortedSegmentMax()
30 size_t inner_id = t_idx / KWARPSIZE % inner_size; in UnsortedSegmentMax()
36 T other_K = input[i * inner_size + inner_id]; in UnsortedSegmentMax()
53 output[segment_id * inner_size + inner_id] = threadK; in UnsortedSegmentMax()
61 size_t inner_size, T *output, cudaStream_t stream) { in CalUnsortedSegmentMax() argument
62 size_t size = (inner_size * KWARPSIZE * num_segments); in CalUnsortedSegmentMax()
70inner_size, fp16_flag, init_K, output); in CalUnsortedSegmentMax()
75 size_t outer_size, size_t inner_size, float *output,
[all …]
Dgeneral_reduction_impl.cu47 __global__ void ThreadReduction(bool small, size_t outer_size, size_t bound, size_t inner_size, con… in ThreadReduction() argument
55 for (int t_idx = blockIdx.x * blockDim.x + threadIdx.x; t_idx < outer_size * inner_size; in ThreadReduction()
57 int outer_id = t_idx / inner_size; in ThreadReduction()
58 int inner_id = t_idx % inner_size; in ThreadReduction()
64 T other_K = input[outer_id * bound * inner_size + i * inner_size + inner_id]; in ThreadReduction()
72 output[outer_id * inner_size + inner_id] = threadK; in ThreadReduction()
73 output_index[outer_id * inner_size + inner_id] = threadV; in ThreadReduction()
78 __global__ void WarpReduction(bool small, size_t outer_size, size_t bound, size_t inner_size, const… in WarpReduction() argument
85 …or (int t_idx = blockIdx.x * blockDim.x + threadIdx.x; t_idx < kWarpSize * outer_size * inner_size; in WarpReduction()
87 int outer_id = t_idx / kWarpSize / inner_size; in WarpReduction()
[all …]
Dunsorted_segment_min.cu32 size_t outer_size, size_t inner_size, T init_K, T *output) { in UnsortedSegmentMin() argument
34 … (int t_idx = blockIdx.x * blockDim.x + threadIdx.x; t_idx < KWARPSIZE * num_segments * inner_size; in UnsortedSegmentMin()
36 int segment_id = t_idx / KWARPSIZE / inner_size; in UnsortedSegmentMin()
37 int inner_id = t_idx / KWARPSIZE % inner_size; in UnsortedSegmentMin()
43 T other_K = input[i * inner_size + inner_id]; in UnsortedSegmentMin()
58 output[segment_id * inner_size + inner_id] = threadK; in UnsortedSegmentMin()
66 size_t inner_size, T *output, cudaStream_t stream) { in CalUnsortedSegmentMin() argument
67 int size = (inner_size * KWARPSIZE * num_segments); in CalUnsortedSegmentMin()
70 inner_size, init_K, output); in CalUnsortedSegmentMin()
75 … size_t outer_size, size_t inner_size, float *output, cudaStream_t stream);
[all …]
Dtopk_impl.cu27 …<<<block_num_limit, BLOCK, 0, stream>>>(outer_size, inner_size, input, output, output_index, k_cut…
100 inline __device__ void TopKStep(const int &outer_size, const int &inner_size, const T *input, T *ou… in TopKStep() argument
113 int limit = (inner_size / kWarpSize) * kWarpSize; in TopKStep()
135 LEFT_INSERT_THREAD_QUEUE((input[outer_id * inner_size + i]), (outer_id * inner_size + i)); in TopKStep()
153 if (i < inner_size) { in TopKStep()
154 LEFT_INSERT_THREAD_QUEUE((input[outer_id * inner_size + i]), (outer_id * inner_size + i)); in TopKStep()
171 output_index[outer_id * k_cut + (*k_prime) + i] = shared_V[i] % inner_size; in TopKStep()
178 __global__ void TopKBlock(int outer_size, int inner_size, const T *input, T *output, S *output_inde… in TopKBlock() argument
200 …outer_size, inner_size, input, output, output_index, k_cut, init_K, outer_id, shared_K, shared_V, … in TopKBlock()
207 void FastTopK(const int outer_size, const int inner_size, const T *input, S k_cut, T *output, S *ou… in FastTopK() argument
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/
Dreduce_fp16.c21 int ReduceMeanFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float1… in ReduceMeanFp16() argument
31 const float16_t *outer_src = src_data + j * axis_size * inner_size; in ReduceMeanFp16()
32 float16_t *outer_dst = dst_data + j * inner_size; in ReduceMeanFp16()
33 for (k = 0; k < inner_size; k++) { in ReduceMeanFp16()
38 tmp += inner_src[i * inner_size]; in ReduceMeanFp16()
46 int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16… in ReduceMaxFp16() argument
53 const float16_t *outer_src = src_data + j * axis_size * inner_size; in ReduceMaxFp16()
54 float16_t *outer_dst = dst_data + j * inner_size; in ReduceMaxFp16()
55 for (k = 0; k < inner_size; k++) { in ReduceMaxFp16()
60 tmp = tmp > inner_src[i * inner_size] ? tmp : inner_src[i * inner_size]; in ReduceMaxFp16()
[all …]
Dlog_softmax_fp16.c53 int inner_size = 1; in LogSoftmaxFp16() local
60 inner_size *= input_shape[i]; in LogSoftmaxFp16()
63 int outter_offset = i * input_shape[axis] * inner_size; in LogSoftmaxFp16()
64 int sum_outter_offset = i * inner_size; in LogSoftmaxFp16()
65 for (int k = 0; k < inner_size; k++) { in LogSoftmaxFp16()
70 int axis_offset = inner_offset + j * inner_size; in LogSoftmaxFp16()
74 int axis_offset = inner_offset + j * inner_size; in LogSoftmaxFp16()
81 int outter_offset = i * input_shape[axis] * inner_size; in LogSoftmaxFp16()
82 int sum_outter_offset = i * inner_size; in LogSoftmaxFp16()
84 int axis_offset = outter_offset + j * inner_size; in LogSoftmaxFp16()
[all …]
Dsoftmax_fp16.c99 int inner_size = 1; in SoftmaxFp16() local
106 inner_size *= input_shape[i]; in SoftmaxFp16()
109 int outter_offset = i * input_shape[axis] * inner_size; in SoftmaxFp16()
110 int sum_outter_offset = i * inner_size; in SoftmaxFp16()
111 for (int k = 0; k < inner_size; k++) { in SoftmaxFp16()
116 int axis_offset = inner_offset + j * inner_size; in SoftmaxFp16()
120 int axis_offset = inner_offset + j * inner_size; in SoftmaxFp16()
127 int outter_offset = i * input_shape[axis] * inner_size; in SoftmaxFp16()
128 int sum_outter_offset = i * inner_size; in SoftmaxFp16()
130 int axis_offset = outter_offset + j * inner_size; in SoftmaxFp16()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/
Dreduce_int8.c176 int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *… in ReduceMeanInt8() argument
183 const int32_t *outer_src = src_data + j * axis_size * inner_size; in ReduceMeanInt8()
184 int32_t *outer_dst = dst_data + j * inner_size; in ReduceMeanInt8()
185 for (k = 0; k < inner_size; k++) { in ReduceMeanInt8()
190 int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; in ReduceMeanInt8()
210 int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32… in ReduceMeanLastAxis() argument
217 const int32_t *outer_src = src_data + j * axis_size * inner_size; in ReduceMeanLastAxis()
218 int8_t *outer_dst = dst_data + j * inner_size; in ReduceMeanLastAxis()
219 for (k = 0; k < inner_size; k++) { in ReduceMeanLastAxis()
224 int32_t tmp = inner_src[i * inner_size] - quant->in_zp_; in ReduceMeanLastAxis()
[all …]
Dreduce_int8.h43 int ReduceMeanInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *…
45 int ReduceMeanLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32…
47 int ReduceSumInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s…
49 int ReduceSumLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_…
51 int ReduceMaxInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s…
53 int ReduceMaxLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_…
55 int ReduceMinInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *s…
57 int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32_…
59 int ReduceProdLastAxis(const int outer_size, const int inner_size, const int axis_size, const int32…
61 int ReduceProdInt8(const int outer_size, const int inner_size, const int axis_size, const int32_t *…
[all …]
Dsoftmax_int8.c27 int inner_size = 1; in SoftmaxInt8() local
32 inner_size *= input_shape[i]; in SoftmaxInt8()
36 int outter_offset = o * axis_shape_size * inner_size; in SoftmaxInt8()
38 for (int c = 0; c < inner_size; c++) { in SoftmaxInt8()
41 int axis_offset = outter_offset + c + i * inner_size; in SoftmaxInt8()
47 int axis_offset = outter_offset + c + i * inner_size; in SoftmaxInt8()
58 int axis_offset = outter_offset + i * inner_size; in SoftmaxInt8()
59 for (int c = 0; c < inner_size; ++c) { in SoftmaxInt8()
Dl2_norm_int8.c23 const int inner_size = param->shape_[param->shape_num_ - 1]; in L2NormalizationInt8() local
27 for (int j = 0; j < inner_size; ++j) { in L2NormalizationInt8()
28 int32_t in = input_data[i * inner_size + j] - quant_param->in_.zp_; in L2NormalizationInt8()
34 for (int k = 0; k < inner_size; ++k) { in L2NormalizationInt8()
35 int32_t in = input_data[i * inner_size + k] - quant_param->in_.zp_; in L2NormalizationInt8()
37 output_data[i * inner_size + k] = MSMIN(127, MSMAX(-128, out)); in L2NormalizationInt8()
Dgather_int8.c22 int GatherInt8(int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit, const … in GatherInt8() argument
29 const int8_t *inputm = in_data + inner_size * m * limit; in GatherInt8()
30 int8_t *outputm = out_data + inner_size * m * indices_element_size; in GatherInt8()
35 for (j = 0; j < inner_size; ++j) { in GatherInt8()
36 int32_t tmp = round(alpha * (inputm[indices[i] * inner_size + j] - z1)) + z2; in GatherInt8()
39 outputm[i * inner_size + j] = (int8_t)tmp; in GatherInt8()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/
Dsoftmax.c59 int inner_size) { in SoftMaxP1() argument
60 if (inner_size == 1) { in SoftMaxP1()
65 int outter_offset = i * length * inner_size; in SoftMaxP1()
66 int sum_outter_offset = i * inner_size; in SoftMaxP1()
67 for (int k = 0; k < inner_size; k++) { in SoftMaxP1()
71 int axis_offset = inner_offset + j * inner_size; in SoftMaxP1()
75 int axis_offset = inner_offset + j * inner_size; in SoftMaxP1()
80 int axis_offset = inner_offset + j * inner_size; in SoftMaxP1()
89 int inner_size) { in SoftMaxP2() argument
91 int outter_offset = i * length * inner_size; in SoftMaxP2()
[all …]
Dsoftmax_grad.c29 int inner_size = 1, outter_size = 1; in SoftmaxGrad() local
34 inner_size *= input_shape[i]; in SoftmaxGrad()
37 for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0; in SoftmaxGrad()
43 const int N = inner_size; in SoftmaxGrad()
46 memset(sum_data, 0, (size_t)(inner_size) * sizeof(float)); in SoftmaxGrad()
47 for (int k = 0; k < inner_size; k++) { in SoftmaxGrad()
50 int offset = inner_offset + j * inner_size; in SoftmaxGrad()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/
Dshift_cpu_kernel.cc90 const int64_t inner_size = SizeToLong(axisIterator_.InnerSize()); in Launch() local
94 (void)std::fill_n(output, outer_size * axis_size * inner_size, fill_value); in Launch()
98 if (inputs[0]->size != outer_size * axis_size * inner_size * sizeof(T)) { in Launch()
103 if ((inner_size == 1) && (outer_size == 1)) { in Launch()
119 (void)tasks.emplace_back([this, i, fill_value, axis_size, inner_size, input, output, outputs] { in Launch()
120 size_t offset = i * axis_size * inner_size; in Launch()
121 size_t input_offset = offset + copy_src_begin_ * inner_size; in Launch()
122 size_t output_offset = offset + copy_dst_begin_ * inner_size; in Launch()
123 size_t copy_size = copy_size_ * inner_size * sizeof(T); in Launch()
129 size_t fill_offset = offset + fill_begin_ * inner_size; in Launch()
[all …]
/third_party/mindspore/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/
Dsoftmax_grad_fp32_tests.cc61 int inner_size = 1; in TEST_F() local
64 inner_size *= softmax_param->input_shape_[i]; in TEST_F()
66 float *sum_data = new (std::nothrow) float[inner_size]; in TEST_F()
68 …float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->… in TEST_F()
121 int inner_size = 1; in TEST_F() local
124 inner_size *= softmax_param->input_shape_[i]; in TEST_F()
126 float *sum_data = new (std::nothrow) float[inner_size]; in TEST_F()
128 …float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->… in TEST_F()
186 int inner_size = 1; in TEST_F() local
189 inner_size *= softmax_param->input_shape_[i]; in TEST_F()
[all …]
/third_party/mindspore/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/operator_library/kernels/nnacl/fp32/
Dsoftmax_fp32.c109 int inner_size = 1; in Softmax() local
116 inner_size *= input_shape[i]; in Softmax()
119 int outter_offset = i * input_shape[axis] * inner_size; in Softmax()
120 int sum_outter_offset = i * inner_size; in Softmax()
121 for (int k = 0; k < inner_size; k++) { in Softmax()
125 int axis_offset = inner_offset + j * inner_size; in Softmax()
129 int axis_offset = inner_offset + j * inner_size; in Softmax()
136 int outter_offset = i * input_shape[axis] * inner_size; in Softmax()
137 int sum_outter_offset = i * inner_size; in Softmax()
139 int axis_offset = outter_offset + j * inner_size; in Softmax()
[all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/
Dgather_base.c19 int Gather(const void *input, int outer_size, int inner_size, int limit, const int *indices, int in… in Gather() argument
28 const int8_t *int8_in_m = int8_in + inner_size * m * limit * data_size; in Gather()
29 int8_t *int8_out_m = int8_out + inner_size * m * indices_element_size * data_size; in Gather()
39 memcpy(int8_out_m + i * inner_size * data_size, int8_in_m + index * inner_size * data_size, in Gather()
40 data_size * inner_size); in Gather()

123