/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp16/ |
D | group_convolution_fp16.cc | 43 float16_t *src_ptr = reinterpret_cast<float16_t *>(ori_in_data_) + group_id * sub_in_channel; in SeparateInput() local 45 MS_ASSERT(src_ptr); in SeparateInput() 48 memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float16_t)); in SeparateInput() 49 src_ptr += ori_in_channel; in SeparateInput() 53 float *src_ptr = reinterpret_cast<float *>(ori_in_data_) + group_id * sub_in_channel; in SeparateInput() local 55 MS_ASSERT(src_ptr); in SeparateInput() 58 memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float)); in SeparateInput() 59 src_ptr += ori_in_channel; in SeparateInput() 75 float16_t *src_ptr = sub_out_data; in PostConcat() local 78 memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(float16_t)); in PostConcat() [all …]
|
D | matmul_base_fp16.cc | 162 void MatmulBaseFP16CPUKernel::InitMatrixA(const void *src_ptr) { in InitMatrixA() argument 163 NNACL_CHECK_NULL_RETURN_VOID(src_ptr); in InitMatrixA() 168 …Float32ToFloat16(reinterpret_cast<const float *>(src_ptr), a_pack_ptr_, params_->batch * params_->… in InitMatrixA() 170 memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * sizeof(float16_t)); in InitMatrixA() 175 const int8_t *int8_src = reinterpret_cast<const int8_t *>(src_ptr); in InitMatrixA() 196 void MatmulBaseFP16CPUKernel::InitMatrixB(const void *src_ptr, TypeId src_data_type) { in InitMatrixB() argument 197 NNACL_CHECK_NULL_RETURN_VOID(src_ptr); in InitMatrixB() 198 const int8_t *int8_src = reinterpret_cast<const int8_t *>(src_ptr); in InitMatrixB() 203 Float32ToFloat16(reinterpret_cast<const float *>(src_ptr), b_pack_ptr_, in InitMatrixB() 208 …const auto *b_src = reinterpret_cast<const float16_t *>(src_ptr) + i * params_->col_align_ * param… in InitMatrixB() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | matmul_fp32.h | 43 void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col); 44 void RowMajor2Row4Major(const float *src_ptr, float *dst_ptr, int row, int col); 45 void RowMajor2Row6Major(const float *src_ptr, float *dst_ptr, int row, int col); 46 void RowMajor2Row8Major(const float *src_ptr, float *dst_ptr, int row, int col); 47 void RowMajor2Row12Major(const float *src_ptr, float *dst_ptr, int row, int col); 48 void RowMajor2Row16Major(const float *src_ptr, float *dst_ptr, int row, int col); 49 void RowMajor2Row32Major(const float *src_ptr, float *dst_ptr, int row, int col); 50 void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col); 51 void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col); 52 void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col); [all …]
|
D | pack_fp32.c | 570 const float *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp32() local 573 Transpose8X8Fp32Func_(src_ptr, dst_ptr, channel, plane); in PackNHWCToNCHWFp32() 577 dst_ptr[tc * plane + tr] = src_ptr[tr * channel + tc]; in PackNHWCToNCHWFp32() 583 const float *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp32() local 586 dst_ptr[i] = src_ptr[i * channel]; in PackNHWCToNCHWFp32() 591 const float *src_ptr = src_batch + hw * channel; in PackNHWCToNCHWFp32() local 594 dst_ptr[i * plane] = src_ptr[i]; in PackNHWCToNCHWFp32() 605 inline void Transpose8X8Fp32Arm64(const float *src_ptr, float *dst_ptr, int src_stride, int dst_str… in Transpose8X8Fp32Arm64() argument 674 …: [ dst_ptr ] "r"(dst_ptr), [ src_ptr ] "r"(src_ptr), [ srcStride ] "r"(srcStride), [ dstStride ] … in Transpose8X8Fp32Arm64() 682 inline void Transpose8X8Fp32Arm32(const float *src_ptr, float *dst_ptr, int src_stride, int dst_str… in Transpose8X8Fp32Arm32() argument [all …]
|
D | pack_fp32.h | 56 typedef void (*Transpose8X8Fp32Func)(const float *src_ptr, float *dst_ptr, int src_stride, int dst_… 58 void Transpose8X8Fp32Arm64(const float *src_ptr, float *dst_ptr, int src_stride, int dst_stride); 61 void Transpose8X8Fp32Arm32(const float *src_ptr, float *dst_ptr, int src_stride, int dst_stride); 70 void Transpose8X8Fp32Avx(const float *src_ptr, float *dst_ptr, int src_stride, int dst_stride); 73 void Transpose8X8Fp32Sse(const float *src_ptr, float *dst_ptr, int src_stride, int dst_stride);
|
D | deconv_winograd_fp32.c | 166 void DeConvWgInputPack(const float *src_ptr, float *dst_ptr, int channel, int stride) { in DeConvWgInputPack() argument 169 const float *src = src_ptr; in DeConvWgInputPack() 224 void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) { in DeConvWgMergeArm32() argument 278 …: [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"… in DeConvWgMergeArm32() 283 void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) { in DeConvWgMergeArm32() argument 337 …: [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"… in DeConvWgMergeArm32() 345 const float *src_ptr = src; in DeConvWgMerge() local 407 …: [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"… in DeConvWgMerge() 412 DeConvWgMergeArm32(src_ptr, dst_ptr, src_step, dst_step); in DeConvWgMerge() 415 const float *s = src_ptr + j * src_stride; in DeConvWgMerge() [all …]
|
D | winograd_transform.c | 135 const float *src_ptr = gemm_out + src_oc4_offset; in WinogradOutputNHWCTransform() local 138 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 147 const float *src_ptr = gemm_out + src_oc8_offset; in WinogradOutputNHWCTransform() local 150 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, output_channel, r_w, r_h, r_c); in WinogradOutputNHWCTransform() 192 const float *src_ptr = gemm_out + src_oc8_offset; in WinogradOutputNC4HW4Transform() local 195 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c); in WinogradOutputNC4HW4Transform() 205 const float *src_ptr = gemm_out + src_oc4_offset; in WinogradOutputNC4HW4Transform() local 208 func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c); in WinogradOutputNC4HW4Transform()
|
D | matmul_fp32.c | 29 void RowMajor2ColMajor(const float *src_ptr, float *dst_ptr, int row, int col) { in RowMajor2ColMajor() argument 32 dst_ptr[c * row + r] = src_ptr[r * col + c]; in RowMajor2ColMajor() 37 void RowMajor2Row4Major(const float *src_ptr, float *dst_ptr, int row, int col) { in RowMajor2Row4Major() argument 39 const float *src = src_ptr + r * col; in RowMajor2Row4Major() 55 void RowMajor2Row6Major(const float *src_ptr, float *dst_ptr, int row, int col) { in RowMajor2Row6Major() argument 57 const float *src = src_ptr + r * col; in RowMajor2Row6Major() 73 void RowMajor2Row8Major(const float *src_ptr, float *dst_ptr, int row, int col) { in RowMajor2Row8Major() argument 75 const float *src = src_ptr + r * col; in RowMajor2Row8Major() 91 void RowMajor2Row12Major(const float *src_ptr, float *dst_ptr, int row, int col) { in RowMajor2Row12Major() argument 93 const float *src = src_ptr + r * col; in RowMajor2Row12Major() [all …]
|
/third_party/mindspore/mindspore/lite/micro/coder/wrapper/fp32/ |
D | matmul_fp32_wrapper.c | 18 void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vect… in InitMatrixA() argument 20 memcpy(dst_ptr, src_ptr, (size_t)(params_->batch * params_->deep_) * sizeof(float)); in InitMatrixA() 24 const float *src = src_ptr + i * params_->deep_ * params_->row_; in InitMatrixA() 34 void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vect… in InitMatrixB() argument 37 …memcpy(dst_ptr, src_ptr, (size_t)(params_->batch * params_->col_ * params_->deep_) * sizeof(float)… in InitMatrixB() 40 const float *src = src_ptr + i * params_->deep_ * params_->col_; in InitMatrixB() 48 const float *src = src_ptr + i * params_->deep_ * params_->col_; in InitMatrixB()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/control/ |
D | tensorlist_getitem.cc | 49 auto src_ptr = input0->GetTensor(index_); in Run() local 50 MS_ASSERT(src_ptr != nullptr); in Run() 51 if (src_ptr->data_type() != kTypeUnknown) { in Run() 52 if (src_ptr->ElementsNum() != out_tensors_.at(0)->ElementsNum()) { in Run() 53 MS_LOG(ERROR) << "src_ptr->ElementsNum():" << src_ptr->ElementsNum() in Run() 57 auto status = lite::Tensor::CopyTensorData(*src_ptr, out_tensors_.at(0)); in Run()
|
/third_party/flutter/skia/third_party/externals/libjpeg-turbo/ |
D | transupp.c | 206 JCOEFPTR src_ptr, dst_ptr; in do_flip_h() local 238 src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; in do_flip_h() 241 *dst_ptr++ = *src_ptr++; /* copy even column */ in do_flip_h() 242 *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */ in do_flip_h() 268 JCOEFPTR src_ptr, dst_ptr; in do_flip_v() local 314 src_ptr = src_row_ptr[dst_blk_x]; in do_flip_v() 318 *dst_ptr++ = *src_ptr++; in do_flip_v() 321 *dst_ptr++ = - *src_ptr++; in do_flip_v() 346 JCOEFPTR src_ptr, dst_ptr; in do_transpose() local 372 src_ptr = in do_transpose() [all …]
|
/third_party/libjpeg-turbo/ |
D | transupp.c | 206 JCOEFPTR src_ptr, dst_ptr; in do_flip_h() local 238 src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; in do_flip_h() 241 *dst_ptr++ = *src_ptr++; /* copy even column */ in do_flip_h() 242 *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */ in do_flip_h() 268 JCOEFPTR src_ptr, dst_ptr; in do_flip_v() local 314 src_ptr = src_row_ptr[dst_blk_x]; in do_flip_v() 318 *dst_ptr++ = *src_ptr++; in do_flip_v() 321 *dst_ptr++ = - *src_ptr++; in do_flip_v() 346 JCOEFPTR src_ptr, dst_ptr; in do_transpose() local 372 src_ptr = in do_transpose() [all …]
|
/third_party/ffmpeg/libavfilter/ |
D | vf_deband.c | 143 const uint8_t *src_ptr = (const uint8_t *)in->data[p]; in deband_8_c() local 159 … const int ref0 = src_ptr[av_clip(y + y_pos, 0, h) * src_linesize + av_clip(x + x_pos, 0, w)]; in deband_8_c() 160 … const int ref1 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + x_pos, 0, w)]; in deband_8_c() 161 … const int ref2 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)]; in deband_8_c() 162 … const int ref3 = src_ptr[av_clip(y + y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)]; in deband_8_c() 163 const int src0 = src_ptr[y * src_linesize + x]; in deband_8_c() 202 const uint8_t *src_ptr = (const uint8_t *)in->data[p]; in deband_8_coupling_c() local 207 … const int ref0 = src_ptr[av_clip(y + y_pos, 0, h) * src_linesize + av_clip(x + x_pos, 0, w)]; in deband_8_coupling_c() 208 … const int ref1 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + x_pos, 0, w)]; in deband_8_coupling_c() 209 … const int ref2 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)]; in deband_8_coupling_c() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/ |
D | gemm.c | 29 const float *src_ptr = v1; local 33 dst_ptr[c] += beta * src_ptr[c]; 35 src_ptr += stride; 56 static void RowMajor2Row4MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int le… argument 58 const float *src = src_ptr + r * lead; 68 static void RowMajor2Row8MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int le… argument 70 const float *src = src_ptr + r * lead; 81 static void RowMajor2Row12MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int l… argument 83 const float *src = src_ptr + r * lead; 93 static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col,… argument [all …]
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/int8/ |
D | group_convolution_int8.cc | 29 int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel; in SeparateInput() local 32 memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_in_channel) * sizeof(int8_t)); in SeparateInput() 33 src_ptr += ori_in_channel; in SeparateInput() 45 int8_t *src_ptr = sub_out_data; in PostConcat() local 48 memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_out_channel) * sizeof(int8_t)); in PostConcat() 49 src_ptr += sub_out_channel; in PostConcat()
|
/third_party/mindspore/mindspore/lite/src/delegate/npu/ |
D | transpose_kernel.cc | 34 const float *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp32() local 36 Transpose8X8Fp32Arm64(src_ptr, dst_ptr, channel, plane); in PackNHWCToNCHWFp32() 40 const float *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp32() local 43 dst_ptr[i] = src_ptr[i * channel]; in PackNHWCToNCHWFp32() 48 const float *src_ptr = src_batch + hw * channel; in PackNHWCToNCHWFp32() local 51 dst_ptr[i * plane] = src_ptr[i]; in PackNHWCToNCHWFp32()
|
/third_party/mindspore/mindspore/lite/src/runtime/kernel/arm/fp32/ |
D | group_convolution_fp32.cc | 35 float *src_ptr = reinterpret_cast<float *>(ori_in_data_) + group_id * sub_in_channel; in SeparateInput() local 38 memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float)); in SeparateInput() 39 src_ptr += ori_in_channel; in SeparateInput() 56 float *src_ptr = sub_out_data; in PostConcat() local 59 memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(float)); in PostConcat() 60 src_ptr += sub_out_channel; in PostConcat()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/ |
D | warp_affine.cc | 132 size_t src_step, const uint8_t *src_ptr, uint8_t *dst_ptr) { in RemapBilinearNotCur1C() argument 136 const uint8_t *t_src_ptr = src_ptr + shy * src_step + shx; in RemapBilinearNotCur1C() 142 size_t src_step, const uint8_t *src_ptr, uint8_t *dst_ptr) { in RemapBilinearNotCur2C() argument 146 const uint8_t *t_src_ptr = src_ptr + shy * src_step + shx * 2; in RemapBilinearNotCur2C() 156 size_t src_step, const uint8_t *src_ptr, uint8_t *dst_ptr) { in RemapBilinearNotCur3C() argument 160 const uint8_t *t_src_ptr = src_ptr + shy * src_step + shx * 3; in RemapBilinearNotCur3C() 173 size_t src_step, const uint8_t *src_ptr, uint8_t *dst_ptr) { in RemapBilinearNotCur4C() argument 177 const uint8_t *t_src_ptr = src_ptr + shy * src_step + shx * 4; in RemapBilinearNotCur4C() 193 … size_t src_step, int cn, const uint8_t *src_ptr, uint8_t *dst_ptr) { in RemapBilinearNotCurMoreC() argument 197 const uint8_t *t_src_ptr = src_ptr + shy * src_step + shx * cn; in RemapBilinearNotCurMoreC() [all …]
|
/third_party/skia/third_party/externals/libjpeg-turbo/ |
D | transupp.c | 482 JCOEFPTR src_ptr, dst_ptr; in do_crop_ext_reflect() local 532 src_ptr = *src_row_ptr++; /* source goes right */ in do_crop_ext_reflect() 535 *dst_ptr++ = *src_ptr++; /* copy even column */ in do_crop_ext_reflect() 536 *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign in do_crop_ext_reflect() 551 src_ptr = *(--src_row_ptr); /* source goes left */ in do_crop_ext_reflect() 554 *dst_ptr++ = *src_ptr++; /* copy even column */ in do_crop_ext_reflect() 555 *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign in do_crop_ext_reflect() 665 JCOEFPTR src_ptr, dst_ptr; in do_reflect() local 687 src_ptr = *(--src_row_ptr); /* source goes left */ in do_reflect() 690 *dst_ptr++ = *src_ptr++; /* copy even column */ in do_reflect() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/ |
D | pack_fp16.c | 217 const float16_t *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp16() local 222 Transpose16x8ARM64Fp16(src_ptr, dst_ptr, src_stride, dst_stride); in PackNHWCToNCHWFp16() 226 Transpose8x8A32Fp16(src_ptr, dst_ptr, src_stride, dst_stride); in PackNHWCToNCHWFp16() 230 dst_ptr[tc * plane + tr] = src_ptr[tr * channel + tc]; in PackNHWCToNCHWFp16() 236 const float16_t *src_ptr = src_batch + hw * channel + c; in PackNHWCToNCHWFp16() local 239 dst_ptr[i] = src_ptr[i * channel]; in PackNHWCToNCHWFp16() 244 const float16_t *src_ptr = src_batch + hw * channel; in PackNHWCToNCHWFp16() local 247 dst_ptr[i * plane] = src_ptr[i]; in PackNHWCToNCHWFp16() 662 inline void Transpose4x8ARM64Fp16(const float16_t *src_ptr, float16_t *dst_ptr, size_t src_stride, … in Transpose4x8ARM64Fp16() argument 697 : [ dst_ptr ] "r"(dst_ptr), [ src_ptr ] "r"(src_ptr), [ src_stride ] "r"(src_stride), in Transpose4x8ARM64Fp16() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/ |
D | split_with_over_lap_base.c | 30 const char *src_ptr = in_data + start_index * param->inner_stride_ * param->element_bytes_; in DoSplitWithOverlapParallel() local 34 (void)memcpy(dst_ptr + i * out_stride, src_ptr, out_stride); in DoSplitWithOverlapParallel() 35 src_ptr += input_stride; in DoSplitWithOverlapParallel()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/control/ |
D | tensorlist_setitem_infer.c | 92 TensorC *src_ptr = &input0->tensors_[i]; in TensorListSetItemInferShape() local 93 if (src_ptr == NULL) { in TensorListSetItemInferShape() 98 if (src_ptr->data_type_ != kTypeUnknown) { in TensorListSetItemInferShape() 99 out_shape.shape_[out_shape.size_] = src_ptr->shape_; in TensorListSetItemInferShape() 100 out_shape.shape_size_[out_shape.size_] = (int)(src_ptr->shape_size_); in TensorListSetItemInferShape()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/ |
D | matmul_int8.h | 31 void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); 43 void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); 51 void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); 52 void RowMajor2Col16x2MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); 60 void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
/third_party/mesa3d/src/gallium/auxiliary/util/ |
D | u_linear.c | 37 pipe_linear_to_tile(size_t src_stride, const void *src_ptr, in pipe_linear_to_tile() argument 51 ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x; in pipe_linear_to_tile() 61 void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, in pipe_linear_from_tile() argument 67 const char *src_ptr2 = (const char *) src_ptr; in pipe_linear_from_tile()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16_grad/ |
D | gemm_fp16.c | 26 static void Row2Col16Block16(const float16_t *src_ptr, float16_t *dst_ptr, size_t col) { in Row2Col16Block16() argument 128 : [ dst_c ] "r"(dst_ptr), [ src_c ] "r"(src_ptr), [ stride ] "r"(stride) in Row2Col16Block16() 136 const float16_t *src_ptr = v1; in AddMatrixFp16() local 146 float16x8_t src_0 = vld1q_f16(src_ptr + c); in AddMatrixFp16() 152 dst_ptr[c] += beta * src_ptr[c]; in AddMatrixFp16() 154 src_ptr += stride; in AddMatrixFp16() 306 const float16_t *src_ptr = src + r * stride; in RowMajor2Row8MajorStrideFp16() local 311 dst[cd8 * C8NUM * row + r * C8NUM + cm8] = src_ptr[c]; in RowMajor2Row8MajorStrideFp16()
|