/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/ |
D | winograd_utils.h | 167 MS_STQ_F32(dst_data, m[0]); \ 168 MS_STQ_F32(dst_data + out_c, m[1]); \ 169 MS_STQ_F32(dst_data + dst_step * out_c, m[2]); \ 170 MS_STQ_F32(dst_data + dst_step * out_c + out_c, m[3]); 173 MS_STQ_F32(dst_data, m[0]); \ 174 MS_STQ_F32(dst_data + out_c, m[1]); \ 175 MS_STQ_F32(dst_data + 2 * out_c, m[2]); \ 176 MS_STQ_F32(dst_data + dst_step * out_c, m[3]); \ 177 MS_STQ_F32(dst_data + dst_step * out_c + out_c, m[4]); \ 178 MS_STQ_F32(dst_data + dst_step * out_c + 2 * out_c, m[5]); \ [all …]
|
D | activation_fp32.c | 33 MS_STQ_F32(dst + i, MS_MAXQ_F32(MS_LDQ_F32(src + i), zero)); in Fp32Relu() 61 MS_STQ_F32(dst + i, dst_tmp); in Fp32Relu6() 94 MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask)); in LRelu() 116 …MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_MOVQ_F32(1.0f), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(dst +… in Sigmoid() 151 MS_STQ_F32(dst + i, MS_TANHX4_F32(input)); in Tanh() 180 MS_STQ_F32(dst + index, result); in Swish() 261 MS_STQ_F32(dst + i, res); in Gelu() 276 MS_STQ_F32(dst + i, res); in Gelu() 305 MS_STQ_F32(dst + i, MS_BLENDQ_F32(elu_tmp, src_tmp, mask)); in Elu()
|
D | add_fp32.c | 42 MS_STQ_F32(out + index, vout); in ElementOptAdd() 60 MS_STQ_F32(out + index, vout); in ElementOptAdd() 144 MS_STQ_F32(out + index, vout); in ElementOptAddRelu() 162 MS_STQ_F32(out + index, vout); in ElementOptAddRelu() 198 MS_STQ_F32(out + index, vout); in ElementOptAddRelu6() 216 MS_STQ_F32(out + index, vout); in ElementOptAddRelu6() 248 MS_STQ_F32(out + index, vout); in ElementAdd() 276 MS_STQ_F32(out + index, vout); in ElementAddRelu() 305 MS_STQ_F32(out + index, vout); in ElementAddRelu6()
|
D | cumsum_fp32.c | 33 MS_STQ_F32(layer_output + j, val); in Cumsum() 47 MS_STQ_F32(layer_output + j, zero_val); in Cumsum() 68 MS_STQ_F32(layer_output + k, out_val); in Cumsum() 93 MS_STQ_F32(layer_output + j, val); in CumsumReverse() 107 MS_STQ_F32(layer_output + j, zero_val); in CumsumReverse() 128 MS_STQ_F32(layer_output - k - 3, out_val); in CumsumReverse()
|
D | mul_fp32.c | 40 MS_STQ_F32(out + index, vout); in ElementMul() 68 MS_STQ_F32(out + index, vout); in ElementMulRelu() 97 MS_STQ_F32(out + index, vout); in ElementMulRelu6() 203 MS_STQ_F32(out + index, vout); in ElementOptMul() 223 MS_STQ_F32(out + index, vout); in ElementOptMul() 251 MS_STQ_F32(out + index, vout); in ElementOptMulRelu() 273 MS_STQ_F32(out + index, vout); in ElementOptMulRelu() 303 MS_STQ_F32(out + index, vout); in ElementOptMulRelu6() 327 MS_STQ_F32(out + index, vout); in ElementOptMulRelu6()
|
D | scale_fp32.c | 45 MS_STQ_F32(out_data + in_offset, result); in ScaleInner() 78 MS_STQ_F32(out_data + in_offset, result); in ScaleAxis() 137 MS_STQ_F32(out_data + in_offset, result); in ScaleInnerRelu() 179 MS_STQ_F32(out_data + in_offset, result); in ScaleAxisRelu() 241 MS_STQ_F32(out_data + in_offset, result); in ScaleInnerRelu6() 285 MS_STQ_F32(out_data + in_offset, result); in ScaleAxisRelu6()
|
D | instance_norm_fp32.c | 104 MS_STQ_F32(dst + index, dstv4); in InstanceNorm() 165 MS_STQ_F32(dst + index * channel, outv), MS_STQ_F32(dst + index * channel + C4NUM, outv1); in InstanceNormC4HW4ArmSse() 166 …MS_STQ_F32(dst + index * channel + C8NUM, outv2), MS_STQ_F32(dst + index * channel + C12NUM, outv3… in InstanceNormC4HW4ArmSse() 195 MS_STQ_F32(dst + index * channel, outv); in InstanceNormC4HW4ArmSse() 196 MS_STQ_F32(dst + index * channel + C4NUM, outv1); in InstanceNormC4HW4ArmSse() 214 MS_STQ_F32(dst + index * channel, MS_ADDQ_F32(MS_MULQ_F32(outv, gammav), betav)); in InstanceNormC4HW4ArmSse()
|
D | conv_depthwise_fp32.c | 384 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowLeft() 385 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowLeft() 386 MS_STQ_F32(line + lw * ic + 8, b2); in ConvDw3x3RowLeft() 387 MS_STQ_F32(line + lw * ic + 12, b3); in ConvDw3x3RowLeft() 416 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowMiddle() 417 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowMiddle() 418 MS_STQ_F32(line + lw * ic + 8, b2); in ConvDw3x3RowMiddle() 419 MS_STQ_F32(line + lw * ic + 12, b3); in ConvDw3x3RowMiddle() 449 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowRight() 450 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowRight() [all …]
|
D | prelu_fp32.c | 123 MS_STQ_F32(cur_out + j, res); in PRelu() 157 MS_STQ_F32(output + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask)); in PReluShareChannel()
|
D | winograd_utils.c | 77 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform4x4Unit() 148 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform6x6Unit() 246 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform8x8Unit_block4() 2962 MS_STQ_F32(dst_data + dst_k_offset + 0 * out_c, m[m_k_offset]); in OutputTransform8x6Unit() 2963 MS_STQ_F32(dst_data + dst_k_offset + 1 * out_c, m[m_k_offset + 1]); in OutputTransform8x6Unit() 2964 MS_STQ_F32(dst_data + dst_k_offset + 2 * out_c, m[m_k_offset + 2]); in OutputTransform8x6Unit() 2965 MS_STQ_F32(dst_data + dst_k_offset + 3 * out_c, m[m_k_offset + 3]); in OutputTransform8x6Unit() 2966 MS_STQ_F32(dst_data + dst_k_offset + 4 * out_c, m[m_k_offset + 4]); in OutputTransform8x6Unit() 2967 MS_STQ_F32(dst_data + dst_k_offset + 5 * out_c, m[m_k_offset + 5]); in OutputTransform8x6Unit() 3090 MS_STQ_F32(dst_data + dst_k_offset + 0 * out_c, m[m_k_offset]); in OutputTransform8x6ReluUnit() [all …]
|
D | exp_fp32.c | 70 MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale)); in ExpFusionFp32()
|
D | pooling_fp32.c | 107 MS_STQ_F32(dst_c_ptr, tmp_avg); in AvgPooling() 210 MS_STQ_F32(dst_c_ptr, tmp_max); in MaxPooling()
|
D | exp_fp32.h | 54 MS_STQ_F32(dst, VexpFp32(input)); in simd_exp()
|
D | power_fp32.c | 75 MS_STQ_F32(output + i, result); in PowerBroadCast()
|
D | resize_fp32.c | 184 MS_STQ_F32(linear_output + w * in_c + c, interp_value); in InterpRow() 220 MS_STQ_F32(output + w * in_c + c, interp_value); in InterpCol() 352 MS_STQ_F32(dst_w + c, interp_value); in BicubicInterpRow() 406 MS_STQ_F32(dst_w + c, interp_value); in BicubicInterpCol()
|
D | winograd_transform.c | 64 MS_STQ_F32(dst_addr, MS_LDQ_F32(src_addr)); in WinogradInputTransform()
|
D | pack_fp32.c | 333 MS_STQ_F32((float *)dst + dst_c_offset, MS_LDQ_F32((float *)src + src_c_offset)); in PackNC4HW4ToNHWCFp32()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ |
D | ms_simd_instructions.h | 57 #define MS_STQ_F32 vst1q_f32 macro 133 #define MS_STQ_F32 _mm_storeu_ps macro 177 MS_STQ_F32(output_ptr + 0 * num, dst##1); \ 178 MS_STQ_F32(output_ptr + 1 * num, dst##2); \ 179 MS_STQ_F32(output_ptr + 2 * num, dst##3); \ 180 MS_STQ_F32(output_ptr + 3 * num, dst##4); \ 181 MS_STQ_F32(output_ptr + 4 * num, dst##5); \ 182 MS_STQ_F32(output_ptr + 5 * num, dst##6); \ 183 MS_STQ_F32(output_ptr + 6 * num, dst##7); \ 184 MS_STQ_F32(output_ptr + 7 * num, dst##8);
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/ |
D | activation_grad.c | 128 …MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_LDQ_F32(src0 + i), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(ds… in SoftplusGrad()
|