Home
last modified time | relevance | path

Searched refs:MS_STQ_F32 (Results 1 – 19 of 19) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/
Dwinograd_utils.h167 MS_STQ_F32(dst_data, m[0]); \
168 MS_STQ_F32(dst_data + out_c, m[1]); \
169 MS_STQ_F32(dst_data + dst_step * out_c, m[2]); \
170 MS_STQ_F32(dst_data + dst_step * out_c + out_c, m[3]);
173 MS_STQ_F32(dst_data, m[0]); \
174 MS_STQ_F32(dst_data + out_c, m[1]); \
175 MS_STQ_F32(dst_data + 2 * out_c, m[2]); \
176 MS_STQ_F32(dst_data + dst_step * out_c, m[3]); \
177 MS_STQ_F32(dst_data + dst_step * out_c + out_c, m[4]); \
178 MS_STQ_F32(dst_data + dst_step * out_c + 2 * out_c, m[5]); \
[all …]
Dactivation_fp32.c33 MS_STQ_F32(dst + i, MS_MAXQ_F32(MS_LDQ_F32(src + i), zero)); in Fp32Relu()
61 MS_STQ_F32(dst + i, dst_tmp); in Fp32Relu6()
94 MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask)); in LRelu()
116MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_MOVQ_F32(1.0f), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(dst +… in Sigmoid()
151 MS_STQ_F32(dst + i, MS_TANHX4_F32(input)); in Tanh()
180 MS_STQ_F32(dst + index, result); in Swish()
261 MS_STQ_F32(dst + i, res); in Gelu()
276 MS_STQ_F32(dst + i, res); in Gelu()
305 MS_STQ_F32(dst + i, MS_BLENDQ_F32(elu_tmp, src_tmp, mask)); in Elu()
Dadd_fp32.c42 MS_STQ_F32(out + index, vout); in ElementOptAdd()
60 MS_STQ_F32(out + index, vout); in ElementOptAdd()
144 MS_STQ_F32(out + index, vout); in ElementOptAddRelu()
162 MS_STQ_F32(out + index, vout); in ElementOptAddRelu()
198 MS_STQ_F32(out + index, vout); in ElementOptAddRelu6()
216 MS_STQ_F32(out + index, vout); in ElementOptAddRelu6()
248 MS_STQ_F32(out + index, vout); in ElementAdd()
276 MS_STQ_F32(out + index, vout); in ElementAddRelu()
305 MS_STQ_F32(out + index, vout); in ElementAddRelu6()
Dcumsum_fp32.c33 MS_STQ_F32(layer_output + j, val); in Cumsum()
47 MS_STQ_F32(layer_output + j, zero_val); in Cumsum()
68 MS_STQ_F32(layer_output + k, out_val); in Cumsum()
93 MS_STQ_F32(layer_output + j, val); in CumsumReverse()
107 MS_STQ_F32(layer_output + j, zero_val); in CumsumReverse()
128 MS_STQ_F32(layer_output - k - 3, out_val); in CumsumReverse()
Dmul_fp32.c40 MS_STQ_F32(out + index, vout); in ElementMul()
68 MS_STQ_F32(out + index, vout); in ElementMulRelu()
97 MS_STQ_F32(out + index, vout); in ElementMulRelu6()
203 MS_STQ_F32(out + index, vout); in ElementOptMul()
223 MS_STQ_F32(out + index, vout); in ElementOptMul()
251 MS_STQ_F32(out + index, vout); in ElementOptMulRelu()
273 MS_STQ_F32(out + index, vout); in ElementOptMulRelu()
303 MS_STQ_F32(out + index, vout); in ElementOptMulRelu6()
327 MS_STQ_F32(out + index, vout); in ElementOptMulRelu6()
Dscale_fp32.c45 MS_STQ_F32(out_data + in_offset, result); in ScaleInner()
78 MS_STQ_F32(out_data + in_offset, result); in ScaleAxis()
137 MS_STQ_F32(out_data + in_offset, result); in ScaleInnerRelu()
179 MS_STQ_F32(out_data + in_offset, result); in ScaleAxisRelu()
241 MS_STQ_F32(out_data + in_offset, result); in ScaleInnerRelu6()
285 MS_STQ_F32(out_data + in_offset, result); in ScaleAxisRelu6()
Dinstance_norm_fp32.c104 MS_STQ_F32(dst + index, dstv4); in InstanceNorm()
165 MS_STQ_F32(dst + index * channel, outv), MS_STQ_F32(dst + index * channel + C4NUM, outv1); in InstanceNormC4HW4ArmSse()
166MS_STQ_F32(dst + index * channel + C8NUM, outv2), MS_STQ_F32(dst + index * channel + C12NUM, outv3… in InstanceNormC4HW4ArmSse()
195 MS_STQ_F32(dst + index * channel, outv); in InstanceNormC4HW4ArmSse()
196 MS_STQ_F32(dst + index * channel + C4NUM, outv1); in InstanceNormC4HW4ArmSse()
214 MS_STQ_F32(dst + index * channel, MS_ADDQ_F32(MS_MULQ_F32(outv, gammav), betav)); in InstanceNormC4HW4ArmSse()
Dconv_depthwise_fp32.c384 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowLeft()
385 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowLeft()
386 MS_STQ_F32(line + lw * ic + 8, b2); in ConvDw3x3RowLeft()
387 MS_STQ_F32(line + lw * ic + 12, b3); in ConvDw3x3RowLeft()
416 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowMiddle()
417 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowMiddle()
418 MS_STQ_F32(line + lw * ic + 8, b2); in ConvDw3x3RowMiddle()
419 MS_STQ_F32(line + lw * ic + 12, b3); in ConvDw3x3RowMiddle()
449 MS_STQ_F32(line + lw * ic, b0); in ConvDw3x3RowRight()
450 MS_STQ_F32(line + lw * ic + 4, b1); in ConvDw3x3RowRight()
[all …]
Dprelu_fp32.c123 MS_STQ_F32(cur_out + j, res); in PRelu()
157 MS_STQ_F32(output + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask)); in PReluShareChannel()
Dwinograd_utils.c77 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform4x4Unit()
148 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform6x6Unit()
246 MS_STQ_F32(dst_data + i * dst_step, m[i]); in InputTransform8x8Unit_block4()
2962 MS_STQ_F32(dst_data + dst_k_offset + 0 * out_c, m[m_k_offset]); in OutputTransform8x6Unit()
2963 MS_STQ_F32(dst_data + dst_k_offset + 1 * out_c, m[m_k_offset + 1]); in OutputTransform8x6Unit()
2964 MS_STQ_F32(dst_data + dst_k_offset + 2 * out_c, m[m_k_offset + 2]); in OutputTransform8x6Unit()
2965 MS_STQ_F32(dst_data + dst_k_offset + 3 * out_c, m[m_k_offset + 3]); in OutputTransform8x6Unit()
2966 MS_STQ_F32(dst_data + dst_k_offset + 4 * out_c, m[m_k_offset + 4]); in OutputTransform8x6Unit()
2967 MS_STQ_F32(dst_data + dst_k_offset + 5 * out_c, m[m_k_offset + 5]); in OutputTransform8x6Unit()
3090 MS_STQ_F32(dst_data + dst_k_offset + 0 * out_c, m[m_k_offset]); in OutputTransform8x6ReluUnit()
[all …]
Dexp_fp32.c70 MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale)); in ExpFusionFp32()
Dpooling_fp32.c107 MS_STQ_F32(dst_c_ptr, tmp_avg); in AvgPooling()
210 MS_STQ_F32(dst_c_ptr, tmp_max); in MaxPooling()
Dexp_fp32.h54 MS_STQ_F32(dst, VexpFp32(input)); in simd_exp()
Dpower_fp32.c75 MS_STQ_F32(output + i, result); in PowerBroadCast()
Dresize_fp32.c184 MS_STQ_F32(linear_output + w * in_c + c, interp_value); in InterpRow()
220 MS_STQ_F32(output + w * in_c + c, interp_value); in InterpCol()
352 MS_STQ_F32(dst_w + c, interp_value); in BicubicInterpRow()
406 MS_STQ_F32(dst_w + c, interp_value); in BicubicInterpCol()
Dwinograd_transform.c64 MS_STQ_F32(dst_addr, MS_LDQ_F32(src_addr)); in WinogradInputTransform()
Dpack_fp32.c333 MS_STQ_F32((float *)dst + dst_c_offset, MS_LDQ_F32((float *)src + src_c_offset)); in PackNC4HW4ToNHWCFp32()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/
Dms_simd_instructions.h57 #define MS_STQ_F32 vst1q_f32 macro
133 #define MS_STQ_F32 _mm_storeu_ps macro
177 MS_STQ_F32(output_ptr + 0 * num, dst##1); \
178 MS_STQ_F32(output_ptr + 1 * num, dst##2); \
179 MS_STQ_F32(output_ptr + 2 * num, dst##3); \
180 MS_STQ_F32(output_ptr + 3 * num, dst##4); \
181 MS_STQ_F32(output_ptr + 4 * num, dst##5); \
182 MS_STQ_F32(output_ptr + 5 * num, dst##6); \
183 MS_STQ_F32(output_ptr + 6 * num, dst##7); \
184 MS_STQ_F32(output_ptr + 7 * num, dst##8);
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/
Dactivation_grad.c128MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_LDQ_F32(src0 + i), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(ds… in SoftplusGrad()