Home
last modified time | relevance | path

Searched refs:vaccOPQR (Results 1 – 25 of 34) sorted by relevance

12

/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x9-minmax-neon-mul16.c95 int32x4_t vaccOPQR = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() local
114 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi0xOPQRSTUV), vget_low_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
132 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi1xOPQRSTUV), vget_low_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
150 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi2xOPQRSTUV), vget_low_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
168 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi3xOPQRSTUV), vget_low_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
186 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi4xOPQRSTUV), vget_low_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
204 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
222 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi6xOPQRSTUV), vget_low_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
240 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi7xOPQRSTUV), vget_low_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
258 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi8xOPQRSTUV), vget_low_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x32.c59 int32x4_t vaccOPQR = vmulq_s32(vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() local
68 vaccOPQR = vmlaq_s32(vaccOPQR, vmovl_s16(vget_low_s16(veyOPQRSTUV)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
77 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
86 vaccOPQR = vrshlq_s32(vaccOPQR, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
92 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
Dminmax-wasmsimd-x32.c52 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local
61vaccOPQR = wasm_i32x4_add(vaccOPQR, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vyOPQRSTUV), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
70 …nst v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
79vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
85 …v128_t voutOPQRSTUV = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
Dminmax-sse41-mul32-ld32-x32.c61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local
70 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_mullo_epi32(vyOPQR, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
79 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
88vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
94 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
Dminmax-xop-mul32-ld32-x32.c66 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local
75 vaccOPQR = _mm_macc_epi32(vyOPQR, vy_multiplier, vaccOPQR); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
84 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
93vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
99 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
Dminmax-sse41-mul16-ld64-x32.c90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() local
99 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
108 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
117vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
123 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
Dminmax-sse2-mul16-ld64-x32.c98 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() local
107 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
116 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
125vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
131 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x32.c56 int32x4_t vaccOPQR = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() local
65 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
74 vaccOPQR = vrshlq_s32(vaccOPQR, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
80 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
Dminmax-wasmsimd-x32.c48 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local
57 …nst v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
66vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
72 …v128_t voutOPQRSTUV = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
Dminmax-sse41-mul32-ld32-x32.c54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local
63 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
72vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
78 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
Dminmax-xop-mul32-ld32-x32.c59 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local
68 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
77vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
83 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
Dminmax-sse2-mul16-ld64-x32.c73 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local
82 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
91vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
97 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
Dminmax-sse41-mul16-ld64-x32.c69 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() local
78 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
87vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
93 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
/external/XNNPACK/src/f32-spmm/gen/
D32x1-minmax-wasmsimd-x86-pipelined-x2.c58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local
69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
88 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
111 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
133 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
D32x1-minmax-wasmsimd-x86-x4.c165 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local
173 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
181 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx2); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
189 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx3); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
210 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
220 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
D32x1-minmax-wasmsimd-arm-x4.c165 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local
173 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
181 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx2); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
189 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx3); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
210 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
220 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
D32x1-minmax-wasmsimd-arm-pipelined-x2.c58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local
69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
88 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
111 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
133 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
D32x1-minmax-wasmsimd-x86-x2.c109 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() local
117 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
138 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
148 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
D32x1-minmax-neon-x2.c111 float32x4_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local
119 vaccOPQR = vaddq_f32(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
143 vaccOPQR = vmlaq_f32(vaccOPQR, viOPQR, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
153 float32x4_t voutOPQR = vminq_f32(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
D32x1-minmax-neonfma-x2.c111 float32x4_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local
119 vaccOPQR = vaddq_f32(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
143 vaccOPQR = vfmaq_f32(vaccOPQR, viOPQR, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
153 float32x4_t voutOPQR = vminq_f32(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
D32x1-minmax-wasmsimd-arm-x2.c109 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() local
117 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
138 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
148 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
D32x1-minmax-wasmsimd-x86.c48 v128_t vaccOPQR = vacc0123; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local
69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
79 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
D32x1-minmax-wasmsimd-x86-pipelined.c58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local
71 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
93 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-neon-c32-acc2.c107 const int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local
116 vst1q_s32(b, vaccOPQR); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
224 int32x4_t vaccOPQR = vld1q_s32(b + 24); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local
233 vaccOPQR = vaddw_s16(vaccOPQR, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
242 vst1q_s32(b, vaccOPQR); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
378 int32x4_t vaccOPQR = vld1q_s32(buffer); buffer += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local
387 vaccOPQR = vaddw_s16(vaccOPQR, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
396 const int32x4_t vsgnaccOPQR = vreinterpretq_s32_u32(vcltq_s32(vaccOPQR, vmovq_n_s32(0))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
412 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
413 const int64x2_t vprodQR = vmull_high_s32(vaccOPQR, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
[all …]
D7x-minmax-neon-c32-acc2.c130 int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() local
139 const int32x4_t vsgnaccOPQR = vreinterpretq_s32_u32(vcltq_s32(vaccOPQR, vmovq_n_s32(0))); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
155 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
156 const int64x2_t vprodQR = vmull_high_s32(vaccOPQR, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
189 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
190 const int64x2_t vprodQR = vmull_s32(vget_high_s32(vaccOPQR), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
236 vaccOPQR = vuzp1q_s32(vreinterpretq_s32_s64(vaccOP), vreinterpretq_s32_s64(vaccQR)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
242 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vaccOPQR), vaccSTUV), voutput… in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
253 vaccOPQR = vcombine_s32(vmovn_s64(vaccOP), vmovn_s64(vaccQR)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
259 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()

12