/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x9-minmax-neon-mul16.c | 95 int32x4_t vaccOPQR = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() local 114 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi0xOPQRSTUV), vget_low_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 132 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi1xOPQRSTUV), vget_low_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 150 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi2xOPQRSTUV), vget_low_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 168 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi3xOPQRSTUV), vget_low_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 186 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi4xOPQRSTUV), vget_low_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 204 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi5xOPQRSTUV), vget_low_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 222 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi6xOPQRSTUV), vget_low_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 240 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi7xOPQRSTUV), vget_low_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 258 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi8xOPQRSTUV), vget_low_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x32.c | 59 int32x4_t vaccOPQR = vmulq_s32(vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() local 68 vaccOPQR = vmlaq_s32(vaccOPQR, vmovl_s16(vget_low_s16(veyOPQRSTUV)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 77 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 86 vaccOPQR = vrshlq_s32(vaccOPQR, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 92 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
|
D | minmax-wasmsimd-x32.c | 52 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local 61 …vaccOPQR = wasm_i32x4_add(vaccOPQR, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vyOPQRSTUV), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 70 …nst v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 79 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 85 …v128_t voutOPQRSTUV = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
|
D | minmax-sse41-mul32-ld32-x32.c | 61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local 70 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_mullo_epi32(vyOPQR, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 79 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 88 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 94 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 66 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local 75 vaccOPQR = _mm_macc_epi32(vyOPQR, vy_multiplier, vaccOPQR); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 84 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 93 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 99 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
|
D | minmax-sse41-mul16-ld64-x32.c | 90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() local 99 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 108 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 117 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 123 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
|
D | minmax-sse2-mul16-ld64-x32.c | 98 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() local 107 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 116 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 125 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 131 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x32.c | 56 int32x4_t vaccOPQR = vmlaq_s32(vy_bias, vmovl_s16(vget_low_s16(vexOPQRSTUV)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() local 65 vaccOPQR = vsraq_n_s32(vaccOPQR, vbicq_s32(vaccOPQR, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 74 vaccOPQR = vrshlq_s32(vaccOPQR, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 80 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
|
D | minmax-wasmsimd-x32.c | 48 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local 57 …nst v128_t vremOPQR = wasm_i32x4_add(wasm_v128_and(vaccOPQR, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 66 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 72 …v128_t voutOPQRSTUV = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
|
D | minmax-sse41-mul32-ld32-x32.c | 54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local 63 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 72 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 78 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 59 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local 68 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 77 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 83 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
|
D | minmax-sse2-mul16-ld64-x32.c | 73 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local 82 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 91 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 97 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
|
D | minmax-sse41-mul16-ld64-x32.c | 69 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() local 78 … vremOPQR = _mm_add_epi32(_mm_and_si128(vaccOPQR, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 87 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 93 __m128i voutOPQRSTUV = _mm_adds_epi16(_mm_packs_epi32(vaccOPQR, vaccSTUV), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 32x1-minmax-wasmsimd-x86-pipelined-x2.c | 58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local 69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 88 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 111 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 133 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-x86-x4.c | 165 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() local 173 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 181 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx2); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 189 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx3); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 210 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4() 220 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x4()
|
D | 32x1-minmax-wasmsimd-arm-x4.c | 165 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() local 173 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 181 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx2); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 189 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx3); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 210 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4() 220 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x4()
|
D | 32x1-minmax-wasmsimd-arm-pipelined-x2.c | 58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local 69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 88 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 111 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 133 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-x86-x2.c | 109 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() local 117 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() 138 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2() 148 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_x2()
|
D | 32x1-minmax-neon-x2.c | 111 float32x4_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local 119 vaccOPQR = vaddq_f32(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() 143 vaccOPQR = vmlaq_f32(vaccOPQR, viOPQR, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() 153 float32x4_t voutOPQR = vminq_f32(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
|
D | 32x1-minmax-neonfma-x2.c | 111 float32x4_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local 119 vaccOPQR = vaddq_f32(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() 143 vaccOPQR = vfmaq_f32(vaccOPQR, viOPQR, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() 153 float32x4_t voutOPQR = vminq_f32(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
|
D | 32x1-minmax-wasmsimd-arm-x2.c | 109 v128_t vaccOPQR = vaccOPQRx0; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() local 117 vaccOPQR = wasm_f32x4_add(vaccOPQR, vaccOPQRx1); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() 138 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2() 148 v128_t voutOPQR = wasm_f32x4_min(vaccOPQR, vmax); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_x2()
|
D | 32x1-minmax-wasmsimd-x86.c | 48 v128_t vaccOPQR = vacc0123; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 69 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 79 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
|
D | 32x1-minmax-wasmsimd-x86-pipelined.c | 58 v128_t vaccOPQR = vw; in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 71 vaccOPQR = wasm_f32x4_add(vaccOPQR, wasm_f32x4_mul(viOPQR, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 93 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-neon-c32-acc2.c | 107 const int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local 116 vst1q_s32(b, vaccOPQR); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 224 int32x4_t vaccOPQR = vld1q_s32(b + 24); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local 233 vaccOPQR = vaddw_s16(vaccOPQR, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 242 vst1q_s32(b, vaccOPQR); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 378 int32x4_t vaccOPQR = vld1q_s32(buffer); buffer += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local 387 vaccOPQR = vaddw_s16(vaccOPQR, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 396 const int32x4_t vsgnaccOPQR = vreinterpretq_s32_u32(vcltq_s32(vaccOPQR, vmovq_n_s32(0))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 412 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 413 const int64x2_t vprodQR = vmull_high_s32(vaccOPQR, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() [all …]
|
D | 7x-minmax-neon-c32-acc2.c | 130 int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() local 139 const int32x4_t vsgnaccOPQR = vreinterpretq_s32_u32(vcltq_s32(vaccOPQR, vmovq_n_s32(0))); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 155 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 156 const int64x2_t vprodQR = vmull_high_s32(vaccOPQR, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 189 const int64x2_t vprodOP = vmull_s32(vget_low_s32(vaccOPQR), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 190 const int64x2_t vprodQR = vmull_s32(vget_high_s32(vaccOPQR), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 236 vaccOPQR = vuzp1q_s32(vreinterpretq_s32_s64(vaccOP), vreinterpretq_s32_s64(vaccQR)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 242 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vaccOPQR), vaccSTUV), voutput… in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 253 vaccOPQR = vcombine_s32(vmovn_s64(vaccOP), vmovn_s64(vaccQR)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 259 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
|