/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() local 46 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() local 50 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() 51 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() 76 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() local 78 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() local 81 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16() 82 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16()
|
D | minmax-neon-ld64-x8.c | 38 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local 40 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 41 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 60 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local 62 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() 63 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
|
D | minmax-neon-ld128-x32.c | 44 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() local 49 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() local 55 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() 56 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() 95 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() local 97 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() local 100 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32() 101 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32()
|
/external/XNNPACK/src/qu8-vaddc/gen/ |
D | minmax-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() local 46 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() local 50 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() 51 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() 76 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() local 78 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() local 81 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16() 82 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16()
|
D | minmax-neon-ld64-x8.c | 38 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() local 40 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 41 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 60 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() local 62 int32x4_t vacc0123 = vmlaq_s32(vbias, vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8() 63 int32x4_t vacc4567 = vmlaq_s32(vbias, vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qu8-vmulc/gen/ |
D | minmax-fp32-neonv8-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 45 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 49 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 50 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 97 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 99 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 102 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 103 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16()
|
D | minmax-fp32-neon-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 45 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 49 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 50 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 100 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 102 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 105 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 106 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16()
|
D | minmax-rndnu-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 46 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 50 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 51 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 98 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 100 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 103 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 104 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16()
|
D | minmax-fp32-sse41-mul16-ld64-x8.c | 40 const __m128i vxa01234567 = _mm_sub_epi16(va01234567, va_zero_point); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 43 const __m128i vprod01234567hi = _mm_mulhi_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 74 const __m128i vxa01234567 = _mm_sub_epi16(va01234567, va_zero_point); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 77 const __m128i vprod01234567hi = _mm_mulhi_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-neonv8-ld64-x8.c | 37 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 39 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 40 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 75 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 77 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 78 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8()
|
/external/XNNPACK/src/qs8-vmulc/gen/ |
D | minmax-fp32-neonv8-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 45 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 49 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 50 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 97 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 99 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() local 102 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16() 103 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16()
|
D | minmax-rndnu-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 46 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 50 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 51 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 98 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 100 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() local 103 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16() 104 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16()
|
D | minmax-fp32-neon-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 45 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 49 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 50 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 100 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 102 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() local 105 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16() 106 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16()
|
D | minmax-fp32-sse41-mul16-ld64-x8.c | 40 const __m128i vxa01234567 = _mm_sub_epi16(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 43 const __m128i vprod01234567hi = _mm_mulhi_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 74 const __m128i vxa01234567 = _mm_sub_epi16(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 77 const __m128i vprod01234567hi = _mm_mulhi_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-neonv8-ld64-x8.c | 37 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 39 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 40 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 75 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 77 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 78 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8()
|
/external/XNNPACK/src/qs8-vmul/gen/ |
D | minmax-fp32-neonv8-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 47 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 53 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 54 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 102 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 105 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 109 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 110 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16()
|
D | minmax-rndnu-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 48 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 54 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 55 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 103 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 106 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 110 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 111 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16()
|
D | minmax-fp32-neon-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() local 47 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() local 53 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() 54 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() 105 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() local 108 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() local 112 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16() 113 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16()
|
D | minmax-fp32-wasmsimd-mul32-ld64-x8.c | 39 const v128_t vxa01234567 = wasm_i16x8_sub(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8() local 42 …v128_t vacc0123 = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(vxa01234567), wasm_i32x4_extend_low_i… in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8() 43 …v128_t vacc4567 = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vxa01234567), wasm_i32x4_extend_high… in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8() 74 const v128_t vxa01234567 = wasm_i16x8_sub(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8() local 77 …v128_t vacc0123 = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(vxa01234567), wasm_i32x4_extend_low_i… in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8() 78 …v128_t vacc4567 = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vxa01234567), wasm_i32x4_extend_high… in xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8()
|
D | minmax-fp32-neonv8-ld64-x8.c | 36 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 39 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 40 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 76 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 79 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 80 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8()
|
/external/XNNPACK/src/qu8-vadd/gen/ |
D | minmax-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() local 48 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() local 54 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() 55 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() 86 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() local 89 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() local 93 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16() 94 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld128_x16()
|
D | minmax-neon-ld64-x8.c | 37 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local 40 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 41 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 64 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() local 67 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8() 68 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qu8_vadd_minmax_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), vget_low_s8(va_zero_point)… in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() local 48 const int16x8_t vxa01234567 = vsubl_s8(vget_low_s8(va0123456789ABCDEF), va_zero_point); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() local 54 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() 55 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() 86 const int16x8_t vxa01234567 = vsubl_s8(va01234567, vget_low_s8(va_zero_point)); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() local 89 const int16x8_t vxa01234567 = vsubl_s8(va01234567, va_zero_point); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() local 93 int32x4_t vacc0123 = vmulq_s32(vmovl_s16(vget_low_s16(vxa01234567)), va_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16() 94 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vxa01234567)), va_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld128_x16()
|
/external/XNNPACK/src/qu8-vmul/gen/ |
D | minmax-fp32-neonv8-ld128-x16.c | 42 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 47 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 53 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 54 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 102 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 105 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() local 109 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16() 110 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16()
|
D | minmax-rndnu-neon-ld128-x16.c | 43 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), vget… in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 48 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(va0123456789ABCDEF), va_z… in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 54 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 55 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 103 …const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, vget_low_u8(va_zero_point… in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 106 const int16x8_t vxa01234567 = vreinterpretq_s16_u16(vsubl_u8(va01234567, va_zero_point)); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() local 110 int32x4_t vacc0123 = vmull_s16(vget_low_s16(vxa01234567), vget_low_s16(vxb01234567)); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16() 111 int32x4_t vacc4567 = vmull_s16(vget_high_s16(vxa01234567), vget_high_s16(vxb01234567)); in xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16()
|