/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-fp32-sse41-c8.c | 94 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() local 97 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 100 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 103 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 147 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() local 150 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 153 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 156 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8()
|
D | 7x-minmax-fp32-sse2-c8.c | 108 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 111 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 114 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 117 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 177 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 180 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 183 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 186 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8()
|
D | 7x-minmax-fp32-sse41-c16.c | 111 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() local 116 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 121 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 126 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 173 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() local 176 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 179 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 182 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7x-minmax-fp32-sse41-c8.c | 95 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() local 98 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 101 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 104 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 148 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() local 151 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 154 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8() 157 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c8()
|
D | 7x-minmax-fp32-sse2-c8.c | 108 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 111 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 114 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 117 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 176 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() local 179 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 182 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8() 185 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse2_c8()
|
D | 7x-minmax-fp32-sse41-c16.c | 112 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() local 117 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 122 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 127 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 174 __m128 vfpacc4567 = _mm_cvtepi32_ps(vacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() local 177 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 180 vfpacc4567 = _mm_min_ps(vfpacc4567, voutput_max_less_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16() 183 vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__sse41_c16()
|
/external/XNNPACK/src/qu8-vmulc/gen/ |
D | minmax-fp32-sse41-mul16-ld64-x8.c | 49 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 52 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 55 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 83 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 86 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 89 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-neonv8-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 49 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 81 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 84 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 87 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8()
|
D | minmax-fp32-avx-mul16-ld64-x8.c | 49 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 52 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 55 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 83 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 86 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 89 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
D | minmax-fp32-neon-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 49 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 83 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 86 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 89 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8()
|
D | minmax-fp32-sse2-mul16-ld64-x8.c | 51 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 54 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 57 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 87 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 90 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 93 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
/external/XNNPACK/src/qs8-vmulc/gen/ |
D | minmax-fp32-sse41-mul16-ld64-x8.c | 49 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 52 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 55 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 83 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 86 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 89 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-neonv8-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 49 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 81 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() local 84 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8() 87 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8()
|
D | minmax-fp32-sse2-mul16-ld64-x8.c | 50 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 53 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 56 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 85 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 88 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 91 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
D | minmax-fp32-avx-mul16-ld64-x8.c | 49 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 52 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 55 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 83 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 86 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 89 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
D | minmax-fp32-neon-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 49 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 83 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() local 86 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8() 89 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qs8-vmul/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 49 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 83 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 86 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 89 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8()
|
D | minmax-fp32-sse41-mul16-ld64-x8.c | 50 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 53 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 56 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 86 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 89 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 92 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-avx-mul16-ld64-x8.c | 50 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 53 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 56 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 86 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 89 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 92 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
D | minmax-fp32-sse2-mul16-ld64-x8.c | 52 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 55 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 58 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 90 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 93 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 96 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
D | minmax-fp32-neon-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 49 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 85 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() local 88 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8() 91 vacc4567 = vreinterpretq_s32_f32(vaddq_f32(vfpacc4567, vmagic_bias)); in xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8()
|
/external/XNNPACK/src/qu8-vmul/gen/ |
D | minmax-fp32-neonv8-ld64-x8.c | 43 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 46 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 49 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 83 float32x4_t vfpacc4567 = vcvtq_f32_s32(vacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() local 86 vfpacc4567 = vmulq_f32(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8() 89 vacc4567 = vcvtnq_s32_f32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8()
|
D | minmax-fp32-sse41-mul16-ld64-x8.c | 50 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 53 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 56 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 86 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 89 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 92 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-fp32-avx-mul16-ld64-x8.c | 50 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 53 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 56 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 86 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 89 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 92 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
D | minmax-fp32-sse2-mul16-ld64-x8.c | 53 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 56 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 59 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 92 __m128 vfpacc4567 = _mm_cvtepi32_ps(vprod4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 95 vfpacc4567 = _mm_mul_ps(vfpacc4567, vscale); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 98 const __m128i vacc4567 = _mm_cvtps_epi32(vfpacc4567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|