/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-avx512skx-x128.c | 43 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 53 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 71 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
|
D | vcvt-sse41-x32.c | 43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() local 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse41_x32()
|
D | vcvt-sse2-x32.c | 43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 42 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local 52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 61 vxSTUV = wasm_f32x4_nearest(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 70 v128_t vaccSTUV = wasm_i32x4_trunc_sat_f32x4(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
|
D | vcvt-neon-x32.c | 43 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x32() local 52 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 61 vxSTUV = vaddq_f32(vxSTUV, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 70 … const int32x4_t vaccSTUV = vqsubq_s32(vreinterpretq_s32_f32(vxSTUV), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x32()
|
D | vcvt-wasmsimd-magic-x32.c | 43 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32() local 53 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32() 62 vxSTUV = wasm_f32x4_add(vxSTUV, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32() 71 v128_t vaccSTUV = wasm_i32x4_max(vxSTUV, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32()
|
D | vcvt-neonv8-x32.c | 42 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local 51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 60 const int32x4_t vaccSTUV = vcvtnq_s32_f32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-avx512skx-x128.c | 43 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 53 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 71 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
|
D | vcvt-sse2-x32.c | 43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 42 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local 52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 61 vxSTUV = wasm_f32x4_nearest(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 70 v128_t vaccSTUV = wasm_i32x4_trunc_sat_f32x4(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
|
D | vcvt-wasmsimd-magic-x32.c | 43 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32() local 53 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32() 62 vxSTUV = wasm_f32x4_add(vxSTUV, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32() 71 v128_t vaccSTUV = wasm_i32x4_max(vxSTUV, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32()
|
D | vcvt-neon-x32.c | 43 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x32() local 52 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x32() 61 vxSTUV = vaddq_f32(vxSTUV, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x32() 70 … const int32x4_t vaccSTUV = vqsubq_s32(vreinterpretq_s32_f32(vxSTUV), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x32()
|
D | vcvt-neonv8-x32.c | 42 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local 51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 60 const int32x4_t vaccSTUV = vcvtnq_s32_f32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-avx-x32.c | 40 __m128i vxSTUV = _mm_cvtepu8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qu8_f32_vcvt_ukernel__avx_x32() local 50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx_x32() 55 const __m256i vxOPQRSTUV = _mm256_insertf128_si256(_mm256_castsi128_si256(vxOPQR), vxSTUV, 1); in xnn_qu8_f32_vcvt_ukernel__avx_x32()
|
D | vcvt-sse41-x32.c | 40 __m128i vxSTUV = _mm_cvtepu8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() local 50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 59 __m128 vySTUV = _mm_cvtepi32_ps(vxSTUV); in xnn_qu8_f32_vcvt_ukernel__sse41_x32()
|
/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-avx-x32.c | 40 __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qs8_f32_vcvt_ukernel__avx_x32() local 50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx_x32() 55 const __m256i vxOPQRSTUV = _mm256_insertf128_si256(_mm256_castsi128_si256(vxOPQR), vxSTUV, 1); in xnn_qs8_f32_vcvt_ukernel__avx_x32()
|
D | vcvt-sse41-x32.c | 40 __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() local 50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 59 __m128 vySTUV = _mm_cvtepi32_ps(vxSTUV); in xnn_qs8_f32_vcvt_ukernel__sse41_x32()
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x32.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 46 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 64 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 89 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 125 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 47 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 67 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 94 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 135 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 48 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 70 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 99 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 145 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|
D | neonfma-nr2fma1adj-x32.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 46 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 62 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 124 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 47 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 64 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 134 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr2fma1adj-x40.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 48 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 66 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 144 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
/external/XNNPACK/src/amalgam/ |
D | avx512skx.c | 111 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 121 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 130 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 139 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 229 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 239 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 248 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 257 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
|
D | sse2.c | 1375 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local 1385 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 1394 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 1403 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 1512 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local 1522 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 1531 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 1540 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
|