Home
last modified time | relevance | path

Searched refs:vxSTUV (Results 1 – 25 of 27) sorted by relevance

12

/external/XNNPACK/src/f32-qs8-vcvt/gen/
Dvcvt-avx512skx-x128.c43 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local
53 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
71 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
Dvcvt-sse41-x32.c43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() local
53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32()
62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32()
71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse41_x32()
Dvcvt-sse2-x32.c43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local
53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
Dvcvt-wasmsimd-cvt-x32.c42 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local
52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
61 vxSTUV = wasm_f32x4_nearest(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
70 v128_t vaccSTUV = wasm_i32x4_trunc_sat_f32x4(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
Dvcvt-neon-x32.c43 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x32() local
52 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32()
61 vxSTUV = vaddq_f32(vxSTUV, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x32()
70 … const int32x4_t vaccSTUV = vqsubq_s32(vreinterpretq_s32_f32(vxSTUV), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x32()
Dvcvt-wasmsimd-magic-x32.c43 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32() local
53 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32()
62 vxSTUV = wasm_f32x4_add(vxSTUV, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32()
71 v128_t vaccSTUV = wasm_i32x4_max(vxSTUV, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32()
Dvcvt-neonv8-x32.c42 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local
51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
60 const int32x4_t vaccSTUV = vcvtnq_s32_f32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
/external/XNNPACK/src/f32-qu8-vcvt/gen/
Dvcvt-avx512skx-x128.c43 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local
53 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
71 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
Dvcvt-sse2-x32.c43 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local
53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
71 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
Dvcvt-wasmsimd-cvt-x32.c42 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local
52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
61 vxSTUV = wasm_f32x4_nearest(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
70 v128_t vaccSTUV = wasm_i32x4_trunc_sat_f32x4(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
Dvcvt-wasmsimd-magic-x32.c43 v128_t vxSTUV = wasm_v128_load(x + 28); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32() local
53 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32()
62 vxSTUV = wasm_f32x4_add(vxSTUV, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32()
71 v128_t vaccSTUV = wasm_i32x4_max(vxSTUV, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32()
Dvcvt-neon-x32.c43 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x32() local
52 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x32()
61 vxSTUV = vaddq_f32(vxSTUV, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x32()
70 … const int32x4_t vaccSTUV = vqsubq_s32(vreinterpretq_s32_f32(vxSTUV), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x32()
Dvcvt-neonv8-x32.c42 float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local
51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
60 const int32x4_t vaccSTUV = vcvtnq_s32_f32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
/external/XNNPACK/src/qu8-f32-vcvt/gen/
Dvcvt-avx-x32.c40 __m128i vxSTUV = _mm_cvtepu8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qu8_f32_vcvt_ukernel__avx_x32() local
50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx_x32()
55 const __m256i vxOPQRSTUV = _mm256_insertf128_si256(_mm256_castsi128_si256(vxOPQR), vxSTUV, 1); in xnn_qu8_f32_vcvt_ukernel__avx_x32()
Dvcvt-sse41-x32.c40 __m128i vxSTUV = _mm_cvtepu8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() local
50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__sse41_x32()
59 __m128 vySTUV = _mm_cvtepi32_ps(vxSTUV); in xnn_qu8_f32_vcvt_ukernel__sse41_x32()
/external/XNNPACK/src/qs8-f32-vcvt/gen/
Dvcvt-avx-x32.c40 __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qs8_f32_vcvt_ukernel__avx_x32() local
50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx_x32()
55 const __m256i vxOPQRSTUV = _mm256_insertf128_si256(_mm256_castsi128_si256(vxOPQR), vxSTUV, 1); in xnn_qs8_f32_vcvt_ukernel__avx_x32()
Dvcvt-sse41-x32.c40 __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_cvtsi32_si128((int) unaligned_load_s32(x + 28))); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() local
50 vxSTUV = _mm_add_epi32(vxSTUV, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__sse41_x32()
59 __m128 vySTUV = _mm_cvtepi32_ps(vxSTUV); in xnn_qs8_f32_vcvt_ukernel__sse41_x32()
/external/XNNPACK/src/f32-vsqrt/gen/
Dneonfma-nr1rsqrts1fma1adj-x32.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local
46 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
64 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
89 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
125 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
Dneonfma-nr1rsqrts1fma1adj-x36.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local
47 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
67 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
94 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
135 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
Dneonfma-nr1rsqrts1fma1adj-x40.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local
48 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
70 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
99 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
145 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
Dneonfma-nr2fma1adj-x32.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local
46 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
62 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
124 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
Dneonfma-nr2fma1adj-x36.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local
47 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
64 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
134 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
Dneonfma-nr2fma1adj-x40.c37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local
48 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
66 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
144 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
/external/XNNPACK/src/amalgam/
Davx512skx.c111 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local
121 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
130 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
139 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128()
229 __m512 vxSTUV = _mm512_loadu_ps(x + 112); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local
239 vxSTUV = _mm512_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
248 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
257 const __m512i vaccSTUV = _mm512_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128()
Dsse2.c1375 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local
1385 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
1394 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
1403 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qs8_vcvt_ukernel__sse2_x32()
1512 __m128 vxSTUV = _mm_loadu_ps(x + 28); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local
1522 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
1531 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()
1540 const __m128i vySTUV = _mm_cvtps_epi32(vxSTUV); in xnn_f32_qu8_vcvt_ukernel__sse2_x32()

12