Home
last modified time | relevance | path

Searched refs:vx_hi (Results 1 – 25 of 84) sorted by relevance

1234

/external/XNNPACK/src/f32-qs8-vcvt/gen/
Dvcvt-sse41-x8.c37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local
41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local
63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
Dvcvt-wasmsimd-cvt-x8.c36 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() local
40 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
43 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
46 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
63 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() local
66 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
69 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
72 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
Dvcvt-sse2-x8.c37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local
41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local
63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
Dvcvt-wasmsimd-magic-x8.c37 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() local
41 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
44 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
47 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
64 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() local
67 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
70 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
73 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
Dvcvt-neon-x8.c37 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x8() local
40 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
43 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
46 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
60 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qs8_vcvt_ukernel__neon_x8() local
63 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
66 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
69 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
Dvcvt-sse41-x16.c73 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() local
77 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
80 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
83 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
96 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() local
99 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
102 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
105 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
Dvcvt-wasmsimd-cvt-x16.c73 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() local
77 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
80 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
83 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
100 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() local
103 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
106 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
109 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
Dvcvt-sse2-x16.c74 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() local
78 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
81 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
84 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
97 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() local
100 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
103 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
106 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
Dvcvt-neon-x16.c69 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x16() local
72 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
75 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
78 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
92 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qs8_vcvt_ukernel__neon_x16() local
95 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
98 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
101 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
Dvcvt-wasmsimd-cvt-x24.c87 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() local
91 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
94 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
97 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
114 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() local
117 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
120 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
123 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
Dvcvt-sse2-x24.c87 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local
91 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
94 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
97 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
110 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local
113 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
116 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
119 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
Dvcvt-sse41-x24.c86 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local
90 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
96 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
109 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local
112 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
115 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
118 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
Dvcvt-wasmsimd-magic-x16.c74 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() local
78 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
81 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
84 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
101 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() local
104 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
107 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
110 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
Dvcvt-wasmsimd-magic-x24.c88 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() local
92 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
95 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
98 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
115 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() local
118 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
121 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
124 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
/external/XNNPACK/src/f32-qu8-vcvt/gen/
Dvcvt-sse2-x8.c37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local
41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local
63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
Dvcvt-wasmsimd-cvt-x8.c36 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() local
40 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
43 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
46 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
63 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() local
66 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
69 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
72 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
Dvcvt-neon-x8.c37 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x8() local
40 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
43 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
46 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
60 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qu8_vcvt_ukernel__neon_x8() local
63 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
66 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
69 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
Dvcvt-wasmsimd-magic-x8.c37 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() local
41 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
44 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
47 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
64 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() local
67 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
70 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
73 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
Dvcvt-wasmsimd-cvt-x16.c73 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() local
77 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
80 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
83 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
100 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() local
103 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
106 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
109 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
Dvcvt-sse2-x16.c73 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() local
77 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
80 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
83 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
96 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() local
99 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
102 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
105 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
Dvcvt-wasmsimd-cvt-x24.c87 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() local
91 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
94 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
97 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
114 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() local
117 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
120 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
123 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
Dvcvt-neon-x16.c69 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x16() local
72 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
75 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
78 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
92 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qu8_vcvt_ukernel__neon_x16() local
95 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
98 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
101 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
Dvcvt-sse2-x24.c86 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local
90 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
96 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
109 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local
112 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
115 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
118 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
Dvcvt-wasmsimd-magic-x16.c74 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() local
78 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
81 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
84 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
101 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() local
104 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
107 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
110 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
Dvcvt-wasmsimd-magic-x24.c88 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() local
92 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
95 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
98 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
115 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() local
118 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
121 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
124 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()

1234