/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-sse41-x8.c | 37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local 41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local 63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
|
D | vcvt-wasmsimd-cvt-x8.c | 36 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() local 40 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() 43 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() 46 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() 63 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() local 66 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() 69 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8() 72 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x8()
|
D | vcvt-sse2-x8.c | 37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local 41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local 63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
|
D | vcvt-wasmsimd-magic-x8.c | 37 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() local 41 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() 44 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() 47 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() 64 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() local 67 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() 70 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8() 73 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x8()
|
D | vcvt-neon-x8.c | 37 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x8() local 40 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x8() 43 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x8() 46 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x8() 60 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qs8_vcvt_ukernel__neon_x8() local 63 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x8() 66 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x8() 69 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x8()
|
D | vcvt-sse41-x16.c | 73 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() local 77 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 80 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 83 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 96 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() local 99 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 102 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 105 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
|
D | vcvt-wasmsimd-cvt-x16.c | 73 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() local 77 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 80 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 83 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 100 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() local 103 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 106 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 109 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
|
D | vcvt-sse2-x16.c | 74 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() local 78 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 81 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 84 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 97 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() local 100 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 103 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 106 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
|
D | vcvt-neon-x16.c | 69 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x16() local 72 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 75 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 78 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 92 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qs8_vcvt_ukernel__neon_x16() local 95 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 98 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 101 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
|
D | vcvt-wasmsimd-cvt-x24.c | 87 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() local 91 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 94 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 97 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 114 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() local 117 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 120 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 123 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
|
D | vcvt-sse2-x24.c | 87 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local 91 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 94 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 97 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 110 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local 113 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 116 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 119 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse41-x24.c | 86 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local 90 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 96 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 109 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local 112 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 115 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 118 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
|
D | vcvt-wasmsimd-magic-x16.c | 74 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() local 78 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() 81 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() 84 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() 101 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() local 104 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() 107 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16() 110 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x16()
|
D | vcvt-wasmsimd-magic-x24.c | 88 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() local 92 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() 95 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() 98 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() 115 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() local 118 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() 121 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24() 124 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x24()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-sse2-x8.c | 37 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local 41 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 44 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 60 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local 63 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 66 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
|
D | vcvt-wasmsimd-cvt-x8.c | 36 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() local 40 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() 43 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() 46 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() 63 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() local 66 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() 69 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8() 72 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x8()
|
D | vcvt-neon-x8.c | 37 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x8() local 40 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x8() 43 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x8() 46 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x8() 60 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qu8_vcvt_ukernel__neon_x8() local 63 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x8() 66 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x8() 69 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x8()
|
D | vcvt-wasmsimd-magic-x8.c | 37 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() local 41 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() 44 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() 47 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() 64 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() local 67 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() 70 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8() 73 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x8()
|
D | vcvt-wasmsimd-cvt-x16.c | 73 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() local 77 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 80 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 83 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 100 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() local 103 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 106 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 109 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
|
D | vcvt-sse2-x16.c | 73 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() local 77 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 80 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 83 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 96 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() local 99 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 102 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 105 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
|
D | vcvt-wasmsimd-cvt-x24.c | 87 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() local 91 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 94 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 97 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 114 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() local 117 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 120 vx_hi = wasm_f32x4_nearest(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 123 v128_t vacc_hi = wasm_i32x4_trunc_sat_f32x4(vx_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
|
D | vcvt-neon-x16.c | 69 float32x4_t vx_hi = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x16() local 72 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 75 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 78 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 92 float32x4_t vx_hi = vld1q_f32(x_hi); in xnn_f32_qu8_vcvt_ukernel__neon_x16() local 95 vx_hi = vmulq_f32(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 98 vx_hi = vaddq_f32(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 101 const int32x4_t vacc_hi = vqsubq_s32(vreinterpretq_s32_f32(vx_hi), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
|
D | vcvt-sse2-x24.c | 86 __m128 vx_hi = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local 90 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 96 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 109 __m128 vx_hi = _mm_loadu_ps(x_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local 112 vx_hi = _mm_mul_ps(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 115 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 118 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
|
D | vcvt-wasmsimd-magic-x16.c | 74 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() local 78 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 81 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 84 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 101 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() local 104 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 107 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 110 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
|
D | vcvt-wasmsimd-magic-x24.c | 88 v128_t vx_hi = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() local 92 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() 95 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() 98 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() 115 v128_t vx_hi = wasm_v128_load(x_hi); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() local 118 vx_hi = wasm_f32x4_mul(vx_hi, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() 121 vx_hi = wasm_f32x4_add(vx_hi, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24() 124 v128_t vacc_hi = wasm_i32x4_max(vx_hi, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x24()
|