/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-wasmsimd-minmax-x8.c | 31 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() local 36 v128_t vacc4567 = wasm_i32x4_max(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 37 vx4567 = wasm_i32x4_min(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 40 vacc4567 = wasm_f32x4_add(vacc4567, wasm_f32x4_mul(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8()
|
D | vlrelu-sse-x8.c | 31 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vlrelu_ukernel__sse_x8() local 36 __m128 vacc4567 = _mm_max_ps(_mm_setzero_ps(), vx4567); in xnn_f32_vlrelu_ukernel__sse_x8() 37 vx4567 = _mm_min_ps(vx4567, vzero); in xnn_f32_vlrelu_ukernel__sse_x8() 40 vacc4567 = _mm_add_ps(vacc4567, _mm_mul_ps(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8()
|
D | vlrelu-sse41-x8.c | 30 const __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_vlrelu_ukernel__sse41_x8() local 34 __m128 vacc4567 = _mm_mul_ps(vx4567, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8() 37 vacc4567 = _mm_blendv_ps(vx4567, vacc4567, vx4567); in xnn_f32_vlrelu_ukernel__sse41_x8()
|
D | vlrelu-wasmsimd-bitselect-x8.c | 30 const v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() local 35 v128_t vacc4567 = wasm_f32x4_mul(vx4567, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 36 const v128_t vmask4567 = wasm_i32x4_shr(vx4567, 31); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 39 vacc4567 = wasm_v128_bitselect(vacc4567, vx4567, vmask4567); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8()
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 37 __m512 vx4567 = _mm512_loadu_ps(x + 16); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() local 41 vx4567 = _mm512_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 44 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 47 const __m512i vacc4567 = _mm512_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32()
|
D | vcvt-avx512skx-x64.c | 37 __m512 vx4567 = _mm512_loadu_ps(x + 16); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() local 43 vx4567 = _mm512_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 48 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 53 const __m512i vacc4567 = _mm512_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64()
|
D | vcvt-sse41-x16.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() local 43 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 48 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x16() 53 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__sse41_x16()
|
D | vcvt-wasmsimd-cvt-x16.c | 36 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() local 42 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 47 vx4567 = wasm_f32x4_nearest(vx4567); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16() 52 v128_t vacc4567 = wasm_i32x4_trunc_sat_f32x4(vx4567); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x16()
|
D | vcvt-sse2-x16.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() local 43 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 48 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x16() 53 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__sse2_x16()
|
D | vcvt-neon-x16.c | 37 float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_qs8_vcvt_ukernel__neon_x16() local 42 vx4567 = vmulq_f32(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 47 vx4567 = vaddq_f32(vx4567, vmagic_bias); in xnn_f32_qs8_vcvt_ukernel__neon_x16() 52 … const int32x4_t vacc4567 = vqsubq_s32(vreinterpretq_s32_f32(vx4567), vmagic_bias_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__neon_x16()
|
D | vcvt-wasmsimd-cvt-x24.c | 36 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() local 44 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 51 vx4567 = wasm_f32x4_nearest(vx4567); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24() 58 v128_t vacc4567 = wasm_i32x4_trunc_sat_f32x4(vx4567); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x24()
|
D | vcvt-sse2-x24.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local 45 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 59 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse41-x24.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local 45 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 59 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qs8_vcvt_ukernel__sse41_x24()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 37 __m512 vx4567 = _mm512_loadu_ps(x + 16); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() local 41 vx4567 = _mm512_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 44 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 47 const __m512i vacc4567 = _mm512_cvtps_epi32(vx4567); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32()
|
D | vcvt-avx512skx-x64.c | 37 __m512 vx4567 = _mm512_loadu_ps(x + 16); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() local 43 vx4567 = _mm512_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 48 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 53 const __m512i vacc4567 = _mm512_cvtps_epi32(vx4567); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64()
|
D | vcvt-wasmsimd-cvt-x16.c | 36 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() local 42 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 47 vx4567 = wasm_f32x4_nearest(vx4567); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16() 52 v128_t vacc4567 = wasm_i32x4_trunc_sat_f32x4(vx4567); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x16()
|
D | vcvt-sse2-x16.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() local 43 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 48 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x16() 53 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qu8_vcvt_ukernel__sse2_x16()
|
D | vcvt-wasmsimd-cvt-x24.c | 36 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() local 44 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 51 vx4567 = wasm_f32x4_nearest(vx4567); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24() 58 v128_t vacc4567 = wasm_i32x4_trunc_sat_f32x4(vx4567); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x24()
|
D | vcvt-neon-x16.c | 37 float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_qu8_vcvt_ukernel__neon_x16() local 42 vx4567 = vmulq_f32(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 47 vx4567 = vaddq_f32(vx4567, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__neon_x16() 52 … const int32x4_t vacc4567 = vqsubq_s32(vreinterpretq_s32_f32(vx4567), vmagic_bias_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__neon_x16()
|
D | vcvt-sse2-x24.c | 37 __m128 vx4567 = _mm_loadu_ps(x + 4); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local 45 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 59 const __m128i vy4567 = _mm_cvtps_epi32(vx4567); in xnn_f32_qu8_vcvt_ukernel__sse2_x24()
|
D | vcvt-avx512skx-x96.c | 38 __m512 vx4567 = _mm512_loadu_ps(x + 16); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() local 46 vx4567 = _mm512_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 53 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 60 const __m512i vacc4567 = _mm512_cvtps_epi32(vx4567); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96()
|
D | vcvt-wasmsimd-magic-x16.c | 37 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() local 43 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 48 vx4567 = wasm_f32x4_add(vx4567, vmagic_bias); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16() 53 v128_t vacc4567 = wasm_i32x4_max(vx4567, vmagic_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x16()
|
/external/XNNPACK/src/f32-vhswish/gen/ |
D | vhswish-wasmsimd-x8.c | 34 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vhswish_ukernel__wasmsimd_x8() local 39 v128_t vacc4567 = wasm_f32x4_add(vx4567, vthree); in xnn_f32_vhswish_ukernel__wasmsimd_x8() 40 vx4567 = wasm_f32x4_mul(vx4567, vsixth); in xnn_f32_vhswish_ukernel__wasmsimd_x8() 49 vacc4567 = wasm_f32x4_mul(vacc4567, vx4567); in xnn_f32_vhswish_ukernel__wasmsimd_x8()
|
D | vhswish-neon-x8.c | 34 float32x4_t vx4567 = vld1q_f32(x); x += 4; in xnn_f32_vhswish_ukernel__neon_x8() local 38 float32x4_t vacc4567 = vaddq_f32(vx4567, vthree); in xnn_f32_vhswish_ukernel__neon_x8() 39 vx4567 = vmulq_f32(vx4567, vsixth); in xnn_f32_vhswish_ukernel__neon_x8() 48 vacc4567 = vmulq_f32(vacc4567, vx4567); in xnn_f32_vhswish_ukernel__neon_x8()
|
D | vhswish-wasmsimd-x16.c | 34 v128_t vx4567 = wasm_v128_load(x + 4); in xnn_f32_vhswish_ukernel__wasmsimd_x16() local 41 v128_t vacc4567 = wasm_f32x4_add(vx4567, vthree); in xnn_f32_vhswish_ukernel__wasmsimd_x16() 42 vx4567 = wasm_f32x4_mul(vx4567, vsixth); in xnn_f32_vhswish_ukernel__wasmsimd_x16() 59 vacc4567 = wasm_f32x4_mul(vacc4567, vx4567); in xnn_f32_vhswish_ukernel__wasmsimd_x16()
|