/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() local 40 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 43 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 46 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 63 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() local 64 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 65 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 68 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() 87 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() local 88 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x32() [all …]
|
D | vcvt-avx512skx-x64.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() local 42 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 47 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 52 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 73 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() local 74 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 75 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 78 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() 97 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() local 98 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x64() [all …]
|
D | vcvt-avx512skx-x96.c | 37 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() local 45 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 52 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 59 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 88 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() local 89 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 90 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 93 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() 112 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() local 113 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x96() [all …]
|
D | vcvt-avx512skx-x128.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 46 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 55 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 64 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 97 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 98 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 99 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 102 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 121 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 122 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() [all …]
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() local 40 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 43 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 46 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 63 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() local 64 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 65 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 68 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() 87 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() local 88 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x32() [all …]
|
D | vcvt-avx512skx-x64.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() local 42 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 47 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 52 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 73 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() local 74 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 75 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 78 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() 97 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() local 98 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x64() [all …]
|
D | vcvt-avx512skx-x96.c | 37 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() local 45 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 52 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 59 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 88 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() local 89 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 90 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 93 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() 112 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() local 113 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x96() [all …]
|
D | vcvt-avx512skx-x128.c | 36 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 46 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 55 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 64 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 97 __m512 vx0123 = _mm512_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 98 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 99 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 102 const __m512i vacc0123 = _mm512_cvtps_epi32(vx0123); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 121 __m512 vx0123 = _mm512_maskz_loadu_ps(vmask, x); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 122 vx0123 = _mm512_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() [all …]
|
/external/XNNPACK/src/f32-vhswish/gen/ |
D | vhswish-sse-x8.c | 33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vhswish_ukernel__sse_x8() local 37 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_vhswish_ukernel__sse_x8() 49 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__sse_x8() 57 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vhswish_ukernel__sse_x8() local 59 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_vhswish_ukernel__sse_x8() 63 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__sse_x8() 68 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vhswish_ukernel__sse_x8() local 69 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_vhswish_ukernel__sse_x8() 73 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__sse_x8()
|
D | vhswish-sse-x4.c | 33 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vhswish_ukernel__sse_x4() local 36 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_vhswish_ukernel__sse_x4() 44 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__sse_x4() 50 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vhswish_ukernel__sse_x4() local 51 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_vhswish_ukernel__sse_x4() 55 vacc0123 = _mm_mul_ps(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__sse_x4()
|
D | vhswish-wasmsimd-x8.c | 33 v128_t vx0123 = wasm_v128_load(x); in xnn_f32_vhswish_ukernel__wasmsimd_x8() local 37 v128_t vacc0123 = wasm_f32x4_add(vx0123, vthree); in xnn_f32_vhswish_ukernel__wasmsimd_x8() 38 vx0123 = wasm_f32x4_mul(vx0123, vsixth); in xnn_f32_vhswish_ukernel__wasmsimd_x8() 48 vacc0123 = wasm_f32x4_mul(vacc0123, vx0123); in xnn_f32_vhswish_ukernel__wasmsimd_x8()
|
/external/XNNPACK/src/qs8-vcvt/gen/ |
D | vcvt-armsimd32-x8.c | 30 const int8x4_t vx0123 = (int8x4_t) unaligned_indexed_load_u32(x, 0); in xnn_qs8_vcvt_ukernel__armsimd32_x8() local 34 const int16x2_t vx02 = __sxtab16(vminus_input_zero_point, vx0123); in xnn_qs8_vcvt_ukernel__armsimd32_x8() 35 const int16x2_t vx13 = __sxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qs8_vcvt_ukernel__armsimd32_x8() 68 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vcvt_ukernel__armsimd32_x8() local 71 const int16x2_t vx02 = __sxtab16(vminus_input_zero_point, vx0123); in xnn_qs8_vcvt_ukernel__armsimd32_x8() 72 const int16x2_t vx13 = __sxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qs8_vcvt_ukernel__armsimd32_x8() 91 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vcvt_ukernel__armsimd32_x8() local 93 const int16x2_t vx02 = __sxtab16(vminus_input_zero_point, vx0123); in xnn_qs8_vcvt_ukernel__armsimd32_x8() 94 const int16x2_t vx13 = __sxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qs8_vcvt_ukernel__armsimd32_x8()
|
D | vcvt-armsimd32-x4.c | 30 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vcvt_ukernel__armsimd32_x4() local 33 const int16x2_t vx02 = __sxtab16(vminus_input_zero_point, vx0123); in xnn_qs8_vcvt_ukernel__armsimd32_x4() 34 const int16x2_t vx13 = __sxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qs8_vcvt_ukernel__armsimd32_x4() 53 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vcvt_ukernel__armsimd32_x4() local 55 const int16x2_t vx02 = __sxtab16(vminus_input_zero_point, vx0123); in xnn_qs8_vcvt_ukernel__armsimd32_x4() 56 const int16x2_t vx13 = __sxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qs8_vcvt_ukernel__armsimd32_x4()
|
/external/XNNPACK/src/qu8-vcvt/gen/ |
D | vcvt-armsimd32-x8.c | 30 const uint8x4_t vx0123 = (uint8x4_t) unaligned_indexed_load_u32(x, 0); in xnn_qu8_vcvt_ukernel__armsimd32_x8() local 34 const uint16x2_t vx02 = __uxtab16(vminus_input_zero_point, vx0123); in xnn_qu8_vcvt_ukernel__armsimd32_x8() 35 const uint16x2_t vx13 = __uxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qu8_vcvt_ukernel__armsimd32_x8() 68 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vcvt_ukernel__armsimd32_x8() local 71 const uint16x2_t vx02 = __uxtab16(vminus_input_zero_point, vx0123); in xnn_qu8_vcvt_ukernel__armsimd32_x8() 72 const uint16x2_t vx13 = __uxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qu8_vcvt_ukernel__armsimd32_x8() 91 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vcvt_ukernel__armsimd32_x8() local 93 const uint16x2_t vx02 = __uxtab16(vminus_input_zero_point, vx0123); in xnn_qu8_vcvt_ukernel__armsimd32_x8() 94 const uint16x2_t vx13 = __uxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qu8_vcvt_ukernel__armsimd32_x8()
|
D | vcvt-armsimd32-x4.c | 30 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vcvt_ukernel__armsimd32_x4() local 33 const uint16x2_t vx02 = __uxtab16(vminus_input_zero_point, vx0123); in xnn_qu8_vcvt_ukernel__armsimd32_x4() 34 const uint16x2_t vx13 = __uxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qu8_vcvt_ukernel__armsimd32_x4() 53 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vcvt_ukernel__armsimd32_x4() local 55 const uint16x2_t vx02 = __uxtab16(vminus_input_zero_point, vx0123); in xnn_qu8_vcvt_ukernel__armsimd32_x4() 56 const uint16x2_t vx13 = __uxtab16(vminus_input_zero_point, __ror(vx0123, 8)); in xnn_qu8_vcvt_ukernel__armsimd32_x4()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-sse-x4.c | 30 __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vlrelu_ukernel__sse_x4() local 33 __m128 vacc0123 = _mm_max_ps(_mm_setzero_ps(), vx0123); in xnn_f32_vlrelu_ukernel__sse_x4() 34 vx0123 = _mm_min_ps(vx0123, vzero); in xnn_f32_vlrelu_ukernel__sse_x4() 36 vacc0123 = _mm_add_ps(vacc0123, _mm_mul_ps(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__sse_x4()
|
D | vlrelu-wasmsimd-minmax-x8.c | 30 v128_t vx0123 = wasm_v128_load(x); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() local 34 v128_t vacc0123 = wasm_i32x4_max(vx0123, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 35 vx0123 = wasm_i32x4_min(vx0123, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 39 vacc0123 = wasm_f32x4_add(vacc0123, wasm_f32x4_mul(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8()
|
D | vlrelu-sse-x8.c | 30 __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vlrelu_ukernel__sse_x8() local 34 __m128 vacc0123 = _mm_max_ps(_mm_setzero_ps(), vx0123); in xnn_f32_vlrelu_ukernel__sse_x8() 35 vx0123 = _mm_min_ps(vx0123, vzero); in xnn_f32_vlrelu_ukernel__sse_x8() 39 vacc0123 = _mm_add_ps(vacc0123, _mm_mul_ps(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8()
|
D | vlrelu-sse41-x4.c | 29 const __m128 vx0123 = _mm_loadu_ps(x); in xnn_f32_vlrelu_ukernel__sse41_x4() local 32 __m128 vacc0123 = _mm_mul_ps(vx0123, vslope); in xnn_f32_vlrelu_ukernel__sse41_x4() 34 vacc0123 = _mm_blendv_ps(vx0123, vacc0123, vx0123); in xnn_f32_vlrelu_ukernel__sse41_x4()
|
/external/XNNPACK/src/qs8-vlrelu/gen/ |
D | vlrelu-armsimd32-x8.c | 31 const int8x4_t vx0123 = (int8x4_t) unaligned_indexed_load_u32(x, 0); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() local 35 int16x2_t vx02 = __sxtb16(vx0123); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() 36 int16x2_t vx13 = __sxtb16(__ror(vx0123, 8)); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() 78 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() local 81 int16x2_t vx02 = __sxtb16(vx0123); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() 82 int16x2_t vx13 = __sxtb16(__ror(vx0123, 8)); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() 106 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() local 108 int16x2_t vx02 = __sxtb16(vx0123); in xnn_qs8_vlrelu_ukernel__armsimd32_x8() 109 int16x2_t vx13 = __sxtb16(__ror(vx0123, 8)); in xnn_qs8_vlrelu_ukernel__armsimd32_x8()
|
D | vlrelu-armsimd32-x4.c | 31 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vlrelu_ukernel__armsimd32_x4() local 34 int16x2_t vx02 = __sxtb16(vx0123); in xnn_qs8_vlrelu_ukernel__armsimd32_x4() 35 int16x2_t vx13 = __sxtb16(__ror(vx0123, 8)); in xnn_qs8_vlrelu_ukernel__armsimd32_x4() 59 const int8x4_t vx0123 = (int8x4_t) unaligned_load_u32(x); in xnn_qs8_vlrelu_ukernel__armsimd32_x4() local 61 int16x2_t vx02 = __sxtb16(vx0123); in xnn_qs8_vlrelu_ukernel__armsimd32_x4() 62 int16x2_t vx13 = __sxtb16(__ror(vx0123, 8)); in xnn_qs8_vlrelu_ukernel__armsimd32_x4()
|
/external/XNNPACK/src/qu8-vlrelu/gen/ |
D | vlrelu-armsimd32-x8.c | 31 const uint8x4_t vx0123 = (uint8x4_t) unaligned_indexed_load_u32(x, 0); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() local 35 uint16x2_t vx02 = __uxtb16(vx0123); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() 36 uint16x2_t vx13 = __uxtb16(__ror(vx0123, 8)); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() 78 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() local 81 uint16x2_t vx02 = __uxtb16(vx0123); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() 82 uint16x2_t vx13 = __uxtb16(__ror(vx0123, 8)); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() 106 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() local 108 uint16x2_t vx02 = __uxtb16(vx0123); in xnn_qu8_vlrelu_ukernel__armsimd32_x8() 109 uint16x2_t vx13 = __uxtb16(__ror(vx0123, 8)); in xnn_qu8_vlrelu_ukernel__armsimd32_x8()
|
D | vlrelu-armsimd32-x4.c | 31 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vlrelu_ukernel__armsimd32_x4() local 34 uint16x2_t vx02 = __uxtb16(vx0123); in xnn_qu8_vlrelu_ukernel__armsimd32_x4() 35 uint16x2_t vx13 = __uxtb16(__ror(vx0123, 8)); in xnn_qu8_vlrelu_ukernel__armsimd32_x4() 59 const uint8x4_t vx0123 = (uint8x4_t) unaligned_load_u32(x); in xnn_qu8_vlrelu_ukernel__armsimd32_x4() local 61 uint16x2_t vx02 = __uxtb16(vx0123); in xnn_qu8_vlrelu_ukernel__armsimd32_x4() 62 uint16x2_t vx13 = __uxtb16(__ror(vx0123, 8)); in xnn_qu8_vlrelu_ukernel__armsimd32_x4()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndd-neon-x4.c | 31 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vrndd_ukernel__neon_x4() local 33 const int32x4_t vintx0123 = vcvtq_s32_f32(vx0123); in xnn_f32_vrndd_ukernel__neon_x4() 35 uint32x4_t vrndmask0123 = vcaltq_f32(vx0123, vintegral_threshold); in xnn_f32_vrndd_ukernel__neon_x4() 41 const float32x4_t vrndx0123 = vbslq_f32(vrndmask0123, vprerndx0123, vx0123); in xnn_f32_vrndd_ukernel__neon_x4() 43 const uint32x4_t vadjmask0123 = vcgtq_f32(vrndx0123, vx0123); in xnn_f32_vrndd_ukernel__neon_x4()
|
D | vrndu-neon-x4.c | 31 const float32x4_t vx0123 = vld1q_f32(x); x += 4; in xnn_f32_vrndu_ukernel__neon_x4() local 33 const int32x4_t vintx0123 = vcvtq_s32_f32(vx0123); in xnn_f32_vrndu_ukernel__neon_x4() 35 uint32x4_t vrndmask0123 = vcaltq_f32(vx0123, vintegral_threshold); in xnn_f32_vrndu_ukernel__neon_x4() 41 const float32x4_t vrndx0123 = vbslq_f32(vrndmask0123, vprerndx0123, vx0123); in xnn_f32_vrndu_ukernel__neon_x4() 43 uint32x4_t vadjmask0123 = vcgeq_f32(vrndx0123, vx0123); in xnn_f32_vrndu_ukernel__neon_x4()
|