/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-sse2-x16.c | 37 __m128i vx89ABCDEF = _mm_loadl_epi64((const __m128i*) (x + 8)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() local 41 vx89ABCDEF = _mm_xor_si128(vx89ABCDEF, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 44 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 48 __m128 vy89AB = _mm_castsi128_ps(_mm_unpacklo_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 49 __m128 vyCDEF = _mm_castsi128_ps(_mm_unpackhi_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-sse2-x24.c | 37 __m128i vx89ABCDEF = _mm_loadl_epi64((const __m128i*) (x + 8)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() local 42 vx89ABCDEF = _mm_xor_si128(vx89ABCDEF, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 46 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 51 __m128 vy89AB = _mm_castsi128_ps(_mm_unpacklo_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 52 __m128 vyCDEF = _mm_castsi128_ps(_mm_unpackhi_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse2-x32.c | 37 __m128i vx89ABCDEF = _mm_loadl_epi64((const __m128i*) (x + 8)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() local 43 vx89ABCDEF = _mm_xor_si128(vx89ABCDEF, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 48 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 54 __m128 vy89AB = _mm_castsi128_ps(_mm_unpacklo_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 55 __m128 vyCDEF = _mm_castsi128_ps(_mm_unpackhi_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
|
D | vcvt-wasmsimd-x16.c | 34 v128_t vx89ABCDEF = wasm_i16x8_load8x8(x + 8); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() local 38 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 42 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 43 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-wasmsimd-x24.c | 34 v128_t vx89ABCDEF = wasm_i16x8_load8x8(x + 8); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() local 39 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 44 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 45 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-avx2-x16.c | 34 __m256i vx89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (x + 8))); in xnn_qs8_f32_vcvt_ukernel__avx2_x16() local 38 vx89ABCDEF = _mm256_add_epi32(vx89ABCDEF, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx2_x16() 41 __m256 vy89ABCDEF = _mm256_cvtepi32_ps(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__avx2_x16()
|
D | vcvt-wasmsimd-x32.c | 34 v128_t vx89ABCDEF = wasm_i16x8_load8x8(x + 8); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() local 40 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 46 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 47 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-avx-x16.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qu8_vcvt_ukernel__avx_x16() local 41 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x16() 44 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x16() 47 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qu8_vcvt_ukernel__avx_x16()
|
D | vcvt-avx-x24.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qu8_vcvt_ukernel__avx_x24() local 42 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x24() 46 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x24() 50 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qu8_vcvt_ukernel__avx_x24()
|
D | vcvt-avx-x32.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qu8_vcvt_ukernel__avx_x32() local 43 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x32() 48 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x32() 53 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qu8_vcvt_ukernel__avx_x32()
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-avx-x16.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qs8_vcvt_ukernel__avx_x16() local 41 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x16() 44 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x16() 47 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qs8_vcvt_ukernel__avx_x16()
|
D | vcvt-avx-x24.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qs8_vcvt_ukernel__avx_x24() local 42 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x24() 46 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x24() 50 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qs8_vcvt_ukernel__avx_x24()
|
D | vcvt-avx-x32.c | 37 __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_qs8_vcvt_ukernel__avx_x32() local 43 vx89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x32() 48 vx89ABCDEF = _mm256_min_ps(vx89ABCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x32() 53 const __m256i vacc89ABCDEF = _mm256_cvtps_epi32(vx89ABCDEF); in xnn_f32_qs8_vcvt_ukernel__avx_x32()
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x16.c | 34 v128_t vx89ABCDEF = wasm_u16x8_load8x8(x + 8); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() local 38 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 42 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 43 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-wasmsimd-x24.c | 34 v128_t vx89ABCDEF = wasm_u16x8_load8x8(x + 8); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() local 39 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 44 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 45 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-sse2-x16.c | 36 __m128i vx89ABCDEF = _mm_loadl_epi64((const __m128i*) (x + 8)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() local 41 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, vzero); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 45 __m128 vy89AB = _mm_castsi128_ps(_mm_unpacklo_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 46 __m128 vyCDEF = _mm_castsi128_ps(_mm_unpackhi_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-avx2-x16.c | 34 __m256i vx89ABCDEF = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) (x + 8))); in xnn_qu8_f32_vcvt_ukernel__avx2_x16() local 38 vx89ABCDEF = _mm256_add_epi32(vx89ABCDEF, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x16() 41 __m256 vy89ABCDEF = _mm256_cvtepi32_ps(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__avx2_x16()
|
D | vcvt-wasmsimd-x32.c | 34 v128_t vx89ABCDEF = wasm_u16x8_load8x8(x + 8); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() local 40 vx89ABCDEF = wasm_i16x8_add(vx89ABCDEF, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 46 v128_t vy89AB = wasm_i32x4_extend_low_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 47 v128_t vyCDEF = wasm_i32x4_extend_high_i16x8(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32()
|
D | vcvt-sse2-x24.c | 36 __m128i vx89ABCDEF = _mm_loadl_epi64((const __m128i*) (x + 8)); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() local 42 vx89ABCDEF = _mm_unpacklo_epi8(vx89ABCDEF, vzero); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 47 __m128 vy89AB = _mm_castsi128_ps(_mm_unpacklo_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 48 __m128 vyCDEF = _mm_castsi128_ps(_mm_unpackhi_epi16(vx89ABCDEF, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x24()
|
D | vcvt-avx2-x24.c | 34 __m256i vx89ABCDEF = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) (x + 8))); in xnn_qu8_f32_vcvt_ukernel__avx2_x24() local 39 vx89ABCDEF = _mm256_add_epi32(vx89ABCDEF, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x24() 43 __m256 vy89ABCDEF = _mm256_cvtepi32_ps(vx89ABCDEF); in xnn_qu8_f32_vcvt_ukernel__avx2_x24()
|
/external/XNNPACK/src/f16-vhswish/gen/ |
D | vhswish-neonfp16arith-x16.c | 37 float16x8_t vx89ABCDEF = vld1q_f16(x); x += 8; in xnn_f16_vhswish_ukernel__neonfp16arith_x16() local 41 float16x8_t vacc89ABCDEF = vaddq_f16(vx89ABCDEF, vthree); in xnn_f16_vhswish_ukernel__neonfp16arith_x16() 42 vx89ABCDEF = vmulq_f16(vx89ABCDEF, vsixth); in xnn_f16_vhswish_ukernel__neonfp16arith_x16() 51 vacc89ABCDEF = vmulq_f16(vacc89ABCDEF, vx89ABCDEF); in xnn_f16_vhswish_ukernel__neonfp16arith_x16()
|
D | vhswish-f16c-x16.c | 38 __m256 vx89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (x + 8))); in xnn_f16_vhswish_ukernel__f16c_x16() local 43 __m128i vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(vx89ABCDEF, vthree), _MM_FROUND_NO_EXC); in xnn_f16_vhswish_ukernel__f16c_x16() 44 …vx89ABCDEF = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_mul_ps(vx89ABCDEF, vsixth), _MM_FROUND_NO_EXC)… in xnn_f16_vhswish_ukernel__f16c_x16() 53 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_mul_ps(_mm256_cvtph_ps(vacc89ABCDEF), vx89ABCDEF), _MM_FROUN… in xnn_f16_vhswish_ukernel__f16c_x16()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-avx-x16.c | 30 const __m256 vx89ABCDEF = _mm256_loadu_ps(x + 8); in xnn_f32_vlrelu_ukernel__avx_x16() local 34 __m256 vacc89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vslope); in xnn_f32_vlrelu_ukernel__avx_x16() 37 vacc89ABCDEF = _mm256_blendv_ps(vx89ABCDEF, vacc89ABCDEF, vx89ABCDEF); in xnn_f32_vlrelu_ukernel__avx_x16()
|
/external/XNNPACK/src/f16-vlrelu/gen/ |
D | vlrelu-neonfp16arith-x16.c | 32 const float16x8_t vx89ABCDEF = vld1q_f16(i); i += 8; in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() local 36 float16x8_t vacc89ABCDEF = vmulq_f16(vx89ABCDEF, vslope); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() 37 const uint16x8_t vmask89ABCDEF = vcltq_s16(vreinterpretq_s16_f16(vx89ABCDEF), vmovq_n_s16(0)); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() 40 vacc89ABCDEF = vbslq_f16(vmask89ABCDEF, vacc89ABCDEF, vx89ABCDEF); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16()
|
D | vlrelu-f16c-x16.c | 33 const __m256 vx89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) (i + 8))); in xnn_f16_vlrelu_ukernel__f16c_x16() local 37 __m256 vacc89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vslope); in xnn_f16_vlrelu_ukernel__f16c_x16() 40 vacc89ABCDEF = _mm256_blendv_ps(vx89ABCDEF, vacc89ABCDEF, vx89ABCDEF); in xnn_f16_vlrelu_ukernel__f16c_x16()
|