/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-sse2-x16.c | 36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() local 40 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 43 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 46 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 47 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-sse2-x24.c | 36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() local 41 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 45 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 49 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 50 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse2-x32.c | 36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() local 42 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 47 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 52 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 53 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
|
D | vcvt-wasmsimd-x16.c | 33 v128_t vx01234567 = wasm_i16x8_load8x8(x); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() local 37 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 40 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 41 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-avx2-x8.c | 33 __m256i vx01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qs8_f32_vcvt_ukernel__avx2_x8() local 36 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx2_x8() 38 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qs8_f32_vcvt_ukernel__avx2_x8()
|
D | vcvt-wasmsimd-x24.c | 33 v128_t vx01234567 = wasm_i16x8_load8x8(x); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() local 38 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 42 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 43 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-avx2-x16.c | 33 __m256i vx01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qs8_f32_vcvt_ukernel__avx2_x16() local 37 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx2_x16() 40 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qs8_f32_vcvt_ukernel__avx2_x16()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-avx-x16.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x16() local 40 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x16() 43 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x16() 46 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x16()
|
D | vcvt-avx-x24.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x24() local 41 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x24() 45 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x24() 49 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x24()
|
D | vcvt-avx-x32.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x32() local 42 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x32() 47 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x32() 52 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x32()
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-avx-x16.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x16() local 40 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x16() 43 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x16() 46 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x16()
|
D | vcvt-avx-x24.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x24() local 41 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x24() 45 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x24() 49 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x24()
|
D | vcvt-avx-x32.c | 36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x32() local 42 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x32() 47 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x32() 52 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x32()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-avx-x8.c | 29 const __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_vlrelu_ukernel__avx_x8() local 32 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x8() 34 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f32_vlrelu_ukernel__avx_x8()
|
D | vlrelu-avx-x16.c | 29 const __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_vlrelu_ukernel__avx_x16() local 33 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x16() 36 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f32_vlrelu_ukernel__avx_x16()
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x16.c | 33 v128_t vx01234567 = wasm_u16x8_load8x8(x); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() local 37 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 40 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 41 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-avx2-x8.c | 33 __m256i vx01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qu8_f32_vcvt_ukernel__avx2_x8() local 36 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x8() 38 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qu8_f32_vcvt_ukernel__avx2_x8()
|
D | vcvt-wasmsimd-x24.c | 33 v128_t vx01234567 = wasm_u16x8_load8x8(x); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() local 38 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 42 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 43 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-sse2-x16.c | 35 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() local 40 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 43 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 44 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-avx2-x16.c | 33 __m256i vx01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qu8_f32_vcvt_ukernel__avx2_x16() local 37 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x16() 40 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qu8_f32_vcvt_ukernel__avx2_x16()
|
/external/XNNPACK/src/f16-vhswish/gen/ |
D | vhswish-neonfp16arith-x16.c | 36 float16x8_t vx01234567 = vld1q_f16(x); x += 8; in xnn_f16_vhswish_ukernel__neonfp16arith_x16() local 39 float16x8_t vacc01234567 = vaddq_f16(vx01234567, vthree); in xnn_f16_vhswish_ukernel__neonfp16arith_x16() 40 vx01234567 = vmulq_f16(vx01234567, vsixth); in xnn_f16_vhswish_ukernel__neonfp16arith_x16() 50 vacc01234567 = vmulq_f16(vacc01234567, vx01234567); in xnn_f16_vhswish_ukernel__neonfp16arith_x16()
|
D | vhswish-f16c-x16.c | 37 __m256 vx01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) x)); in xnn_f16_vhswish_ukernel__f16c_x16() local 41 __m128i vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(vx01234567, vthree), _MM_FROUND_NO_EXC); in xnn_f16_vhswish_ukernel__f16c_x16() 42 …vx01234567 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_mul_ps(vx01234567, vsixth), _MM_FROUND_NO_EXC)… in xnn_f16_vhswish_ukernel__f16c_x16() 52 …vacc01234567 = _mm256_cvtps_ph(_mm256_mul_ps(_mm256_cvtph_ps(vacc01234567), vx01234567), _MM_FROUN… in xnn_f16_vhswish_ukernel__f16c_x16()
|
/external/XNNPACK/src/f16-vlrelu/gen/ |
D | vlrelu-neonfp16arith-x8.c | 31 const float16x8_t vx01234567 = vld1q_f16(i); i += 8; in xnn_f16_vlrelu_ukernel__neonfp16arith_x8() local 33 float16x8_t vacc01234567 = vmulq_f16(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8() 34 const uint16x8_t vmask01234567 = vcltq_s16(vreinterpretq_s16_f16(vx01234567), vmovq_n_s16(0)); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8() 36 vacc01234567 = vbslq_f16(vmask01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8()
|
D | vlrelu-neonfp16arith-x16.c | 31 const float16x8_t vx01234567 = vld1q_f16(i); i += 8; in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() local 34 float16x8_t vacc01234567 = vmulq_f16(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() 35 const uint16x8_t vmask01234567 = vcltq_s16(vreinterpretq_s16_f16(vx01234567), vmovq_n_s16(0)); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() 39 vacc01234567 = vbslq_f16(vmask01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16()
|
D | vlrelu-f16c-x16.c | 32 const __m256 vx01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i)); in xnn_f16_vlrelu_ukernel__f16c_x16() local 36 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__f16c_x16() 39 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__f16c_x16()
|