Home
last modified time | relevance | path

Searched refs:vx01234567 (Results 1 – 25 of 69) sorted by relevance

123

/external/XNNPACK/src/qs8-f32-vcvt/gen/
Dvcvt-sse2-x16.c36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() local
40 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
43 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
46 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
47 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
Dvcvt-sse2-x24.c36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() local
41 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
45 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
49 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
50 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
Dvcvt-sse2-x32.c36 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() local
42 vx01234567 = _mm_xor_si128(vx01234567, vsign_mask); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
47 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
52 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
53 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
Dvcvt-wasmsimd-x16.c33 v128_t vx01234567 = wasm_i16x8_load8x8(x); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() local
37 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
40 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
41 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
Dvcvt-avx2-x8.c33 __m256i vx01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qs8_f32_vcvt_ukernel__avx2_x8() local
36 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx2_x8()
38 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qs8_f32_vcvt_ukernel__avx2_x8()
Dvcvt-wasmsimd-x24.c33 v128_t vx01234567 = wasm_i16x8_load8x8(x); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() local
38 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
42 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
43 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
Dvcvt-avx2-x16.c33 __m256i vx01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qs8_f32_vcvt_ukernel__avx2_x16() local
37 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qs8_f32_vcvt_ukernel__avx2_x16()
40 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qs8_f32_vcvt_ukernel__avx2_x16()
/external/XNNPACK/src/f32-qu8-vcvt/gen/
Dvcvt-avx-x16.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x16() local
40 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x16()
43 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x16()
46 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x16()
Dvcvt-avx-x24.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x24() local
41 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x24()
45 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x24()
49 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x24()
Dvcvt-avx-x32.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qu8_vcvt_ukernel__avx_x32() local
42 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qu8_vcvt_ukernel__avx_x32()
47 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx_x32()
52 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qu8_vcvt_ukernel__avx_x32()
/external/XNNPACK/src/f32-qs8-vcvt/gen/
Dvcvt-avx-x16.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x16() local
40 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x16()
43 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x16()
46 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x16()
Dvcvt-avx-x24.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x24() local
41 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x24()
45 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x24()
49 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x24()
Dvcvt-avx-x32.c36 __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_qs8_vcvt_ukernel__avx_x32() local
42 vx01234567 = _mm256_mul_ps(vx01234567, vscale); in xnn_f32_qs8_vcvt_ukernel__avx_x32()
47 vx01234567 = _mm256_min_ps(vx01234567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx_x32()
52 const __m256i vacc01234567 = _mm256_cvtps_epi32(vx01234567); in xnn_f32_qs8_vcvt_ukernel__avx_x32()
/external/XNNPACK/src/f32-vlrelu/gen/
Dvlrelu-avx-x8.c29 const __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_vlrelu_ukernel__avx_x8() local
32 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x8()
34 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f32_vlrelu_ukernel__avx_x8()
Dvlrelu-avx-x16.c29 const __m256 vx01234567 = _mm256_loadu_ps(x); in xnn_f32_vlrelu_ukernel__avx_x16() local
33 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x16()
36 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f32_vlrelu_ukernel__avx_x16()
/external/XNNPACK/src/qu8-f32-vcvt/gen/
Dvcvt-wasmsimd-x16.c33 v128_t vx01234567 = wasm_u16x8_load8x8(x); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() local
37 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
40 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
41 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
Dvcvt-avx2-x8.c33 __m256i vx01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qu8_f32_vcvt_ukernel__avx2_x8() local
36 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x8()
38 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qu8_f32_vcvt_ukernel__avx2_x8()
Dvcvt-wasmsimd-x24.c33 v128_t vx01234567 = wasm_u16x8_load8x8(x); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() local
38 vx01234567 = wasm_i16x8_add(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
42 v128_t vy0123 = wasm_i32x4_extend_low_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
43 v128_t vy4567 = wasm_i32x4_extend_high_i16x8(vx01234567); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
Dvcvt-sse2-x16.c35 __m128i vx01234567 = _mm_loadl_epi64((const __m128i*) x); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() local
40 vx01234567 = _mm_unpacklo_epi8(vx01234567, vzero); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
43 __m128 vy0123 = _mm_castsi128_ps(_mm_unpacklo_epi16(vx01234567, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
44 __m128 vy4567 = _mm_castsi128_ps(_mm_unpackhi_epi16(vx01234567, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
Dvcvt-avx2-x16.c33 __m256i vx01234567 = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i*) x)); in xnn_qu8_f32_vcvt_ukernel__avx2_x16() local
37 vx01234567 = _mm256_add_epi32(vx01234567, vminus_zero_point); in xnn_qu8_f32_vcvt_ukernel__avx2_x16()
40 __m256 vy01234567 = _mm256_cvtepi32_ps(vx01234567); in xnn_qu8_f32_vcvt_ukernel__avx2_x16()
/external/XNNPACK/src/f16-vhswish/gen/
Dvhswish-neonfp16arith-x16.c36 float16x8_t vx01234567 = vld1q_f16(x); x += 8; in xnn_f16_vhswish_ukernel__neonfp16arith_x16() local
39 float16x8_t vacc01234567 = vaddq_f16(vx01234567, vthree); in xnn_f16_vhswish_ukernel__neonfp16arith_x16()
40 vx01234567 = vmulq_f16(vx01234567, vsixth); in xnn_f16_vhswish_ukernel__neonfp16arith_x16()
50 vacc01234567 = vmulq_f16(vacc01234567, vx01234567); in xnn_f16_vhswish_ukernel__neonfp16arith_x16()
Dvhswish-f16c-x16.c37 __m256 vx01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) x)); in xnn_f16_vhswish_ukernel__f16c_x16() local
41 __m128i vacc01234567 = _mm256_cvtps_ph(_mm256_add_ps(vx01234567, vthree), _MM_FROUND_NO_EXC); in xnn_f16_vhswish_ukernel__f16c_x16()
42vx01234567 = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_mul_ps(vx01234567, vsixth), _MM_FROUND_NO_EXC)… in xnn_f16_vhswish_ukernel__f16c_x16()
52 …vacc01234567 = _mm256_cvtps_ph(_mm256_mul_ps(_mm256_cvtph_ps(vacc01234567), vx01234567), _MM_FROUN… in xnn_f16_vhswish_ukernel__f16c_x16()
/external/XNNPACK/src/f16-vlrelu/gen/
Dvlrelu-neonfp16arith-x8.c31 const float16x8_t vx01234567 = vld1q_f16(i); i += 8; in xnn_f16_vlrelu_ukernel__neonfp16arith_x8() local
33 float16x8_t vacc01234567 = vmulq_f16(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8()
34 const uint16x8_t vmask01234567 = vcltq_s16(vreinterpretq_s16_f16(vx01234567), vmovq_n_s16(0)); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8()
36 vacc01234567 = vbslq_f16(vmask01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__neonfp16arith_x8()
Dvlrelu-neonfp16arith-x16.c31 const float16x8_t vx01234567 = vld1q_f16(i); i += 8; in xnn_f16_vlrelu_ukernel__neonfp16arith_x16() local
34 float16x8_t vacc01234567 = vmulq_f16(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16()
35 const uint16x8_t vmask01234567 = vcltq_s16(vreinterpretq_s16_f16(vx01234567), vmovq_n_s16(0)); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16()
39 vacc01234567 = vbslq_f16(vmask01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__neonfp16arith_x16()
Dvlrelu-f16c-x16.c32 const __m256 vx01234567 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i)); in xnn_f16_vlrelu_ukernel__f16c_x16() local
36 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f16_vlrelu_ukernel__f16c_x16()
39 vacc01234567 = _mm256_blendv_ps(vx01234567, vacc01234567, vx01234567); in xnn_f16_vlrelu_ukernel__f16c_x16()

123