Home
last modified time | relevance | path

Searched refs:vshift (Results 1 – 25 of 280) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/qu8-requantization/
Dprecise-ssse3.c44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qu8_requantize_precise__ssse3() local
73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
Dprecise-sse2.c44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qu8_requantize_precise__sse2() local
78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
Dq31-neon.c48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qu8_requantize_q31__neon() local
49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qu8_requantize_q31__neon()
77 const int32x4_t x_scaled = vrshlq_s32(x_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon()
78 const int32x4_t y_scaled = vrshlq_s32(y_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon()
79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon()
80 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon()
Dprecise-neon.c45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qu8_requantize_precise__neon() local
100 const int64x2_t x01_scaled = vrshlq_s64(x01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
101 const int64x2_t x23_scaled = vrshlq_s64(x23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
102 const int64x2_t y01_scaled = vrshlq_s64(y01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
103 const int64x2_t y23_scaled = vrshlq_s64(y23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
104 const int64x2_t z01_scaled = vrshlq_s64(z01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
105 const int64x2_t z23_scaled = vrshlq_s64(z23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
106 const int64x2_t w01_scaled = vrshlq_s64(w01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
107 const int64x2_t w23_scaled = vrshlq_s64(w23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
/external/XNNPACK/src/qs8-requantization/
Dprecise-ssse3.c44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qs8_requantize_precise__ssse3() local
73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
Dprecise-sse2.c44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qs8_requantize_precise__sse2() local
78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
Dq31-neon.c48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qs8_requantize_q31__neon() local
49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qs8_requantize_q31__neon()
77 const int32x4_t x_scaled = vrshlq_s32(x_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon()
78 const int32x4_t y_scaled = vrshlq_s32(y_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon()
79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon()
80 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon()
Dprecise-neon.c45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qs8_requantize_precise__neon() local
100 const int64x2_t x01_scaled = vrshlq_s64(x01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
101 const int64x2_t x23_scaled = vrshlq_s64(x23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
102 const int64x2_t y01_scaled = vrshlq_s64(y01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
103 const int64x2_t y23_scaled = vrshlq_s64(y23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
104 const int64x2_t z01_scaled = vrshlq_s64(z01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
105 const int64x2_t z23_scaled = vrshlq_s64(z23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
106 const int64x2_t w01_scaled = vrshlq_s64(w01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
107 const int64x2_t w23_scaled = vrshlq_s64(w23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-wasmsimd-x32.c27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local
60 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
61 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
62 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
63 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
64 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
65 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
66 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
67 …vaccSTUV = wasm_i32x4_sub(wasm_i32x4_shr(vaccSTUV, vshift), wasm_i32x4_gt(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
98 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
[all …]
Dminmax-sse41-mul32-ld32-x32.c28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local
66 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
67 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
68 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
69 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
70 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
71 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
72 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
73 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
109 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x32.c33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local
71 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
72 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
73 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
74 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
75 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
76 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
77 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
78 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
114 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
[all …]
Dminmax-wasmsimd-x24.c27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() local
55 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
56 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
57 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
58 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
59 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
60 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
90 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
91 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
Dminmax-xop-mul32-ld32-x24.c33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() local
65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
69 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
70 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
103 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
104 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
Dminmax-sse41-mul32-ld32-x24.c28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() local
60 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
61 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
62 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
63 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
64 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
65 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
98 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
99 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
Dminmax-sse2-mul16-ld64-x32.c28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local
85 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
86 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
87 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
88 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
89 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
90 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
91 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
92 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
136 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
[all …]
Dminmax-sse41-mul16-ld64-x32.c28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() local
81 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
82 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
83 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
84 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
85 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
86 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
87 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
88 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
131 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32()
[all …]
Dminmax-wasmsimd-x16.c27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() local
50 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
51 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
52 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
53 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
78 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
79 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7x-minmax-sse41-c24-acc2.c59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local
147 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
148 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
149 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
150 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
151 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
152 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
153 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
154 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
155 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
[all …]
D7x-minmax-wasmsimd-c24-acc2.c59 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local
160 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
161 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
162 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
163 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
164 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
165 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
166 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
167 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
168 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
[all …]
D7x-minmax-sse41-c16-acc2.c59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local
124 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
125 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
126 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
127 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
128 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
129 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
130 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
131 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
201 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
[all …]
D7x-minmax-wasmsimd-c16-acc2.c59 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local
133 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
134 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
135 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
136 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
137 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
138 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
139 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
140 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
211 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-wasmsimd-x32.c29 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local
73 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
74 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
75 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
76 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
77 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
78 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
79 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
80 …vaccSTUV = wasm_i32x4_sub(wasm_i32x4_shr(vaccSTUV, vshift), wasm_i32x4_gt(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
116 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
[all …]
Dminmax-wasmsimd-x24.c29 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() local
65 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
66 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
67 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
68 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
69 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
70 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
105 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
106 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
Dminmax-sse41-mul32-ld32-x32.c30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local
82 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
83 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
84 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
85 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
86 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
87 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
88 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
89 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
131 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x32.c35 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local
87 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
88 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
89 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
90 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
91 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
92 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
93 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
94 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
136 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
[all …]

12345678910>>...12