/external/XNNPACK/src/qu8-requantization/ |
D | precise-ssse3.c | 44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qu8_requantize_precise__ssse3() local 73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
|
D | precise-sse2.c | 44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qu8_requantize_precise__sse2() local 78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
|
D | q31-neon.c | 48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qu8_requantize_q31__neon() local 49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qu8_requantize_q31__neon() 77 const int32x4_t x_scaled = vrshlq_s32(x_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon() 78 const int32x4_t y_scaled = vrshlq_s32(y_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon() 79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon() 80 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qu8_requantize_q31__neon()
|
D | precise-neon.c | 45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qu8_requantize_precise__neon() local 100 const int64x2_t x01_scaled = vrshlq_s64(x01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 101 const int64x2_t x23_scaled = vrshlq_s64(x23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 102 const int64x2_t y01_scaled = vrshlq_s64(y01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 103 const int64x2_t y23_scaled = vrshlq_s64(y23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 104 const int64x2_t z01_scaled = vrshlq_s64(z01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 105 const int64x2_t z23_scaled = vrshlq_s64(z23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 106 const int64x2_t w01_scaled = vrshlq_s64(w01_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon() 107 const int64x2_t w23_scaled = vrshlq_s64(w23_adjusted_product, vshift); in xnn_qu8_requantize_precise__neon()
|
/external/XNNPACK/src/qs8-requantization/ |
D | precise-ssse3.c | 44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qs8_requantize_precise__ssse3() local 73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
|
D | precise-sse2.c | 44 const __m128i vshift = _mm_cvtsi32_si128((int) shift); in xnn_qs8_requantize_precise__sse2() local 78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
|
D | q31-neon.c | 48 const int32x4_t vshift = vdupq_n_s32(-shift); in xnn_qs8_requantize_q31__neon() local 49 const int32x4_t vshift_eq_0_mask = vreinterpretq_s32_u32(vceqq_s32(vshift, vmovq_n_s32(0))); in xnn_qs8_requantize_q31__neon() 77 const int32x4_t x_scaled = vrshlq_s32(x_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon() 78 const int32x4_t y_scaled = vrshlq_s32(y_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon() 79 const int32x4_t z_scaled = vrshlq_s32(z_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon() 80 const int32x4_t w_scaled = vrshlq_s32(w_adjusted_product, vshift); in xnn_qs8_requantize_q31__neon()
|
D | precise-neon.c | 45 const int64x2_t vshift = vdupq_n_s64(-shift); in xnn_qs8_requantize_precise__neon() local 100 const int64x2_t x01_scaled = vrshlq_s64(x01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 101 const int64x2_t x23_scaled = vrshlq_s64(x23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 102 const int64x2_t y01_scaled = vrshlq_s64(y01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 103 const int64x2_t y23_scaled = vrshlq_s64(y23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 104 const int64x2_t z01_scaled = vrshlq_s64(z01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 105 const int64x2_t z23_scaled = vrshlq_s64(z23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 106 const int64x2_t w01_scaled = vrshlq_s64(w01_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon() 107 const int64x2_t w23_scaled = vrshlq_s64(w23_adjusted_product, vshift); in xnn_qs8_requantize_precise__neon()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-wasmsimd-x32.c | 27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() local 60 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 61 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 62 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 63 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 64 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 65 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 66 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 67 …vaccSTUV = wasm_i32x4_sub(wasm_i32x4_shr(vaccSTUV, vshift), wasm_i32x4_gt(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 98 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x32.c | 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local 66 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 67 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 68 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 69 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 70 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 71 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 72 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 73 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 109 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x32.c | 33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local 71 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 72 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 73 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 74 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 75 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 76 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 77 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 78 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 114 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() local 55 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 56 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 57 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 58 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 59 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 60 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 90 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 91 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
|
D | minmax-xop-mul32-ld32-x24.c | 33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() local 65 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 66 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 67 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 68 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 69 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 70 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 103 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 104 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
|
D | minmax-sse41-mul32-ld32-x24.c | 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() local 60 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 61 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 62 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 63 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 64 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 65 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 98 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 99 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
|
D | minmax-sse2-mul16-ld64-x32.c | 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local 85 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 86 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 87 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 88 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 89 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 90 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 91 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 92 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 136 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() [all …]
|
D | minmax-sse41-mul16-ld64-x32.c | 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() local 81 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 82 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 83 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 84 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 85 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 86 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 87 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 88 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() 131 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32() [all …]
|
D | minmax-wasmsimd-x16.c | 27 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() local 50 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 51 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 52 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 53 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 78 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 79 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-sse41-c24-acc2.c | 59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 147 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 148 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 149 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 150 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 151 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 152 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 153 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 154 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 155 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() [all …]
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 59 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 160 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 161 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 162 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 163 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 164 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 165 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 166 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 167 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 168 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() [all …]
|
D | 7x-minmax-sse41-c16-acc2.c | 59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 124 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 125 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 126 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 127 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 128 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 129 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 130 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 131 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 201 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() [all …]
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 59 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 133 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 134 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 135 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 136 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 137 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 138 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 139 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 140 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 211 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-wasmsimd-x32.c | 29 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local 73 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 74 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 75 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 76 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 77 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 78 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 79 …vaccOPQR = wasm_i32x4_sub(wasm_i32x4_shr(vaccOPQR, vshift), wasm_i32x4_gt(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 80 …vaccSTUV = wasm_i32x4_sub(wasm_i32x4_shr(vaccSTUV, vshift), wasm_i32x4_gt(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 116 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 29 const int32_t vshift = params->wasmsimd.shift; in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() local 65 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 66 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 67 …vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vacc89AB, vshift), wasm_i32x4_gt(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 68 …vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vaccCDEF, vshift), wasm_i32x4_gt(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 69 …vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vaccGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 70 …vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vaccKLMN, vshift), wasm_i32x4_gt(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 105 …vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vacc0123, vshift), wasm_i32x4_gt(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 106 …vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
|
D | minmax-sse41-mul32-ld32-x32.c | 30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local 82 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 83 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 84 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 85 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 86 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 87 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 88 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 89 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 131 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x32.c | 35 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local 87 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 88 …vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 89 …vacc89AB = _mm_sub_epi32(_mm_sra_epi32(vacc89AB, vshift), _mm_cmpgt_epi32(vrem89AB, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 90 …vaccCDEF = _mm_sub_epi32(_mm_sra_epi32(vaccCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 91 …vaccGHIJ = _mm_sub_epi32(_mm_sra_epi32(vaccGHIJ, vshift), _mm_cmpgt_epi32(vremGHIJ, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 92 …vaccKLMN = _mm_sub_epi32(_mm_sra_epi32(vaccKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 93 …vaccOPQR = _mm_sub_epi32(_mm_sra_epi32(vaccOPQR, vshift), _mm_cmpgt_epi32(vremOPQR, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 94 …vaccSTUV = _mm_sub_epi32(_mm_sra_epi32(vaccSTUV, vshift), _mm_cmpgt_epi32(vremSTUV, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 136 …vacc0123 = _mm_sub_epi32(_mm_sra_epi32(vacc0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() [all …]
|