Home
last modified time | relevance | path

Searched refs:vmultiplier (Results 1 – 25 of 400) sorted by relevance

12345678910>>...16

/external/XNNPACK/src/qu8-requantization/
Dprecise-neon.c40 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_qu8_requantize_precise__neon() local
42 const int32x2_t vmultiplier = vdup_n_s32(multiplier); in xnn_qu8_requantize_precise__neon() local
61 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon()
62 const int64x2_t x23_product = vmull_high_s32(x, vmultiplier); in xnn_qu8_requantize_precise__neon()
63 const int64x2_t y01_product = vmull_s32(vget_low_s32(y), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon()
64 const int64x2_t y23_product = vmull_high_s32(y, vmultiplier); in xnn_qu8_requantize_precise__neon()
65 const int64x2_t z01_product = vmull_s32(vget_low_s32(z), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon()
66 const int64x2_t z23_product = vmull_high_s32(z, vmultiplier); in xnn_qu8_requantize_precise__neon()
67 const int64x2_t w01_product = vmull_s32(vget_low_s32(w), vget_low_s32(vmultiplier)); in xnn_qu8_requantize_precise__neon()
68 const int64x2_t w23_product = vmull_high_s32(w, vmultiplier); in xnn_qu8_requantize_precise__neon()
[all …]
Dprecise-sse4.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qu8_requantize_precise__sse4() local
64 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4()
65 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4()
66 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4()
67 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4()
69 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4()
70 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4()
71 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4()
72 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4()
Dprecise-ssse3.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qu8_requantize_precise__ssse3() local
63 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
64 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
65 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
66 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
68 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
69 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
70 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
71 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
Dq31-sse4.c46 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qu8_requantize_q31__sse4() local
67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qu8_requantize_q31__sse4()
Dprecise-sse2.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qu8_requantize_precise__sse2() local
68 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2()
69 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2()
70 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2()
71 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2()
73 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2()
74 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2()
75 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2()
76 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2()
Dq31-wasmsimd.c48 const v128_t vmultiplier = wasm_i64x2_make(twice_multiplier, twice_multiplier); in xnn_qu8_requantize_q31__wasmsimd() local
79 … const v128_t x_product_lo = wasm_i64x2_add(wasm_i64x2_mul(x_lo, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
80 … const v128_t y_product_lo = wasm_i64x2_add(wasm_i64x2_mul(y_lo, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
81 … const v128_t z_product_lo = wasm_i64x2_add(wasm_i64x2_mul(z_lo, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
82 … const v128_t w_product_lo = wasm_i64x2_add(wasm_i64x2_mul(w_lo, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
84 … const v128_t x_product_hi = wasm_i64x2_add(wasm_i64x2_mul(x_hi, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
85 … const v128_t y_product_hi = wasm_i64x2_add(wasm_i64x2_mul(y_hi, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
86 … const v128_t z_product_hi = wasm_i64x2_add(wasm_i64x2_mul(z_hi, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
87 … const v128_t w_product_hi = wasm_i64x2_add(wasm_i64x2_mul(w_hi, vmultiplier), vtwice_q31rounding); in xnn_qu8_requantize_q31__wasmsimd()
/external/XNNPACK/src/qs8-requantization/
Dprecise-neon.c40 const int32x4_t vmultiplier = vdupq_n_s32(multiplier); in xnn_qs8_requantize_precise__neon() local
42 const int32x2_t vmultiplier = vdup_n_s32(multiplier); in xnn_qs8_requantize_precise__neon() local
61 const int64x2_t x01_product = vmull_s32(vget_low_s32(x), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon()
62 const int64x2_t x23_product = vmull_high_s32(x, vmultiplier); in xnn_qs8_requantize_precise__neon()
63 const int64x2_t y01_product = vmull_s32(vget_low_s32(y), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon()
64 const int64x2_t y23_product = vmull_high_s32(y, vmultiplier); in xnn_qs8_requantize_precise__neon()
65 const int64x2_t z01_product = vmull_s32(vget_low_s32(z), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon()
66 const int64x2_t z23_product = vmull_high_s32(z, vmultiplier); in xnn_qs8_requantize_precise__neon()
67 const int64x2_t w01_product = vmull_s32(vget_low_s32(w), vget_low_s32(vmultiplier)); in xnn_qs8_requantize_precise__neon()
68 const int64x2_t w23_product = vmull_high_s32(w, vmultiplier); in xnn_qs8_requantize_precise__neon()
[all …]
Dprecise-sse4.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qs8_requantize_precise__sse4() local
64 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4()
65 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4()
66 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4()
67 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4()
69 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4()
70 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4()
71 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4()
72 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4()
Dq31-sse4.c46 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qs8_requantize_q31__sse4() local
67 const __m128i x_product_even = _mm_add_epi64(_mm_mul_epi32(x, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
68 const __m128i y_product_even = _mm_add_epi64(_mm_mul_epi32(y, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
69 const __m128i z_product_even = _mm_add_epi64(_mm_mul_epi32(z, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
70 const __m128i w_product_even = _mm_add_epi64(_mm_mul_epi32(w, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
72 const __m128i x_product_odd = _mm_add_epi64(_mm_mul_epi32(x_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
73 const __m128i y_product_odd = _mm_add_epi64(_mm_mul_epi32(y_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
74 const __m128i z_product_odd = _mm_add_epi64(_mm_mul_epi32(z_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
75 const __m128i w_product_odd = _mm_add_epi64(_mm_mul_epi32(w_rev, vmultiplier), vq31rounding); in xnn_qs8_requantize_q31__sse4()
Dprecise-ssse3.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qs8_requantize_precise__ssse3() local
63 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
64 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
65 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
66 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
68 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
69 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
70 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
71 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
Dq31-wasmsimd.c48 const v128_t vmultiplier = wasm_i64x2_make(twice_multiplier, twice_multiplier); in xnn_qs8_requantize_q31__wasmsimd() local
79 … const v128_t x_product_lo = wasm_i64x2_add(wasm_i64x2_mul(x_lo, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
80 … const v128_t y_product_lo = wasm_i64x2_add(wasm_i64x2_mul(y_lo, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
81 … const v128_t z_product_lo = wasm_i64x2_add(wasm_i64x2_mul(z_lo, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
82 … const v128_t w_product_lo = wasm_i64x2_add(wasm_i64x2_mul(w_lo, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
84 … const v128_t x_product_hi = wasm_i64x2_add(wasm_i64x2_mul(x_hi, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
85 … const v128_t y_product_hi = wasm_i64x2_add(wasm_i64x2_mul(y_hi, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
86 … const v128_t z_product_hi = wasm_i64x2_add(wasm_i64x2_mul(z_hi, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
87 … const v128_t w_product_hi = wasm_i64x2_add(wasm_i64x2_mul(w_hi, vmultiplier), vtwice_q31rounding); in xnn_qs8_requantize_q31__wasmsimd()
Dprecise-sse2.c40 const __m128i vmultiplier = _mm_set1_epi32(multiplier); in xnn_qs8_requantize_precise__sse2() local
68 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2()
69 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2()
70 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2()
71 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2()
73 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2()
74 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2()
75 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2()
76 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7x-minmax-neon-c32-acc2.c58 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() local
60 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() local
143 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
144 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
145 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
146 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
147 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
148 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
149 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
150 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
[all …]
D7x-minmax-neon-c24-acc2.c58 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() local
60 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2() local
126 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
127 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
128 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
129 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
130 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
131 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
132 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
133 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c24_acc2()
[all …]
D7x-minmax-neon-c16-acc2.c58 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() local
60 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2() local
109 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
110 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
111 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
112 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
113 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
114 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
115 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
116 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c16_acc2()
[all …]
D7x-minmax-neon-c8-acc2.c58 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() local
60 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2() local
92 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
93 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
94 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
95 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
102 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
103 const int64x2_t vprod23 = vmull_s32(vget_high_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
104 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
105 const int64x2_t vprod67 = vmull_s32(vget_high_s32(vacc4567), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c8_acc2()
[all …]
D7p7x-minmax-neon-c16-acc2.c173 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() local
175 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() local
229 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
230 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
231 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
232 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
233 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
234 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
235 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
236 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2()
[all …]
D7p7x-minmax-neon-c32-acc2.c306 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local
308 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() local
400 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
401 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
402 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
403 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
404 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
405 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
406 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
407 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2()
[all …]
D7p7x-minmax-neon-c8-acc2.c137 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
139 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
174 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
175 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
176 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
177 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
184 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
185 const int64x2_t vprod23 = vmull_s32(vget_high_s32(vacc0123), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
186 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
187 const int64x2_t vprod67 = vmull_s32(vget_high_s32(vacc4567), vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
[all …]
D7p7x-minmax-neon-c24-acc2.c270 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() local
272 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() local
345 const int64x2_t vprod01 = vmull_s32(vget_low_s32(vacc0123), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
346 const int64x2_t vprod23 = vmull_high_s32(vacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
347 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
348 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
349 const int64x2_t vprod89 = vmull_s32(vget_low_s32(vacc89AB), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
350 const int64x2_t vprodAB = vmull_high_s32(vacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
351 const int64x2_t vprodCD = vmull_s32(vget_low_s32(vaccCDEF), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
352 const int64x2_t vprodEF = vmull_high_s32(vaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2()
[all …]
D7x-minmax-sse41-c24-acc2.c57 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local
134 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
135 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
136 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
137 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
138 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
139 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
140 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
141 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
142 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
[all …]
D7x-minmax-wasmsimd-c24-acc2.c57 const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local
147 const v128_t vabsprod01 = wasm_i64x2_mul(vabsacc01, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
148 const v128_t vabsprod23 = wasm_i64x2_mul(vabsacc23, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
149 const v128_t vabsprod45 = wasm_i64x2_mul(vabsacc45, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
150 const v128_t vabsprod67 = wasm_i64x2_mul(vabsacc67, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
151 const v128_t vabsprod89 = wasm_i64x2_mul(vabsacc89, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
152 const v128_t vabsprodAB = wasm_i64x2_mul(vabsaccAB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
153 const v128_t vabsprodCD = wasm_i64x2_mul(vabsaccCD, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
154 const v128_t vabsprodEF = wasm_i64x2_mul(vabsaccEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
155 const v128_t vabsprodGH = wasm_i64x2_mul(vabsaccGH, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
[all …]
/external/XNNPACK/src/qu8-gavgpool/
D7x-minmax-neon-c8.c58 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() local
60 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8() local
90 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
91 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
92 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
93 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
100 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
101 const int64x2_t vproduct23 = vmull_s32(vget_high_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
102 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
103 const int64x2_t vproduct67 = vmull_s32(vget_high_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7x__neon_c8()
[all …]
D7p7x-minmax-neon-c8.c104 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
106 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() local
166 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
167 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
168 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
169 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
176 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
177 const int64x2_t vproduct23 = vmull_s32(vget_high_s32(vacc_lo), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
178 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
179 const int64x2_t vproduct67 = vmull_s32(vget_high_s32(vacc_hi), vmultiplier); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
[all …]
/external/XNNPACK/src/qu8-avgpool/
D9x-minmax-neon-c8.c36 const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() local
38 const int32x2_t vmultiplier = vld1_dup_s32(&params->neon.multiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8() local
145 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
146 const int64x2_t vproduct23 = vmull_high_s32(vacc_lo, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
147 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vget_low_s32(vmultiplier)); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
148 const int64x2_t vproduct67 = vmull_high_s32(vacc_hi, vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
155 const int64x2_t vproduct01 = vmull_s32(vget_low_s32(vacc_lo), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
156 const int64x2_t vproduct23 = vmull_s32(vget_high_s32(vacc_lo), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
157 const int64x2_t vproduct45 = vmull_s32(vget_low_s32(vacc_hi), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
158 const int64x2_t vproduct67 = vmull_s32(vget_high_s32(vacc_hi), vmultiplier); in xnn_qu8_avgpool_minmax_ukernel_9x__neon_c8()
[all …]

12345678910>>...16