Home
last modified time | relevance | path

Searched refs:vrounding (Results 1 – 25 of 214) sorted by relevance

123456789

/external/XNNPACK/src/qs8-requantization/
Dprecise-sse4.c46 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__sse4() local
74 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4()
75 const __m128i x_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(x_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4()
76 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4()
77 const __m128i y_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(y_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4()
78 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4()
79 const __m128i z_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(z_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4()
80 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4()
81 const __m128i w_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(w_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4()
Dprecise-ssse3.c45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__ssse3() local
73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
Dprecise-sse2.c45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__sse2() local
78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
/external/XNNPACK/src/qu8-requantization/
Dprecise-sse4.c46 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__sse4() local
74 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4()
75 const __m128i x_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(x_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4()
76 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4()
77 const __m128i y_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(y_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4()
78 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4()
79 const __m128i z_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(z_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4()
80 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4()
81 const __m128i w_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(w_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4()
Dprecise-ssse3.c45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__ssse3() local
73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
Dprecise-sse2.c45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__sse2() local
78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7x-minmax-sse41-c24-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local
147 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
148 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
149 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
150 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
151 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
152 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
153 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
154 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
155 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
[all …]
D7x-minmax-wasmsimd-c24-acc2.c58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local
160 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
161 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
162 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
163 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
164 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
165 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
166 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
167 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
168 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
[all …]
D7x-minmax-sse41-c16-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local
124 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
125 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
126 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
127 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
128 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
129 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
130 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
131 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
201 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
[all …]
D7x-minmax-wasmsimd-c16-acc2.c58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local
133 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
134 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
135 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
136 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
137 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
138 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
139 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
140 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
211 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
[all …]
D7x-minmax-ssse3-c24-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local
171 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
172 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
173 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
174 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
175 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
176 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
177 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
178 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
179 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
[all …]
D7x-minmax-sse2-c24-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local
178 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
179 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
180 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
181 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
182 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
183 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
184 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
185 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
186 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
[all …]
D7x-minmax-wasmsimd-c8-acc2.c58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local
106 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
107 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
108 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
109 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
175 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
176 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
177 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
178 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
D7x-minmax-sse41-c8-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local
101 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
102 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
103 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
104 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
168 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
169 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
170 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
171 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
D7x-minmax-ssse3-c16-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local
140 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
141 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
142 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
143 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
144 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
145 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
146 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
147 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
234 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
[all …]
D7x-minmax-sse2-c16-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local
145 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
146 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
147 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
148 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
149 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
150 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
151 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
152 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
242 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
[all …]
D7x-minmax-ssse3-c8-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local
109 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
110 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
111 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
112 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
189 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
190 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
191 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
192 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
D7x-minmax-sse2-c8-acc2.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local
112 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
113 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
114 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
115 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
195 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
196 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
197 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
198 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
D7p7x-minmax-wasmsimd-c16-acc2.c184 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local
260 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
261 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
262 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
263 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
264 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
265 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
266 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
267 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
339 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
[all …]
D7p7x-minmax-sse41-c16-acc2.c186 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local
253 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
254 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
255 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
256 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
257 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
258 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
259 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
260 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
331 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
[all …]
D7p7x-minmax-wasmsimd-c24-acc2.c292 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local
395 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
396 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
397 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
398 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
399 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
400 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
401 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
402 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
403 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
[all …]
D7p7x-minmax-wasmsimd-c8-acc2.c150 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local
199 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
200 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
201 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
202 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
269 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
270 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
271 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
272 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
/external/XNNPACK/src/qu8-gavgpool/
D7x-minmax-sse2-c8.c58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local
104 …const __m128i vabs_scaled_lo02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
105 …const __m128i vabs_scaled_lo13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
106 …const __m128i vabs_scaled_hi02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
107 …const __m128i vabs_scaled_hi13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
173 …const __m128i vabs_scaled_lo02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
174 …const __m128i vabs_scaled_lo13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
175 …const __m128i vabs_scaled_hi02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
176 …const __m128i vabs_scaled_hi13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-wasmsimd-mul16.c285 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() local
286 const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
287 const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
288 const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
289 const v128_t vprod67 = wasm_i64x2_add(wasm_i64x2_mul(vacc67, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
290 const v128_t vprod89 = wasm_i64x2_add(wasm_i64x2_mul(vacc89, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
291 const v128_t vprodAB = wasm_i64x2_add(wasm_i64x2_mul(vaccAB, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
292 const v128_t vprodCD = wasm_i64x2_add(wasm_i64x2_mul(vaccCD, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
293 const v128_t vprodEF = wasm_i64x2_add(wasm_i64x2_mul(vaccEF, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
294 const v128_t vprodGH = wasm_i64x2_add(wasm_i64x2_mul(vaccGH, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
[all …]
Dup16x9-minmax-wasmsimd-mul16.c232 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local
233 const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
234 const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
235 const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
236 const v128_t vprod67 = wasm_i64x2_add(wasm_i64x2_mul(vacc67, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
237 const v128_t vprod89 = wasm_i64x2_add(wasm_i64x2_mul(vacc89, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
238 const v128_t vprodAB = wasm_i64x2_add(wasm_i64x2_mul(vaccAB, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
239 const v128_t vprodCD = wasm_i64x2_add(wasm_i64x2_mul(vaccCD, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
240 const v128_t vprodEF = wasm_i64x2_add(wasm_i64x2_mul(vaccEF, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
371 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local
[all …]

123456789