/external/rust/crates/quiche/deps/boringssl/src/crypto/poly1305/ |
D | poly1305_vec.c | 206 p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); in poly1305_first_block() 207 p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); in poly1305_first_block() 208 p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); in poly1305_first_block() 209 p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); in poly1305_first_block() 260 T0 = _mm_mul_epu32(H0, p->R20.v); in poly1305_blocks() 261 T1 = _mm_mul_epu32(H0, p->R21.v); in poly1305_blocks() 262 T2 = _mm_mul_epu32(H0, p->R22.v); in poly1305_blocks() 263 T3 = _mm_mul_epu32(H0, p->R23.v); in poly1305_blocks() 264 T4 = _mm_mul_epu32(H0, p->R24.v); in poly1305_blocks() 265 T5 = _mm_mul_epu32(H1, p->S24.v); in poly1305_blocks() [all …]
|
/external/boringssl/src/crypto/poly1305/ |
D | poly1305_vec.c | 210 p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); in poly1305_first_block() 211 p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); in poly1305_first_block() 212 p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); in poly1305_first_block() 213 p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); in poly1305_first_block() 264 T0 = _mm_mul_epu32(H0, p->R20.v); in poly1305_blocks() 265 T1 = _mm_mul_epu32(H0, p->R21.v); in poly1305_blocks() 266 T2 = _mm_mul_epu32(H0, p->R22.v); in poly1305_blocks() 267 T3 = _mm_mul_epu32(H0, p->R23.v); in poly1305_blocks() 268 T4 = _mm_mul_epu32(H0, p->R24.v); in poly1305_blocks() 269 T5 = _mm_mul_epu32(H1, p->S24.v); in poly1305_blocks() [all …]
|
/external/rust/crates/ring/crypto/poly1305/ |
D | poly1305_vec.c | 214 p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); in poly1305_first_block() 215 p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); in poly1305_first_block() 216 p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); in poly1305_first_block() 217 p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); in poly1305_first_block() 268 T0 = _mm_mul_epu32(H0, p->R20.v); in poly1305_blocks() 269 T1 = _mm_mul_epu32(H0, p->R21.v); in poly1305_blocks() 270 T2 = _mm_mul_epu32(H0, p->R22.v); in poly1305_blocks() 271 T3 = _mm_mul_epu32(H0, p->R23.v); in poly1305_blocks() 272 T4 = _mm_mul_epu32(H0, p->R24.v); in poly1305_blocks() 273 T5 = _mm_mul_epu32(H1, p->S24.v); in poly1305_blocks() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_idct4x4_add_sse2.c | 43 temp1[0] = _mm_mul_epu32(temp1[0], cospi_p16_p16); // ([0] + [2])*cospi_16_64 in highbd_idct4_small_sse2() 44 temp1[1] = _mm_mul_epu32(temp1[1], cospi_p16_p16); // ([0] + [2])*cospi_16_64 in highbd_idct4_small_sse2() 45 temp2[0] = _mm_mul_epu32(temp2[0], cospi_p16_p16); // ([0] - [2])*cospi_16_64 in highbd_idct4_small_sse2() 46 temp2[1] = _mm_mul_epu32(temp2[1], cospi_p16_p16); // ([0] - [2])*cospi_16_64 in highbd_idct4_small_sse2() 52 temp1[0] = _mm_mul_epu32(io[1], cospi_p24_p24); // input[1] * cospi_24_64 in highbd_idct4_small_sse2() 53 temp1[1] = _mm_mul_epu32(temp1[3], cospi_p24_p24); // input[1] * cospi_24_64 in highbd_idct4_small_sse2() 54 temp2[0] = _mm_mul_epu32(io[1], cospi_p08_p08); // input[1] * cospi_8_64 in highbd_idct4_small_sse2() 55 temp2[1] = _mm_mul_epu32(temp1[3], cospi_p08_p08); // input[1] * cospi_8_64 in highbd_idct4_small_sse2() 56 temp1[2] = _mm_mul_epu32(io[3], cospi_p08_p08); // input[3] * cospi_8_64 in highbd_idct4_small_sse2() 57 temp1[3] = _mm_mul_epu32(temp2[3], cospi_p08_p08); // input[3] * cospi_8_64 in highbd_idct4_small_sse2() [all …]
|
/external/XNNPACK/src/qs8-requantization/ |
D | precise-sse4.c | 64 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4() 65 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4() 66 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4() 67 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse4() 69 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4() 70 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4() 71 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4() 72 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse4()
|
D | precise-ssse3.c | 63 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 64 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 65 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 66 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 68 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 69 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 70 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3() 71 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__ssse3()
|
D | precise-sse2.c | 68 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2() 69 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2() 70 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2() 71 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qs8_requantize_precise__sse2() 73 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2() 74 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2() 75 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2() 76 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qs8_requantize_precise__sse2()
|
D | q31-ssse3.c | 77 const __m128i x_abs_product_even = _mm_mul_epu32(x_abs, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 78 const __m128i y_abs_product_even = _mm_mul_epu32(y_abs, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 79 const __m128i z_abs_product_even = _mm_mul_epu32(z_abs, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 80 const __m128i w_abs_product_even = _mm_mul_epu32(w_abs, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 97 const __m128i x_abs_product_odd = _mm_mul_epu32(x_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 98 const __m128i y_abs_product_odd = _mm_mul_epu32(y_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 99 const __m128i z_abs_product_odd = _mm_mul_epu32(z_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__ssse3() 100 const __m128i w_abs_product_odd = _mm_mul_epu32(w_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__ssse3()
|
D | q31-sse2.c | 77 const __m128i x_abs_product_even = _mm_mul_epu32(x_abs, vmultiplier); in xnn_qs8_requantize_q31__sse2() 78 const __m128i y_abs_product_even = _mm_mul_epu32(y_abs, vmultiplier); in xnn_qs8_requantize_q31__sse2() 79 const __m128i z_abs_product_even = _mm_mul_epu32(z_abs, vmultiplier); in xnn_qs8_requantize_q31__sse2() 80 const __m128i w_abs_product_even = _mm_mul_epu32(w_abs, vmultiplier); in xnn_qs8_requantize_q31__sse2() 97 const __m128i x_abs_product_odd = _mm_mul_epu32(x_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__sse2() 98 const __m128i y_abs_product_odd = _mm_mul_epu32(y_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__sse2() 99 const __m128i z_abs_product_odd = _mm_mul_epu32(z_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__sse2() 100 const __m128i w_abs_product_odd = _mm_mul_epu32(w_abs_rev, vmultiplier); in xnn_qs8_requantize_q31__sse2()
|
/external/XNNPACK/src/qu8-requantization/ |
D | precise-sse4.c | 64 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4() 65 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4() 66 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4() 67 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse4() 69 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4() 70 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4() 71 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4() 72 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse4()
|
D | precise-ssse3.c | 63 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 64 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 65 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 66 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 68 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 69 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 70 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3() 71 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__ssse3()
|
D | precise-sse2.c | 68 const __m128i x_absmul02 = _mm_mul_epu32(x_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2() 69 const __m128i y_absmul02 = _mm_mul_epu32(y_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2() 70 const __m128i z_absmul02 = _mm_mul_epu32(z_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2() 71 const __m128i w_absmul02 = _mm_mul_epu32(w_abs0123, vmultiplier); in xnn_qu8_requantize_precise__sse2() 73 const __m128i x_absmul13 = _mm_mul_epu32(x_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2() 74 const __m128i y_absmul13 = _mm_mul_epu32(y_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2() 75 const __m128i z_absmul13 = _mm_mul_epu32(z_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2() 76 const __m128i w_absmul13 = _mm_mul_epu32(w_abs1032, vmultiplier); in xnn_qu8_requantize_precise__sse2()
|
D | q31-ssse3.c | 77 const __m128i x_abs_product_even = _mm_mul_epu32(x_abs, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 78 const __m128i y_abs_product_even = _mm_mul_epu32(y_abs, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 79 const __m128i z_abs_product_even = _mm_mul_epu32(z_abs, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 80 const __m128i w_abs_product_even = _mm_mul_epu32(w_abs, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 97 const __m128i x_abs_product_odd = _mm_mul_epu32(x_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 98 const __m128i y_abs_product_odd = _mm_mul_epu32(y_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 99 const __m128i z_abs_product_odd = _mm_mul_epu32(z_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__ssse3() 100 const __m128i w_abs_product_odd = _mm_mul_epu32(w_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__ssse3()
|
D | q31-sse2.c | 77 const __m128i x_abs_product_even = _mm_mul_epu32(x_abs, vmultiplier); in xnn_qu8_requantize_q31__sse2() 78 const __m128i y_abs_product_even = _mm_mul_epu32(y_abs, vmultiplier); in xnn_qu8_requantize_q31__sse2() 79 const __m128i z_abs_product_even = _mm_mul_epu32(z_abs, vmultiplier); in xnn_qu8_requantize_q31__sse2() 80 const __m128i w_abs_product_even = _mm_mul_epu32(w_abs, vmultiplier); in xnn_qu8_requantize_q31__sse2() 97 const __m128i x_abs_product_odd = _mm_mul_epu32(x_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__sse2() 98 const __m128i y_abs_product_odd = _mm_mul_epu32(y_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__sse2() 99 const __m128i z_abs_product_odd = _mm_mul_epu32(z_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__sse2() 100 const __m128i w_abs_product_odd = _mm_mul_epu32(w_abs_rev, vmultiplier); in xnn_qu8_requantize_q31__sse2()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-sse41-c24-acc2.c | 134 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 135 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 136 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 137 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 138 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 139 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 140 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 141 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 142 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 143 const __m128i vabsprodHJ = _mm_mul_epu32(vabsaccHJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() [all …]
|
D | 7x-minmax-sse41-c16-acc2.c | 115 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 116 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 117 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 118 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 119 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 120 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 121 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 122 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 196 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 197 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() [all …]
|
D | 7x-minmax-ssse3-c24-acc2.c | 158 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 159 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 160 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 161 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 162 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 163 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 164 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 165 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 166 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 167 const __m128i vabsprodHJ = _mm_mul_epu32(vabsaccHJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-sse2-c24-acc2.c | 165 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 166 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 167 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 168 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 169 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 170 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 171 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 172 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 173 const __m128i vabsprodGI = _mm_mul_epu32(vabsaccGHIJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 174 const __m128i vabsprodHJ = _mm_mul_epu32(vabsaccHJ, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() [all …]
|
D | 7x-minmax-sse41-c8-acc2.c | 96 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 97 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 98 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 99 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 163 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 164 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 165 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 166 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 131 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 132 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 133 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 134 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 135 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 136 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 137 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 138 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 229 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 230 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() [all …]
|
D | 7x-minmax-sse2-c16-acc2.c | 136 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 137 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 138 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 139 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 140 const __m128i vabsprod8A = _mm_mul_epu32(vabsacc89AB, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 141 const __m128i vabsprod9B = _mm_mul_epu32(vabsacc9B, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 142 const __m128i vabsprodCE = _mm_mul_epu32(vabsaccCDEF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 143 const __m128i vabsprodDF = _mm_mul_epu32(vabsaccDF, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 237 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 238 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() [all …]
|
D | 7x-minmax-ssse3-c8-acc2.c | 104 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 105 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 106 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 107 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 184 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 185 const __m128i vabsprod13 = _mm_mul_epu32(vabsacc13, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 186 const __m128i vabsprod46 = _mm_mul_epu32(vabsacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 187 const __m128i vabsprod57 = _mm_mul_epu32(vabsacc57, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse2.c | 440 …xmm7 = _mm_mul_epu32(xmm7, xmm5); /* we use _unsigned_ multiplication and discard high dword of th… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 446 xmm6 = _mm_mul_epu32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 453 xmm6 = _mm_mul_epu32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 460 xmm6 = _mm_mul_epu32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 467 xmm6 = _mm_mul_epu32(xmm6, xmm1); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 474 xmm6 = _mm_mul_epu32(xmm6, xmm0); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 500 xmm7 = _mm_mul_epu32(xmm7, xmm5); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 506 xmm6 = _mm_mul_epu32(xmm6, xmm4); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 513 xmm6 = _mm_mul_epu32(xmm6, xmm3); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() 520 xmm6 = _mm_mul_epu32(xmm6, xmm2); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2() [all …]
|
/external/rust/crates/quiche/deps/boringssl/src/crypto/fipsmodule/modes/ |
D | gcm_nohw.c | 100 _mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3)); in gcm_mul32_nohw() 102 _mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3)); in gcm_mul32_nohw() 113 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0)); in gcm_mul32_nohw() 114 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2)); in gcm_mul32_nohw() 115 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0)); in gcm_mul32_nohw() 116 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2)); in gcm_mul32_nohw()
|
/external/boringssl/src/crypto/fipsmodule/modes/ |
D | gcm_nohw.c | 100 _mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3)); in gcm_mul32_nohw() 102 _mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3)); in gcm_mul32_nohw() 113 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0)); in gcm_mul32_nohw() 114 c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2)); in gcm_mul32_nohw() 115 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0)); in gcm_mul32_nohw() 116 c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2)); in gcm_mul32_nohw()
|