/external/libgav1/libgav1/src/dsp/x86/ |
D | transpose_sse4.h | 42 const __m128i a1 = _mm_unpackhi_epi16(in[0], in[1]); in Transpose2x16_U16() 44 const __m128i a3 = _mm_unpackhi_epi16(in[2], in[3]); in Transpose2x16_U16() 50 const __m128i b1 = _mm_unpackhi_epi16(a0, a1); in Transpose2x16_U16() 52 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in Transpose2x16_U16() 58 out[1] = _mm_unpackhi_epi16(b0, b1); in Transpose2x16_U16() 60 out[3] = _mm_unpackhi_epi16(b2, b3); in Transpose2x16_U16() 107 const __m128i b2 = _mm_unpackhi_epi16(a0, a1); in Transpose8x8To4x16_U8() 108 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in Transpose8x8To4x16_U8() 204 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in Transpose8x4To4x8_U16() 205 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in Transpose8x4To4x8_U16() [all …]
|
/external/libavc/common/x86/ |
D | ih264_iquant_itrans_recon_ssse3.c | 141 …temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 143 …temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 146 src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 148 src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 416 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3() 419 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3() 439 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3() 442 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3() 460 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3() 463 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | lpf_common_sse2.h | 47 w3 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 xx xx xx xx in highbd_transpose6x6_sse2() 48 w4 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 xx xx xx xx in highbd_transpose6x6_sse2() 49 w5 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 xx xx xx xx in highbd_transpose6x6_sse2() 85 w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17 in highbd_transpose4x8_8x4_high_sse2() 86 w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37 in highbd_transpose4x8_8x4_high_sse2() 172 w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17 in highbd_transpose8x8_high_sse2() 173 w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37 in highbd_transpose8x8_high_sse2() 174 w2 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 46 56 47 57 in highbd_transpose8x8_high_sse2() 175 w3 = _mm_unpackhi_epi16(*x6, *x7); // 64 74 65 75 66 76 67 77 in highbd_transpose8x8_high_sse2() 278 ww1 = _mm_unpackhi_epi16( in transpose4x8_8x4_sse2() [all …]
|
D | highbd_convolve_ssse3.c | 65 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_convolve_y_sr_ssse3() 66 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_convolve_y_sr_ssse3() 67 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_convolve_y_sr_ssse3() 73 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_convolve_y_sr_ssse3() 74 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_convolve_y_sr_ssse3() 75 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_convolve_y_sr_ssse3() 84 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_convolve_y_sr_ssse3() 87 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_convolve_y_sr_ssse3()
|
D | transpose_sse2.h | 63 const __m128i b1 = _mm_unpackhi_epi16(a0, a1); in transpose_8bit_8x8() 65 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in transpose_8bit_8x8() 177 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in transpose_16bit_8x4() 178 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in transpose_16bit_8x4() 234 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in transpose_16bit_8x8() 235 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in transpose_16bit_8x8() 236 const __m128i a6 = _mm_unpackhi_epi16(in[4], in[5]); in transpose_16bit_8x8() 237 const __m128i a7 = _mm_unpackhi_epi16(in[6], in[7]); in transpose_16bit_8x8()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-sse41-mul16.c | 114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() [all …]
|
D | up16x9-minmax-sse41-mul16.c | 106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() [all …]
|
D | up8x9-minmax-sse41-mul16.c | 98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 265 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() [all …]
|
D | up16x9-minmax-ssse3-mul16.c | 106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() [all …]
|
D | up16x9-minmax-sse2-mul16.c | 106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() [all …]
|
D | up24x9-minmax-ssse3-mul16.c | 114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() [all …]
|
D | up24x9-minmax-sse2-mul16.c | 114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() [all …]
|
D | up8x9-minmax-sse2-mul16.c | 98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 286 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() [all …]
|
D | up8x9-minmax-ssse3-mul16.c | 98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 286 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() [all …]
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-sse2.c | 82 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 91 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod1_odd, vprod1_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 100 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod2_odd, vprod2_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 109 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod3_odd, vprod3_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 118 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod4_odd, vprod4_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 127 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod5_odd, vprod5_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 136 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod6_odd, vprod6_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 145 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod7_odd, vprod7_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 154 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod8_odd, vprod8_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 234 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_convolve_2d_ssse3.c | 110 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_convolve_2d_sr_ssse3() 111 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_convolve_2d_sr_ssse3() 112 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_convolve_2d_sr_ssse3() 118 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_convolve_2d_sr_ssse3() 119 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_convolve_2d_sr_ssse3() 120 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_convolve_2d_sr_ssse3() 129 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_convolve_2d_sr_ssse3() 132 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_convolve_2d_sr_ssse3()
|
D | highbd_jnt_convolve_sse4.c | 77 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_y_sse4_1() 78 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_y_sse4_1() 79 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_y_sse4_1() 85 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_dist_wtd_convolve_y_sse4_1() 86 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_dist_wtd_convolve_y_sse4_1() 87 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_dist_wtd_convolve_y_sse4_1() 96 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_y_sse4_1() 99 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_dist_wtd_convolve_y_sse4_1() 186 const __m128i data_ref_0_hi_0 = _mm_unpackhi_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() 187 const __m128i data_ref_0_hi_1 = _mm_unpackhi_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | post_proc_sse2.c | 56 sumsq_1 = _mm_unpackhi_epi16(tmp_0, tmp_1); in vpx_mbpost_proc_down_sse2() 65 sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(a, zero)); in vpx_mbpost_proc_down_sse2() 95 sumsq_1 = _mm_sub_epi32(sumsq_1, _mm_unpackhi_epi16(above_sq, zero)); in vpx_mbpost_proc_down_sse2() 99 sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(below_sq, zero)); in vpx_mbpost_proc_down_sse2() 111 mask_1 = _mm_sub_epi32(mask_1, _mm_unpackhi_epi16(multmp_0, multmp_1)); in vpx_mbpost_proc_down_sse2()
|
D | inv_txfm_sse2.c | 20 const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); in transpose_16bit_4() 23 res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); in transpose_16bit_4() 92 u[1] = _mm_unpackhi_epi16(in[0], in[1]); in idct4_sse2() 256 s[1] = _mm_unpackhi_epi16(in[7], in[0]); in iadst8_sse2() 258 s[3] = _mm_unpackhi_epi16(in[5], in[2]); in iadst8_sse2() 260 s[5] = _mm_unpackhi_epi16(in[3], in[4]); in iadst8_sse2() 262 s[7] = _mm_unpackhi_epi16(in[1], in[6]); in iadst8_sse2() 333 u[1] = _mm_unpackhi_epi16(in[4], in[5]); in iadst8_sse2() 335 u[3] = _mm_unpackhi_epi16(in[6], in[7]); in iadst8_sse2() 372 u[1] = _mm_unpackhi_epi16(s[2], s[3]); in iadst8_sse2() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul16-ld64-x32.c | 85 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 87 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 89 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 91 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 94 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 96 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 98 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 100 vaccSTUV = _mm_add_epi32(vaccSTUV, _mm_unpackhi_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 162 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 165 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
|
D | minmax-sse2-mul16-ld64-x32.c | 93 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 95 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 97 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 99 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 102 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 106 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 108 vaccSTUV = _mm_add_epi32(vaccSTUV, _mm_unpackhi_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 172 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 175 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
|
D | minmax-sse41-mul16-ld64-x24.c | 75 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 77 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 79 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 82 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 84 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 86 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 141 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24() 144 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_dct_intrin_sse2.c | 61 const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); in transpose_4x4() 303 v1 = _mm_unpackhi_epi16(u0, u1); in fdct8_sse2() 305 v3 = _mm_unpackhi_epi16(u2, u3); in fdct8_sse2() 343 u1 = _mm_unpackhi_epi16(s6, s5); in fdct8_sse2() 371 u1 = _mm_unpackhi_epi16(s0, s3); in fdct8_sse2() 373 u3 = _mm_unpackhi_epi16(s1, s2); in fdct8_sse2() 450 s1 = _mm_unpackhi_epi16(in0, in1); in fadst8_sse2() 452 s3 = _mm_unpackhi_epi16(in2, in3); in fadst8_sse2() 454 s5 = _mm_unpackhi_epi16(in4, in5); in fadst8_sse2() 456 s7 = _mm_unpackhi_epi16(in6, in7); in fadst8_sse2() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_sse2.c | 52 const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_sse2() 54 const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w); in av1_wedge_sse_from_residuals_sse2() 59 const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2() 61 const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2() 212 const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w); in av1_wedge_compute_delta_squares_sse2() 214 const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w); in av1_wedge_compute_delta_squares_sse2() 216 const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w); in av1_wedge_compute_delta_squares_sse2() 218 const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w); in av1_wedge_compute_delta_squares_sse2()
|
/external/XNNPACK/src/x8-zip/ |
D | x4-sse2.c | 40 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() 42 const __m128i vxyzw3 = _mm_unpackhi_epi16(vxy_hi, vzw_hi); in xnn_x8_zip_x4_ukernel__sse2() 61 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() 63 const __m128i vxyzw3 = _mm_unpackhi_epi16(vxy_hi, vzw_hi); in xnn_x8_zip_x4_ukernel__sse2()
|