/external/libgav1/libgav1/src/dsp/x86/ |
D | transpose_sse4.h | 41 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in Transpose2x16_U16() 43 const __m128i a2 = _mm_unpacklo_epi16(in[2], in[3]); in Transpose2x16_U16() 49 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in Transpose2x16_U16() 51 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in Transpose2x16_U16() 57 out[0] = _mm_unpacklo_epi16(b0, b1); in Transpose2x16_U16() 59 out[2] = _mm_unpacklo_epi16(b2, b3); in Transpose2x16_U16() 77 return _mm_unpacklo_epi16(a0, a1); in Transpose4x4_U8() 105 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in Transpose8x8To4x16_U8() 106 const __m128i b1 = _mm_unpacklo_epi16(a2, a3); in Transpose8x8To4x16_U8() 129 const __m128i ba = _mm_unpacklo_epi16(in[0], in[1]); in Transpose4x4_U16() [all …]
|
/external/libavc/common/x86/ |
D | ih264_iquant_itrans_recon_ssse3.c | 140 …temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 142 …temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3() 145 src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 147 src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3() 249 pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 250 pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 251 pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 252 pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3() 415 src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3() 418 …scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | transpose_sse2.h | 33 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4() 62 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8() 64 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8() 106 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4() 107 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4() 136 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8() 137 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8() 138 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8() 139 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8() 175 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x4() [all …]
|
D | lpf_common_sse2.h | 34 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose6x6_sse2() 35 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose6x6_sse2() 36 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose6x6_sse2() 66 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose4x8_8x4_low_sse2() 67 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose4x8_8x4_low_sse2() 139 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose8x8_low_sse2() 140 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose8x8_low_sse2() 141 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose8x8_low_sse2() 142 w3 = _mm_unpacklo_epi16(*x6, *x7); // 60 70 61 71 62 72 63 73 in highbd_transpose8x8_low_sse2() 238 *d0 = _mm_unpacklo_epi16( in transpose4x8_8x4_low_sse2() [all …]
|
D | highbd_convolve_ssse3.c | 61 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_y_sr_ssse3() 62 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_y_sr_ssse3() 63 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_y_sr_ssse3() 69 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_y_sr_ssse3() 70 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_y_sr_ssse3() 71 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_y_sr_ssse3() 83 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_y_sr_ssse3() 86 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_y_sr_ssse3() 236 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_x_sr_ssse3()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | transpose_sse2.h | 32 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4() 61 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8() 63 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8() 105 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4() 106 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4() 131 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8() 132 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8() 133 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8() 134 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8() 177 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x8() [all …]
|
D | post_proc_sse2.c | 55 sumsq_0 = _mm_unpacklo_epi16(tmp_0, tmp_1); in vpx_mbpost_proc_down_sse2() 64 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(a, zero)); in vpx_mbpost_proc_down_sse2() 94 sumsq_0 = _mm_sub_epi32(sumsq_0, _mm_unpacklo_epi16(above_sq, zero)); in vpx_mbpost_proc_down_sse2() 98 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(below_sq, zero)); in vpx_mbpost_proc_down_sse2() 110 mask_0 = _mm_sub_epi32(mask_0, _mm_unpacklo_epi16(multmp_0, multmp_1)); in vpx_mbpost_proc_down_sse2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-sse41-mul16.c | 113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() [all …]
|
D | up16x9-minmax-sse41-mul16.c | 105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() [all …]
|
D | up8x9-minmax-sse41-mul16.c | 97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 264 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() [all …]
|
D | up16x9-minmax-ssse3-mul16.c | 105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() [all …]
|
D | up16x9-minmax-sse2-mul16.c | 105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() [all …]
|
D | up24x9-minmax-ssse3-mul16.c | 113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() [all …]
|
D | up24x9-minmax-sse2-mul16.c | 113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() [all …]
|
D | up8x9-minmax-sse2-mul16.c | 97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 285 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() [all …]
|
D | up8x9-minmax-ssse3-mul16.c | 97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 285 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() [all …]
|
/external/libhevc/common/x86/ |
D | ihevc_itrans_recon_ssse3_intr.c | 172 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_0, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3() 173 m_temp_reg_1 = _mm_unpacklo_epi16(m_temp_reg_1, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3() 174 m_temp_reg_2 = _mm_unpacklo_epi16(m_temp_reg_2, m_ge_zero16b_flag_row2); in ihevc_itrans_recon_4x4_ttype1_ssse3() 175 m_temp_reg_3 = _mm_unpacklo_epi16(m_temp_reg_3, m_ge_zero16b_flag_row3); in ihevc_itrans_recon_4x4_ttype1_ssse3() 295 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3() 296 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() 317 m_temp_reg_20 = _mm_unpacklo_epi16(m_temp_reg_30, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3() 318 m_temp_reg_21 = _mm_unpacklo_epi16(m_temp_reg_31, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3() 433 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3() 434 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3() [all …]
|
D | ihevc_itrans_recon_sse42_intr.c | 246 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42() 247 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42() 337 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42() 338 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42() 572 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42() 573 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42() 664 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42() 665 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42() 885 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in ihevc_itrans_recon_8x8_sse42() 903 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in ihevc_itrans_recon_8x8_sse42() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_convolve_2d_ssse3.c | 91 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_2d_sr_ssse3() 106 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_2d_sr_ssse3() 107 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_2d_sr_ssse3() 108 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_2d_sr_ssse3() 114 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_2d_sr_ssse3() 115 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_2d_sr_ssse3() 116 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_2d_sr_ssse3() 128 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_2d_sr_ssse3() 131 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_2d_sr_ssse3()
|
D | highbd_jnt_convolve_sse4.c | 73 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_y_sse4_1() 74 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_y_sse4_1() 75 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_y_sse4_1() 81 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_dist_wtd_convolve_y_sse4_1() 82 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_dist_wtd_convolve_y_sse4_1() 83 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_dist_wtd_convolve_y_sse4_1() 95 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_y_sse4_1() 98 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_dist_wtd_convolve_y_sse4_1() 121 const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() 122 const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1() [all …]
|
D | cfl_sse2.c | 40 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2() 41 _mm_unpacklo_epi16(l1, zeros))); in subtract_average_sse2() 49 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2() 53 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2()
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_dct_intrin_sse2.c | 60 const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); in transpose_4x4() 83 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fdct4_sse2() 84 u[1] = _mm_unpacklo_epi16(in[3], in[2]); in fdct4_sse2() 119 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fadst4_sse2() 120 u[1] = _mm_unpacklo_epi16(in[2], in[3]); in fadst4_sse2() 121 u[2] = _mm_unpacklo_epi16(in7, kZero); in fadst4_sse2() 122 u[3] = _mm_unpacklo_epi16(in[2], kZero); in fadst4_sse2() 123 u[4] = _mm_unpacklo_epi16(in[3], kZero); in fadst4_sse2() 302 v0 = _mm_unpacklo_epi16(u0, u1); in fdct8_sse2() 304 v2 = _mm_unpacklo_epi16(u2, u3); in fdct8_sse2() [all …]
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-sse2.c | 81 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 90 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod1_odd, vprod1_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 99 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod2_odd, vprod2_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 108 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod3_odd, vprod3_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 117 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod4_odd, vprod4_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 126 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod5_odd, vprod5_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 135 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod6_odd, vprod6_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 144 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod7_odd, vprod7_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 153 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod8_odd, vprod8_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 233 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() [all …]
|
/external/libmpeg2/common/x86/ |
D | impeg2_idct_recon_sse42_intr.c | 223 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42() 241 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42() 277 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42() 399 m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); in impeg2_idct_recon_sse42() 400 m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); in impeg2_idct_recon_sse42() 404 m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); in impeg2_idct_recon_sse42() 405 m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); in impeg2_idct_recon_sse42() 431 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42() 449 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42() 485 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul16-ld64-x32.c | 84 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 86 …__m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 88 …__m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 93 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 95 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 97 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 99 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 161 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32() 164 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
|