Home
last modified time | relevance | path

Searched refs:_mm_unpacklo_epi16 (Results 1 – 25 of 220) sorted by relevance

123456789

/external/libgav1/libgav1/src/dsp/x86/
Dtranspose_sse4.h41 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in Transpose2x16_U16()
43 const __m128i a2 = _mm_unpacklo_epi16(in[2], in[3]); in Transpose2x16_U16()
49 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in Transpose2x16_U16()
51 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in Transpose2x16_U16()
57 out[0] = _mm_unpacklo_epi16(b0, b1); in Transpose2x16_U16()
59 out[2] = _mm_unpacklo_epi16(b2, b3); in Transpose2x16_U16()
77 return _mm_unpacklo_epi16(a0, a1); in Transpose4x4_U8()
105 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in Transpose8x8To4x16_U8()
106 const __m128i b1 = _mm_unpacklo_epi16(a2, a3); in Transpose8x8To4x16_U8()
129 const __m128i ba = _mm_unpacklo_epi16(in[0], in[1]); in Transpose4x4_U16()
[all …]
/external/libavc/common/x86/
Dih264_iquant_itrans_recon_ssse3.c140 …temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3()
142 …temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3()
145 src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3()
147 src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3()
249 pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3()
250 pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3()
251 pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3()
252 pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended in ih264_iquant_itrans_recon_4x4_ssse3()
415 src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3()
418 …scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dtranspose_sse2.h33 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4()
62 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8()
64 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8()
106 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4()
107 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4()
136 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8()
137 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8()
138 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8()
139 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8()
175 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x4()
[all …]
Dlpf_common_sse2.h34 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose6x6_sse2()
35 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose6x6_sse2()
36 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose6x6_sse2()
66 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose4x8_8x4_low_sse2()
67 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose4x8_8x4_low_sse2()
139 w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13 in highbd_transpose8x8_low_sse2()
140 w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33 in highbd_transpose8x8_low_sse2()
141 w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53 in highbd_transpose8x8_low_sse2()
142 w3 = _mm_unpacklo_epi16(*x6, *x7); // 60 70 61 71 62 72 63 73 in highbd_transpose8x8_low_sse2()
238 *d0 = _mm_unpacklo_epi16( in transpose4x8_8x4_low_sse2()
[all …]
Dhighbd_convolve_ssse3.c61 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_y_sr_ssse3()
62 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_y_sr_ssse3()
63 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_y_sr_ssse3()
69 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_y_sr_ssse3()
70 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_y_sr_ssse3()
71 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_y_sr_ssse3()
83 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_y_sr_ssse3()
86 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_y_sr_ssse3()
236 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_x_sr_ssse3()
/external/libvpx/libvpx/vpx_dsp/x86/
Dtranspose_sse2.h32 return _mm_unpacklo_epi16(a0, a1); in transpose_8bit_4x4()
61 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); in transpose_8bit_8x8()
63 const __m128i b2 = _mm_unpacklo_epi16(a2, a3); in transpose_8bit_8x8()
105 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x4()
106 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x4()
131 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_4x8()
132 const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); in transpose_16bit_4x8()
133 const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); in transpose_16bit_4x8()
134 const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); in transpose_16bit_4x8()
177 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); in transpose_16bit_8x8()
[all …]
Dpost_proc_sse2.c55 sumsq_0 = _mm_unpacklo_epi16(tmp_0, tmp_1); in vpx_mbpost_proc_down_sse2()
64 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(a, zero)); in vpx_mbpost_proc_down_sse2()
94 sumsq_0 = _mm_sub_epi32(sumsq_0, _mm_unpacklo_epi16(above_sq, zero)); in vpx_mbpost_proc_down_sse2()
98 sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(below_sq, zero)); in vpx_mbpost_proc_down_sse2()
110 mask_0 = _mm_sub_epi32(mask_0, _mm_unpacklo_epi16(multmp_0, multmp_1)); in vpx_mbpost_proc_down_sse2()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-sse41-mul16.c113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
[all …]
Dup16x9-minmax-sse41-mul16.c105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
[all …]
Dup8x9-minmax-sse41-mul16.c97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
264 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
[all …]
Dup16x9-minmax-ssse3-mul16.c105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
[all …]
Dup16x9-minmax-sse2-mul16.c105 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
107 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
126 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
128 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
147 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
149 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
168 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
170 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
189 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
191 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
[all …]
Dup24x9-minmax-ssse3-mul16.c113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
[all …]
Dup24x9-minmax-sse2-mul16.c113 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
115 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
117 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
142 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
144 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
146 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
171 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
173 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
175 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
200 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
[all …]
Dup8x9-minmax-sse2-mul16.c97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
285 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
[all …]
Dup8x9-minmax-ssse3-mul16.c97 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
110 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
123 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
136 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
149 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
162 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
175 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
188 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
201 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
285 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
[all …]
/external/libhevc/common/x86/
Dihevc_itrans_recon_ssse3_intr.c172 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_0, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3()
173 m_temp_reg_1 = _mm_unpacklo_epi16(m_temp_reg_1, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3()
174 m_temp_reg_2 = _mm_unpacklo_epi16(m_temp_reg_2, m_ge_zero16b_flag_row2); in ihevc_itrans_recon_4x4_ttype1_ssse3()
175 m_temp_reg_3 = _mm_unpacklo_epi16(m_temp_reg_3, m_ge_zero16b_flag_row3); in ihevc_itrans_recon_4x4_ttype1_ssse3()
295 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3()
296 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3()
317 m_temp_reg_20 = _mm_unpacklo_epi16(m_temp_reg_30, m_ge_zero16b_flag_row0); in ihevc_itrans_recon_4x4_ttype1_ssse3()
318 m_temp_reg_21 = _mm_unpacklo_epi16(m_temp_reg_31, m_ge_zero16b_flag_row1); in ihevc_itrans_recon_4x4_ttype1_ssse3()
433 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_ssse3()
434 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_ssse3()
[all …]
Dihevc_itrans_recon_sse42_intr.c246 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42()
247 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42()
337 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_ttype1_sse42()
338 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_ttype1_sse42()
572 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42()
573 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42()
664 m_temp_reg_24 = _mm_unpacklo_epi16(m_temp_reg_20, m_temp_reg_22); in ihevc_itrans_recon_4x4_sse42()
665 m_temp_reg_25 = _mm_unpacklo_epi16(m_temp_reg_21, m_temp_reg_23); in ihevc_itrans_recon_4x4_sse42()
885 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in ihevc_itrans_recon_8x8_sse42()
903 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in ihevc_itrans_recon_8x8_sse42()
[all …]
/external/libaom/libaom/av1/common/x86/
Dhighbd_convolve_2d_ssse3.c91 __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_2d_sr_ssse3()
106 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_convolve_2d_sr_ssse3()
107 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_convolve_2d_sr_ssse3()
108 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_convolve_2d_sr_ssse3()
114 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_convolve_2d_sr_ssse3()
115 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_convolve_2d_sr_ssse3()
116 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_convolve_2d_sr_ssse3()
128 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_convolve_2d_sr_ssse3()
131 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_convolve_2d_sr_ssse3()
Dhighbd_jnt_convolve_sse4.c73 s[0] = _mm_unpacklo_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_y_sse4_1()
74 s[1] = _mm_unpacklo_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_y_sse4_1()
75 s[2] = _mm_unpacklo_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_y_sse4_1()
81 s[0 + 8] = _mm_unpacklo_epi16(s1, s2); in av1_highbd_dist_wtd_convolve_y_sse4_1()
82 s[1 + 8] = _mm_unpacklo_epi16(s3, s4); in av1_highbd_dist_wtd_convolve_y_sse4_1()
83 s[2 + 8] = _mm_unpacklo_epi16(s5, s6); in av1_highbd_dist_wtd_convolve_y_sse4_1()
95 s[3] = _mm_unpacklo_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_y_sse4_1()
98 s[3 + 8] = _mm_unpacklo_epi16(s7, s8); in av1_highbd_dist_wtd_convolve_y_sse4_1()
121 const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1()
122 const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1()
[all …]
Dcfl_sse2.c40 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2()
41 _mm_unpacklo_epi16(l1, zeros))); in subtract_average_sse2()
49 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2()
53 sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros), in subtract_average_sse2()
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_dct_intrin_sse2.c60 const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); in transpose_4x4()
83 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fdct4_sse2()
84 u[1] = _mm_unpacklo_epi16(in[3], in[2]); in fdct4_sse2()
119 u[0] = _mm_unpacklo_epi16(in[0], in[1]); in fadst4_sse2()
120 u[1] = _mm_unpacklo_epi16(in[2], in[3]); in fadst4_sse2()
121 u[2] = _mm_unpacklo_epi16(in7, kZero); in fadst4_sse2()
122 u[3] = _mm_unpacklo_epi16(in[2], kZero); in fadst4_sse2()
123 u[4] = _mm_unpacklo_epi16(in[3], kZero); in fadst4_sse2()
302 v0 = _mm_unpacklo_epi16(u0, u1); in fdct8_sse2()
304 v2 = _mm_unpacklo_epi16(u2, u3); in fdct8_sse2()
[all …]
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-sse2.c81 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
90 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod1_odd, vprod1_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
99 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod2_odd, vprod2_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
108 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod3_odd, vprod3_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
117 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod4_odd, vprod4_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
126 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod5_odd, vprod5_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
135 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod6_odd, vprod6_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
144 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod7_odd, vprod7_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
153 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod8_odd, vprod8_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
233 vacc_lo = _mm_add_epi32(vacc_lo, _mm_unpacklo_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
[all …]
/external/libmpeg2/common/x86/
Dimpeg2_idct_recon_sse42_intr.c223 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42()
241 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42()
277 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42()
399 m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51); in impeg2_idct_recon_sse42()
400 m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53); in impeg2_idct_recon_sse42()
404 m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55); in impeg2_idct_recon_sse42()
405 m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57); in impeg2_idct_recon_sse42()
431 m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74); in impeg2_idct_recon_sse42()
449 m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76); in impeg2_idct_recon_sse42()
485 m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73); in impeg2_idct_recon_sse42()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul16-ld64-x32.c84 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
86 …__m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
88 …__m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
90 …__m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
93 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
95 vacc89AB = _mm_add_epi32(vacc89AB, _mm_unpacklo_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
97 vaccGHIJ = _mm_add_epi32(vaccGHIJ, _mm_unpacklo_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
99 vaccOPQR = _mm_add_epi32(vaccOPQR, _mm_unpacklo_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
161 …__m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_unpacklo_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
164 vacc0123 = _mm_add_epi32(vacc0123, _mm_unpacklo_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()

123456789