Home
last modified time | relevance | path

Searched refs:_mm_unpackhi_epi16 (Results 1 – 25 of 210) sorted by relevance

123456789

/external/libgav1/libgav1/src/dsp/x86/
Dtranspose_sse4.h42 const __m128i a1 = _mm_unpackhi_epi16(in[0], in[1]); in Transpose2x16_U16()
44 const __m128i a3 = _mm_unpackhi_epi16(in[2], in[3]); in Transpose2x16_U16()
50 const __m128i b1 = _mm_unpackhi_epi16(a0, a1); in Transpose2x16_U16()
52 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in Transpose2x16_U16()
58 out[1] = _mm_unpackhi_epi16(b0, b1); in Transpose2x16_U16()
60 out[3] = _mm_unpackhi_epi16(b2, b3); in Transpose2x16_U16()
107 const __m128i b2 = _mm_unpackhi_epi16(a0, a1); in Transpose8x8To4x16_U8()
108 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in Transpose8x8To4x16_U8()
204 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in Transpose8x4To4x8_U16()
205 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in Transpose8x4To4x8_U16()
[all …]
/external/libavc/common/x86/
Dih264_iquant_itrans_recon_ssse3.c141 …temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3()
143 …temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bi… in ih264_iquant_itrans_recon_4x4_ssse3()
146 src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3()
148 src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long in ih264_iquant_itrans_recon_4x4_ssse3()
416 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3()
419 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3()
439 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3()
442 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3()
460 src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long in ih264_iquant_itrans_recon_8x8_ssse3()
463 …scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- … in ih264_iquant_itrans_recon_8x8_ssse3()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dlpf_common_sse2.h47 w3 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 xx xx xx xx in highbd_transpose6x6_sse2()
48 w4 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 xx xx xx xx in highbd_transpose6x6_sse2()
49 w5 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 xx xx xx xx in highbd_transpose6x6_sse2()
85 w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17 in highbd_transpose4x8_8x4_high_sse2()
86 w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37 in highbd_transpose4x8_8x4_high_sse2()
172 w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17 in highbd_transpose8x8_high_sse2()
173 w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37 in highbd_transpose8x8_high_sse2()
174 w2 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 46 56 47 57 in highbd_transpose8x8_high_sse2()
175 w3 = _mm_unpackhi_epi16(*x6, *x7); // 64 74 65 75 66 76 67 77 in highbd_transpose8x8_high_sse2()
278 ww1 = _mm_unpackhi_epi16( in transpose4x8_8x4_sse2()
[all …]
Dhighbd_convolve_ssse3.c65 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_convolve_y_sr_ssse3()
66 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_convolve_y_sr_ssse3()
67 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_convolve_y_sr_ssse3()
73 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_convolve_y_sr_ssse3()
74 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_convolve_y_sr_ssse3()
75 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_convolve_y_sr_ssse3()
84 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_convolve_y_sr_ssse3()
87 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_convolve_y_sr_ssse3()
Dtranspose_sse2.h63 const __m128i b1 = _mm_unpackhi_epi16(a0, a1); in transpose_8bit_8x8()
65 const __m128i b3 = _mm_unpackhi_epi16(a2, a3); in transpose_8bit_8x8()
177 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in transpose_16bit_8x4()
178 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in transpose_16bit_8x4()
234 const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); in transpose_16bit_8x8()
235 const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); in transpose_16bit_8x8()
236 const __m128i a6 = _mm_unpackhi_epi16(in[4], in[5]); in transpose_16bit_8x8()
237 const __m128i a7 = _mm_unpackhi_epi16(in[6], in[7]); in transpose_16bit_8x8()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-sse41-mul16.c114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
[all …]
Dup16x9-minmax-sse41-mul16.c106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
[all …]
Dup8x9-minmax-sse41-mul16.c98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
265 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
[all …]
Dup16x9-minmax-ssse3-mul16.c106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
[all …]
Dup16x9-minmax-sse2-mul16.c106 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
108 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
127 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
129 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
148 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
150 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
169 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
171 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp3x89ABCDEFlo, vp3x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
190 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
192 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp4x89ABCDEFlo, vp4x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
[all …]
Dup24x9-minmax-ssse3-mul16.c114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
[all …]
Dup24x9-minmax-sse2-mul16.c114 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
116 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp0x89ABCDEFlo, vp0x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
118 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp0xGHIJKLMNlo, vp0xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
143 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
145 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp1x89ABCDEFlo, vp1x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
147 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp1xGHIJKLMNlo, vp1xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
172 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
174 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vp2x89ABCDEFlo, vp2x89ABCDEFhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
176 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vp2xGHIJKLMNlo, vp2xGHIJKLMNhi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
201 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
[all …]
Dup8x9-minmax-sse2-mul16.c98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
286 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
[all …]
Dup8x9-minmax-ssse3-mul16.c98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
286 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
[all …]
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-sse2.c82 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
91 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod1_odd, vprod1_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
100 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod2_odd, vprod2_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
109 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod3_odd, vprod3_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
118 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod4_odd, vprod4_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
127 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod5_odd, vprod5_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
136 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod6_odd, vprod6_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
145 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod7_odd, vprod7_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
154 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod8_odd, vprod8_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
234 vacc_hi = _mm_add_epi32(vacc_hi, _mm_unpackhi_epi16(vprod0_odd, vprod0_even)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
[all …]
/external/libaom/libaom/av1/common/x86/
Dhighbd_convolve_2d_ssse3.c110 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_convolve_2d_sr_ssse3()
111 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_convolve_2d_sr_ssse3()
112 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_convolve_2d_sr_ssse3()
118 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_convolve_2d_sr_ssse3()
119 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_convolve_2d_sr_ssse3()
120 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_convolve_2d_sr_ssse3()
129 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_convolve_2d_sr_ssse3()
132 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_convolve_2d_sr_ssse3()
Dhighbd_jnt_convolve_sse4.c77 s[4] = _mm_unpackhi_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_y_sse4_1()
78 s[5] = _mm_unpackhi_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_y_sse4_1()
79 s[6] = _mm_unpackhi_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_y_sse4_1()
85 s[4 + 8] = _mm_unpackhi_epi16(s1, s2); in av1_highbd_dist_wtd_convolve_y_sse4_1()
86 s[5 + 8] = _mm_unpackhi_epi16(s3, s4); in av1_highbd_dist_wtd_convolve_y_sse4_1()
87 s[6 + 8] = _mm_unpackhi_epi16(s5, s6); in av1_highbd_dist_wtd_convolve_y_sse4_1()
96 s[7] = _mm_unpackhi_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_y_sse4_1()
99 s[7 + 8] = _mm_unpackhi_epi16(s7, s8); in av1_highbd_dist_wtd_convolve_y_sse4_1()
186 const __m128i data_ref_0_hi_0 = _mm_unpackhi_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1()
187 const __m128i data_ref_0_hi_1 = _mm_unpackhi_epi16(data_1, zero); in av1_highbd_dist_wtd_convolve_y_sse4_1()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dpost_proc_sse2.c56 sumsq_1 = _mm_unpackhi_epi16(tmp_0, tmp_1); in vpx_mbpost_proc_down_sse2()
65 sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(a, zero)); in vpx_mbpost_proc_down_sse2()
95 sumsq_1 = _mm_sub_epi32(sumsq_1, _mm_unpackhi_epi16(above_sq, zero)); in vpx_mbpost_proc_down_sse2()
99 sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(below_sq, zero)); in vpx_mbpost_proc_down_sse2()
111 mask_1 = _mm_sub_epi32(mask_1, _mm_unpackhi_epi16(multmp_0, multmp_1)); in vpx_mbpost_proc_down_sse2()
Dinv_txfm_sse2.c20 const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); in transpose_16bit_4()
23 res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); in transpose_16bit_4()
92 u[1] = _mm_unpackhi_epi16(in[0], in[1]); in idct4_sse2()
256 s[1] = _mm_unpackhi_epi16(in[7], in[0]); in iadst8_sse2()
258 s[3] = _mm_unpackhi_epi16(in[5], in[2]); in iadst8_sse2()
260 s[5] = _mm_unpackhi_epi16(in[3], in[4]); in iadst8_sse2()
262 s[7] = _mm_unpackhi_epi16(in[1], in[6]); in iadst8_sse2()
333 u[1] = _mm_unpackhi_epi16(in[4], in[5]); in iadst8_sse2()
335 u[3] = _mm_unpackhi_epi16(in[6], in[7]); in iadst8_sse2()
372 u[1] = _mm_unpackhi_epi16(s[2], s[3]); in iadst8_sse2()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul16-ld64-x32.c85 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
87 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
89 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
91 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
94 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
96 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
98 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
100 vaccSTUV = _mm_add_epi32(vaccSTUV, _mm_unpackhi_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
162 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
165 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
Dminmax-sse2-mul16-ld64-x32.c93 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
95 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
97 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
99 …__m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodOPQRSTUVlo, vxprodO… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
102 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
104 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
106 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
108 vaccSTUV = _mm_add_epi32(vaccSTUV, _mm_unpackhi_epi16(vyprodOPQRSTUVlo, vyprodOPQRSTUVhi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
172 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
175 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
Dminmax-sse41-mul16-ld64-x24.c75 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
77 …__m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod89ABCDEFlo, vxprod8… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
79 …__m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprodGHIJKLMNlo, vxprodG… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
82 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
84 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_unpackhi_epi16(vyprod89ABCDEFlo, vyprod89ABCDEFhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
86 vaccKLMN = _mm_add_epi32(vaccKLMN, _mm_unpackhi_epi16(vyprodGHIJKLMNlo, vyprodGHIJKLMNhi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
141 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
144 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_dct_intrin_sse2.c61 const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); in transpose_4x4()
303 v1 = _mm_unpackhi_epi16(u0, u1); in fdct8_sse2()
305 v3 = _mm_unpackhi_epi16(u2, u3); in fdct8_sse2()
343 u1 = _mm_unpackhi_epi16(s6, s5); in fdct8_sse2()
371 u1 = _mm_unpackhi_epi16(s0, s3); in fdct8_sse2()
373 u3 = _mm_unpackhi_epi16(s1, s2); in fdct8_sse2()
450 s1 = _mm_unpackhi_epi16(in0, in1); in fadst8_sse2()
452 s3 = _mm_unpackhi_epi16(in2, in3); in fadst8_sse2()
454 s5 = _mm_unpackhi_epi16(in4, in5); in fadst8_sse2()
456 s7 = _mm_unpackhi_epi16(in6, in7); in fadst8_sse2()
[all …]
/external/libaom/libaom/av1/encoder/x86/
Dwedge_utils_sse2.c52 const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_sse2()
54 const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w); in av1_wedge_sse_from_residuals_sse2()
59 const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2()
61 const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w); in av1_wedge_sse_from_residuals_sse2()
212 const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w); in av1_wedge_compute_delta_squares_sse2()
214 const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w); in av1_wedge_compute_delta_squares_sse2()
216 const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w); in av1_wedge_compute_delta_squares_sse2()
218 const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w); in av1_wedge_compute_delta_squares_sse2()
/external/XNNPACK/src/x8-zip/
Dx4-sse2.c40 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2()
42 const __m128i vxyzw3 = _mm_unpackhi_epi16(vxy_hi, vzw_hi); in xnn_x8_zip_x4_ukernel__sse2()
61 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2()
63 const __m128i vxyzw3 = _mm_unpackhi_epi16(vxy_hi, vzw_hi); in xnn_x8_zip_x4_ukernel__sse2()

123456789