Home
last modified time | relevance | path

Searched refs:_mm_unpacklo_epi32 (Results 1 – 25 of 180) sorted by relevance

12345678

/external/libaom/libaom/aom_dsp/x86/
Dtranspose_sse2.h72 const __m128i c0 = _mm_unpacklo_epi32(b0, b2); in transpose_8bit_8x8()
74 const __m128i c2 = _mm_unpacklo_epi32(b1, b3); in transpose_8bit_8x8()
114 out[0] = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x4()
146 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x8()
147 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_4x8()
185 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x4()
186 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x4()
248 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x8()
249 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_8x8()
250 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x8()
[all …]
Dlpf_common_sse2.h38 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose6x6_sse2()
53 ww0 = _mm_unpacklo_epi32(w3, w4); // 04 14 24 34 05 15 25 35 in highbd_transpose6x6_sse2()
69 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose4x8_8x4_low_sse2()
88 ww2 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35 in highbd_transpose4x8_8x4_high_sse2()
144 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose8x8_low_sse2()
145 ww1 = _mm_unpacklo_epi32(w2, w3); // 40 50 60 70 41 51 61 71 in highbd_transpose8x8_low_sse2()
177 ww0 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35 in highbd_transpose8x8_high_sse2()
178 ww1 = _mm_unpacklo_epi32(w2, w3); // 44 54 64 74 45 55 65 75 in highbd_transpose8x8_high_sse2()
337 *d0 = _mm_unpacklo_epi32( in transpose8x8_low_sse2()
376 *d0d1 = _mm_unpacklo_epi32( in transpose8x8_sse2()
[all …]
Dmasked_sad4d_ssse3.c81 res0 = _mm_add_epi32(_mm_unpacklo_epi32(res0, res1), in masked_sadx4d_ssse3()
83 res2 = _mm_add_epi32(_mm_unpacklo_epi32(res2, res3), in masked_sadx4d_ssse3()
148 res0 = _mm_add_epi32(_mm_unpacklo_epi32(res0, res1), in aom_masked_sad8xhx4d_ssse3()
150 res2 = _mm_add_epi32(_mm_unpacklo_epi32(res2, res3), in aom_masked_sad8xhx4d_ssse3()
157 a = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)ref##idx), \
185 const __m128i src = _mm_unpacklo_epi32( in aom_masked_sad4xhx4d_ssse3()
189 _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)b_ptr), in aom_masked_sad4xhx4d_ssse3()
192 _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)m_ptr), in aom_masked_sad4xhx4d_ssse3()
212 res0 = _mm_unpacklo_epi32(res0, res1); in aom_masked_sad4xhx4d_ssse3()
213 res2 = _mm_unpacklo_epi32(res2, res3); in aom_masked_sad4xhx4d_ssse3()
Dloopfilter_sse2.c54 *q7p7 = _mm_unpacklo_epi32( in transpose_pq_14_sse2()
65 *q4p4 = _mm_unpacklo_epi32( in transpose_pq_14_sse2()
68 *q3p3 = _mm_unpacklo_epi32( in transpose_pq_14_sse2()
79 *q0p0 = _mm_unpacklo_epi32( in transpose_pq_14_sse2()
111 d0 = _mm_unpacklo_epi32( in transpose_pq_14_inv_sse2()
130 d1 = _mm_unpacklo_epi32( in transpose_pq_14_inv_sse2()
163 filter = _mm_unpacklo_epi32(filter, filter); in filter4_sse2()
179 filter = _mm_unpacklo_epi32(filter, filter); in filter4_sse2()
181 filter2filter1 = _mm_unpacklo_epi32(filter2filter1, filter); in filter4_sse2()
252 q1p1 = _mm_unpacklo_epi32(*p1, *q1); in lpf_internal_4_sse2()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dtranspose_sse2.h71 const __m128i c0 = _mm_unpacklo_epi32(b0, b2); in transpose_8bit_8x8()
73 const __m128i c2 = _mm_unpacklo_epi32(b1, b3); in transpose_8bit_8x8()
111 out[0] = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x4()
141 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x8()
142 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_4x8()
195 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x8()
196 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_8x8()
197 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x8()
198 const __m128i b3 = _mm_unpacklo_epi32(a6, a7); in transpose_16bit_8x8()
255 const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]); in transpose_32bit_4x4()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Dsse2-lut64-p2-div-x20.c76 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
78 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
85 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
87 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
94 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
96 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
103 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
105 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
112 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
114 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
[all …]
Dsse2-lut64-p2-div-x24.c81 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
83 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
90 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
92 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
99 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
101 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
108 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
110 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
117 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
119 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
[all …]
Dsse2-lut64-p2-div-x12.c66 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
68 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
75 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
77 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
84 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
86 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
96 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
98 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
107 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
109 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
[all …]
Dsse2-lut64-p2-div-x16.c71 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
73 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
80 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
82 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
89 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
91 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
98 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
100 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
110 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
112 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
[all …]
Dsse2-lut64-p2-div-x8.c61 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
63 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
70 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
72 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
82 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
84 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
93 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
95 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
152 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
154 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
[all …]
Dsse2-lut64-p2-div-x4.c54 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
56 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
101 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
103 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
108 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
110 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
/external/libgav1/libgav1/src/dsp/x86/
Dtranspose_sse4.h114 out[0] = _mm_unpacklo_epi32(b0, b1); in Transpose8x8To4x16_U8()
116 out[2] = _mm_unpacklo_epi32(b2, b3); in Transpose8x8To4x16_U8()
134 const __m128i dcba_lo = _mm_unpacklo_epi32(ba, dc); in Transpose4x4_U16()
173 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose4x8To8x4_U16()
174 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in Transpose4x8To8x4_U16()
212 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose8x4To4x8_U16()
213 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in Transpose8x4To4x8_U16()
275 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose8x8_U16()
276 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in Transpose8x8_U16()
277 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in Transpose8x8_U16()
[all …]
Dloop_filter_sse4.cc114 const __m128i _hev = _mm_unpacklo_epi32(hev, hev); in Filter4()
122 a = _mm_unpacklo_epi32(a, a); in Filter4()
130 _mm_unpacklo_epi32(t1, _mm_cmpeq_epi8(t1, t1)); in Filter4()
157 const __m128i qp1 = _mm_unpacklo_epi32(p1, q1); in Horizontal4()
158 const __m128i qp0 = _mm_unpacklo_epi32(p0, q0); in Horizontal4()
159 const __m128i q1q0 = _mm_unpacklo_epi32(q0, q1); in Horizontal4()
160 const __m128i p1p0 = _mm_unpacklo_epi32(p0, p1); in Horizontal4()
335 const __m128i qp2 = _mm_unpacklo_epi32(p2, q2); in Horizontal6()
336 const __m128i qp1 = _mm_unpacklo_epi32(p1, q1); in Horizontal6()
337 const __m128i qp0 = _mm_unpacklo_epi32(p0, q0); in Horizontal6()
[all …]
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse2-rr2-lut16-p3-x12.c72 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
74 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
81 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
83 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
90 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
92 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
102 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
104 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
113 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
115 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
[all …]
Dvelu-sse2-rr2-lut16-p3-x16.c77 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
79 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
86 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
88 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
95 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
97 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
104 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
106 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
116 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
118 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
[all …]
Dvelu-sse2-rr2-lut16-p3-x20.c82 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
84 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
91 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
93 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
100 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
102 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
109 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
111 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
118 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
120 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
[all …]
Dvelu-sse2-rr2-lut16-p3-x24.c87 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
89 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
96 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
98 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
105 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
107 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
114 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
116 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
123 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
125 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
[all …]
Dvelu-sse2-rr2-lut16-p3-x8.c67 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
69 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
76 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
78 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
88 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
90 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
99 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
101 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
161 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
163 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
[all …]
Dvelu-sse2-rr2-lut16-p3-x4.c61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
68 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
70 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
109 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
111 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
116 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
118 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
/external/XNNPACK/src/x32-zip/
Dx4-sse2.c37 const __m128i vxy_lo = _mm_unpacklo_epi32(vx, vy); in xnn_x32_zip_x4_ukernel__sse2()
39 const __m128i vzw_lo = _mm_unpacklo_epi32(vz, vw); in xnn_x32_zip_x4_ukernel__sse2()
65 const __m128i vxy = _mm_unpacklo_epi32(vx, vy); in xnn_x32_zip_x4_ukernel__sse2()
66 const __m128i vzw = _mm_unpacklo_epi32(vz, vw); in xnn_x32_zip_x4_ukernel__sse2()
/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-minmax-sse2-ld64.c111 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
112 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
113 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
114 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
115 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
116 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
118 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
119 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
120 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
D3x4c8-minmax-sse2-ld128.c109 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
110 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
111 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
112 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
113 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
114 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
116 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
117 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
118 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
D3x4c8-xw-minmax-sse2.c107 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
108 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
109 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
110 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
111 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
112 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
114 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
115 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
116 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-sse2-ld128.c126 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
127 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
128 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
129 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
130 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
131 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
133 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
134 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
135 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
D3x4c8-minmax-sse2-ld64.c128 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
129 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
130 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
131 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
132 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
133 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
135 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
136 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
137 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()

12345678