/external/libaom/libaom/aom_dsp/x86/ |
D | transpose_sse2.h | 72 const __m128i c0 = _mm_unpacklo_epi32(b0, b2); in transpose_8bit_8x8() 74 const __m128i c2 = _mm_unpacklo_epi32(b1, b3); in transpose_8bit_8x8() 114 out[0] = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x4() 146 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x8() 147 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_4x8() 185 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x4() 186 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x4() 248 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x8() 249 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_8x8() 250 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x8() [all …]
|
D | lpf_common_sse2.h | 38 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose6x6_sse2() 53 ww0 = _mm_unpacklo_epi32(w3, w4); // 04 14 24 34 05 15 25 35 in highbd_transpose6x6_sse2() 69 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose4x8_8x4_low_sse2() 88 ww2 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35 in highbd_transpose4x8_8x4_high_sse2() 144 ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31 in highbd_transpose8x8_low_sse2() 145 ww1 = _mm_unpacklo_epi32(w2, w3); // 40 50 60 70 41 51 61 71 in highbd_transpose8x8_low_sse2() 177 ww0 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35 in highbd_transpose8x8_high_sse2() 178 ww1 = _mm_unpacklo_epi32(w2, w3); // 44 54 64 74 45 55 65 75 in highbd_transpose8x8_high_sse2() 337 *d0 = _mm_unpacklo_epi32( in transpose8x8_low_sse2() 376 *d0d1 = _mm_unpacklo_epi32( in transpose8x8_sse2() [all …]
|
D | masked_sad4d_ssse3.c | 81 res0 = _mm_add_epi32(_mm_unpacklo_epi32(res0, res1), in masked_sadx4d_ssse3() 83 res2 = _mm_add_epi32(_mm_unpacklo_epi32(res2, res3), in masked_sadx4d_ssse3() 148 res0 = _mm_add_epi32(_mm_unpacklo_epi32(res0, res1), in aom_masked_sad8xhx4d_ssse3() 150 res2 = _mm_add_epi32(_mm_unpacklo_epi32(res2, res3), in aom_masked_sad8xhx4d_ssse3() 157 a = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)ref##idx), \ 185 const __m128i src = _mm_unpacklo_epi32( in aom_masked_sad4xhx4d_ssse3() 189 _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)b_ptr), in aom_masked_sad4xhx4d_ssse3() 192 _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)m_ptr), in aom_masked_sad4xhx4d_ssse3() 212 res0 = _mm_unpacklo_epi32(res0, res1); in aom_masked_sad4xhx4d_ssse3() 213 res2 = _mm_unpacklo_epi32(res2, res3); in aom_masked_sad4xhx4d_ssse3()
|
D | loopfilter_sse2.c | 54 *q7p7 = _mm_unpacklo_epi32( in transpose_pq_14_sse2() 65 *q4p4 = _mm_unpacklo_epi32( in transpose_pq_14_sse2() 68 *q3p3 = _mm_unpacklo_epi32( in transpose_pq_14_sse2() 79 *q0p0 = _mm_unpacklo_epi32( in transpose_pq_14_sse2() 111 d0 = _mm_unpacklo_epi32( in transpose_pq_14_inv_sse2() 130 d1 = _mm_unpacklo_epi32( in transpose_pq_14_inv_sse2() 163 filter = _mm_unpacklo_epi32(filter, filter); in filter4_sse2() 179 filter = _mm_unpacklo_epi32(filter, filter); in filter4_sse2() 181 filter2filter1 = _mm_unpacklo_epi32(filter2filter1, filter); in filter4_sse2() 252 q1p1 = _mm_unpacklo_epi32(*p1, *q1); in lpf_internal_4_sse2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | transpose_sse2.h | 71 const __m128i c0 = _mm_unpacklo_epi32(b0, b2); in transpose_8bit_8x8() 73 const __m128i c2 = _mm_unpacklo_epi32(b1, b3); in transpose_8bit_8x8() 111 out[0] = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x4() 141 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_4x8() 142 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_4x8() 195 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in transpose_16bit_8x8() 196 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in transpose_16bit_8x8() 197 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in transpose_16bit_8x8() 198 const __m128i b3 = _mm_unpacklo_epi32(a6, a7); in transpose_16bit_8x8() 255 const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]); in transpose_32bit_4x4() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse2-lut64-p2-div-x20.c | 76 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 78 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 85 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 87 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 94 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 96 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 103 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 105 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 112 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 114 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() [all …]
|
D | sse2-lut64-p2-div-x24.c | 81 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 83 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 90 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 92 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 99 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 101 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 108 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 110 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 117 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 119 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() [all …]
|
D | sse2-lut64-p2-div-x12.c | 66 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 68 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 75 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 77 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 84 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 86 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 96 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 98 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 107 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 109 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() [all …]
|
D | sse2-lut64-p2-div-x16.c | 71 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 73 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 80 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 82 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 89 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 91 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 98 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 100 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 110 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 112 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() [all …]
|
D | sse2-lut64-p2-div-x8.c | 61 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 63 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 70 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 72 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 82 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 84 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 93 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 95 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 152 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 154 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() [all …]
|
D | sse2-lut64-p2-div-x4.c | 54 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 56 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 101 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 103 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 108 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 110 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | transpose_sse4.h | 114 out[0] = _mm_unpacklo_epi32(b0, b1); in Transpose8x8To4x16_U8() 116 out[2] = _mm_unpacklo_epi32(b2, b3); in Transpose8x8To4x16_U8() 134 const __m128i dcba_lo = _mm_unpacklo_epi32(ba, dc); in Transpose4x4_U16() 173 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose4x8To8x4_U16() 174 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in Transpose4x8To8x4_U16() 212 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose8x4To4x8_U16() 213 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in Transpose8x4To4x8_U16() 275 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); in Transpose8x8_U16() 276 const __m128i b1 = _mm_unpacklo_epi32(a2, a3); in Transpose8x8_U16() 277 const __m128i b2 = _mm_unpacklo_epi32(a4, a5); in Transpose8x8_U16() [all …]
|
D | loop_filter_sse4.cc | 114 const __m128i _hev = _mm_unpacklo_epi32(hev, hev); in Filter4() 122 a = _mm_unpacklo_epi32(a, a); in Filter4() 130 _mm_unpacklo_epi32(t1, _mm_cmpeq_epi8(t1, t1)); in Filter4() 157 const __m128i qp1 = _mm_unpacklo_epi32(p1, q1); in Horizontal4() 158 const __m128i qp0 = _mm_unpacklo_epi32(p0, q0); in Horizontal4() 159 const __m128i q1q0 = _mm_unpacklo_epi32(q0, q1); in Horizontal4() 160 const __m128i p1p0 = _mm_unpacklo_epi32(p0, p1); in Horizontal4() 335 const __m128i qp2 = _mm_unpacklo_epi32(p2, q2); in Horizontal6() 336 const __m128i qp1 = _mm_unpacklo_epi32(p1, q1); in Horizontal6() 337 const __m128i qp0 = _mm_unpacklo_epi32(p0, q0); in Horizontal6() [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-lut16-p3-x12.c | 72 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 74 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 81 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 83 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 90 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 92 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 102 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 104 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 113 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 115 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() [all …]
|
D | velu-sse2-rr2-lut16-p3-x16.c | 77 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 79 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 86 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 88 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 95 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 97 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 104 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 106 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 116 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 118 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() [all …]
|
D | velu-sse2-rr2-lut16-p3-x20.c | 82 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 84 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 91 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 93 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 100 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 102 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 109 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 111 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 118 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 120 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() [all …]
|
D | velu-sse2-rr2-lut16-p3-x24.c | 87 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 89 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 96 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 98 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 105 const __m128i vl89 = _mm_unpacklo_epi32(vl8, vl9); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 107 const __m128i vlAB = _mm_unpacklo_epi32(vlA, vlB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 114 const __m128i vlCD = _mm_unpacklo_epi32(vlC, vlD); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 116 const __m128i vlEF = _mm_unpacklo_epi32(vlE, vlF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 123 const __m128i vlGH = _mm_unpacklo_epi32(vlG, vlH); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 125 const __m128i vlIJ = _mm_unpacklo_epi32(vlI, vlJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() [all …]
|
D | velu-sse2-rr2-lut16-p3-x8.c | 67 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 69 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 76 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 78 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 88 const __m128i vl01 = _mm_unpacklo_epi32(vl0, vl1); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 90 const __m128i vl23 = _mm_unpacklo_epi32(vl2, vl3); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 99 const __m128i vl45 = _mm_unpacklo_epi32(vl4, vl5); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 101 const __m128i vl67 = _mm_unpacklo_epi32(vl6, vl7); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 161 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 163 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() [all …]
|
D | velu-sse2-rr2-lut16-p3-x4.c | 61 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 63 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 68 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 70 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 109 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 111 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 116 const __m128i vl_lo = _mm_unpacklo_epi32(vl_ll, vl_lh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 118 const __m128i vl_hi = _mm_unpacklo_epi32(vl_hl, vl_hh); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
|
/external/XNNPACK/src/x32-zip/ |
D | x4-sse2.c | 37 const __m128i vxy_lo = _mm_unpacklo_epi32(vx, vy); in xnn_x32_zip_x4_ukernel__sse2() 39 const __m128i vzw_lo = _mm_unpacklo_epi32(vz, vw); in xnn_x32_zip_x4_ukernel__sse2() 65 const __m128i vxy = _mm_unpacklo_epi32(vx, vy); in xnn_x32_zip_x4_ukernel__sse2() 66 const __m128i vzw = _mm_unpacklo_epi32(vz, vw); in xnn_x32_zip_x4_ukernel__sse2()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-minmax-sse2-ld64.c | 111 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 112 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 113 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 114 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 115 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 116 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 118 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 119 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() 120 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 109 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 110 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 111 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 112 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 113 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 114 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 116 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 117 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() 118 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-xw-minmax-sse2.c | 107 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 108 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 109 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 110 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 111 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 112 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 114 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 115 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() 116 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-sse2-ld128.c | 126 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 127 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 128 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 129 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 130 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 131 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 133 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 134 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() 135 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-minmax-sse2-ld64.c | 128 …const __m128i vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 129 …const __m128i vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 130 …const __m128i vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 131 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 132 …const __m128i vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 133 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 135 …__m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 136 …__m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() 137 …__m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
|