Home
last modified time | relevance | path

Searched refs:_mm_madd_epi16 (Results 1 – 25 of 210) sorted by relevance

123456789

/external/libhevc/common/x86/
Dihevc_32x32_itrans_recon_sse42_intr.c295 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_sse42()
318 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_sse42()
358 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_32x32_sse42()
374 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff1); in ihevc_itrans_recon_32x32_sse42()
387 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff2); in ihevc_itrans_recon_32x32_sse42()
401 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff2); in ihevc_itrans_recon_32x32_sse42()
415 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_32x32_sse42()
429 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff3); in ihevc_itrans_recon_32x32_sse42()
446 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff4); in ihevc_itrans_recon_32x32_sse42()
460 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff4); in ihevc_itrans_recon_32x32_sse42()
[all …]
Dihevc_itrans_recon_32x32_ssse3_intr.c304 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_ssse3()
327 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_ssse3()
368 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_32x32_ssse3()
385 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff1); in ihevc_itrans_recon_32x32_ssse3()
398 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff2); in ihevc_itrans_recon_32x32_ssse3()
412 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff2); in ihevc_itrans_recon_32x32_ssse3()
426 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_32x32_ssse3()
440 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff3); in ihevc_itrans_recon_32x32_ssse3()
457 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff4); in ihevc_itrans_recon_32x32_ssse3()
471 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff4); in ihevc_itrans_recon_32x32_ssse3()
[all …]
Dihevc_16x16_itrans_recon_sse42_intr.c256 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_sse42()
275 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_sse42()
294 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_sse42()
311 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_sse42()
331 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_sse42()
347 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_sse42()
365 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_sse42()
380 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_sse42()
406 m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_16x16_sse42()
407 m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_16x16_sse42()
[all …]
Dihevc_itrans_recon_16x16_ssse3_intr.c262 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_ssse3()
281 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_ssse3()
300 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_ssse3()
317 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_ssse3()
337 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_ssse3()
353 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_ssse3()
371 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_ssse3()
386 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_ssse3()
412 m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_16x16_ssse3()
413 m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_16x16_ssse3()
[all …]
Dihevc_itrans_recon_sse42_intr.c887 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_8x8_sse42()
888 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_8x8_sse42()
905 m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); in ihevc_itrans_recon_8x8_sse42()
906 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); in ihevc_itrans_recon_8x8_sse42()
941 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in ihevc_itrans_recon_8x8_sse42()
967 m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in ihevc_itrans_recon_8x8_sse42()
996 m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in ihevc_itrans_recon_8x8_sse42()
1026 m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in ihevc_itrans_recon_8x8_sse42()
1095 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_8x8_sse42()
1096 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_8x8_sse42()
[all …]
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_dct_intrin_sse2.c89 u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0 in fdct4_sse2()
90 u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2 in fdct4_sse2()
91 u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1 in fdct4_sse2()
92 u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3 in fdct4_sse2()
125 v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2 in fadst4_sse2()
126 v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5 in fadst4_sse2()
127 v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x1 in fadst4_sse2()
128 v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3 in fadst4_sse2()
129 v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6 in fadst4_sse2()
130 v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4 in fadst4_sse2()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Daom_subpixel_8t_intrin_sse2.c50 __m128i d1 = _mm_madd_epi16(ss_1_1, secondFilters); in aom_filter_block1d16_h4_sse2()
51 __m128i d2 = _mm_madd_epi16(ss_2_1, thirdFilters); in aom_filter_block1d16_h4_sse2()
58 d1 = _mm_madd_epi16(ss_1_2, secondFilters); in aom_filter_block1d16_h4_sse2()
59 d2 = _mm_madd_epi16(ss_2_2, thirdFilters); in aom_filter_block1d16_h4_sse2()
74 d1 = _mm_madd_epi16(ss_1_1, secondFilters); in aom_filter_block1d16_h4_sse2()
75 d2 = _mm_madd_epi16(ss_2_1, thirdFilters); in aom_filter_block1d16_h4_sse2()
82 d1 = _mm_madd_epi16(ss_1_2, secondFilters); in aom_filter_block1d16_h4_sse2()
83 d2 = _mm_madd_epi16(ss_2_2, thirdFilters); in aom_filter_block1d16_h4_sse2()
171 tmp_0 = _mm_madd_epi16(resReg23_lo_1, secondFilters); in aom_filter_block1d16_v4_sse2()
172 tmp_1 = _mm_madd_epi16(resReg23_lo_2, secondFilters); in aom_filter_block1d16_v4_sse2()
[all …]
/external/libaom/libaom/av1/encoder/x86/
Dwedge_utils_sse2.c63 const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w); in av1_wedge_sse_from_residuals_sse2()
64 const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w); in av1_wedge_sse_from_residuals_sse2()
65 const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w); in av1_wedge_sse_from_residuals_sse2()
66 const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w); in av1_wedge_sse_from_residuals_sse2()
71 const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w); in av1_wedge_sse_from_residuals_sse2()
72 const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w); in av1_wedge_sse_from_residuals_sse2()
139 const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w); in av1_wedge_sign_from_residuals_sse2()
140 const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w); in av1_wedge_sign_from_residuals_sse2()
141 const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w); in av1_wedge_sign_from_residuals_sse2()
142 const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w); in av1_wedge_sign_from_residuals_sse2()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dinv_txfm_sse2.c121 v[0] = _mm_madd_epi16(in[0], k__sinpi_1_3); // s_1 * x0 + s_3 * x1 in iadst4_sse2()
122 v[1] = _mm_madd_epi16(in[1], k__sinpi_4_2); // s_4 * x2 + s_2 * x3 in iadst4_sse2()
123 v[2] = _mm_madd_epi16(in[0], k__sinpi_2_3); // s_2 * x0 + s_3 * x1 in iadst4_sse2()
124 v[3] = _mm_madd_epi16(in[1], k__sinpi_1_4); // s_1 * x2 + s_4 * x3 in iadst4_sse2()
125 v[4] = _mm_madd_epi16(in[0], k__sinpi_12_n3); // (s_1 + s_2) * x0 - s_3 * x1 in iadst4_sse2()
133 u[2] = _mm_madd_epi16(in[0], k__sinpi_1_3); in iadst4_sse2()
264 u[0] = _mm_madd_epi16(s[0], k__cospi_p02_p30); in iadst8_sse2()
265 u[1] = _mm_madd_epi16(s[1], k__cospi_p02_p30); in iadst8_sse2()
266 u[2] = _mm_madd_epi16(s[0], k__cospi_p30_m02); in iadst8_sse2()
267 u[3] = _mm_madd_epi16(s[1], k__cospi_p30_m02); in iadst8_sse2()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x4c2-xw-minmax-sse41.c86 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
88 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
90 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
92 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
96 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41()
[all …]
D4x4c2-minmax-sse41-ld128.c89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
111 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
113 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
[all …]
D4x4c2-minmax-sse41-ld64.c87 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
89 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
109 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
111 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
[all …]
D4x4c2-xw-minmax-sse2.c86 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
88 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
90 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
92 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
96 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2()
[all …]
D4x4c2-minmax-sse2-ld128.c89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
111 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
113 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128()
[all …]
D4x4c2-minmax-sse2-ld64.c87 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
89 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
109 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
111 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D4x4c2-minmax-sse41-ld128.c106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128()
[all …]
D4x4c2-minmax-sse41-ld64.c104 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
106 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
110 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
126 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
[all …]
D4x4c2-minmax-sse2-ld128.c106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128()
[all …]
D4x4c2-minmax-ssse3-ld64.c104 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
106 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
110 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
126 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64()
[all …]
D4x4c2-minmax-ssse3-ld128.c106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128()
[all …]
/external/flac/src/libFLAC/
Dlpc_intrin_sse2.c83 summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
84 …mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(sum… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
85 …mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
86 …mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
87 …mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
88 …mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
89 …mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
90 …mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
91 …mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
92 …mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()
[all …]
/external/libmpeg2/common/x86/
Dimpeg2_idct_recon_sse42_intr.c225 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in impeg2_idct_recon_sse42()
226 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in impeg2_idct_recon_sse42()
243 m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); in impeg2_idct_recon_sse42()
244 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); in impeg2_idct_recon_sse42()
279 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in impeg2_idct_recon_sse42()
305 m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in impeg2_idct_recon_sse42()
334 m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in impeg2_idct_recon_sse42()
364 m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in impeg2_idct_recon_sse42()
433 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in impeg2_idct_recon_sse42()
434 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in impeg2_idct_recon_sse42()
[all …]
/external/libaom/libaom/av1/common/x86/
Dhighbd_wiener_convolve_ssse3.c76 const __m128i res_0 = _mm_madd_epi16(data, coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3()
78 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3()
80 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3()
82 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3()
91 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3()
93 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3()
95 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3()
97 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3()
154 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3()
155 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3()
[all …]
Dwiener_convolve_sse2.c74 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_wiener_convolve_add_src_sse2()
76 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_wiener_convolve_add_src_sse2()
78 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_wiener_convolve_add_src_sse2()
80 const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); in av1_wiener_convolve_add_src_sse2()
89 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_wiener_convolve_add_src_sse2()
91 const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); in av1_wiener_convolve_add_src_sse2()
93 const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); in av1_wiener_convolve_add_src_sse2()
95 const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); in av1_wiener_convolve_add_src_sse2()
152 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_wiener_convolve_add_src_sse2()
153 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_wiener_convolve_add_src_sse2()
[all …]
/external/XNNPACK/src/qu8-gemm/
D4x4c2-minmax-sse2.c89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
101 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
103 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
105 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
107 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
113 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
[all …]

123456789