/external/libhevc/common/x86/ |
D | ihevc_32x32_itrans_recon_sse42_intr.c | 295 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_sse42() 318 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_sse42() 358 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_32x32_sse42() 374 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff1); in ihevc_itrans_recon_32x32_sse42() 387 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff2); in ihevc_itrans_recon_32x32_sse42() 401 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff2); in ihevc_itrans_recon_32x32_sse42() 415 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_32x32_sse42() 429 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff3); in ihevc_itrans_recon_32x32_sse42() 446 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff4); in ihevc_itrans_recon_32x32_sse42() 460 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff4); in ihevc_itrans_recon_32x32_sse42() [all …]
|
D | ihevc_itrans_recon_32x32_ssse3_intr.c | 304 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_ssse3() 327 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_32x32_ssse3() 368 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_32x32_ssse3() 385 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff1); in ihevc_itrans_recon_32x32_ssse3() 398 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff2); in ihevc_itrans_recon_32x32_ssse3() 412 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff2); in ihevc_itrans_recon_32x32_ssse3() 426 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_32x32_ssse3() 440 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff3); in ihevc_itrans_recon_32x32_ssse3() 457 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff4); in ihevc_itrans_recon_32x32_ssse3() 471 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_14, m_coeff4); in ihevc_itrans_recon_32x32_ssse3() [all …]
|
D | ihevc_16x16_itrans_recon_sse42_intr.c | 256 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_sse42() 275 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_sse42() 294 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_sse42() 311 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_sse42() 331 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_sse42() 347 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_sse42() 365 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_sse42() 380 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_sse42() 406 m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_16x16_sse42() 407 m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_16x16_sse42() [all …]
|
D | ihevc_itrans_recon_16x16_ssse3_intr.c | 262 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_ssse3() 281 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_ssse3() 300 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_ssse3() 317 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_ssse3() 337 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff1); in ihevc_itrans_recon_16x16_ssse3() 353 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff1); in ihevc_itrans_recon_16x16_ssse3() 371 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_10, m_coeff3); in ihevc_itrans_recon_16x16_ssse3() 386 m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_11, m_coeff3); in ihevc_itrans_recon_16x16_ssse3() 412 m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_16x16_ssse3() 413 m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_16x16_ssse3() [all …]
|
D | ihevc_itrans_recon_sse42_intr.c | 887 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_8x8_sse42() 888 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_8x8_sse42() 905 m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); in ihevc_itrans_recon_8x8_sse42() 906 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); in ihevc_itrans_recon_8x8_sse42() 941 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in ihevc_itrans_recon_8x8_sse42() 967 m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in ihevc_itrans_recon_8x8_sse42() 996 m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in ihevc_itrans_recon_8x8_sse42() 1026 m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in ihevc_itrans_recon_8x8_sse42() 1095 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in ihevc_itrans_recon_8x8_sse42() 1096 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in ihevc_itrans_recon_8x8_sse42() [all …]
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_dct_intrin_sse2.c | 89 u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0 in fdct4_sse2() 90 u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2 in fdct4_sse2() 91 u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1 in fdct4_sse2() 92 u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3 in fdct4_sse2() 125 v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2 in fadst4_sse2() 126 v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5 in fadst4_sse2() 127 v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x1 in fadst4_sse2() 128 v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3 in fadst4_sse2() 129 v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6 in fadst4_sse2() 130 v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4 in fadst4_sse2() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | aom_subpixel_8t_intrin_sse2.c | 50 __m128i d1 = _mm_madd_epi16(ss_1_1, secondFilters); in aom_filter_block1d16_h4_sse2() 51 __m128i d2 = _mm_madd_epi16(ss_2_1, thirdFilters); in aom_filter_block1d16_h4_sse2() 58 d1 = _mm_madd_epi16(ss_1_2, secondFilters); in aom_filter_block1d16_h4_sse2() 59 d2 = _mm_madd_epi16(ss_2_2, thirdFilters); in aom_filter_block1d16_h4_sse2() 74 d1 = _mm_madd_epi16(ss_1_1, secondFilters); in aom_filter_block1d16_h4_sse2() 75 d2 = _mm_madd_epi16(ss_2_1, thirdFilters); in aom_filter_block1d16_h4_sse2() 82 d1 = _mm_madd_epi16(ss_1_2, secondFilters); in aom_filter_block1d16_h4_sse2() 83 d2 = _mm_madd_epi16(ss_2_2, thirdFilters); in aom_filter_block1d16_h4_sse2() 171 tmp_0 = _mm_madd_epi16(resReg23_lo_1, secondFilters); in aom_filter_block1d16_v4_sse2() 172 tmp_1 = _mm_madd_epi16(resReg23_lo_2, secondFilters); in aom_filter_block1d16_v4_sse2() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_sse2.c | 63 const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w); in av1_wedge_sse_from_residuals_sse2() 64 const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w); in av1_wedge_sse_from_residuals_sse2() 65 const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w); in av1_wedge_sse_from_residuals_sse2() 66 const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w); in av1_wedge_sse_from_residuals_sse2() 71 const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w); in av1_wedge_sse_from_residuals_sse2() 72 const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w); in av1_wedge_sse_from_residuals_sse2() 139 const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w); in av1_wedge_sign_from_residuals_sse2() 140 const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w); in av1_wedge_sign_from_residuals_sse2() 141 const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w); in av1_wedge_sign_from_residuals_sse2() 142 const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w); in av1_wedge_sign_from_residuals_sse2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | inv_txfm_sse2.c | 121 v[0] = _mm_madd_epi16(in[0], k__sinpi_1_3); // s_1 * x0 + s_3 * x1 in iadst4_sse2() 122 v[1] = _mm_madd_epi16(in[1], k__sinpi_4_2); // s_4 * x2 + s_2 * x3 in iadst4_sse2() 123 v[2] = _mm_madd_epi16(in[0], k__sinpi_2_3); // s_2 * x0 + s_3 * x1 in iadst4_sse2() 124 v[3] = _mm_madd_epi16(in[1], k__sinpi_1_4); // s_1 * x2 + s_4 * x3 in iadst4_sse2() 125 v[4] = _mm_madd_epi16(in[0], k__sinpi_12_n3); // (s_1 + s_2) * x0 - s_3 * x1 in iadst4_sse2() 133 u[2] = _mm_madd_epi16(in[0], k__sinpi_1_3); in iadst4_sse2() 264 u[0] = _mm_madd_epi16(s[0], k__cospi_p02_p30); in iadst8_sse2() 265 u[1] = _mm_madd_epi16(s[1], k__cospi_p02_p30); in iadst8_sse2() 266 u[2] = _mm_madd_epi16(s[0], k__cospi_p30_m02); in iadst8_sse2() 267 u[3] = _mm_madd_epi16(s[1], k__cospi_p30_m02); in iadst8_sse2() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x4c2-xw-minmax-sse41.c | 86 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 88 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 90 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 92 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 96 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41() [all …]
|
D | 4x4c2-minmax-sse41-ld128.c | 89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 111 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() 113 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128() [all …]
|
D | 4x4c2-minmax-sse41-ld64.c | 87 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 89 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 91 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 93 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 109 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() 111 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64() [all …]
|
D | 4x4c2-xw-minmax-sse2.c | 86 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 88 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 90 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 92 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 96 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2() [all …]
|
D | 4x4c2-minmax-sse2-ld128.c | 89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 111 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() 113 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128() [all …]
|
D | 4x4c2-minmax-sse2-ld64.c | 87 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 89 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 91 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 93 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 98 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 100 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 102 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 109 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() 111 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x4c2-minmax-sse41-ld128.c | 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() 130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128() [all …]
|
D | 4x4c2-minmax-sse41-ld64.c | 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 110 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 126 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() 128 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64() [all …]
|
D | 4x4c2-minmax-sse2-ld128.c | 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() 130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128() [all …]
|
D | 4x4c2-minmax-ssse3-ld64.c | 104 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 110 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 126 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() 128 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64() [all …]
|
D | 4x4c2-minmax-ssse3-ld128.c | 106 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 108 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 110 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 112 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 117 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 119 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 121 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 128 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() 130 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128() [all …]
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse2.c | 83 summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 84 …mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(sum… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 85 …mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 86 …mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 87 …mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 88 …mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 89 …mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 90 …mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 91 …mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() 92 …mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ,… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2() [all …]
|
/external/libmpeg2/common/x86/ |
D | impeg2_idct_recon_sse42_intr.c | 225 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in impeg2_idct_recon_sse42() 226 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in impeg2_idct_recon_sse42() 243 m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1); in impeg2_idct_recon_sse42() 244 m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2); in impeg2_idct_recon_sse42() 279 m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in impeg2_idct_recon_sse42() 305 m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in impeg2_idct_recon_sse42() 334 m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1); in impeg2_idct_recon_sse42() 364 m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3); in impeg2_idct_recon_sse42() 433 m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1); in impeg2_idct_recon_sse42() 434 m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2); in impeg2_idct_recon_sse42() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_wiener_convolve_ssse3.c | 76 const __m128i res_0 = _mm_madd_epi16(data, coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3() 78 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3() 80 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3() 82 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3() 91 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3() 93 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3() 95 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3() 97 _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67); in av1_highbd_wiener_convolve_add_src_ssse3() 154 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3() 155 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_highbd_wiener_convolve_add_src_ssse3() [all …]
|
D | wiener_convolve_sse2.c | 74 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_wiener_convolve_add_src_sse2() 76 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_wiener_convolve_add_src_sse2() 78 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_wiener_convolve_add_src_sse2() 80 const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67); in av1_wiener_convolve_add_src_sse2() 89 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_wiener_convolve_add_src_sse2() 91 const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23); in av1_wiener_convolve_add_src_sse2() 93 const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45); in av1_wiener_convolve_add_src_sse2() 95 const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67); in av1_wiener_convolve_add_src_sse2() 152 const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01); in av1_wiener_convolve_add_src_sse2() 153 const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23); in av1_wiener_convolve_add_src_sse2() [all …]
|
/external/XNNPACK/src/qu8-gemm/ |
D | 4x4c2-minmax-sse2.c | 89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 101 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 103 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 105 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 107 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 113 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() 115 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2() [all …]
|