/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 55 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 56 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 57 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 58 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 59 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 60 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 62 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 64 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 65 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x24.c | 51 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 52 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 53 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 54 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 55 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 56 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 58 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 59 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 60 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vy89AB, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 61 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vyCDEF, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() [all …]
|
D | minmax-sse41-mul32-ld32-x16.c | 47 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 48 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 49 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 50 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 52 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 53 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 54 vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vy89AB, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 55 vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vyCDEF, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 90 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 91 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() [all …]
|
D | minmax-sse41-mul32-ld32-x8.c | 43 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 44 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 46 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 47 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 73 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 74 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 76 vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 77 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
|
/external/libhevc/common/x86/ |
D | ihevc_weighted_pred_sse42_intr.c | 182 src_temp0_4x32b = _mm_mullo_epi32(src_temp0_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 183 src_temp1_4x32b = _mm_mullo_epi32(src_temp1_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 184 src_temp2_4x32b = _mm_mullo_epi32(src_temp2_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 185 src_temp3_4x32b = _mm_mullo_epi32(src_temp3_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 200 src_temp4_4x32b = _mm_mullo_epi32(src_temp4_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 201 src_temp5_4x32b = _mm_mullo_epi32(src_temp5_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 202 src_temp6_4x32b = _mm_mullo_epi32(src_temp6_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 203 src_temp7_4x32b = _mm_mullo_epi32(src_temp7_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 298 src_temp0_4x32b = _mm_mullo_epi32(src_temp0_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() 299 src_temp1_4x32b = _mm_mullo_epi32(src_temp1_4x32b, wgt0_4x32b); in ihevc_weighted_pred_uni_sse42() [all …]
|
D | ihevc_itrans_recon_sse42_intr.c | 177 m_temp_reg_13 = _mm_mullo_epi32(m_temp_reg_1, m_coeff3); in ihevc_itrans_recon_4x4_ttype1_sse42() 206 m_temp_reg_30 = _mm_mullo_epi32(m_temp_reg_10, m_coeff1); //29*c0 in ihevc_itrans_recon_4x4_ttype1_sse42() 207 m_temp_reg_31 = _mm_mullo_epi32(m_temp_reg_11, m_coeff2); //55*c1 in ihevc_itrans_recon_4x4_ttype1_sse42() 211 m_temp_reg_32 = _mm_mullo_epi32(m_temp_reg_11, m_coeff1); //29*c1 in ihevc_itrans_recon_4x4_ttype1_sse42() 212 m_temp_reg_33 = _mm_mullo_epi32(m_temp_reg_12, m_coeff2); //55*c2 in ihevc_itrans_recon_4x4_ttype1_sse42() 216 m_temp_reg_34 = _mm_mullo_epi32(m_temp_reg_10, m_coeff2); //55*c0 in ihevc_itrans_recon_4x4_ttype1_sse42() 217 m_temp_reg_35 = _mm_mullo_epi32(m_temp_reg_12, m_coeff1); //29*c2 in ihevc_itrans_recon_4x4_ttype1_sse42() 218 m_temp_reg_36 = _mm_mullo_epi32(m_temp_reg_14, m_coeff3); //74*c4 in ihevc_itrans_recon_4x4_ttype1_sse42() 272 m_temp_reg_3 = _mm_mullo_epi32(m_temp_reg_22, m_coeff3); in ihevc_itrans_recon_4x4_ttype1_sse42() 297 m_temp_reg_30 = _mm_mullo_epi32(m_temp_reg_0, m_coeff1); //29*c0 in ihevc_itrans_recon_4x4_ttype1_sse42() [all …]
|
/external/flac/src/libFLAC/ |
D | lpc_intrin_sse41.c | 979 summ = _mm_mullo_epi32(dat[2], qlp[2]); in FLAC__lpc_restore_signal_intrin_sse41() 980 summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[1], qlp[1])); in FLAC__lpc_restore_signal_intrin_sse41() 981 summ = _mm_add_epi32(summ, _mm_mullo_epi32(dat[0], qlp[0])); in FLAC__lpc_restore_signal_intrin_sse41() 1010 summ = _mm_add_epi32(_mm_mullo_epi32(dat[1], qlp[1]), _mm_mullo_epi32(dat[0], qlp[0])); in FLAC__lpc_restore_signal_intrin_sse41() 1167 summ = _mm_mullo_epi32(q11, _mm_loadu_si128((const __m128i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() 1168 …mull = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(su… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() 1169 …mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(sum… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() 1170 …mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() 1171 …mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() 1172 …mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ… in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_inv_txfm_sse4.c | 413 x = _mm_mullo_epi32(u0, cospi32); in idct4x4_sse4_1() 414 y = _mm_mullo_epi32(u2, cospi32); in idct4x4_sse4_1() 423 x = _mm_mullo_epi32(u1, cospi48); in idct4x4_sse4_1() 424 y = _mm_mullo_epi32(u3, cospim16); in idct4x4_sse4_1() 429 x = _mm_mullo_epi32(u1, cospi16); in idct4x4_sse4_1() 430 y = _mm_mullo_epi32(u3, cospi48); in idct4x4_sse4_1() 478 s0 = _mm_mullo_epi32(x0, sinpi1); in iadst4x4_sse4_1() 479 s1 = _mm_mullo_epi32(x0, sinpi2); in iadst4x4_sse4_1() 480 s2 = _mm_mullo_epi32(x1, sinpi3); in iadst4x4_sse4_1() 481 s3 = _mm_mullo_epi32(x2, sinpi4); in iadst4x4_sse4_1() [all …]
|
D | selfguided_sse4.c | 159 an = _mm_max_epi32(_mm_mullo_epi32(a, _mm_set1_epi32(n)), bb); in compute_p() 162 an = _mm_mullo_epi32(sum2, _mm_set1_epi32(n)); in compute_p() 213 _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z), in calc_ab() 234 const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1); in calc_ab() 354 _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z), in calc_ab_fast() 375 const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1); in calc_ab_fast() 627 v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0)); in av1_apply_selfguided_restoration_sse4_1() 630 v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1)); in av1_apply_selfguided_restoration_sse4_1() 635 v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0)); in av1_apply_selfguided_restoration_sse4_1() 638 v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1)); in av1_apply_selfguided_restoration_sse4_1()
|
D | av1_txfm_sse4.h | 55 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); in av1_round_shift_rect_array_32_sse4_1() 62 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); in av1_round_shift_rect_array_32_sse4_1()
|
D | highbd_txfm_utility_sse4.h | 103 x = _mm_mullo_epi32(*w0, *n0); in half_btf_sse4_1() 104 y = _mm_mullo_epi32(*w1, *n1); in half_btf_sse4_1() 115 x = _mm_mullo_epi32(*w0, *n0); in half_btf_0_sse4_1()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | highbd_fwd_txfm_sse4.c | 81 u0 = _mm_mullo_epi32(s0, cospi32); in fdct4x4_sse4_1() 82 u1 = _mm_mullo_epi32(s1, cospi32); in fdct4x4_sse4_1() 93 v0 = _mm_mullo_epi32(s2, cospi48); in fdct4x4_sse4_1() 94 v1 = _mm_mullo_epi32(s3, cospi16); in fdct4x4_sse4_1() 100 v0 = _mm_mullo_epi32(s2, cospi16); in fdct4x4_sse4_1() 101 v1 = _mm_mullo_epi32(s3, cospi48); in fdct4x4_sse4_1() 143 s0 = _mm_mullo_epi32(in[idx], sinpi1); in fadst4x4_sse4_1() 144 s1 = _mm_mullo_epi32(in[idx], sinpi4); in fadst4x4_sse4_1() 147 s2 = _mm_mullo_epi32(in[idx], sinpi2); in fadst4x4_sse4_1() 148 s3 = _mm_mullo_epi32(in[idx], sinpi1); in fadst4x4_sse4_1() [all …]
|
D | av1_txfm1d_sse4.h | 100 const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \ 101 const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \ 104 const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \ 105 const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \ 121 const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \ 122 const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \ 126 const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \ 127 const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 48 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 49 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 50 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 51 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 52 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 53 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 55 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 103 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 104 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-sse41-mul32-ld32-x24.c | 46 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 47 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 48 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 49 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 50 __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxGHIJ, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 51 __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxKLMN, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 92 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 93 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
|
D | minmax-sse41-mul32-ld32-x16.c | 44 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 45 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 46 __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 47 __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 79 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 80 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
|
D | minmax-sse41-mul32-ld32-x8.c | 42 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 43 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 67 __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 68 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | convolve_sse4_1.h | 26 d = _mm_add_epi32(_mm_mullo_epi32(d, *wt0), _mm_mullo_epi32(*res, *wt1)); in mult_add_store() 41 const __m128i wt0_res = _mm_mullo_epi32(*data_ref_0, *wt0); in highbd_comp_avg_sse4_1() 42 const __m128i wt1_res = _mm_mullo_epi32(*res_unsigned, *wt1); in highbd_comp_avg_sse4_1()
|
/external/libavc/common/x86/ |
D | ih264_ihadamard_scaling_sse42.c | 173 src_r0 = _mm_mullo_epi32(src_r0, mult_val); in ih264_ihadamard_scaling_4x4_sse42() 174 src_r1 = _mm_mullo_epi32(src_r1, mult_val); in ih264_ihadamard_scaling_4x4_sse42() 175 src_r2 = _mm_mullo_epi32(src_r2, mult_val); in ih264_ihadamard_scaling_4x4_sse42() 176 src_r3 = _mm_mullo_epi32(src_r3, mult_val); in ih264_ihadamard_scaling_4x4_sse42() 234 …temp0 = _mm_mullo_epi32(scale_val, plane_0); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat… in ih264_ihadamard_scaling_2x2_uv_sse42() 235 …temp1 = _mm_mullo_epi32(scale_val, plane_1); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat… in ih264_ihadamard_scaling_2x2_uv_sse42()
|
D | ih264_resi_trans_quant_sse42.c | 265 temp0 = _mm_mullo_epi32(temp0, src_r0); in ih264_resi_trans_quant_4x4_sse42() 266 temp1 = _mm_mullo_epi32(temp1, src_r1); in ih264_resi_trans_quant_4x4_sse42() 267 temp2 = _mm_mullo_epi32(temp2, src_r2); in ih264_resi_trans_quant_4x4_sse42() 268 temp3 = _mm_mullo_epi32(temp3, src_r3); in ih264_resi_trans_quant_4x4_sse42() 548 temp0 = _mm_mullo_epi32(temp0, src_r0); in ih264_resi_trans_quant_chroma_4x4_sse42() 549 temp1 = _mm_mullo_epi32(temp1, src_r1); in ih264_resi_trans_quant_chroma_4x4_sse42() 550 temp2 = _mm_mullo_epi32(temp2, src_r2); in ih264_resi_trans_quant_chroma_4x4_sse42() 551 temp3 = _mm_mullo_epi32(temp3, src_r3); in ih264_resi_trans_quant_chroma_4x4_sse42() 779 temp0 = _mm_mullo_epi32(scale_val, src_r0); //multiply by pu2_scale_matrix[0] in ih264_hadamard_quant_4x4_sse42() 780 temp1 = _mm_mullo_epi32(scale_val, src_r1); in ih264_hadamard_quant_4x4_sse42() [all …]
|
/external/libopus/celt/x86/ |
D | pitch_sse4_1.c | 92 inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); in celt_inner_prod_sse4_1() 169 sum0 = _mm_mullo_epi32(vecX0, vecY0); in xcorr_kernel_sse4_1() 170 sum1 = _mm_mullo_epi32(vecX1, vecY1); in xcorr_kernel_sse4_1() 171 sum2 = _mm_mullo_epi32(vecX2, vecY2); in xcorr_kernel_sse4_1() 172 sum3 = _mm_mullo_epi32(vecX3, vecY3); in xcorr_kernel_sse4_1() 187 sum0 = _mm_mullo_epi32(vecX0, vecY0); in xcorr_kernel_sse4_1()
|
/external/XNNPACK/src/qs8-vadd/ |
D | sse-mul32-ld32.c.in | 60 …__m128i vacc${ABC[N:N+4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[N:N+4]}, vx… 63 …vacc${ABC[N:N+4]} = _mm_add_epi32(vacc${ABC[N:N+4]}, _mm_mullo_epi32(vy${ABC[N:N+4]}, vy_multiplie… 114 …__m128i vacc${ABC[0:4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[0:4]}, vx_mul… 115 …__m128i vacc${ABC[4:8]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[4:8]}, vx_mul… 117 … vacc${ABC[0:4]} = _mm_add_epi32(vacc${ABC[0:4]}, _mm_mullo_epi32(vy${ABC[0:4]}, vy_multiplier)); 118 … vacc${ABC[4:8]} = _mm_add_epi32(vacc${ABC[4:8]}, _mm_mullo_epi32(vy${ABC[4:8]}, vy_multiplier));
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | distance_weighted_blend_sse4.cc | 239 __m128i mult0 = _mm_mullo_epi32(prediction0, weight0); in ComputeWeightedAverage8() 241 __m128i mult1 = _mm_mullo_epi32(prediction1, weight1); in ComputeWeightedAverage8() 247 mult0 = _mm_mullo_epi32(prediction0, weight0); in ComputeWeightedAverage8() 249 mult1 = _mm_mullo_epi32(prediction1, weight1); in ComputeWeightedAverage8()
|
/external/libopus/silk/fixed/x86/ |
D | burg_modified_FIX_sse4_1.c | 174 T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); in silk_burg_modified_sse4_1() 175 T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); in silk_burg_modified_sse4_1() 184 PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); in silk_burg_modified_sse4_1() 185 SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); in silk_burg_modified_sse4_1()
|
/external/XNNPACK/src/qs8-vaddc/ |
D | sse-mul32-ld32.c.in | 56 …__m128i vacc${ABC[N:N+4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[N:N+4]}, vx… 101 …__m128i vacc${ABC[0:4]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[0:4]}, vx_mul… 102 …__m128i vacc${ABC[4:8]} = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx${ABC[4:8]}, vx_mul…
|