/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_avx2.c | 56 const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w); in av1_wedge_sse_from_residuals_avx2() 57 const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w); in av1_wedge_sse_from_residuals_avx2() 61 const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w); in av1_wedge_sse_from_residuals_avx2() 116 const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w); in av1_wedge_sign_from_residuals_avx2() 117 const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w); in av1_wedge_sign_from_residuals_avx2() 118 const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w); in av1_wedge_sign_from_residuals_avx2() 119 const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w); in av1_wedge_sign_from_residuals_avx2() 191 const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w); in av1_wedge_compute_delta_squares_avx2() 192 const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w); in av1_wedge_compute_delta_squares_avx2() 193 const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w); in av1_wedge_compute_delta_squares_avx2() [all …]
|
D | rdopt_avx2.c | 41 const __m256i madd_xy = _mm256_madd_epi16(pixels, slli); in horver_correlation_4x4() 49 const __m256i madd_xz = _mm256_madd_epi16(slli, perm); in horver_correlation_4x4() 54 const __m256i madd1_slli = _mm256_madd_epi16(slli, _mm256_set1_epi16(1)); in horver_correlation_4x4() 58 const __m256i madd_slli = _mm256_madd_epi16(slli, slli); in horver_correlation_4x4()
|
D | pickrst_avx2.c | 24 const __m256i d0 = _mm256_madd_epi16(*kl, _mm256_cvtepu8_epi16(s)); in acc_stat_avx2() 142 const __m256i d0 = _mm256_madd_epi16(*dgd_ijkl, s1); in acc_stat_highbd_avx2() 522 const __m256i v0 = _mm256_madd_epi16( in av1_lowbd_pixel_proj_error_avx2() 524 const __m256i v1 = _mm256_madd_epi16( in av1_lowbd_pixel_proj_error_avx2() 532 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_lowbd_pixel_proj_error_avx2() 568 _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt_16b, d0)); in av1_lowbd_pixel_proj_error_avx2() 570 _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt_16b, d0)); in av1_lowbd_pixel_proj_error_avx2() 577 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_lowbd_pixel_proj_error_avx2() 603 const __m256i err0 = _mm256_madd_epi16(diff0, diff0); in av1_lowbd_pixel_proj_error_avx2() 697 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_highbd_pixel_proj_error_avx2() [all …]
|
D | corner_match_avx2.c | 54 sumsq2_vec = _mm256_add_epi32(sumsq2_vec, _mm256_madd_epi16(v2_1, v2_1)); in compute_cross_correlation_avx2() 57 cross_vec = _mm256_add_epi32(cross_vec, _mm256_madd_epi16(v1_1, v2_1)); in compute_cross_correlation_avx2()
|
D | error_intrin_avx2.c | 53 dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); in av1_block_error_avx2() 55 coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); in av1_block_error_avx2()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_dct32x32_impl_avx2.h | 365 const __m256i s2_20_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 366 const __m256i s2_20_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 367 const __m256i s2_21_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 368 const __m256i s2_21_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 369 const __m256i s2_22_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 370 const __m256i s2_22_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 371 const __m256i s2_23_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 372 const __m256i s2_23_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2() 373 const __m256i s2_24_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_p16); in FDCT32x32_2D_AVX2() 374 const __m256i s2_24_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_p16); in FDCT32x32_2D_AVX2() [all …]
|
D | convolve_avx2.h | 155 const __m256i tmp_0 = _mm256_madd_epi16(*src_0, *ker_0); in mm256_madd_add_epi32() 156 const __m256i tmp_1 = _mm256_madd_epi16(*src_1, *ker_1); in mm256_madd_add_epi32()
|
D | highbd_convolve_avx2.c | 292 a0 = _mm256_madd_epi16(fil[0], sig[0]); in filter_8x1_pixels() 293 a1 = _mm256_madd_epi16(fil[3], sig[3]); in filter_8x1_pixels() 296 a0 = _mm256_madd_epi16(fil[1], sig[1]); in filter_8x1_pixels() 297 a1 = _mm256_madd_epi16(fil[2], sig[2]); in filter_8x1_pixels() 443 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_16_2t_pixels() 444 __m256i x1 = _mm256_madd_epi16(sig[1], *f); in filter_16_2t_pixels() 454 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_8x1_2t_pixels()
|
D | variance_avx2.c | 53 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2() 54 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2() 197 exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo); \ 199 exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi); \
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_wiener_convolve_avx2.c | 105 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2() 106 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2() 107 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2() 108 const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2() 109 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2() 110 const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2() 111 const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2() 112 const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2() 196 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2() 197 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2() [all …]
|
D | wiener_convolve_avx2.c | 113 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_wiener_convolve_add_src_avx2() 114 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_wiener_convolve_add_src_avx2() 115 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_wiener_convolve_add_src_avx2() 116 const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); in av1_wiener_convolve_add_src_avx2() 117 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_wiener_convolve_add_src_avx2() 118 const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); in av1_wiener_convolve_add_src_avx2() 119 const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); in av1_wiener_convolve_add_src_avx2() 120 const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); in av1_wiener_convolve_add_src_avx2() 202 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_wiener_convolve_add_src_avx2() 203 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_wiener_convolve_add_src_avx2() [all …]
|
D | selfguided_avx2.c | 108 const __m256i x2 = _mm256_madd_epi16(x1, x1); in integral_images() 156 const __m256i x2 = _mm256_madd_epi16(x1, x1); in integral_images_highbd() 207 bb = _mm256_madd_epi16(b, b); in compute_p() 210 bb = _mm256_madd_epi16(sum1, sum1); in compute_p() 276 const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); in calc_ab() 343 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter() 413 const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); in calc_ab_fast() 521 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter_fast() 538 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter_fast()
|
/external/flac/libFLAC/ |
D | lpc_intrin_avx2.c | 80 summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 81 …mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 82 …mull = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 83 …mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 84 …mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 85 …mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 86 …mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 87 …mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 88 …mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() 89 …mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() [all …]
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_error_avx2.c | 37 dqcoeff_256 = _mm256_madd_epi16(dqcoeff_256, dqcoeff_256); in vp9_block_error_avx2() 39 coeff_256 = _mm256_madd_epi16(coeff_256, coeff_256); in vp9_block_error_avx2() 66 dqcoeff_0 = _mm256_madd_epi16(dqcoeff_0, dqcoeff_0); in vp9_block_error_avx2() 67 dqcoeff_1 = _mm256_madd_epi16(dqcoeff_1, dqcoeff_1); in vp9_block_error_avx2() 69 coeff_0 = _mm256_madd_epi16(coeff_0, coeff_0); in vp9_block_error_avx2() 70 coeff_1 = _mm256_madd_epi16(coeff_1, coeff_1); in vp9_block_error_avx2() 125 const __m256i error_lo = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2() 138 const __m256i error = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | sum_squares_avx2.c | 38 const __m256i v_sq_0_d = _mm256_madd_epi16(v_val_0_w, v_val_0_w); in aom_sum_squares_2d_i16_nxn_avx2() 39 const __m256i v_sq_1_d = _mm256_madd_epi16(v_val_1_w, v_val_1_w); in aom_sum_squares_2d_i16_nxn_avx2() 40 const __m256i v_sq_2_d = _mm256_madd_epi16(v_val_2_w, v_val_2_w); in aom_sum_squares_2d_i16_nxn_avx2() 41 const __m256i v_sq_3_d = _mm256_madd_epi16(v_val_3_w, v_val_3_w); in aom_sum_squares_2d_i16_nxn_avx2()
|
D | sse_avx2.c | 31 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d00_w, v_d00_w)); in sse_w32_avx2() 32 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d01_w, v_d01_w)); in sse_w32_avx2() 84 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w4x4_avx2() 95 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w8x2_avx2() 139 _mm256_add_epi32(_mm256_madd_epi16(v_asub, v_asub), in aom_sse_avx2() 140 _mm256_madd_epi16(v_bsub, v_bsub)); in aom_sse_avx2() 219 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w16_avx2() 238 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w4x4_avx2() 247 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w8x2_avx2()
|
D | obmc_variance_avx2.c | 53 const __m256i v_pm_d = _mm256_madd_epi16(v_p0_d, v_m_d); in obmc_variance_w8n() 113 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_variance_w16n() 114 const __m256i v_pm1_d = _mm256_madd_epi16(v_p1_d, v_m1_d); in obmc_variance_w16n() 132 const __m256i v_sqrdiff_d = _mm256_madd_epi16(v_rdiff01_w, v_rdiff01_w); in obmc_variance_w16n()
|
D | convolve_avx2.h | 319 const __m256i res_0 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve() 320 const __m256i res_1 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve() 321 const __m256i res_2 = _mm256_madd_epi16(s[2], coeffs[2]); in convolve() 322 const __m256i res_3 = _mm256_madd_epi16(s[3], coeffs[3]); in convolve() 332 const __m256i res_1 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve_4tap() 333 const __m256i res_2 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve_4tap() 386 const __m256i wt_res_lo = _mm256_madd_epi16(data_lo, *wt); in comp_avg() 387 const __m256i wt_res_hi = _mm256_madd_epi16(data_hi, *wt); in comp_avg()
|
D | obmc_sad_avx2.c | 49 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in obmc_sad_w4_avx2() 90 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_sad_w8n_avx2() 171 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in hbd_obmc_sad_w4_avx2() 216 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in hbd_obmc_sad_w8n_avx2()
|
D | masked_sad_intrin_avx2.c | 228 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad8xh_avx2() 234 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad8xh_avx2() 245 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad8xh_avx2() 285 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad16xh_avx2() 291 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad16xh_avx2() 302 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad16xh_avx2()
|
D | highbd_variance_avx2.c | 36 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc8x8var_avx2() 66 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc16x16var_avx2() 72 __m256i v_sum0 = _mm256_madd_epi16(v_sum_d, one); in aom_highbd_calc16x16var_avx2()
|
D | txfm_common_avx2.h | 33 __m256i u0 = _mm256_madd_epi16(t0, w0); in btf_16_w16_avx2() 34 __m256i u1 = _mm256_madd_epi16(t1, w0); in btf_16_w16_avx2() 35 __m256i v0 = _mm256_madd_epi16(t0, w1); in btf_16_w16_avx2() 36 __m256i v1 = _mm256_madd_epi16(t1, w1); in btf_16_w16_avx2() 272 const __m256i b = _mm256_madd_epi16(a, scale_rounding); in scale_round_avx2()
|
D | variance_avx2.c | 40 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2() 41 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2() 421 const __m256i pred_lo = _mm256_madd_epi16(s_lo, a_lo); in highbd_comp_mask_pred_line_avx2() 427 const __m256i pred_hi = _mm256_madd_epi16(s_hi, a_hi); in highbd_comp_mask_pred_line_avx2()
|
D | highbd_convolve_avx2.c | 471 a0 = _mm256_madd_epi16(fil[0], sig[0]); in filter_8x1_pixels() 472 a1 = _mm256_madd_epi16(fil[3], sig[3]); in filter_8x1_pixels() 475 a0 = _mm256_madd_epi16(fil[1], sig[1]); in filter_8x1_pixels() 476 a1 = _mm256_madd_epi16(fil[2], sig[2]); in filter_8x1_pixels() 792 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_16_2t_pixels() 793 __m256i x1 = _mm256_madd_epi16(sig[1], *f); in filter_16_2t_pixels() 803 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_8x1_2t_pixels()
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v256_intrinsics_x86.h | 110 return _mm256_madd_epi16(a, _mm256_set1_epi16(1)); in v256_padd_s16() 379 v256 t1 = _mm256_madd_epi16(v256_unpackhi_s8_s16(a), v256_unpackhi_u8_s16(b)); in v256_dotp_su8() 380 v256 t2 = _mm256_madd_epi16(v256_unpacklo_s8_s16(a), v256_unpacklo_u8_s16(b)); in v256_dotp_su8() 390 v256 r = _mm256_madd_epi16(a, b); in v256_dotp_s16() 474 v256 rl = _mm256_madd_epi16(l, l); in v256_ssd_u8() 475 v256 rh = _mm256_madd_epi16(h, h); in v256_ssd_u8() 519 return _mm256_madd_epi16(a, b); in v256_madd_s16()
|