Home
last modified time | relevance | path

Searched refs:_mm256_madd_epi16 (Results 1 – 25 of 33) sorted by relevance

12

/external/libaom/libaom/av1/encoder/x86/
Dwedge_utils_avx2.c56 const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w); in av1_wedge_sse_from_residuals_avx2()
57 const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w); in av1_wedge_sse_from_residuals_avx2()
61 const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w); in av1_wedge_sse_from_residuals_avx2()
116 const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w); in av1_wedge_sign_from_residuals_avx2()
117 const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w); in av1_wedge_sign_from_residuals_avx2()
118 const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w); in av1_wedge_sign_from_residuals_avx2()
119 const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w); in av1_wedge_sign_from_residuals_avx2()
191 const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w); in av1_wedge_compute_delta_squares_avx2()
192 const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w); in av1_wedge_compute_delta_squares_avx2()
193 const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w); in av1_wedge_compute_delta_squares_avx2()
[all …]
Drdopt_avx2.c41 const __m256i madd_xy = _mm256_madd_epi16(pixels, slli); in horver_correlation_4x4()
49 const __m256i madd_xz = _mm256_madd_epi16(slli, perm); in horver_correlation_4x4()
54 const __m256i madd1_slli = _mm256_madd_epi16(slli, _mm256_set1_epi16(1)); in horver_correlation_4x4()
58 const __m256i madd_slli = _mm256_madd_epi16(slli, slli); in horver_correlation_4x4()
Dpickrst_avx2.c24 const __m256i d0 = _mm256_madd_epi16(*kl, _mm256_cvtepu8_epi16(s)); in acc_stat_avx2()
142 const __m256i d0 = _mm256_madd_epi16(*dgd_ijkl, s1); in acc_stat_highbd_avx2()
522 const __m256i v0 = _mm256_madd_epi16( in av1_lowbd_pixel_proj_error_avx2()
524 const __m256i v1 = _mm256_madd_epi16( in av1_lowbd_pixel_proj_error_avx2()
532 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_lowbd_pixel_proj_error_avx2()
568 _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt_16b, d0)); in av1_lowbd_pixel_proj_error_avx2()
570 _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt_16b, d0)); in av1_lowbd_pixel_proj_error_avx2()
577 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_lowbd_pixel_proj_error_avx2()
603 const __m256i err0 = _mm256_madd_epi16(diff0, diff0); in av1_lowbd_pixel_proj_error_avx2()
697 const __m256i err0 = _mm256_madd_epi16(e0, e0); in av1_highbd_pixel_proj_error_avx2()
[all …]
Dcorner_match_avx2.c54 sumsq2_vec = _mm256_add_epi32(sumsq2_vec, _mm256_madd_epi16(v2_1, v2_1)); in compute_cross_correlation_avx2()
57 cross_vec = _mm256_add_epi32(cross_vec, _mm256_madd_epi16(v1_1, v2_1)); in compute_cross_correlation_avx2()
Derror_intrin_avx2.c53 dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); in av1_block_error_avx2()
55 coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); in av1_block_error_avx2()
/external/libvpx/libvpx/vpx_dsp/x86/
Dfwd_dct32x32_impl_avx2.h365 const __m256i s2_20_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
366 const __m256i s2_20_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
367 const __m256i s2_21_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
368 const __m256i s2_21_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
369 const __m256i s2_22_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
370 const __m256i s2_22_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
371 const __m256i s2_23_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
372 const __m256i s2_23_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_m16); in FDCT32x32_2D_AVX2()
373 const __m256i s2_24_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_p16); in FDCT32x32_2D_AVX2()
374 const __m256i s2_24_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_p16); in FDCT32x32_2D_AVX2()
[all …]
Dconvolve_avx2.h155 const __m256i tmp_0 = _mm256_madd_epi16(*src_0, *ker_0); in mm256_madd_add_epi32()
156 const __m256i tmp_1 = _mm256_madd_epi16(*src_1, *ker_1); in mm256_madd_add_epi32()
Dhighbd_convolve_avx2.c292 a0 = _mm256_madd_epi16(fil[0], sig[0]); in filter_8x1_pixels()
293 a1 = _mm256_madd_epi16(fil[3], sig[3]); in filter_8x1_pixels()
296 a0 = _mm256_madd_epi16(fil[1], sig[1]); in filter_8x1_pixels()
297 a1 = _mm256_madd_epi16(fil[2], sig[2]); in filter_8x1_pixels()
443 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_16_2t_pixels()
444 __m256i x1 = _mm256_madd_epi16(sig[1], *f); in filter_16_2t_pixels()
454 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_8x1_2t_pixels()
Dvariance_avx2.c53 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2()
54 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2()
197 exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo); \
199 exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi); \
/external/libaom/libaom/av1/common/x86/
Dhighbd_wiener_convolve_avx2.c105 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
106 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
107 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
108 const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
109 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2()
110 const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2()
111 const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2()
112 const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); in av1_highbd_wiener_convolve_add_src_avx2()
196 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
197 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_highbd_wiener_convolve_add_src_avx2()
[all …]
Dwiener_convolve_avx2.c113 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_wiener_convolve_add_src_avx2()
114 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_wiener_convolve_add_src_avx2()
115 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_wiener_convolve_add_src_avx2()
116 const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23); in av1_wiener_convolve_add_src_avx2()
117 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_wiener_convolve_add_src_avx2()
118 const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45); in av1_wiener_convolve_add_src_avx2()
119 const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67); in av1_wiener_convolve_add_src_avx2()
120 const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67); in av1_wiener_convolve_add_src_avx2()
202 const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01); in av1_wiener_convolve_add_src_avx2()
203 const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23); in av1_wiener_convolve_add_src_avx2()
[all …]
Dselfguided_avx2.c108 const __m256i x2 = _mm256_madd_epi16(x1, x1); in integral_images()
156 const __m256i x2 = _mm256_madd_epi16(x1, x1); in integral_images_highbd()
207 bb = _mm256_madd_epi16(b, b); in compute_p()
210 bb = _mm256_madd_epi16(sum1, sum1); in compute_p()
276 const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); in calc_ab()
343 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter()
413 const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n); in calc_ab_fast()
521 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter_fast()
538 __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b); in final_filter_fast()
/external/flac/libFLAC/
Dlpc_intrin_avx2.c80 summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
81 …mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
82 …mull = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
83 …mull = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
84 …mull = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
85 …mull = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
86 …mull = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
87 …mull = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
88 …mull = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
89 …mull = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_… in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
[all …]
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_error_avx2.c37 dqcoeff_256 = _mm256_madd_epi16(dqcoeff_256, dqcoeff_256); in vp9_block_error_avx2()
39 coeff_256 = _mm256_madd_epi16(coeff_256, coeff_256); in vp9_block_error_avx2()
66 dqcoeff_0 = _mm256_madd_epi16(dqcoeff_0, dqcoeff_0); in vp9_block_error_avx2()
67 dqcoeff_1 = _mm256_madd_epi16(dqcoeff_1, dqcoeff_1); in vp9_block_error_avx2()
69 coeff_0 = _mm256_madd_epi16(coeff_0, coeff_0); in vp9_block_error_avx2()
70 coeff_1 = _mm256_madd_epi16(coeff_1, coeff_1); in vp9_block_error_avx2()
125 const __m256i error_lo = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2()
138 const __m256i error = _mm256_madd_epi16(diff, diff); in vp9_block_error_fp_avx2()
/external/libaom/libaom/aom_dsp/x86/
Dsum_squares_avx2.c38 const __m256i v_sq_0_d = _mm256_madd_epi16(v_val_0_w, v_val_0_w); in aom_sum_squares_2d_i16_nxn_avx2()
39 const __m256i v_sq_1_d = _mm256_madd_epi16(v_val_1_w, v_val_1_w); in aom_sum_squares_2d_i16_nxn_avx2()
40 const __m256i v_sq_2_d = _mm256_madd_epi16(v_val_2_w, v_val_2_w); in aom_sum_squares_2d_i16_nxn_avx2()
41 const __m256i v_sq_3_d = _mm256_madd_epi16(v_val_3_w, v_val_3_w); in aom_sum_squares_2d_i16_nxn_avx2()
Dsse_avx2.c31 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d00_w, v_d00_w)); in sse_w32_avx2()
32 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d01_w, v_d01_w)); in sse_w32_avx2()
84 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w4x4_avx2()
95 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in sse_w8x2_avx2()
139 _mm256_add_epi32(_mm256_madd_epi16(v_asub, v_asub), in aom_sse_avx2()
140 _mm256_madd_epi16(v_bsub, v_bsub)); in aom_sse_avx2()
219 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w16_avx2()
238 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w4x4_avx2()
247 *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w)); in highbd_sse_w8x2_avx2()
Dobmc_variance_avx2.c53 const __m256i v_pm_d = _mm256_madd_epi16(v_p0_d, v_m_d); in obmc_variance_w8n()
113 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_variance_w16n()
114 const __m256i v_pm1_d = _mm256_madd_epi16(v_p1_d, v_m1_d); in obmc_variance_w16n()
132 const __m256i v_sqrdiff_d = _mm256_madd_epi16(v_rdiff01_w, v_rdiff01_w); in obmc_variance_w16n()
Dconvolve_avx2.h319 const __m256i res_0 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve()
320 const __m256i res_1 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve()
321 const __m256i res_2 = _mm256_madd_epi16(s[2], coeffs[2]); in convolve()
322 const __m256i res_3 = _mm256_madd_epi16(s[3], coeffs[3]); in convolve()
332 const __m256i res_1 = _mm256_madd_epi16(s[0], coeffs[0]); in convolve_4tap()
333 const __m256i res_2 = _mm256_madd_epi16(s[1], coeffs[1]); in convolve_4tap()
386 const __m256i wt_res_lo = _mm256_madd_epi16(data_lo, *wt); in comp_avg()
387 const __m256i wt_res_hi = _mm256_madd_epi16(data_hi, *wt); in comp_avg()
Dobmc_sad_avx2.c49 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in obmc_sad_w4_avx2()
90 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in obmc_sad_w8n_avx2()
171 const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d); in hbd_obmc_sad_w4_avx2()
216 const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d); in hbd_obmc_sad_w8n_avx2()
Dmasked_sad_intrin_avx2.c228 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad8xh_avx2()
234 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad8xh_avx2()
245 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad8xh_avx2()
285 __m256i pred_l = _mm256_madd_epi16(data_l, mask_l); in highbd_masked_sad16xh_avx2()
291 __m256i pred_r = _mm256_madd_epi16(data_r, mask_r); in highbd_masked_sad16xh_avx2()
302 res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one)); in highbd_masked_sad16xh_avx2()
Dhighbd_variance_avx2.c36 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc8x8var_avx2()
66 const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff); in aom_highbd_calc16x16var_avx2()
72 __m256i v_sum0 = _mm256_madd_epi16(v_sum_d, one); in aom_highbd_calc16x16var_avx2()
Dtxfm_common_avx2.h33 __m256i u0 = _mm256_madd_epi16(t0, w0); in btf_16_w16_avx2()
34 __m256i u1 = _mm256_madd_epi16(t1, w0); in btf_16_w16_avx2()
35 __m256i v0 = _mm256_madd_epi16(t0, w1); in btf_16_w16_avx2()
36 __m256i v1 = _mm256_madd_epi16(t1, w1); in btf_16_w16_avx2()
272 const __m256i b = _mm256_madd_epi16(a, scale_rounding); in scale_round_avx2()
Dvariance_avx2.c40 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2()
41 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2()
421 const __m256i pred_lo = _mm256_madd_epi16(s_lo, a_lo); in highbd_comp_mask_pred_line_avx2()
427 const __m256i pred_hi = _mm256_madd_epi16(s_hi, a_hi); in highbd_comp_mask_pred_line_avx2()
Dhighbd_convolve_avx2.c471 a0 = _mm256_madd_epi16(fil[0], sig[0]); in filter_8x1_pixels()
472 a1 = _mm256_madd_epi16(fil[3], sig[3]); in filter_8x1_pixels()
475 a0 = _mm256_madd_epi16(fil[1], sig[1]); in filter_8x1_pixels()
476 a1 = _mm256_madd_epi16(fil[2], sig[2]); in filter_8x1_pixels()
792 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_16_2t_pixels()
793 __m256i x1 = _mm256_madd_epi16(sig[1], *f); in filter_16_2t_pixels()
803 __m256i x0 = _mm256_madd_epi16(sig[0], *f); in filter_8x1_2t_pixels()
/external/libaom/libaom/aom_dsp/simd/
Dv256_intrinsics_x86.h110 return _mm256_madd_epi16(a, _mm256_set1_epi16(1)); in v256_padd_s16()
379 v256 t1 = _mm256_madd_epi16(v256_unpackhi_s8_s16(a), v256_unpackhi_u8_s16(b)); in v256_dotp_su8()
380 v256 t2 = _mm256_madd_epi16(v256_unpacklo_s8_s16(a), v256_unpacklo_u8_s16(b)); in v256_dotp_su8()
390 v256 r = _mm256_madd_epi16(a, b); in v256_dotp_s16()
474 v256 rl = _mm256_madd_epi16(l, l); in v256_ssd_u8()
475 v256 rh = _mm256_madd_epi16(h, h); in v256_ssd_u8()
519 return _mm256_madd_epi16(a, b); in v256_madd_s16()

12