Home
last modified time | relevance | path

Searched refs:_mm256_add_epi16 (Results 1 – 25 of 28) sorted by relevance

12

/external/libvpx/libvpx/vpx_dsp/x86/
Dloopfilter_avx2.c600 pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), in vpx_lpf_horizontal_16_dual_avx2()
601 _mm256_add_epi16(p256_4, p256_3)); in vpx_lpf_horizontal_16_dual_avx2()
602 pixelFilter_q = _mm256_add_epi16(_mm256_add_epi16(q256_6, q256_5), in vpx_lpf_horizontal_16_dual_avx2()
603 _mm256_add_epi16(q256_4, q256_3)); in vpx_lpf_horizontal_16_dual_avx2()
606 _mm256_add_epi16(p256_0, _mm256_add_epi16(p256_2, p256_1)); in vpx_lpf_horizontal_16_dual_avx2()
607 pixelFilter_p = _mm256_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); in vpx_lpf_horizontal_16_dual_avx2()
610 _mm256_add_epi16(q256_0, _mm256_add_epi16(q256_2, q256_1)); in vpx_lpf_horizontal_16_dual_avx2()
611 pixelFilter_q = _mm256_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); in vpx_lpf_horizontal_16_dual_avx2()
613 pixelFilter_p = _mm256_add_epi16( in vpx_lpf_horizontal_16_dual_avx2()
614 eight, _mm256_add_epi16(pixelFilter_p, pixelFilter_q)); in vpx_lpf_horizontal_16_dual_avx2()
[all …]
Davg_intrin_avx2.c231 __m256i b0 = _mm256_add_epi16(a0, a1); in hadamard_col8x2_avx2()
233 __m256i b2 = _mm256_add_epi16(a2, a3); in hadamard_col8x2_avx2()
235 __m256i b4 = _mm256_add_epi16(a4, a5); in hadamard_col8x2_avx2()
237 __m256i b6 = _mm256_add_epi16(a6, a7); in hadamard_col8x2_avx2()
240 a0 = _mm256_add_epi16(b0, b2); in hadamard_col8x2_avx2()
241 a1 = _mm256_add_epi16(b1, b3); in hadamard_col8x2_avx2()
244 a4 = _mm256_add_epi16(b4, b6); in hadamard_col8x2_avx2()
245 a5 = _mm256_add_epi16(b5, b7); in hadamard_col8x2_avx2()
250 b0 = _mm256_add_epi16(a0, a4); in hadamard_col8x2_avx2()
251 b7 = _mm256_add_epi16(a1, a5); in hadamard_col8x2_avx2()
[all …]
Dfwd_dct32x32_impl_avx2.h131 step1a[0] = _mm256_add_epi16(ina0, inb0); in FDCT32x32_2D_AVX2()
132 step1a[1] = _mm256_add_epi16(ina1, inb1); in FDCT32x32_2D_AVX2()
133 step1a[2] = _mm256_add_epi16(ina2, inb2); in FDCT32x32_2D_AVX2()
134 step1a[3] = _mm256_add_epi16(ina3, inb3); in FDCT32x32_2D_AVX2()
167 step1a[0] = _mm256_add_epi16(ina0, inb0); in FDCT32x32_2D_AVX2()
168 step1a[1] = _mm256_add_epi16(ina1, inb1); in FDCT32x32_2D_AVX2()
169 step1a[2] = _mm256_add_epi16(ina2, inb2); in FDCT32x32_2D_AVX2()
170 step1a[3] = _mm256_add_epi16(ina3, inb3); in FDCT32x32_2D_AVX2()
203 step1a[0] = _mm256_add_epi16(ina0, inb0); in FDCT32x32_2D_AVX2()
204 step1a[1] = _mm256_add_epi16(ina1, inb1); in FDCT32x32_2D_AVX2()
[all …]
Dconvolve_avx2.h66 sum1 = _mm256_add_epi16(x0, x2); in convolve8_16_avx2()
67 sum2 = _mm256_add_epi16(x1, x3); in convolve8_16_avx2()
69 sum1 = _mm256_add_epi16(sum1, k_64); in convolve8_16_avx2()
Dvariance_avx2.c57 *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1)); in variance_kernel_avx2()
181 exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \
182 exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \
196 *sum_reg = _mm256_add_epi16(*sum_reg, exp_src_lo); \
198 *sum_reg = _mm256_add_epi16(*sum_reg, exp_src_hi); \
/external/libaom/libaom/aom_dsp/x86/
Davg_intrin_avx2.c29 __m256i b0 = _mm256_add_epi16(a0, a1); in hadamard_col8x2_avx2()
31 __m256i b2 = _mm256_add_epi16(a2, a3); in hadamard_col8x2_avx2()
33 __m256i b4 = _mm256_add_epi16(a4, a5); in hadamard_col8x2_avx2()
35 __m256i b6 = _mm256_add_epi16(a6, a7); in hadamard_col8x2_avx2()
38 a0 = _mm256_add_epi16(b0, b2); in hadamard_col8x2_avx2()
39 a1 = _mm256_add_epi16(b1, b3); in hadamard_col8x2_avx2()
42 a4 = _mm256_add_epi16(b4, b6); in hadamard_col8x2_avx2()
43 a5 = _mm256_add_epi16(b5, b7); in hadamard_col8x2_avx2()
48 b0 = _mm256_add_epi16(a0, a4); in hadamard_col8x2_avx2()
49 b7 = _mm256_add_epi16(a1, a5); in hadamard_col8x2_avx2()
[all …]
Dconvolve_avx2.h74 _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); \
83 res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); \
164 _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); \
204 const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const); \
232 const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const); \
325 const __m256i res = _mm256_add_epi16(_mm256_add_epi16(res_01, res_45), in convolve_lowbd()
326 _mm256_add_epi16(res_23, res_67)); in convolve_lowbd()
337 const __m256i res = _mm256_add_epi16(res_45, res_23); in convolve_lowbd_4tap()
419 const __m256i wt_res = _mm256_add_epi16(*data_ref_0, *res_unsigned); in comp_avg()
431 _mm256_add_epi16(res_signed, *round_const), round_shift); in convolve_rounding()
Dintrapred_avx2.c28 return _mm256_add_epi16(y0, u0); in dc_sum_64()
38 return _mm256_add_epi16(y, u); in dc_sum_32()
321 sum_left = _mm256_add_epi16(sum_left, sum_above); in aom_dc_predictor_32x32_avx2()
323 sum_left = _mm256_add_epi16(sum_left, thirtytwo); in aom_dc_predictor_32x32_avx2()
337 sum = _mm256_add_epi16(sum, sixteen); in aom_dc_top_predictor_32x32_avx2()
351 sum = _mm256_add_epi16(sum, sixteen); in aom_dc_left_predictor_32x32_avx2()
439 sum_left = _mm256_add_epi16(sum_left, sum_above); in aom_dc_predictor_32x64_avx2()
451 sum_left = _mm256_add_epi16(sum_left, sum_above); in aom_dc_predictor_64x64_avx2()
463 sum_left = _mm256_add_epi16(sum_left, sum_above); in aom_dc_predictor_64x32_avx2()
475 sum_left = _mm256_add_epi16(sum_left, sum_above); in aom_dc_predictor_64x16_avx2()
[all …]
Dsum_squares_avx2.c129 v_acc_sum = _mm256_add_epi16(v_acc_sum, vsrc0); in aom_var_2d_u8_avx2()
130 v_acc_sum = _mm256_add_epi16(v_acc_sum, vsrc1); in aom_var_2d_u8_avx2()
150 v_acc_sum = _mm256_add_epi16(v_acc_sum, vsrc0); in aom_var_2d_u8_avx2()
151 v_acc_sum = _mm256_add_epi16(v_acc_sum, vsrc1); in aom_var_2d_u8_avx2()
Dhighbd_variance_avx2.c37 v_sum_d = _mm256_add_epi16(v_sum_d, v_diff); in aom_highbd_calc8x8var_avx2()
67 v_sum_d = _mm256_add_epi16(v_sum_d, v_diff); in aom_highbd_calc16x16var_avx2()
Dblend_a64_mask_avx2.c137 const __m256i m0 = _mm256_srli_epi16(_mm256_add_epi16(m0_acbd, two_w), 2); in lowbd_blend_a64_d16_mask_subw1_subh1_w16_avx2()
167 const __m256i m0 = _mm256_srli_epi16(_mm256_add_epi16(m0_acbd, two_w), 2); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2()
168 const __m256i m1 = _mm256_srli_epi16(_mm256_add_epi16(m1_acbd, two_w), 2); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2()
456 const __m256i v_rsl_w = _mm256_add_epi16(v_rvsal_w, v_rvsbl_w); in blend_a64_mask_sx_sy_w16_avx2()
494 const __m256i v_rsl_w = _mm256_add_epi16(v_rvsal_w, v_rvsbl_w); in blend_a64_mask_sx_sy_w32n_avx2()
495 const __m256i v_rsh_w = _mm256_add_epi16(v_rvsah_w, v_rvsbh_w); in blend_a64_mask_sx_sy_w32n_avx2()
1009 _mm256_srli_epi16(_mm256_add_epi16(adjacent, two_w), 2); in highbd_blend_a64_d16_mask_subw1_subh1_w4_avx2()
1141 _mm256_srli_epi16(_mm256_add_epi16(m0123, two_w), 2); in highbd_blend_a64_d16_mask_subw1_subh1_w8_avx2()
1149 _mm256_srli_epi16(_mm256_add_epi16(m4567, two_w), 2); in highbd_blend_a64_d16_mask_subw1_subh1_w8_avx2()
1281 const __m256i mask0a = _mm256_srli_epi16(_mm256_add_epi16(m01, two_w), 2); in highbd_blend_a64_d16_mask_subw1_subh1_w16_avx2()
[all …]
Dvariance_impl_avx2.c45 exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \
46 exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \
78 sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo); \
80 sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi); \
Dsad_highbd_avx2.c49 s[0] = _mm256_add_epi16(s[0], s[1]); in highbd_sad16x4_core_avx2()
50 s[0] = _mm256_add_epi16(s[0], s[2]); in highbd_sad16x4_core_avx2()
51 s[0] = _mm256_add_epi16(s[0], s[3]); in highbd_sad16x4_core_avx2()
Dvariance_avx2.c44 *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1)); in variance_kernel_avx2()
Dadaptive_quantize_avx2.c79 qcoeff = _mm256_add_epi16(tmp, qcoeff); in calculate_qcoeff_avx2()
/external/tensorflow/tensorflow/lite/experimental/ruy/
Dpack_avx2.cc231 _mm256_add_epi16(sums_4x4_16bit_lo,
234 _mm256_add_epi16(sums_4x4_16bit_lo,
237 _mm256_add_epi16(sums_4x4_16bit_lo,
240 _mm256_add_epi16(sums_4x4_16bit_lo,
243 _mm256_add_epi16(sums_4x4_16bit_lo,
246 _mm256_add_epi16(sums_4x4_16bit_lo,
249 _mm256_add_epi16(sums_4x4_16bit_lo,
262 sums_4x4_16bit_hi = _mm256_add_epi16(
265 sums_4x4_16bit_hi = _mm256_add_epi16(
268 sums_4x4_16bit_hi = _mm256_add_epi16(
[all …]
/external/libaom/libaom/av1/common/x86/
Dconvolve_avx2.c98 _mm256_add_epi16(res_lo, right_shift_const), right_shift); in av1_convolve_y_sr_avx2()
108 _mm256_add_epi16(res_hi, right_shift_const), right_shift); in av1_convolve_y_sr_avx2()
211 _mm256_add_epi16(res_lo, right_shift_const), right_shift); in av1_convolve_y_sr_avx2()
221 _mm256_add_epi16(res_hi, right_shift_const), right_shift); in av1_convolve_y_sr_avx2()
310 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const), in av1_convolve_x_sr_avx2()
313 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), in av1_convolve_x_sr_avx2()
348 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const), in av1_convolve_x_sr_avx2()
351 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), in av1_convolve_x_sr_avx2()
383 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const), in av1_convolve_x_sr_avx2()
386 res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), in av1_convolve_x_sr_avx2()
[all …]
Djnt_convolve_avx2.c93 res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift); in av1_dist_wtd_convolve_x_avx2()
96 const __m256i res_unsigned = _mm256_add_epi16(res, offset_const); in av1_dist_wtd_convolve_x_avx2()
147 res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift); in av1_dist_wtd_convolve_x_avx2()
151 const __m256i res_unsigned = _mm256_add_epi16(res, offset_const); in av1_dist_wtd_convolve_x_avx2()
274 res_lo = _mm256_add_epi16(res_lo, offset_const_1); in av1_dist_wtd_convolve_y_avx2()
292 _mm256_add_epi16(res_lo_round, offset_const_2); in av1_dist_wtd_convolve_y_avx2()
332 res_hi = _mm256_add_epi16(res_hi, offset_const_1); in av1_dist_wtd_convolve_y_avx2()
350 _mm256_add_epi16(res_hi_round, offset_const_2); in av1_dist_wtd_convolve_y_avx2()
453 res_lo = _mm256_add_epi16(res_lo, offset_const_1); in av1_dist_wtd_convolve_y_avx2()
471 _mm256_add_epi16(res_lo_round, offset_const_2); in av1_dist_wtd_convolve_y_avx2()
[all …]
Dcfl_avx2.c76 __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16); in cfl_luma_subsampling_420_lbd_avx2()
174 __m256i sum = _mm256_add_epi16(top, bot); in cfl_luma_subsampling_420_hbd_avx2()
178 __m256i sum_1 = _mm256_add_epi16(top_1, bot_1); in cfl_luma_subsampling_420_hbd_avx2()
182 hsum = _mm256_add_epi16(hsum, hsum); in cfl_luma_subsampling_420_hbd_avx2()
251 return _mm256_add_epi16(scaled_luma_q0, dc_q0); in predict_unclipped()
423 __m256i l0 = _mm256_add_epi16(_mm256_loadu_si256(src), in subtract_average_avx2()
429 _mm256_add_epi16(_mm256_loadu_si256(src + 1), in subtract_average_avx2()
Dwiener_convolve_avx2.c136 _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); in av1_wiener_convolve_add_src_avx2()
143 res = _mm256_add_epi16(res, data_0); in av1_wiener_convolve_add_src_avx2()
144 res = _mm256_add_epi16(res, round_const_horz); in av1_wiener_convolve_add_src_avx2()
Dwarp_plane_avx2.c92 const __m256i res_even = _mm256_add_epi16(res_02, res_46); in filter_src_pixels_avx2()
93 const __m256i res_odd = _mm256_add_epi16(res_13, res_57); in filter_src_pixels_avx2()
95 _mm256_add_epi16(_mm256_add_epi16(res_even, res_odd), *round_const); in filter_src_pixels_avx2()
694 res_lo_16 = _mm256_srai_epi16(_mm256_add_epi16(p_16, temp_lo_16), 1); in store_vertical_filter_output_avx2()
696 res_lo_16 = _mm256_add_epi16(res_lo_16, *res_sub_const); in store_vertical_filter_output_avx2()
698 _mm256_add_epi16(res_lo_16, *round_bits_const), round_bits); in store_vertical_filter_output_avx2()
737 res_hi_16 = _mm256_srai_epi16(_mm256_add_epi16(p4_16, temp_hi_16), 1); in store_vertical_filter_output_avx2()
739 res_hi_16 = _mm256_add_epi16(res_hi_16, *res_sub_const); in store_vertical_filter_output_avx2()
741 _mm256_add_epi16(res_hi_16, *round_bits_const), round_bits); in store_vertical_filter_output_avx2()
1056 _mm256_add_epi16(_mm256_sub_epi16(dst_1_16, ref_1_16), dup_255); in av1_calc_frame_error_avx2()
[all …]
Dreconinter_avx2.c26 _mm256_add_epi16(mask_base, _mm256_srli_epi16(diff, 4))); in calc_mask_avx2()
544 _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), in av1_build_compound_diffwtd_mask_highbd_avx2()
564 _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), in av1_build_compound_diffwtd_mask_highbd_avx2()
586 _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), in av1_build_compound_diffwtd_mask_highbd_avx2()
606 _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)), in av1_build_compound_diffwtd_mask_highbd_avx2()
Dconvolve_2d_avx2.c89 res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), in av1_convolve_2d_sr_avx2()
99 _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h); in av1_convolve_2d_sr_avx2()
/external/libaom/libaom/av1/encoder/x86/
Dcorner_match_avx2.c57 sum_vec = _mm256_add_epi16(sum_vec, _mm256_sad_epu8(v, zero)); in av1_compute_cross_correlation_avx2()
Dpickrst_avx2.c539 _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0); in av1_lowbd_pixel_proj_error_avx2()
584 _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0); in av1_lowbd_pixel_proj_error_avx2()
934 const __m256i e0 = _mm256_sub_epi16(_mm256_add_epi16(vr, d0), s0); in av1_highbd_pixel_proj_error_avx2()
1011 const __m256i e0 = _mm256_sub_epi16(_mm256_add_epi16(vr, d0), s0); in av1_highbd_pixel_proj_error_avx2()

12