Home
last modified time | relevance | path

Searched refs:_mm256_maddubs_epi16 (Results 1 – 23 of 23) sorted by relevance

/external/libaom/aom_dsp/x86/
Daom_subpixel_8t_intrin_avx2.c109 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d4_h4_avx2()
197 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d4_h8_avx2()
203 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters); in aom_filter_block1d4_h8_avx2()
305 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d8_h4_avx2()
306 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); in aom_filter_block1d8_h4_avx2()
407 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d8_h8_avx2()
408 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); in aom_filter_block1d8_h8_avx2()
418 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d8_h8_avx2()
419 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); in aom_filter_block1d8_h8_avx2()
531 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d16_h4_avx2()
[all …]
Dconvolve_avx2.h726 const __m256i res_01 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd()
727 const __m256i res_23 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd()
728 const __m256i res_45 = _mm256_maddubs_epi16(s[2], coeffs[2]); in convolve_lowbd()
729 const __m256i res_67 = _mm256_maddubs_epi16(s[3], coeffs[3]); in convolve_lowbd()
740 const __m256i res_01 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd_6tap()
741 const __m256i res_23 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd_6tap()
742 const __m256i res_45 = _mm256_maddubs_epi16(s[2], coeffs[2]); in convolve_lowbd_6tap()
753 const __m256i res_23 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd_4tap()
754 const __m256i res_45 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd_4tap()
Dsubtract_avx2.c22 diff0 = _mm256_maddubs_epi16(diff0, set_one_minusone); in subtract32_avx2()
23 diff1 = _mm256_maddubs_epi16(diff1, set_one_minusone); in subtract32_avx2()
Dblend_a64_mask_avx2.c136 const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w16_avx2()
165 const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2()
166 const __m256i m1_acbd = _mm256_maddubs_epi16(m1_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2()
191 const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w16_avx2()
216 const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2()
217 const __m256i m1_ac = _mm256_maddubs_epi16(m_i01, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2()
414 _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_s_b, v_s1_s_b), in blend_16_u8_avx2()
431 _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_b, v_s1_b), in blend_32_u8_avx2()
434 _mm256_maddubs_epi16(_mm256_unpackhi_epi8(v_s0_b, v_s1_b), in blend_32_u8_avx2()
1007 const __m256i adjacent = _mm256_maddubs_epi16(addrows, one_b); in highbd_blend_a64_d16_mask_subw1_subh1_w4_avx2()
[all …]
Dmasked_sad_intrin_avx2.c44 __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l); in masked_sad32xh_avx2()
49 __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r); in masked_sad32xh_avx2()
98 __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l); in masked_sad16xh_avx2()
103 __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r); in masked_sad16xh_avx2()
Dvariance_avx2.c39 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2()
40 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2()
263 const __m256i blendAL = _mm256_maddubs_epi16(ssAL, aaAL); in comp_mask_pred_line_avx2()
264 const __m256i blendAH = _mm256_maddubs_epi16(ssAH, aaAH); in comp_mask_pred_line_avx2()
726 const __m256i diff0 = _mm256_maddubs_epi16(u_low_256, set_one_minusone); in calc_sum_sse_wd32_avx2()
727 const __m256i diff1 = _mm256_maddubs_epi16(u_high_256, set_one_minusone); in calc_sum_sse_wd32_avx2()
Dvariance_impl_avx2.c41 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \
42 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \
/external/libvpx/vpx_dsp/x86/
Dvpx_subpixel_8t_intrin_avx2.c360 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2()
361 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2()
371 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2()
372 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2()
397 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2()
398 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2()
482 res_reg_m1001_lo = _mm256_maddubs_epi16(src_reg_m1001_lo, kernel_reg_23); in vpx_filter_block1d16_v4_avx2()
483 res_reg_1223_lo = _mm256_maddubs_epi16(src_reg_1223_lo, kernel_reg_45); in vpx_filter_block1d16_v4_avx2()
487 res_reg_m1001_hi = _mm256_maddubs_epi16(src_reg_m1001_hi, kernel_reg_23); in vpx_filter_block1d16_v4_avx2()
488 res_reg_1223_hi = _mm256_maddubs_epi16(src_reg_1223_hi, kernel_reg_45); in vpx_filter_block1d16_v4_avx2()
[all …]
Dconvolve_avx2.h57 const __m256i x0 = _mm256_maddubs_epi16(s[0], f[0]); in convolve8_16_avx2()
58 const __m256i x1 = _mm256_maddubs_epi16(s[1], f[1]); in convolve8_16_avx2()
59 const __m256i x2 = _mm256_maddubs_epi16(s[2], f[2]); in convolve8_16_avx2()
60 const __m256i x3 = _mm256_maddubs_epi16(s[3], f[3]); in convolve8_16_avx2()
Dvariance_avx2.c51 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2()
52 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2()
177 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \
178 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \
/external/libgav1/src/dsp/x86/
Dconvolve_avx2.cc47 const __m256i v_madd_21 = _mm256_maddubs_epi16(src[0], taps[0]); // k2k1 in SumOnePassTaps()
48 const __m256i v_madd_43 = _mm256_maddubs_epi16(src[1], taps[1]); // k4k3 in SumOnePassTaps()
49 const __m256i v_madd_65 = _mm256_maddubs_epi16(src[2], taps[2]); // k6k5 in SumOnePassTaps()
54 const __m256i v_madd_10 = _mm256_maddubs_epi16(src[0], taps[0]); // k1k0 in SumOnePassTaps()
55 const __m256i v_madd_32 = _mm256_maddubs_epi16(src[1], taps[1]); // k3k2 in SumOnePassTaps()
56 const __m256i v_madd_54 = _mm256_maddubs_epi16(src[2], taps[2]); // k5k4 in SumOnePassTaps()
57 const __m256i v_madd_76 = _mm256_maddubs_epi16(src[3], taps[3]); // k7k6 in SumOnePassTaps()
63 sum = _mm256_maddubs_epi16(src[0], taps[0]); // k4k3 in SumOnePassTaps()
66 const __m256i v_madd_32 = _mm256_maddubs_epi16(src[0], taps[0]); // k3k2 in SumOnePassTaps()
67 const __m256i v_madd_54 = _mm256_maddubs_epi16(src[1], taps[1]); // k5k4 in SumOnePassTaps()
Dloop_restoration_avx2.cc70 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap7Kernel()
71 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap7Kernel()
72 madds[2] = _mm256_maddubs_epi16(s45, filter[2]); in WienerHorizontalTap7Kernel()
73 madds[3] = _mm256_maddubs_epi16(s67, filter[3]); in WienerHorizontalTap7Kernel()
88 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap5Kernel()
89 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap5Kernel()
90 madds[2] = _mm256_maddubs_epi16(s45, filter[2]); in WienerHorizontalTap5Kernel()
103 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap3Kernel()
104 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap3Kernel()
1456 _mm256_maddubs_epi16(ma, _mm256_set1_epi16(one_over_n_quarter)); in CalculateB5()
Dloop_restoration_10bit_avx2.cc1381 _mm256_maddubs_epi16(ma, _mm256_set1_epi16(one_over_n_quarter)); in CalculateB5()
/external/libaom/av1/common/x86/
Dcfl_avx2.c74 __m256i top_16x16 = _mm256_maddubs_epi16(top, twos); in cfl_luma_subsampling_420_lbd_avx2()
75 __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos); in cfl_luma_subsampling_420_lbd_avx2()
106 __m256i top_16x16 = _mm256_maddubs_epi16(top, fours); in cfl_luma_subsampling_422_lbd_avx2()
Dwarp_plane_avx2.c87 const __m256i res_02 = _mm256_maddubs_epi16(src_0, coeff[0]); in filter_src_pixels_avx2()
88 const __m256i res_46 = _mm256_maddubs_epi16(src_1, coeff[1]); in filter_src_pixels_avx2()
89 const __m256i res_13 = _mm256_maddubs_epi16(src_2, coeff[2]); in filter_src_pixels_avx2()
90 const __m256i res_57 = _mm256_maddubs_epi16(src_3, coeff[3]); in filter_src_pixels_avx2()
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/
DMatMatProductAVX2.h1552 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD0); \
1554 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH0); \
1561 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD8); \
1563 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH8); \
1570 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD16); \
1572 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH16); \
1579 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD24); \
1581 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH24); \
2094 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD0); \
2096 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH0); \
[all …]
/external/rust/crates/libz-sys/src/zlib-ng/arch/x86/
Dadler32_avx2_tpl.h84 __m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v); // sum 32 uint8s to 16 shorts
/external/skia/src/opts/
DSkBitmapProcState_opts.h94 __m256i lo = _mm256_maddubs_epi16(_mm256_unpacklo_epi8( l, r), in S32_alpha_D32_filter_DX()
96 __m256i hi = _mm256_maddubs_epi16(_mm256_unpackhi_epi8( l, r), in S32_alpha_D32_filter_DX()
/external/libaom/third_party/SVT-AV1/
Dconvolve_avx2.h466 return _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_2tap_avx2()
471 const __m256i res_23 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_4tap_avx2()
472 const __m256i res_45 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_4tap_avx2()
478 const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_6tap_avx2()
479 const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_6tap_avx2()
480 const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]); in convolve_6tap_avx2()
487 const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_8tap_avx2()
488 const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_8tap_avx2()
489 const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]); in convolve_8tap_avx2()
490 const __m256i res_67 = _mm256_maddubs_epi16(ss[3], coeffs[3]); in convolve_8tap_avx2()
/external/libaom/aom_dsp/simd/
Dv256_intrinsics_x86.h108 return _mm256_maddubs_epi16(a, _mm256_set1_epi8(1)); in v256_padd_u8()
523 return _mm256_maddubs_epi16(a, b); in v256_madd_us8()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Dsse_tensor_utils.cc114 __m256i sumprod_16x16 = _mm256_maddubs_epi16(a_16x16, b_16x16); in DotProdInt8x4x8()
/external/clang/test/CodeGen/
Davx2-builtins.c673 return _mm256_maddubs_epi16(a, b); in test_mm256_maddubs_epi16()
/external/clang/lib/Headers/
Davx2intrin.h271 _mm256_maddubs_epi16(__m256i __a, __m256i __b) in _mm256_maddubs_epi16() function