/external/libaom/aom_dsp/x86/ |
D | aom_subpixel_8t_intrin_avx2.c | 109 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d4_h4_avx2() 197 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d4_h8_avx2() 203 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters); in aom_filter_block1d4_h8_avx2() 305 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d8_h4_avx2() 306 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); in aom_filter_block1d8_h4_avx2() 407 srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); in aom_filter_block1d8_h8_avx2() 408 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); in aom_filter_block1d8_h8_avx2() 418 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d8_h8_avx2() 419 srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); in aom_filter_block1d8_h8_avx2() 531 srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); in aom_filter_block1d16_h4_avx2() [all …]
|
D | convolve_avx2.h | 726 const __m256i res_01 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd() 727 const __m256i res_23 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd() 728 const __m256i res_45 = _mm256_maddubs_epi16(s[2], coeffs[2]); in convolve_lowbd() 729 const __m256i res_67 = _mm256_maddubs_epi16(s[3], coeffs[3]); in convolve_lowbd() 740 const __m256i res_01 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd_6tap() 741 const __m256i res_23 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd_6tap() 742 const __m256i res_45 = _mm256_maddubs_epi16(s[2], coeffs[2]); in convolve_lowbd_6tap() 753 const __m256i res_23 = _mm256_maddubs_epi16(s[0], coeffs[0]); in convolve_lowbd_4tap() 754 const __m256i res_45 = _mm256_maddubs_epi16(s[1], coeffs[1]); in convolve_lowbd_4tap()
|
D | subtract_avx2.c | 22 diff0 = _mm256_maddubs_epi16(diff0, set_one_minusone); in subtract32_avx2() 23 diff1 = _mm256_maddubs_epi16(diff1, set_one_minusone); in subtract32_avx2()
|
D | blend_a64_mask_avx2.c | 136 const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w16_avx2() 165 const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2() 166 const __m256i m1_acbd = _mm256_maddubs_epi16(m1_ac, one_b); in lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2() 191 const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w16_avx2() 216 const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2() 217 const __m256i m1_ac = _mm256_maddubs_epi16(m_i01, one_b); in lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2() 414 _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_s_b, v_s1_s_b), in blend_16_u8_avx2() 431 _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_b, v_s1_b), in blend_32_u8_avx2() 434 _mm256_maddubs_epi16(_mm256_unpackhi_epi8(v_s0_b, v_s1_b), in blend_32_u8_avx2() 1007 const __m256i adjacent = _mm256_maddubs_epi16(addrows, one_b); in highbd_blend_a64_d16_mask_subw1_subh1_w4_avx2() [all …]
|
D | masked_sad_intrin_avx2.c | 44 __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l); in masked_sad32xh_avx2() 49 __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r); in masked_sad32xh_avx2() 98 __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l); in masked_sad16xh_avx2() 103 __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r); in masked_sad16xh_avx2()
|
D | variance_avx2.c | 39 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2() 40 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2() 263 const __m256i blendAL = _mm256_maddubs_epi16(ssAL, aaAL); in comp_mask_pred_line_avx2() 264 const __m256i blendAH = _mm256_maddubs_epi16(ssAH, aaAH); in comp_mask_pred_line_avx2() 726 const __m256i diff0 = _mm256_maddubs_epi16(u_low_256, set_one_minusone); in calc_sum_sse_wd32_avx2() 727 const __m256i diff1 = _mm256_maddubs_epi16(u_high_256, set_one_minusone); in calc_sum_sse_wd32_avx2()
|
D | variance_impl_avx2.c | 41 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ 42 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \
|
/external/libvpx/vpx_dsp/x86/ |
D | vpx_subpixel_8t_intrin_avx2.c | 360 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2() 361 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2() 371 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2() 372 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2() 397 tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); in vpx_filter_block1d16_h4_avx2() 398 tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); in vpx_filter_block1d16_h4_avx2() 482 res_reg_m1001_lo = _mm256_maddubs_epi16(src_reg_m1001_lo, kernel_reg_23); in vpx_filter_block1d16_v4_avx2() 483 res_reg_1223_lo = _mm256_maddubs_epi16(src_reg_1223_lo, kernel_reg_45); in vpx_filter_block1d16_v4_avx2() 487 res_reg_m1001_hi = _mm256_maddubs_epi16(src_reg_m1001_hi, kernel_reg_23); in vpx_filter_block1d16_v4_avx2() 488 res_reg_1223_hi = _mm256_maddubs_epi16(src_reg_1223_hi, kernel_reg_45); in vpx_filter_block1d16_v4_avx2() [all …]
|
D | convolve_avx2.h | 57 const __m256i x0 = _mm256_maddubs_epi16(s[0], f[0]); in convolve8_16_avx2() 58 const __m256i x1 = _mm256_maddubs_epi16(s[1], f[1]); in convolve8_16_avx2() 59 const __m256i x2 = _mm256_maddubs_epi16(s[2], f[2]); in convolve8_16_avx2() 60 const __m256i x3 = _mm256_maddubs_epi16(s[3], f[3]); in convolve8_16_avx2()
|
D | variance_avx2.c | 51 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2() 52 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2() 177 exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ 178 exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \
|
/external/libgav1/src/dsp/x86/ |
D | convolve_avx2.cc | 47 const __m256i v_madd_21 = _mm256_maddubs_epi16(src[0], taps[0]); // k2k1 in SumOnePassTaps() 48 const __m256i v_madd_43 = _mm256_maddubs_epi16(src[1], taps[1]); // k4k3 in SumOnePassTaps() 49 const __m256i v_madd_65 = _mm256_maddubs_epi16(src[2], taps[2]); // k6k5 in SumOnePassTaps() 54 const __m256i v_madd_10 = _mm256_maddubs_epi16(src[0], taps[0]); // k1k0 in SumOnePassTaps() 55 const __m256i v_madd_32 = _mm256_maddubs_epi16(src[1], taps[1]); // k3k2 in SumOnePassTaps() 56 const __m256i v_madd_54 = _mm256_maddubs_epi16(src[2], taps[2]); // k5k4 in SumOnePassTaps() 57 const __m256i v_madd_76 = _mm256_maddubs_epi16(src[3], taps[3]); // k7k6 in SumOnePassTaps() 63 sum = _mm256_maddubs_epi16(src[0], taps[0]); // k4k3 in SumOnePassTaps() 66 const __m256i v_madd_32 = _mm256_maddubs_epi16(src[0], taps[0]); // k3k2 in SumOnePassTaps() 67 const __m256i v_madd_54 = _mm256_maddubs_epi16(src[1], taps[1]); // k5k4 in SumOnePassTaps()
|
D | loop_restoration_avx2.cc | 70 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap7Kernel() 71 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap7Kernel() 72 madds[2] = _mm256_maddubs_epi16(s45, filter[2]); in WienerHorizontalTap7Kernel() 73 madds[3] = _mm256_maddubs_epi16(s67, filter[3]); in WienerHorizontalTap7Kernel() 88 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap5Kernel() 89 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap5Kernel() 90 madds[2] = _mm256_maddubs_epi16(s45, filter[2]); in WienerHorizontalTap5Kernel() 103 madds[0] = _mm256_maddubs_epi16(s01, filter[0]); in WienerHorizontalTap3Kernel() 104 madds[1] = _mm256_maddubs_epi16(s23, filter[1]); in WienerHorizontalTap3Kernel() 1456 _mm256_maddubs_epi16(ma, _mm256_set1_epi16(one_over_n_quarter)); in CalculateB5()
|
D | loop_restoration_10bit_avx2.cc | 1381 _mm256_maddubs_epi16(ma, _mm256_set1_epi16(one_over_n_quarter)); in CalculateB5()
|
/external/libaom/av1/common/x86/ |
D | cfl_avx2.c | 74 __m256i top_16x16 = _mm256_maddubs_epi16(top, twos); in cfl_luma_subsampling_420_lbd_avx2() 75 __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos); in cfl_luma_subsampling_420_lbd_avx2() 106 __m256i top_16x16 = _mm256_maddubs_epi16(top, fours); in cfl_luma_subsampling_422_lbd_avx2()
|
D | warp_plane_avx2.c | 87 const __m256i res_02 = _mm256_maddubs_epi16(src_0, coeff[0]); in filter_src_pixels_avx2() 88 const __m256i res_46 = _mm256_maddubs_epi16(src_1, coeff[1]); in filter_src_pixels_avx2() 89 const __m256i res_13 = _mm256_maddubs_epi16(src_2, coeff[2]); in filter_src_pixels_avx2() 90 const __m256i res_57 = _mm256_maddubs_epi16(src_3, coeff[3]); in filter_src_pixels_avx2()
|
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/ |
D | MatMatProductAVX2.h | 1552 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD0); \ 1554 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH0); \ 1561 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD8); \ 1563 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH8); \ 1570 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD16); \ 1572 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH16); \ 1579 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD24); \ 1581 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH24); \ 2094 P_16_A = _mm256_maddubs_epi16(R_INPUT_A, L_AD0); \ 2096 P_16_B = _mm256_maddubs_epi16(R_INPUT_B, L_EH0); \ [all …]
|
/external/rust/crates/libz-sys/src/zlib-ng/arch/x86/ |
D | adler32_avx2_tpl.h | 84 __m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v); // sum 32 uint8s to 16 shorts
|
/external/skia/src/opts/ |
D | SkBitmapProcState_opts.h | 94 __m256i lo = _mm256_maddubs_epi16(_mm256_unpacklo_epi8( l, r), in S32_alpha_D32_filter_DX() 96 __m256i hi = _mm256_maddubs_epi16(_mm256_unpackhi_epi8( l, r), in S32_alpha_D32_filter_DX()
|
/external/libaom/third_party/SVT-AV1/ |
D | convolve_avx2.h | 466 return _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_2tap_avx2() 471 const __m256i res_23 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_4tap_avx2() 472 const __m256i res_45 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_4tap_avx2() 478 const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_6tap_avx2() 479 const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_6tap_avx2() 480 const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]); in convolve_6tap_avx2() 487 const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]); in convolve_8tap_avx2() 488 const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]); in convolve_8tap_avx2() 489 const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]); in convolve_8tap_avx2() 490 const __m256i res_67 = _mm256_maddubs_epi16(ss[3], coeffs[3]); in convolve_8tap_avx2()
|
/external/libaom/aom_dsp/simd/ |
D | v256_intrinsics_x86.h | 108 return _mm256_maddubs_epi16(a, _mm256_set1_epi8(1)); in v256_padd_u8() 523 return _mm256_maddubs_epi16(a, b); in v256_madd_us8()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | sse_tensor_utils.cc | 114 __m256i sumprod_16x16 = _mm256_maddubs_epi16(a_16x16, b_16x16); in DotProdInt8x4x8()
|
/external/clang/test/CodeGen/ |
D | avx2-builtins.c | 673 return _mm256_maddubs_epi16(a, b); in test_mm256_maddubs_epi16()
|
/external/clang/lib/Headers/ |
D | avx2intrin.h | 271 _mm256_maddubs_epi16(__m256i __a, __m256i __b) in _mm256_maddubs_epi16() function
|