/external/libgav1/libgav1/src/dsp/x86/ |
D | intra_edge_sse4.cc | 140 __m128i sum_hi = _mm_add_epi16(source2_hi, _mm_srli_si128(source2_hi, 8)); in ComputeKernel3Store8() local 142 sum_hi = _mm_add_epi16(sum_hi, source4_hi); in ComputeKernel3Store8() 144 sum_hi = _mm_add_epi16(sum_hi, _mm_srli_si128(source4_hi, 2)); in ComputeKernel3Store8() 146 sum_hi = _mm_add_epi16(sum_hi, _mm_srli_si128(source4_hi, 4)); in ComputeKernel3Store8() 150 sum = _mm_alignr_epi8(sum_hi, _mm_slli_si128(sum, 8), 8); in ComputeKernel3Store8() 233 __m128i sum_hi = in IntraEdgeUpsampler_SSE4_1() local 235 sum_hi = _mm_add_epi16(sum_hi, _mm_alignr_epi8(src9_hi_extra, src9_hi, 4)); in IntraEdgeUpsampler_SSE4_1() 236 sum_hi = _mm_sub_epi16(sum_hi, _mm_alignr_epi8(src_hi_extra, src_hi, 6)); in IntraEdgeUpsampler_SSE4_1() 237 sum_hi = RightShiftWithRounding_S16(sum_hi, 4); in IntraEdgeUpsampler_SSE4_1() 239 _mm_unpacklo_epi8(_mm_packus_epi16(sum_hi, sum_hi), LoadLo8(temp + 10)); in IntraEdgeUpsampler_SSE4_1()
|
D | convolve_sse4.cc | 1003 __m128i sum_hi = _mm_madd_epi16(src_hi_01, taps[0]); in Sum2DVerticalTaps() local 1008 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_23, taps[1])); in Sum2DVerticalTaps() 1014 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_45, taps[2])); in Sum2DVerticalTaps() 1020 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_67, taps[3])); in Sum2DVerticalTaps() 1025 RightShiftWithRounding_S32(sum_hi, in Sum2DVerticalTaps() 1030 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1)); in Sum2DVerticalTaps() 1041 __m128i sum_hi = _mm_madd_epi16(src_hi_01, taps_hi[0]); in Sum2DVerticalTaps4x2() local 1046 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_23, taps_hi[1])); in Sum2DVerticalTaps4x2() 1052 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_45, taps_hi[2])); in Sum2DVerticalTaps4x2() 1058 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_67, taps_hi[3])); in Sum2DVerticalTaps4x2() [all …]
|
D | convolve_sse4.inc | 165 __m128i sum_hi = _mm_madd_epi16(_mm_unpackhi_epi16(src[0], src[1]), taps[0]); 172 sum_hi = _mm_add_epi32(sum_hi, madd_hi); 177 sum_hi = _mm_add_epi32(sum_hi, madd_hi); 182 sum_hi = _mm_add_epi32(sum_hi, madd_hi); 190 RightShiftWithRounding_S32(sum_hi, 196 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1));
|
D | convolve_avx2.cc | 430 __m256i sum_hi = in SimpleSum2DVerticalTaps() local 438 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps() 445 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps() 452 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps() 460 RightShiftWithRounding_S32(sum_hi, in SimpleSum2DVerticalTaps() 466 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1)); in SimpleSum2DVerticalTaps()
|
D | loop_restoration_10bit_avx2.cc | 1317 const __m128i sum_hi = _mm_unpackhi_epi16(b, _mm_setzero_si128()); in CalculateMa() local 1319 const __m128i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa() 1348 const __m256i sum_hi = _mm256_unpackhi_epi16(b, _mm256_setzero_si256()); in CalculateMa() local 1350 const __m256i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa()
|
D | loop_restoration_avx2.cc | 1392 const __m128i sum_hi = _mm_unpackhi_epi16(sum, _mm_setzero_si128()); in CalculateMa() local 1394 const __m128i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa() 1422 const __m256i sum_hi = _mm256_unpackhi_epi16(sum, _mm256_setzero_si256()); in CalculateMa() local 1424 const __m256i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa()
|
D | loop_restoration_10bit_sse4.cc | 1018 const __m128i sum_hi = _mm_unpackhi_epi16(b, _mm_setzero_si128()); in CalculateMa() local 1020 const __m128i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa()
|
D | loop_restoration_sse4.cc | 1159 const __m128i sum_hi = _mm_unpackhi_epi16(sum, _mm_setzero_si128()); in CalculateMa() local 1161 const __m128i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa()
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | intra_edge_neon.cc | 81 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() local 82 sum_hi = vmulq_n_u16(sum_hi, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON() 83 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 86 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON() 105 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() local 106 sum_hi = vmulq_n_u16(sum_hi, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON() 107 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 110 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON() 164 uint16x8_t sum_hi = in IntraEdgeFilter_NEON() local 169 sum_hi = vaddq_u16(sum_hi, vshlq_n_u16(sum_123_hi, 2)); in IntraEdgeFilter_NEON() [all …]
|
D | convolve_neon.cc | 425 int32x4_t sum_lo, sum_hi; in SimpleSum2DVerticalTaps() local 428 sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 0); in SimpleSum2DVerticalTaps() 430 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_lo, 1); in SimpleSum2DVerticalTaps() 432 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[2]), taps_lo, 2); in SimpleSum2DVerticalTaps() 434 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[3]), taps_lo, 3); in SimpleSum2DVerticalTaps() 437 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[4]), taps_hi, 0); in SimpleSum2DVerticalTaps() 439 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[5]), taps_hi, 1); in SimpleSum2DVerticalTaps() 441 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[6]), taps_hi, 2); in SimpleSum2DVerticalTaps() 443 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[7]), taps_hi, 3); in SimpleSum2DVerticalTaps() 446 sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 1); in SimpleSum2DVerticalTaps() [all …]
|
/external/libvpx/libvpx/vp8/common/x86/ |
D | bilinear_filter_sse2.c | 57 const __m128i sum_hi = _mm_add_epi16(a_hi_filtered, b_hi_filtered); in horizontal_16x16() local 60 const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); in horizontal_16x16() 110 const __m128i sum_hi = in vertical_16x16() local 114 const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); in vertical_16x16()
|
/external/libvpx/libvpx/vp9/common/arm/neon/ |
D | vp9_highbd_iht16x16_add_neon.c | 123 const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]); in highbd_add_dct_const_round_shift_low_8() local 128 out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8() 129 out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
|
D | vp9_highbd_iht8x8_add_neon.c | 73 const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]); in highbd_add_dct_const_round_shift_low_8() local 75 const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | variance_avx2.c | 87 const __m256i sum_hi = in sum_to_32bit_avx2() local 89 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
|
D | variance_sse2.c | 65 const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); in sum_to_32bit_sse2() local 66 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
|
D | highbd_intrapred_sse2.c | 427 const __m128i sum_hi = dc_sum_8(ref + 8); in dc_sum_16() local 428 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_intrapred_intrin_sse2.c | 271 const __m128i sum_hi = dc_sum_8(ref + 8); in dc_sum_16() local 272 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
|
D | variance_sse2.c | 93 const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); in sum_to_32bit_sse2() local 94 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
|
D | variance_avx2.c | 96 const __m256i sum_hi = in sum_to_32bit_avx2() local 98 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | highbd_temporal_filter_sse4.c | 81 const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); in highbd_average_4() local 87 const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); in highbd_average_4()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | temporal_filter_sse4.c | 1113 const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); in highbd_average_4() local 1119 const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); in highbd_average_4()
|