Home
last modified time | relevance | path

Searched refs:sum_hi (Results 1 – 21 of 21) sorted by relevance

/external/libgav1/libgav1/src/dsp/x86/
Dintra_edge_sse4.cc140 __m128i sum_hi = _mm_add_epi16(source2_hi, _mm_srli_si128(source2_hi, 8)); in ComputeKernel3Store8() local
142 sum_hi = _mm_add_epi16(sum_hi, source4_hi); in ComputeKernel3Store8()
144 sum_hi = _mm_add_epi16(sum_hi, _mm_srli_si128(source4_hi, 2)); in ComputeKernel3Store8()
146 sum_hi = _mm_add_epi16(sum_hi, _mm_srli_si128(source4_hi, 4)); in ComputeKernel3Store8()
150 sum = _mm_alignr_epi8(sum_hi, _mm_slli_si128(sum, 8), 8); in ComputeKernel3Store8()
233 __m128i sum_hi = in IntraEdgeUpsampler_SSE4_1() local
235 sum_hi = _mm_add_epi16(sum_hi, _mm_alignr_epi8(src9_hi_extra, src9_hi, 4)); in IntraEdgeUpsampler_SSE4_1()
236 sum_hi = _mm_sub_epi16(sum_hi, _mm_alignr_epi8(src_hi_extra, src_hi, 6)); in IntraEdgeUpsampler_SSE4_1()
237 sum_hi = RightShiftWithRounding_S16(sum_hi, 4); in IntraEdgeUpsampler_SSE4_1()
239 _mm_unpacklo_epi8(_mm_packus_epi16(sum_hi, sum_hi), LoadLo8(temp + 10)); in IntraEdgeUpsampler_SSE4_1()
Dconvolve_sse4.cc1003 __m128i sum_hi = _mm_madd_epi16(src_hi_01, taps[0]); in Sum2DVerticalTaps() local
1008 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_23, taps[1])); in Sum2DVerticalTaps()
1014 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_45, taps[2])); in Sum2DVerticalTaps()
1020 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_67, taps[3])); in Sum2DVerticalTaps()
1025 RightShiftWithRounding_S32(sum_hi, in Sum2DVerticalTaps()
1030 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1)); in Sum2DVerticalTaps()
1041 __m128i sum_hi = _mm_madd_epi16(src_hi_01, taps_hi[0]); in Sum2DVerticalTaps4x2() local
1046 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_23, taps_hi[1])); in Sum2DVerticalTaps4x2()
1052 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_45, taps_hi[2])); in Sum2DVerticalTaps4x2()
1058 sum_hi = _mm_add_epi32(sum_hi, _mm_madd_epi16(src_hi_67, taps_hi[3])); in Sum2DVerticalTaps4x2()
[all …]
Dconvolve_sse4.inc165 __m128i sum_hi = _mm_madd_epi16(_mm_unpackhi_epi16(src[0], src[1]), taps[0]);
172 sum_hi = _mm_add_epi32(sum_hi, madd_hi);
177 sum_hi = _mm_add_epi32(sum_hi, madd_hi);
182 sum_hi = _mm_add_epi32(sum_hi, madd_hi);
190 RightShiftWithRounding_S32(sum_hi,
196 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1));
Dconvolve_avx2.cc430 __m256i sum_hi = in SimpleSum2DVerticalTaps() local
438 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps()
445 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps()
452 sum_hi = _mm256_add_epi32(sum_hi, madd_hi); in SimpleSum2DVerticalTaps()
460 RightShiftWithRounding_S32(sum_hi, in SimpleSum2DVerticalTaps()
466 RightShiftWithRounding_S32(sum_hi, kInterRoundBitsVertical - 1)); in SimpleSum2DVerticalTaps()
Dloop_restoration_10bit_avx2.cc1317 const __m128i sum_hi = _mm_unpackhi_epi16(b, _mm_setzero_si128()); in CalculateMa() local
1319 const __m128i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa()
1348 const __m256i sum_hi = _mm256_unpackhi_epi16(b, _mm256_setzero_si256()); in CalculateMa() local
1350 const __m256i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa()
Dloop_restoration_avx2.cc1392 const __m128i sum_hi = _mm_unpackhi_epi16(sum, _mm_setzero_si128()); in CalculateMa() local
1394 const __m128i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa()
1422 const __m256i sum_hi = _mm256_unpackhi_epi16(sum, _mm256_setzero_si256()); in CalculateMa() local
1424 const __m256i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa()
Dloop_restoration_10bit_sse4.cc1018 const __m128i sum_hi = _mm_unpackhi_epi16(b, _mm_setzero_si128()); in CalculateMa() local
1020 const __m128i z1 = CalculateMa<n>(sum_hi, VrshrU32(sum_sq[1], 4), scale); in CalculateMa()
Dloop_restoration_sse4.cc1159 const __m128i sum_hi = _mm_unpackhi_epi16(sum, _mm_setzero_si128()); in CalculateMa() local
1161 const __m128i z1 = CalculateMa<n>(sum_hi, sum_sq[1], scale); in CalculateMa()
/external/libgav1/libgav1/src/dsp/arm/
Dintra_edge_neon.cc81 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() local
82 sum_hi = vmulq_n_u16(sum_hi, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON()
83 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON()
86 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON()
105 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() local
106 sum_hi = vmulq_n_u16(sum_hi, kKernelsNEON[kernel_index][0]); in IntraEdgeFilter_NEON()
107 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON()
110 vcombine_u8(vrshrn_n_u16(sum_lo, 4), vrshrn_n_u16(sum_hi, 4)); in IntraEdgeFilter_NEON()
164 uint16x8_t sum_hi = in IntraEdgeFilter_NEON() local
169 sum_hi = vaddq_u16(sum_hi, vshlq_n_u16(sum_123_hi, 2)); in IntraEdgeFilter_NEON()
[all …]
Dconvolve_neon.cc425 int32x4_t sum_lo, sum_hi; in SimpleSum2DVerticalTaps() local
428 sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 0); in SimpleSum2DVerticalTaps()
430 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[1]), taps_lo, 1); in SimpleSum2DVerticalTaps()
432 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[2]), taps_lo, 2); in SimpleSum2DVerticalTaps()
434 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[3]), taps_lo, 3); in SimpleSum2DVerticalTaps()
437 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[4]), taps_hi, 0); in SimpleSum2DVerticalTaps()
439 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[5]), taps_hi, 1); in SimpleSum2DVerticalTaps()
441 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[6]), taps_hi, 2); in SimpleSum2DVerticalTaps()
443 sum_hi = vmlal_lane_s16(sum_hi, vget_high_s16(src[7]), taps_hi, 3); in SimpleSum2DVerticalTaps()
446 sum_hi = vmull_lane_s16(vget_high_s16(src[0]), taps_lo, 1); in SimpleSum2DVerticalTaps()
[all …]
/external/libvpx/libvpx/vp8/common/x86/
Dbilinear_filter_sse2.c57 const __m128i sum_hi = _mm_add_epi16(a_hi_filtered, b_hi_filtered); in horizontal_16x16() local
60 const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); in horizontal_16x16()
110 const __m128i sum_hi = in vertical_16x16() local
114 const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); in vertical_16x16()
/external/libvpx/libvpx/vp9/common/arm/neon/
Dvp9_highbd_iht16x16_add_neon.c123 const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]); in highbd_add_dct_const_round_shift_low_8() local
128 out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
129 out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
Dvp9_highbd_iht8x8_add_neon.c73 const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]); in highbd_add_dct_const_round_shift_low_8() local
75 const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS); in highbd_add_dct_const_round_shift_low_8()
/external/libaom/libaom/aom_dsp/x86/
Dvariance_avx2.c87 const __m256i sum_hi = in sum_to_32bit_avx2() local
89 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
Dvariance_sse2.c65 const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); in sum_to_32bit_sse2() local
66 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
Dhighbd_intrapred_sse2.c427 const __m128i sum_hi = dc_sum_8(ref + 8); in dc_sum_16() local
428 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
/external/libvpx/libvpx/vpx_dsp/x86/
Dhighbd_intrapred_intrin_sse2.c271 const __m128i sum_hi = dc_sum_8(ref + 8); in dc_sum_16() local
272 return _mm_add_epi16(sum_lo, sum_hi); in dc_sum_16()
Dvariance_sse2.c93 const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); in sum_to_32bit_sse2() local
94 return _mm_add_epi32(sum_lo, sum_hi); in sum_to_32bit_sse2()
Dvariance_avx2.c96 const __m256i sum_hi = in sum_to_32bit_avx2() local
98 return _mm256_add_epi32(sum_lo, sum_hi); in sum_to_32bit_avx2()
/external/libvpx/libvpx/vp9/encoder/x86/
Dhighbd_temporal_filter_sse4.c81 const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); in highbd_average_4() local
87 const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); in highbd_average_4()
/external/libaom/libaom/av1/encoder/x86/
Dtemporal_filter_sse4.c1113 const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); in highbd_average_4() local
1119 const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); in highbd_average_4()