/external/libaom/libaom/av1/common/arm/ |
D | warp_plane_neon.c | 202 uint8x8_t src_1, int16x4_t *res) { in convolve() argument 214 pix_1 = vreinterpretq_s16_u16(vmovl_u8(src_1)); in convolve() 220 static INLINE void horizontal_filter_neon(uint8x16_t src_1, uint8x16_t src_2, in horizontal_filter_neon() argument 237 uint8x16_t tmp_0 = vandq_u8(src_1, mask); in horizontal_filter_neon() 245 src_1 = vaddq_u8(tmp_0, tmp_2); in horizontal_filter_neon() 248 src_1_low = vget_low_u8(src_1); in horizontal_filter_neon() 250 src_3_low = vget_low_u8(vextq_u8(src_1, src_1, 4)); in horizontal_filter_neon() 252 src_5_low = vget_low_u8(vextq_u8(src_1, src_1, 2)); in horizontal_filter_neon() 253 src_6_low = vget_low_u8(vextq_u8(src_1, src_1, 6)); in horizontal_filter_neon() 312 int16x4_t src_0, src_1, fltr_0, fltr_1; in vertical_filter_neon() local [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | convolve_sse2.h | 35 static INLINE __m128i mm_madd_add_epi8_sse2(const __m128i *const src_1, in mm_madd_add_epi8_sse2() argument 39 const __m128i src_1_half = _mm_unpacklo_epi8(*src_1, _mm_setzero_si128()); in mm_madd_add_epi8_sse2() 49 static INLINE __m128i mm_madd_add_epi16_sse2(const __m128i *const src_1, in mm_madd_add_epi16_sse2() argument 53 const __m128i madd_1 = _mm_madd_epi16(*src_1, *ker_1); in mm_madd_add_epi16_sse2() 59 const __m128i *const src_1, in mm_madd_packs_epi16_sse2() argument 62 const __m128i madd_2 = _mm_madd_epi16(*src_1, *ker); in mm_madd_packs_epi16_sse2() 67 static INLINE __m128i mm_zip_epi32_sse2(const __m128i *const src_1, in mm_zip_epi32_sse2() argument 69 const __m128i tmp_1 = _mm_unpacklo_epi32(*src_1, *src_2); in mm_zip_epi32_sse2() 70 const __m128i tmp_2 = _mm_unpackhi_epi32(*src_1, *src_2); in mm_zip_epi32_sse2()
|
D | variance_avx2.c | 265 const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep)); in spv32_half_zero() local 266 const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); in spv32_half_zero() 316 const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); in spv32_x4_y4() local 317 const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); in spv32_x4_y4() 354 const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep)); in spv32_bilin_zero() local 355 exp_src_lo = _mm256_unpacklo_epi8(src_0, src_1); in spv32_bilin_zero() 356 exp_src_hi = _mm256_unpackhi_epi8(src_0, src_1); in spv32_bilin_zero() 409 const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); in spv32_x4_yb() local 410 const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); in spv32_x4_yb() 454 const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); in spv32_xb_y4() local [all …]
|
D | convolve_avx2.h | 152 const __m256i *const src_1, in mm256_madd_add_epi32() argument 156 const __m256i tmp_1 = _mm256_madd_epi16(*src_1, *ker_1); in mm256_madd_add_epi32()
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | intra_edge_neon.cc | 76 const uint8x16_t src_1 = vld1q_u8(dst_buffer + i); in IntraEdgeFilter_NEON() local 80 sum_lo = vmlal_u8(sum_lo, vget_low_u8(src_1), krn1); in IntraEdgeFilter_NEON() 83 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 99 const uint8x16_t src_1 = vld1q_u8(dst_buffer + i); in IntraEdgeFilter_NEON() local 104 sum_lo = vmlal_u8(sum_lo, vget_low_u8(src_1), krn1); in IntraEdgeFilter_NEON() 107 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 114 const uint8x16_t dst_remainder = vbslq_u8(mask, src_1, result); in IntraEdgeFilter_NEON() 148 uint8x16_t src_1 = vld1q_u8(dst_buffer); in IntraEdgeFilter_NEON() local 161 vaddl_u8(vget_low_u8(src_1), vget_low_u8(src_2)), vget_low_u8(src_3)); in IntraEdgeFilter_NEON() 167 vaddw_u8(vaddl_u8(vget_high_u8(src_1), vget_high_u8(src_2)), in IntraEdgeFilter_NEON() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | subpel_variance_neon.c | 37 const uint8x8_t src_1 = in var_filter_block2d_bil_w4() local 40 const uint16x8_t b = vmlal_u8(a, src_1, f1); in var_filter_block2d_bil_w4() 60 const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); in var_filter_block2d_bil_w8() local 62 const uint16x8_t b = vmlal_u8(a, src_1, f1); in var_filter_block2d_bil_w8() 84 const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]); in var_filter_block2d_bil_w16() local 86 const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1); in var_filter_block2d_bil_w16() 89 const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1); in var_filter_block2d_bil_w16()
|
/external/libaom/libaom/aom_dsp/arm/ |
D | subpel_variance_neon.c | 35 const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); in var_filter_block2d_bil_w8() local 37 const uint16x8_t b = vmlal_u8(a, src_1, f1); in var_filter_block2d_bil_w8() 59 const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]); in var_filter_block2d_bil_w16() local 61 const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1); in var_filter_block2d_bil_w16() 64 const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1); in var_filter_block2d_bil_w16()
|
D | blend_a64_mask_neon.c | 22 static INLINE void blend8x1(int16x8_t mask, int16x8_t src_0, int16x8_t src_1, in blend8x1() argument 29 vmlal_s16(im_res_low, vget_low_s16(max_minus_mask), vget_low_s16(src_1)); in blend8x1() 33 vget_high_s16(src_1)); in blend8x1()
|
/external/libhevc/encoder/arm/ |
D | ihevce_copy_neon.c | 158 uint8x16_t src_0, src_1; in copy_2d_neon() local 163 src_1 = vld1q_u8(pu1_src + 16); in copy_2d_neon() 164 vst1q_u8(pu1_dst + 16, src_1); in copy_2d_neon() 180 uint8x16_t src_0, src_1, src_2, src_3; in copy_2d_neon() local 186 src_1 = vld1q_u8(src_il + 16); in copy_2d_neon() 187 vst1q_u8(dst_il + 16, src_1); in copy_2d_neon()
|
D | ihevce_ssd_calculator_neon.c | 140 uint8x16_t src_0, pred_0, src_1, pred_1, abs_0, abs_1; in ihevce_1x32_ssd_computer_neon() local 147 src_1 = vld1q_u8(pu1_src + 16); in ihevce_1x32_ssd_computer_neon() 154 src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane]; in ihevce_1x32_ssd_computer_neon() 158 abs_1 = vabdq_u8(src_1, pred_1); in ihevce_1x32_ssd_computer_neon() 179 uint8x16_t src_0, src_1, src_2, src_3; in ihevce_1x64_ssd_computer_neon() local 189 src_1 = vld1q_u8(pu1_src + 16); in ihevce_1x64_ssd_computer_neon() 200 src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane]; in ihevce_1x64_ssd_computer_neon() 208 abs_1 = vabdq_u8(src_1, pred_1); in ihevce_1x64_ssd_computer_neon()
|
D | ihevce_sad_compute_neon.c | 145 const uint8x16_t src_1 = vld1q_u8(pu1_src + 16); in ihevce_32xn_sad_computer_neon() local 150 abs_1 = vabal_u8(abs_1, vget_low_u8(src_1), vget_low_u8(pred_1)); in ihevce_32xn_sad_computer_neon() 151 abs_1 = vabal_u8(abs_1, vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_32xn_sad_computer_neon() 179 const uint8x16_t src_1 = vld1q_u8(pu1_src + 16); in ihevce_64xn_sad_computer_neon() local 188 abs_0 = vabal_u8(abs_0, vget_low_u8(src_1), vget_low_u8(pred_1)); in ihevce_64xn_sad_computer_neon() 189 abs_0 = vabal_u8(abs_0, vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_64xn_sad_computer_neon()
|
D | ihevce_ssd_and_sad_calculator_neon.c | 187 const uint8x16_t src_1 = vld1q_u8(pu1_src + 16); in ihevce_ssd_and_sad_calculator_neon() local 201 abs_l = vabd_u8(vget_low_u8(src_1), vget_low_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon() 202 abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon() 246 const uint8x16_t src_1 = vld1q_u8(pu1_src + 16); in ihevce_ssd_and_sad_calculator_neon() local 264 abs_l = vabd_u8(vget_low_u8(src_1), vget_low_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon() 265 abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | distance_weighted_blend_sse4.cc | 69 __m128i src_1 = LoadHi8(src_10, pred_1); in DistanceWeightedBlend4xH_SSE4_1() local 72 const __m128i res0 = ComputeWeightedAverage8(src_0, src_1, weights); in DistanceWeightedBlend4xH_SSE4_1() 79 src_1 = LoadHi8(src_11, pred_1); in DistanceWeightedBlend4xH_SSE4_1() 82 const __m128i res1 = ComputeWeightedAverage8(src_0, src_1, weights); in DistanceWeightedBlend4xH_SSE4_1() 273 __m128i src_1 = LoadHi8(src_10, pred_1); in DistanceWeightedBlend4xH_SSE4_1() local 277 ComputeWeightedAverage8(src_0, src_1, weight0, weight1); in DistanceWeightedBlend4xH_SSE4_1() 284 src_1 = LoadHi8(src_11, pred_1); in DistanceWeightedBlend4xH_SSE4_1() 288 ComputeWeightedAverage8(src_0, src_1, weight0, weight1); in DistanceWeightedBlend4xH_SSE4_1()
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_warp_plane_sse4.c | 118 const __m128i src_1 = *src; in highbd_filter_src_pixels() local 124 const __m128i res_0 = _mm_madd_epi16(src_1, coeff[0]); in highbd_filter_src_pixels() 126 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 4), coeff[2]); in highbd_filter_src_pixels() 128 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 8), coeff[4]); in highbd_filter_src_pixels() 130 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 12), coeff[6]); in highbd_filter_src_pixels() 138 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 2), coeff[1]); in highbd_filter_src_pixels() 140 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 6), coeff[3]); in highbd_filter_src_pixels() 142 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 10), coeff[5]); in highbd_filter_src_pixels() 144 _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 14), coeff[7]); in highbd_filter_src_pixels() 496 const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]); in av1_highbd_warp_affine_sse4_1() local [all …]
|
D | wiener_convolve_sse2.c | 88 const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero); in av1_wiener_convolve_add_src_sse2() local 89 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_wiener_convolve_add_src_sse2() 161 const __m128i src_1 = in av1_wiener_convolve_add_src_sse2() local 174 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_wiener_convolve_add_src_sse2()
|
D | highbd_wiener_convolve_avx2.c | 96 const __m256i src_1 = yy_loadu_256(src_ij + 1); in av1_highbd_wiener_convolve_add_src_avx2() local 106 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2() 205 const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1); in av1_highbd_wiener_convolve_add_src_avx2() local 210 const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01); in av1_highbd_wiener_convolve_add_src_avx2()
|
D | jnt_convolve_ssse3.c | 102 const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2); in av1_dist_wtd_convolve_2d_ssse3() local 103 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_dist_wtd_convolve_2d_ssse3() 174 const __m128i src_1 = in av1_dist_wtd_convolve_2d_ssse3() local 187 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_dist_wtd_convolve_2d_ssse3()
|
D | warp_plane_avx2.c | 83 const __m256i src_1 = _mm256_shuffle_epi8(src, shuffle_src[1]); in filter_src_pixels_avx2() local 88 const __m256i res_46 = _mm256_maddubs_epi16(src_1, coeff[1]); in filter_src_pixels_avx2() 313 const __m128i src_1 = in warp_horizontal_filter_avx2() local 316 _mm256_inserti128_si256(_mm256_castsi128_si256(src_0), src_1, 0x1); in warp_horizontal_filter_avx2() 347 const __m128i src_1 = in warp_horizontal_filter_alpha0_avx2() local 350 _mm256_inserti128_si256(_mm256_castsi128_si256(src_0), src_1, 0x1); in warp_horizontal_filter_alpha0_avx2() 383 const __m128i src_1 = in warp_horizontal_filter_beta0_avx2() local 386 _mm256_inserti128_si256(_mm256_castsi128_si256(src_0), src_1, 0x1); in warp_horizontal_filter_beta0_avx2() 789 const __m256i src_1 = in warp_vertical_filter_avx2() local 798 src[0] = _mm256_unpacklo_epi16(src_0, src_1); in warp_vertical_filter_avx2() [all …]
|
D | wiener_convolve_avx2.c | 153 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_wiener_convolve_add_src_avx2() local 160 s[0] = _mm256_unpacklo_epi16(src_0, src_1); in av1_wiener_convolve_add_src_avx2() 164 s[4] = _mm256_unpackhi_epi16(src_0, src_1); in av1_wiener_convolve_add_src_avx2()
|
D | convolve_2d_avx2.c | 120 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_convolve_2d_sr_avx2() local 124 s[0] = _mm256_unpacklo_epi16(src_0, src_1); in av1_convolve_2d_sr_avx2() 126 s[3] = _mm256_unpackhi_epi16(src_0, src_1); in av1_convolve_2d_sr_avx2()
|
D | convolve_2d_sse2.c | 90 const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero); in av1_convolve_2d_sr_sse2() local 91 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_convolve_2d_sr_sse2() 166 const __m128i src_1 = in av1_convolve_2d_sr_sse2() local 179 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_convolve_2d_sr_sse2()
|
D | highbd_wiener_convolve_ssse3.c | 163 const __m128i src_1 = in av1_highbd_wiener_convolve_add_src_ssse3() local 176 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_highbd_wiener_convolve_add_src_ssse3()
|
D | jnt_convolve_sse2.c | 480 const __m128i src_1 = _mm_or_si128(temp_hi, temp_lo); in av1_dist_wtd_convolve_2d_sse2() local 481 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_dist_wtd_convolve_2d_sse2() 558 const __m128i src_1 = in av1_dist_wtd_convolve_2d_sse2() local 571 const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01); in av1_dist_wtd_convolve_2d_sse2()
|
D | selfguided_avx2.c | 660 __m128i src_0, src_1; in av1_apply_selfguided_restoration_avx2() local 663 src_1 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij + 8)); in av1_apply_selfguided_restoration_avx2() 665 ep_1 = _mm256_cvtepu16_epi32(src_1); in av1_apply_selfguided_restoration_avx2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | convolve_avx2.h | 89 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \ 96 s[0] = _mm256_unpacklo_epi16(src_0, src_1); \ 100 s[4] = _mm256_unpackhi_epi16(src_0, src_1); \
|