/external/libgav1/src/dsp/arm/ |
D | intra_edge_neon.cc | 156 const uint8x16_t src_4 = vld1q_u8(dst_buffer + i + 2); in IntraEdgeFilter_NEON() local 159 vshlq_n_u16(vaddl_u8(vget_low_u8(src_0), vget_low_u8(src_4)), 1); in IntraEdgeFilter_NEON() 165 vshlq_n_u16(vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_4)), 1); in IntraEdgeFilter_NEON() 186 const uint8x16_t src_4 = vld1q_u8(dst_buffer + i + 2); in IntraEdgeFilter_NEON() local 189 vshlq_n_u16(vaddl_u8(vget_low_u8(src_0), vget_low_u8(src_4)), 1); in IntraEdgeFilter_NEON() 195 vshlq_n_u16(vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_4)), 1); in IntraEdgeFilter_NEON() 410 const uint16x8_t src_4 = vld1q_u16(dst_buffer + i + 2); in IntraEdgeFilter_NEON() local 411 const uint16x8_t sum_04 = vshlq_n_u16(vaddq_u16(src_0, src_4), 1); in IntraEdgeFilter_NEON() 429 const uint16x8_t src_4 = vld1q_u16(dst_buffer + i + 2); in IntraEdgeFilter_NEON() local 430 const uint16x8_t sum_04 = vshlq_n_u16(vaddq_u16(src_0, src_4), 1); in IntraEdgeFilter_NEON()
|
/external/libaom/av1/common/x86/ |
D | wiener_convolve_sse2.c | 77 const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero); in av1_wiener_convolve_add_src_sse2() local 78 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_wiener_convolve_add_src_sse2() 145 const __m128i src_4 = in av1_wiener_convolve_add_src_sse2() local 154 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_wiener_convolve_add_src_sse2()
|
D | convolve_2d_sse2.c | 67 const __m128i src_4 = _mm_unpacklo_epi8(data_2, zero); in av1_convolve_2d_sr_12tap_sse2() local 68 const __m128i res_4 = _mm_madd_epi16(src_4, coeffs[2]); in av1_convolve_2d_sr_12tap_sse2() 140 const __m128i src_4 = in av1_convolve_2d_sr_12tap_sse2() local 155 const __m128i res_4 = _mm_madd_epi16(src_4, coeffs[2]); in av1_convolve_2d_sr_12tap_sse2() 292 const __m128i src_4 = in av1_convolve_2d_sr_sse2() local 294 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_convolve_2d_sr_sse2() 369 const __m128i src_4 = in av1_convolve_2d_sr_sse2() local 378 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_convolve_2d_sr_sse2()
|
D | jnt_convolve_ssse3.c | 91 const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8); in av1_dist_wtd_convolve_2d_ssse3() local 92 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_dist_wtd_convolve_2d_ssse3() 158 const __m128i src_4 = in av1_dist_wtd_convolve_2d_ssse3() local 167 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_dist_wtd_convolve_2d_ssse3()
|
D | highbd_wiener_convolve_avx2.c | 99 const __m256i src_4 = yy_loadu_256(src_ij + 4); in av1_highbd_wiener_convolve_add_src_avx2() local 109 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2() 193 const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5); in av1_highbd_wiener_convolve_add_src_avx2() local 198 const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45); in av1_highbd_wiener_convolve_add_src_avx2()
|
D | wiener_convolve_avx2.c | 156 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_wiener_convolve_add_src_avx2() local 162 s[2] = _mm256_unpacklo_epi16(src_4, src_5); in av1_wiener_convolve_add_src_avx2() 166 s[6] = _mm256_unpackhi_epi16(src_4, src_5); in av1_wiener_convolve_add_src_avx2()
|
D | warp_plane_avx2.c | 794 const __m256i src_4 = horz_out[2]; in warp_vertical_filter_avx2() local 800 src[4] = _mm256_unpacklo_epi16(src_4, src_5); in warp_vertical_filter_avx2() 804 src[5] = _mm256_unpackhi_epi16(src_4, src_5); in warp_vertical_filter_avx2() 844 const __m256i src_4 = horz_out[2]; in warp_vertical_filter_gamma0_avx2() local 850 src[4] = _mm256_unpacklo_epi16(src_4, src_5); in warp_vertical_filter_gamma0_avx2() 854 src[5] = _mm256_unpackhi_epi16(src_4, src_5); in warp_vertical_filter_gamma0_avx2() 893 const __m256i src_4 = horz_out[2]; in warp_vertical_filter_delta0_avx2() local 899 src[4] = _mm256_unpacklo_epi16(src_4, src_5); in warp_vertical_filter_delta0_avx2() 903 src[5] = _mm256_unpackhi_epi16(src_4, src_5); in warp_vertical_filter_delta0_avx2() 941 const __m256i src_4 = horz_out[2]; in warp_vertical_filter_gamma0_delta0_avx2() local [all …]
|
D | highbd_wiener_convolve_ssse3.c | 147 const __m128i src_4 = in av1_highbd_wiener_convolve_add_src_ssse3() local 156 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_highbd_wiener_convolve_add_src_ssse3()
|
D | jnt_convolve_sse2.c | 457 const __m128i src_4 = _mm_or_si128(temp_hi, temp_lo); in av1_dist_wtd_convolve_2d_sse2() local 458 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_dist_wtd_convolve_2d_sse2() 534 const __m128i src_4 = in av1_dist_wtd_convolve_2d_sse2() local 543 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_dist_wtd_convolve_2d_sse2()
|
D | highbd_convolve_2d_sse4.c | 306 const __m128i src_4 = in av1_highbd_dist_wtd_convolve_2d_sse4_1() local 315 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
|
D | highbd_warp_plane_sse4.c | 462 const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]); in av1_highbd_warp_affine_sse4_1() local 491 const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4); in av1_highbd_warp_affine_sse4_1()
|
D | warp_plane_sse4.c | 551 const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]); in filter_src_pixels_vertical() local 556 const __m128i res_4 = _mm_madd_epi16(src_4, coeffs[2]); in filter_src_pixels_vertical()
|
/external/libaom/av1/common/arm/ |
D | warp_plane_neon.c | 221 uint8x16_t src_3, uint8x16_t src_4, in horizontal_filter_neon() argument 240 uint8x16_t tmp_3 = vandq_u8(src_4, mask); in horizontal_filter_neon() 490 uint8x16_t src_1, src_2, src_3, src_4; in av1_warp_affine_neon() local 598 src_4 = vextq_u8(src_3, src_3, 1); in av1_warp_affine_neon() 600 horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k, in av1_warp_affine_neon() 616 src_4 = vextq_u8(src_3, src_3, 1); in av1_warp_affine_neon() 618 horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k, in av1_warp_affine_neon()
|
/external/libaom/aom_dsp/x86/ |
D | convolve_avx2.h | 259 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); \ 265 s[2] = _mm256_unpacklo_epi16(src_4, src_5); \ 269 s[6] = _mm256_unpackhi_epi16(src_4, src_5); \ 409 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); \ 418 s[2] = _mm256_unpacklo_epi16(src_4, src_5); \ 424 s[8] = _mm256_unpackhi_epi16(src_4, src_5); \
|