/external/libaom/libaom/av1/common/x86/ |
D | wiener_convolve_avx2.c | 55 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + SUBPEL_TAPS) * 8]); in av1_wiener_convolve_add_src_avx2() 58 memset(im_block + (im_h * im_stride), 0, MAX_SB_SIZE); in av1_wiener_convolve_add_src_avx2() 147 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res_clamped); in av1_wiener_convolve_add_src_avx2() 152 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_wiener_convolve_add_src_avx2() 153 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_wiener_convolve_add_src_avx2() 154 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_wiener_convolve_add_src_avx2() 155 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_wiener_convolve_add_src_avx2() 156 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_wiener_convolve_add_src_avx2() 157 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_wiener_convolve_add_src_avx2() 169 const int16_t *data = &im_block[i * im_stride]; in av1_wiener_convolve_add_src_avx2() [all …]
|
D | highbd_convolve_2d_ssse3.c | 27 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); in av1_highbd_convolve_2d_sr_ssse3() 93 _mm_store_si128((__m128i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_ssse3() 98 __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 99 __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 100 __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 101 __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 102 __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 103 __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 104 __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 123 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_ssse3()
|
D | highbd_convolve_2d_avx2.c | 30 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); in av1_highbd_convolve_2d_sr_avx2() 102 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_avx2() 108 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 109 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 110 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 111 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 112 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 113 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 124 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_avx2()
|
D | convolve_2d_avx2.c | 32 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); in av1_convolve_2d_sr_avx2() 91 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2() 100 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2() 119 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_convolve_2d_sr_avx2() 120 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_convolve_2d_sr_avx2() 121 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_convolve_2d_sr_avx2() 122 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_convolve_2d_sr_avx2() 130 const int16_t *data = &im_block[i * im_stride]; in av1_convolve_2d_sr_avx2()
|
D | highbd_jnt_convolve_avx2.c | 236 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); in av1_highbd_dist_wtd_convolve_2d_avx2() 321 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_dist_wtd_convolve_2d_avx2() 327 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 328 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 329 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 330 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 331 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 332 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 343 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_dist_wtd_convolve_2d_avx2()
|
D | jnt_convolve_ssse3.c | 29 im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); in av1_dist_wtd_convolve_2d_ssse3() 118 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_ssse3() 151 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_ssse3()
|
D | jnt_convolve_avx2.c | 605 DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]); in av1_dist_wtd_convolve_2d_avx2() 667 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_dist_wtd_convolve_2d_avx2() 687 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 688 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 689 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 690 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 699 const int16_t *data = &im_block[i * im_stride]; in av1_dist_wtd_convolve_2d_avx2()
|
D | highbd_convolve_2d_sse4.c | 177 im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 270 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 303 const int16_t *data = &im_block[i * im_stride + j]; in av1_highbd_dist_wtd_convolve_2d_sse4_1()
|
D | convolve_2d_sse2.c | 30 im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); in av1_convolve_2d_sr_sse2() 106 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_convolve_2d_sr_sse2() 143 const int16_t *data = &im_block[i * im_stride + j]; in av1_convolve_2d_sr_sse2()
|
D | jnt_convolve_sse2.c | 398 im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]); in av1_dist_wtd_convolve_2d_sse2() 502 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_sse2() 535 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_sse2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | convolve_avx2.h | 75 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \ 85 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); 88 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); \ 89 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \ 90 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); \ 91 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); \ 92 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); \ 93 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); \ 105 const int16_t *data = &im_block[i * im_stride]; \ 166 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \ [all …]
|
/external/libaom/libaom/av1/common/ |
D | convolve.c | 79 int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; in av1_convolve_2d_sobel_y_c() local 97 im_block[y * im_stride + x] = sum; in av1_convolve_2d_sobel_y_c() 102 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sobel_y_c() 121 int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; in av1_convolve_2d_sr_c() local 142 im_block[y * im_stride + x] = in av1_convolve_2d_sr_c() 148 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sr_c() 254 int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; in av1_dist_wtd_convolve_2d_c() local 274 im_block[y * im_stride + x] = in av1_dist_wtd_convolve_2d_c() 280 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_dist_wtd_convolve_2d_c() 462 int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; in av1_convolve_2d_scale_c() local [all …]
|
/external/libaom/libaom/av1/common/arm/ |
D | jnt_convolve_neon.c | 319 const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride, in dist_wtd_convolve_2d_horiz_neon() argument 327 dst_ptr = im_block; in dist_wtd_convolve_2d_horiz_neon() 563 int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride, in dist_wtd_convolve_2d_vert_neon() argument 599 src_ptr = im_block; in dist_wtd_convolve_2d_vert_neon() 726 im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]); in av1_dist_wtd_convolve_2d_neon() 747 dist_wtd_convolve_2d_horiz_neon(src_ptr, src_stride, im_block, im_stride, in av1_dist_wtd_convolve_2d_neon() 750 dist_wtd_convolve_2d_vert_neon(im_block, im_stride, dst8, dst8_stride, in av1_dist_wtd_convolve_2d_neon()
|
D | convolve_neon.c | 954 im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]); in av1_convolve_2d_sr_neon() 966 dst_ptr = im_block; in av1_convolve_2d_sr_neon() 1297 v_src_ptr = im_block; in av1_convolve_2d_sr_neon()
|