/external/libaom/libaom/av1/common/x86/ |
D | jnt_convolve_ssse3.c | 31 int im_stride = MAX_SB_SIZE; in av1_dist_wtd_convolve_2d_ssse3() local 118 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_ssse3() 151 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_ssse3() 153 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_dist_wtd_convolve_2d_ssse3() 154 *(__m128i *)(data + 1 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3() 156 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_dist_wtd_convolve_2d_ssse3() 157 *(__m128i *)(data + 3 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3() 159 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_dist_wtd_convolve_2d_ssse3() 160 *(__m128i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3() 162 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_dist_wtd_convolve_2d_ssse3() [all …]
|
D | wiener_convolve_avx2.c | 57 int im_stride = 8; in av1_wiener_convolve_add_src_avx2() local 58 memset(im_block + (im_h * im_stride), 0, MAX_SB_SIZE); in av1_wiener_convolve_add_src_avx2() 147 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res_clamped); in av1_wiener_convolve_add_src_avx2() 152 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_wiener_convolve_add_src_avx2() 153 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_wiener_convolve_add_src_avx2() 154 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_wiener_convolve_add_src_avx2() 155 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_wiener_convolve_add_src_avx2() 156 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_wiener_convolve_add_src_avx2() 157 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_wiener_convolve_add_src_avx2() 169 const int16_t *data = &im_block[i * im_stride]; in av1_wiener_convolve_add_src_avx2() [all …]
|
D | highbd_convolve_2d_ssse3.c | 29 int im_stride = 8; in av1_highbd_convolve_2d_sr_ssse3() local 93 _mm_store_si128((__m128i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_ssse3() 98 __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 99 __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 100 __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 101 __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 102 __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 103 __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 104 __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3() 123 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_ssse3() [all …]
|
D | highbd_convolve_2d_sse4.c | 181 int im_stride = MAX_SB_SIZE; in av1_highbd_dist_wtd_convolve_2d_sse4_1() local 270 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 303 const int16_t *data = &im_block[i * im_stride + j]; in av1_highbd_dist_wtd_convolve_2d_sse4_1() 305 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 306 *(__m128i *)(data + 1 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 308 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 309 *(__m128i *)(data + 3 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 311 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() 312 *(__m128i *)(data + 5 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1() 314 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1() [all …]
|
D | convolve_2d_sse2.c | 32 int im_stride = MAX_SB_SIZE; in av1_convolve_2d_sr_sse2() local 106 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_convolve_2d_sr_sse2() 143 const int16_t *data = &im_block[i * im_stride + j]; in av1_convolve_2d_sr_sse2() 145 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_convolve_2d_sr_sse2() 146 *(__m128i *)(data + 1 * im_stride)); in av1_convolve_2d_sr_sse2() 148 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_convolve_2d_sr_sse2() 149 *(__m128i *)(data + 3 * im_stride)); in av1_convolve_2d_sr_sse2() 151 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_convolve_2d_sr_sse2() 152 *(__m128i *)(data + 5 * im_stride)); in av1_convolve_2d_sr_sse2() 154 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_convolve_2d_sr_sse2() [all …]
|
D | highbd_convolve_2d_avx2.c | 32 int im_stride = 8; in av1_highbd_convolve_2d_sr_avx2() local 102 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_avx2() 108 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 109 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 110 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 111 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 112 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 113 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() 124 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_avx2() 127 _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); in av1_highbd_convolve_2d_sr_avx2() [all …]
|
D | convolve_2d_avx2.c | 30 int im_stride = 8; in av1_convolve_2d_sr_avx2() local 91 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2() 100 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2() 119 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_convolve_2d_sr_avx2() 120 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_convolve_2d_sr_avx2() 121 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_convolve_2d_sr_avx2() 122 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_convolve_2d_sr_avx2() 130 const int16_t *data = &im_block[i * im_stride]; in av1_convolve_2d_sr_avx2() 133 _mm256_loadu_si256((__m256i *)(data + 4 * im_stride)); in av1_convolve_2d_sr_avx2() 135 _mm256_loadu_si256((__m256i *)(data + 5 * im_stride)); in av1_convolve_2d_sr_avx2()
|
D | jnt_convolve_sse2.c | 400 int im_stride = MAX_SB_SIZE; in av1_dist_wtd_convolve_2d_sse2() local 502 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_sse2() 535 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_sse2() 537 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_dist_wtd_convolve_2d_sse2() 538 *(__m128i *)(data + 1 * im_stride)); in av1_dist_wtd_convolve_2d_sse2() 540 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_dist_wtd_convolve_2d_sse2() 541 *(__m128i *)(data + 3 * im_stride)); in av1_dist_wtd_convolve_2d_sse2() 543 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_dist_wtd_convolve_2d_sse2() 544 *(__m128i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_sse2() 546 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_dist_wtd_convolve_2d_sse2() [all …]
|
D | highbd_jnt_convolve_avx2.c | 240 int im_stride = 8; in av1_highbd_dist_wtd_convolve_2d_avx2() local 321 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_dist_wtd_convolve_2d_avx2() 327 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 328 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 329 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 330 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 331 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 332 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() 343 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_dist_wtd_convolve_2d_avx2() 346 _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2() [all …]
|
D | jnt_convolve_avx2.c | 607 int im_stride = 8; in av1_dist_wtd_convolve_2d_avx2() local 667 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_dist_wtd_convolve_2d_avx2() 687 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 688 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 689 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 690 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 699 const int16_t *data = &im_block[i * im_stride]; in av1_dist_wtd_convolve_2d_avx2() 702 _mm256_loadu_si256((__m256i *)(data + 4 * im_stride)); in av1_dist_wtd_convolve_2d_avx2() 704 _mm256_loadu_si256((__m256i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | convolve_avx2.h | 75 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \ 85 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); 88 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); \ 89 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \ 90 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); \ 91 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); \ 92 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); \ 93 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); \ 105 const int16_t *data = &im_block[i * im_stride]; \ 107 const __m256i s6 = _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); \ [all …]
|
/external/libaom/libaom/av1/common/ |
D | convolve.c | 84 int im_stride = w; in av1_convolve_2d_sobel_y_c() local 97 im_block[y * im_stride + x] = sum; in av1_convolve_2d_sobel_y_c() 102 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sobel_y_c() 108 sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; in av1_convolve_2d_sobel_y_c() 123 int im_stride = w; in av1_convolve_2d_sr_c() local 142 im_block[y * im_stride + x] = in av1_convolve_2d_sr_c() 148 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sr_c() 156 sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; in av1_convolve_2d_sr_c() 256 int im_stride = w; in av1_dist_wtd_convolve_2d_c() local 274 im_block[y * im_stride + x] = in av1_dist_wtd_convolve_2d_c() [all …]
|
/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.c | 958 const int im_stride = MAX_SB_SIZE; in av1_convolve_2d_sr_neon() local 967 im_dst_stride = im_stride; in av1_convolve_2d_sr_neon() 1270 horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w, in av1_convolve_2d_sr_neon() 1276 horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w, in av1_convolve_2d_sr_neon() 1296 src_stride = im_stride; in av1_convolve_2d_sr_neon() 1319 __builtin_prefetch(v_s + 0 * im_stride); in av1_convolve_2d_sr_neon() 1320 __builtin_prefetch(v_s + 1 * im_stride); in av1_convolve_2d_sr_neon() 1321 __builtin_prefetch(v_s + 2 * im_stride); in av1_convolve_2d_sr_neon() 1322 __builtin_prefetch(v_s + 3 * im_stride); in av1_convolve_2d_sr_neon() 1323 __builtin_prefetch(v_s + 4 * im_stride); in av1_convolve_2d_sr_neon() [all …]
|
D | jnt_convolve_neon.c | 319 const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride, in dist_wtd_convolve_2d_horiz_neon() argument 328 dst_stride = im_stride; in dist_wtd_convolve_2d_horiz_neon() 563 int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride, in dist_wtd_convolve_2d_vert_neon() argument 610 __builtin_prefetch(s + 0 * im_stride); in dist_wtd_convolve_2d_vert_neon() 611 __builtin_prefetch(s + 1 * im_stride); in dist_wtd_convolve_2d_vert_neon() 612 __builtin_prefetch(s + 2 * im_stride); in dist_wtd_convolve_2d_vert_neon() 613 __builtin_prefetch(s + 3 * im_stride); in dist_wtd_convolve_2d_vert_neon() 614 __builtin_prefetch(s + 4 * im_stride); in dist_wtd_convolve_2d_vert_neon() 615 __builtin_prefetch(s + 5 * im_stride); in dist_wtd_convolve_2d_vert_neon() 616 __builtin_prefetch(s + 6 * im_stride); in dist_wtd_convolve_2d_vert_neon() [all …]
|