Home
last modified time | relevance | path

Searched refs:im_stride (Results 1 – 14 of 14) sorted by relevance

/external/libaom/libaom/av1/common/x86/
Djnt_convolve_ssse3.c31 int im_stride = MAX_SB_SIZE; in av1_dist_wtd_convolve_2d_ssse3() local
118 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_ssse3()
151 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_ssse3()
153 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_dist_wtd_convolve_2d_ssse3()
154 *(__m128i *)(data + 1 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3()
156 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_dist_wtd_convolve_2d_ssse3()
157 *(__m128i *)(data + 3 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3()
159 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_dist_wtd_convolve_2d_ssse3()
160 *(__m128i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_ssse3()
162 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_dist_wtd_convolve_2d_ssse3()
[all …]
Dwiener_convolve_avx2.c57 int im_stride = 8; in av1_wiener_convolve_add_src_avx2() local
58 memset(im_block + (im_h * im_stride), 0, MAX_SB_SIZE); in av1_wiener_convolve_add_src_avx2()
147 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res_clamped); in av1_wiener_convolve_add_src_avx2()
152 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_wiener_convolve_add_src_avx2()
153 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_wiener_convolve_add_src_avx2()
154 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_wiener_convolve_add_src_avx2()
155 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_wiener_convolve_add_src_avx2()
156 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_wiener_convolve_add_src_avx2()
157 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_wiener_convolve_add_src_avx2()
169 const int16_t *data = &im_block[i * im_stride]; in av1_wiener_convolve_add_src_avx2()
[all …]
Dhighbd_convolve_2d_ssse3.c29 int im_stride = 8; in av1_highbd_convolve_2d_sr_ssse3() local
93 _mm_store_si128((__m128i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_ssse3()
98 __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
99 __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
100 __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
101 __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
102 __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
103 __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
104 __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride)); in av1_highbd_convolve_2d_sr_ssse3()
123 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_ssse3()
[all …]
Dhighbd_convolve_2d_sse4.c181 int im_stride = MAX_SB_SIZE; in av1_highbd_dist_wtd_convolve_2d_sse4_1() local
270 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
303 const int16_t *data = &im_block[i * im_stride + j]; in av1_highbd_dist_wtd_convolve_2d_sse4_1()
305 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1()
306 *(__m128i *)(data + 1 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
308 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1()
309 *(__m128i *)(data + 3 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
311 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1()
312 *(__m128i *)(data + 5 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_sse4_1()
314 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_highbd_dist_wtd_convolve_2d_sse4_1()
[all …]
Dconvolve_2d_sse2.c32 int im_stride = MAX_SB_SIZE; in av1_convolve_2d_sr_sse2() local
106 _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_convolve_2d_sr_sse2()
143 const int16_t *data = &im_block[i * im_stride + j]; in av1_convolve_2d_sr_sse2()
145 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_convolve_2d_sr_sse2()
146 *(__m128i *)(data + 1 * im_stride)); in av1_convolve_2d_sr_sse2()
148 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_convolve_2d_sr_sse2()
149 *(__m128i *)(data + 3 * im_stride)); in av1_convolve_2d_sr_sse2()
151 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_convolve_2d_sr_sse2()
152 *(__m128i *)(data + 5 * im_stride)); in av1_convolve_2d_sr_sse2()
154 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_convolve_2d_sr_sse2()
[all …]
Dhighbd_convolve_2d_avx2.c32 int im_stride = 8; in av1_highbd_convolve_2d_sr_avx2() local
102 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_convolve_2d_sr_avx2()
108 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
109 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
110 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
111 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
112 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
113 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
124 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_convolve_2d_sr_avx2()
127 _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); in av1_highbd_convolve_2d_sr_avx2()
[all …]
Dconvolve_2d_avx2.c30 int im_stride = 8; in av1_convolve_2d_sr_avx2() local
91 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2()
100 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_convolve_2d_sr_avx2()
119 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_convolve_2d_sr_avx2()
120 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_convolve_2d_sr_avx2()
121 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_convolve_2d_sr_avx2()
122 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_convolve_2d_sr_avx2()
130 const int16_t *data = &im_block[i * im_stride]; in av1_convolve_2d_sr_avx2()
133 _mm256_loadu_si256((__m256i *)(data + 4 * im_stride)); in av1_convolve_2d_sr_avx2()
135 _mm256_loadu_si256((__m256i *)(data + 5 * im_stride)); in av1_convolve_2d_sr_avx2()
Djnt_convolve_sse2.c400 int im_stride = MAX_SB_SIZE; in av1_dist_wtd_convolve_2d_sse2() local
502 _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res); in av1_dist_wtd_convolve_2d_sse2()
535 const int16_t *data = &im_block[i * im_stride + j]; in av1_dist_wtd_convolve_2d_sse2()
537 _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride), in av1_dist_wtd_convolve_2d_sse2()
538 *(__m128i *)(data + 1 * im_stride)); in av1_dist_wtd_convolve_2d_sse2()
540 _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride), in av1_dist_wtd_convolve_2d_sse2()
541 *(__m128i *)(data + 3 * im_stride)); in av1_dist_wtd_convolve_2d_sse2()
543 _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride), in av1_dist_wtd_convolve_2d_sse2()
544 *(__m128i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_sse2()
546 _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride), in av1_dist_wtd_convolve_2d_sse2()
[all …]
Dhighbd_jnt_convolve_avx2.c240 int im_stride = 8; in av1_highbd_dist_wtd_convolve_2d_avx2() local
321 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_highbd_dist_wtd_convolve_2d_avx2()
327 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
328 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
329 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
330 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
331 __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
332 __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
343 const int16_t *data = &im_block[i * im_stride]; in av1_highbd_dist_wtd_convolve_2d_avx2()
346 _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); in av1_highbd_dist_wtd_convolve_2d_avx2()
[all …]
Djnt_convolve_avx2.c607 int im_stride = 8; in av1_dist_wtd_convolve_2d_avx2() local
667 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); in av1_dist_wtd_convolve_2d_avx2()
687 __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
688 __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
689 __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
690 __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
699 const int16_t *data = &im_block[i * im_stride]; in av1_dist_wtd_convolve_2d_avx2()
702 _mm256_loadu_si256((__m256i *)(data + 4 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
704 _mm256_loadu_si256((__m256i *)(data + 5 * im_stride)); in av1_dist_wtd_convolve_2d_avx2()
/external/libaom/libaom/aom_dsp/x86/
Dconvolve_avx2.h75 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \
85 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
88 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); \
89 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \
90 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); \
91 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); \
92 __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride)); \
93 __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride)); \
105 const int16_t *data = &im_block[i * im_stride]; \
107 const __m256i s6 = _mm256_loadu_si256((__m256i *)(data + 6 * im_stride)); \
[all …]
/external/libaom/libaom/av1/common/
Dconvolve.c84 int im_stride = w; in av1_convolve_2d_sobel_y_c() local
97 im_block[y * im_stride + x] = sum; in av1_convolve_2d_sobel_y_c()
102 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sobel_y_c()
108 sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; in av1_convolve_2d_sobel_y_c()
123 int im_stride = w; in av1_convolve_2d_sr_c() local
142 im_block[y * im_stride + x] = in av1_convolve_2d_sr_c()
148 int16_t *src_vert = im_block + fo_vert * im_stride; in av1_convolve_2d_sr_c()
156 sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; in av1_convolve_2d_sr_c()
256 int im_stride = w; in av1_dist_wtd_convolve_2d_c() local
274 im_block[y * im_stride + x] = in av1_dist_wtd_convolve_2d_c()
[all …]
/external/libaom/libaom/av1/common/arm/
Dconvolve_neon.c958 const int im_stride = MAX_SB_SIZE; in av1_convolve_2d_sr_neon() local
967 im_dst_stride = im_stride; in av1_convolve_2d_sr_neon()
1270 horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w, in av1_convolve_2d_sr_neon()
1276 horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w, in av1_convolve_2d_sr_neon()
1296 src_stride = im_stride; in av1_convolve_2d_sr_neon()
1319 __builtin_prefetch(v_s + 0 * im_stride); in av1_convolve_2d_sr_neon()
1320 __builtin_prefetch(v_s + 1 * im_stride); in av1_convolve_2d_sr_neon()
1321 __builtin_prefetch(v_s + 2 * im_stride); in av1_convolve_2d_sr_neon()
1322 __builtin_prefetch(v_s + 3 * im_stride); in av1_convolve_2d_sr_neon()
1323 __builtin_prefetch(v_s + 4 * im_stride); in av1_convolve_2d_sr_neon()
[all …]
Djnt_convolve_neon.c319 const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride, in dist_wtd_convolve_2d_horiz_neon() argument
328 dst_stride = im_stride; in dist_wtd_convolve_2d_horiz_neon()
563 int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride, in dist_wtd_convolve_2d_vert_neon() argument
610 __builtin_prefetch(s + 0 * im_stride); in dist_wtd_convolve_2d_vert_neon()
611 __builtin_prefetch(s + 1 * im_stride); in dist_wtd_convolve_2d_vert_neon()
612 __builtin_prefetch(s + 2 * im_stride); in dist_wtd_convolve_2d_vert_neon()
613 __builtin_prefetch(s + 3 * im_stride); in dist_wtd_convolve_2d_vert_neon()
614 __builtin_prefetch(s + 4 * im_stride); in dist_wtd_convolve_2d_vert_neon()
615 __builtin_prefetch(s + 5 * im_stride); in dist_wtd_convolve_2d_vert_neon()
616 __builtin_prefetch(s + 6 * im_stride); in dist_wtd_convolve_2d_vert_neon()
[all …]