/external/libaom/libaom/av1/common/x86/ |
D | wiener_convolve_avx2.c | 189 const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); in av1_wiener_convolve_add_src_avx2() local 191 const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit); in av1_wiener_convolve_add_src_avx2() 233 const __m128i res_16bit = _mm_packus_epi32(reslo, reshi); in av1_wiener_convolve_add_src_avx2() local 236 const __m128i res_8b = _mm_packus_epi16(res_16bit, res_16bit); in av1_wiener_convolve_add_src_avx2()
|
D | highbd_convolve_2d_avx2.c | 149 __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); in av1_highbd_convolve_2d_sr_avx2() local 150 res_16bit = _mm256_min_epi16(res_16bit, clip_pixel); in av1_highbd_convolve_2d_sr_avx2() 151 res_16bit = _mm256_max_epi16(res_16bit, zero); in av1_highbd_convolve_2d_sr_avx2() 154 _mm256_castsi256_si128(res_16bit)); in av1_highbd_convolve_2d_sr_avx2() 156 _mm256_extracti128_si256(res_16bit, 1)); in av1_highbd_convolve_2d_sr_avx2()
|
D | highbd_wiener_convolve_ssse3.c | 194 __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); in av1_highbd_wiener_convolve_add_src_ssse3() local 195 res_16bit = _mm_min_epi16(_mm_max_epi16(res_16bit, zero), maxval); in av1_highbd_wiener_convolve_add_src_ssse3() 198 _mm_storeu_si128(p, res_16bit); in av1_highbd_wiener_convolve_add_src_ssse3()
|
D | wiener_convolve_sse2.c | 191 const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); in av1_wiener_convolve_add_src_sse2() local 192 __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit); in av1_wiener_convolve_add_src_sse2()
|
D | convolve_2d_avx2.c | 156 const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); in av1_convolve_2d_sr_avx2() local 158 const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit); in av1_convolve_2d_sr_avx2()
|
D | highbd_warp_plane_sse4.c | 611 __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); in av1_highbd_warp_affine_sse4_1() local 615 res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero); in av1_highbd_warp_affine_sse4_1() 624 _mm_storel_epi64(p, res_16bit); in av1_highbd_warp_affine_sse4_1() 626 _mm_storeu_si128(p, res_16bit); in av1_highbd_warp_affine_sse4_1()
|
D | highbd_wiener_convolve_avx2.c | 235 const __m256i res_16bit = in av1_highbd_wiener_convolve_add_src_avx2() local 238 _mm256_max_epi16(res_16bit, clamp_low), clamp_high); in av1_highbd_wiener_convolve_add_src_avx2()
|
D | warp_plane_sse4.c | 660 const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round); in store_vertical_filter_output() local 661 __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit); in store_vertical_filter_output()
|
D | warp_plane_avx2.c | 760 const __m256i res_16bit = _mm256_packs_epi32(res_lo_round, res_hi_round); in store_vertical_filter_output_avx2() local 761 const __m256i res_8bit = _mm256_packus_epi16(res_16bit, res_16bit); in store_vertical_filter_output_avx2()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_convolve_avx2.c | 223 __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); in av1_highbd_convolve_y_sr_avx2() local 224 res_16bit = _mm256_min_epi16(res_16bit, clip_pixel); in av1_highbd_convolve_y_sr_avx2() 225 res_16bit = _mm256_max_epi16(res_16bit, zero); in av1_highbd_convolve_y_sr_avx2() 228 _mm256_castsi256_si128(res_16bit)); in av1_highbd_convolve_y_sr_avx2() 230 _mm256_extracti128_si256(res_16bit, 1)); in av1_highbd_convolve_y_sr_avx2() 1112 __m256i res_16bit = _mm256_min_epi32(res_a_round, clip_pixel); in aom_highbd_filter_block1d4_v4_avx2() local 1113 res_16bit = _mm256_max_epi32(res_16bit, zero); in aom_highbd_filter_block1d4_v4_avx2() 1114 res_16bit = _mm256_packs_epi32(res_16bit, res_16bit); in aom_highbd_filter_block1d4_v4_avx2() 1117 _mm256_castsi256_si128(res_16bit)); in aom_highbd_filter_block1d4_v4_avx2() 1119 _mm256_extracti128_si256(res_16bit, 1)); in aom_highbd_filter_block1d4_v4_avx2() [all …]
|
D | convolve_avx2.h | 126 const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round); \ 127 const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit); \
|