/external/clang/test/CodeGen/ |
D | avx2-builtins.c | 11 __m256i test_mm256_abs_epi8(__m256i a) { in test_mm256_abs_epi8() 17 __m256i test_mm256_abs_epi16(__m256i a) { in test_mm256_abs_epi16() 23 __m256i test_mm256_abs_epi32(__m256i a) { in test_mm256_abs_epi32() 29 __m256i test_mm256_add_epi8(__m256i a, __m256i b) { in test_mm256_add_epi8() 35 __m256i test_mm256_add_epi16(__m256i a, __m256i b) { in test_mm256_add_epi16() 41 __m256i test_mm256_add_epi32(__m256i a, __m256i b) { in test_mm256_add_epi32() 47 __m256i test_mm256_add_epi64(__m256i a, __m256i b) { in test_mm256_add_epi64() 53 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { in test_mm256_adds_epi8() 59 __m256i test_mm256_adds_epi16(__m256i a, __m256i b) { in test_mm256_adds_epi16() 65 __m256i test_mm256_adds_epu8(__m256i a, __m256i b) { in test_mm256_adds_epu8() [all …]
|
D | avx512vlbw-builtins.c | 9 __mmask32 test_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epi8_mask() 15 __mmask32 test_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epi8_mask() 35 __mmask16 test_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epi16_mask() 41 __mmask16 test_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epi16_mask() 61 __mmask32 test_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpgt_epi8_mask() 67 __mmask32 test_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpgt_epi8_mask() 87 __mmask16 test_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { in test_mm256_cmpgt_epi16_mask() 93 __mmask16 test_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpgt_epi16_mask() 139 __mmask32 test_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epu8_mask() 145 __mmask32 test_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epu8_mask() [all …]
|
/external/clang/lib/Headers/ |
D | avx2intrin.h | 36 (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ 37 (__v32qi)(__m256i)(Y), (int)(M)) 39 static __inline__ __m256i __DEFAULT_FN_ATTRS 40 _mm256_abs_epi8(__m256i __a) in _mm256_abs_epi8() 42 return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); in _mm256_abs_epi8() 45 static __inline__ __m256i __DEFAULT_FN_ATTRS 46 _mm256_abs_epi16(__m256i __a) in _mm256_abs_epi16() 48 return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); in _mm256_abs_epi16() 51 static __inline__ __m256i __DEFAULT_FN_ATTRS 52 _mm256_abs_epi32(__m256i __a) in _mm256_abs_epi32() [all …]
|
D | avx512vlbwintrin.h | 66 _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epi8_mask() 72 _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epi8_mask() 78 _mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epu8_mask() 84 _mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epu8_mask() 114 _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epi16_mask() 120 _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epi16_mask() 126 _mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epu16_mask() 132 _mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epu16_mask() 162 _mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { in _mm256_cmpge_epi8_mask() 168 _mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpge_epi8_mask() [all …]
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_avx2.c | 33 const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE); in av1_wedge_sse_from_residuals_avx2() 34 const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff); in av1_wedge_sse_from_residuals_avx2() 36 __m256i v_acc0_q = _mm256_setzero_si256(); in av1_wedge_sse_from_residuals_avx2() 45 const __m256i v_r0_w = _mm256_lddqu_si256((__m256i *)(r1 + n)); in av1_wedge_sse_from_residuals_avx2() 46 const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(d + n)); in av1_wedge_sse_from_residuals_avx2() 49 const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2() 50 const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2() 51 const __m256i v_m0_w = _mm256_cvtepu8_epi16(v_m01_b); in av1_wedge_sse_from_residuals_avx2() 53 const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2() 54 const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2() [all …]
|
D | av1_fwd_txfm_avx2.h | 19 __m256i *in0, __m256i *in1, in btf_32_avx2_type0() 20 const __m256i _r, const int32_t cos_bit) { in btf_32_avx2_type0() 21 __m256i _in0 = *in0; in btf_32_avx2_type0() 22 __m256i _in1 = *in1; in btf_32_avx2_type0() 23 const __m256i ww0 = _mm256_set1_epi32(w0); in btf_32_avx2_type0() 24 const __m256i ww1 = _mm256_set1_epi32(w1); in btf_32_avx2_type0() 25 const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); in btf_32_avx2_type0() 26 const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); in btf_32_avx2_type0() 27 __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); in btf_32_avx2_type0() 30 const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); in btf_32_avx2_type0() [all …]
|
D | av1_quantize_avx2.c | 19 static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) { in read_coeff() 21 const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff); in read_coeff() 22 const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1); in read_coeff() 26 *c = _mm256_loadu_si256((const __m256i *)coeff); in read_coeff() 31 const __m256i zero = _mm256_setzero_si256(); in write_zero() 33 _mm256_storeu_si256((__m256i *)qcoeff, zero); in write_zero() 34 _mm256_storeu_si256((__m256i *)qcoeff + 1, zero); in write_zero() 36 _mm256_storeu_si256((__m256i *)qcoeff, zero); in write_zero() 40 static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { in init_one_qp() 47 __m256i *thr, __m256i *qp) { in init_qp() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | convolve_avx2.h | 39 __m256i data = _mm256_castsi128_si256( \ 47 __m256i res = convolve_lowbd_x(data, coeffs_h, filt); \ 50 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \ 53 __m256i data_1 = _mm256_castsi128_si256( \ 56 __m256i res = convolve_lowbd_x(data_1, coeffs_h, filt); \ 60 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); 63 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); \ 64 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \ 65 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); \ 66 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); \ [all …]
|
D | txfm_common_avx2.h | 23 static INLINE __m256i pair_set_w16_epi16(int16_t a, int16_t b) { in pair_set_w16_epi16() 28 static INLINE void btf_16_w16_avx2(const __m256i w0, const __m256i w1, in btf_16_w16_avx2() 29 __m256i *in0, __m256i *in1, const __m256i _r, in btf_16_w16_avx2() 31 __m256i t0 = _mm256_unpacklo_epi16(*in0, *in1); in btf_16_w16_avx2() 32 __m256i t1 = _mm256_unpackhi_epi16(*in0, *in1); in btf_16_w16_avx2() 33 __m256i u0 = _mm256_madd_epi16(t0, w0); in btf_16_w16_avx2() 34 __m256i u1 = _mm256_madd_epi16(t1, w0); in btf_16_w16_avx2() 35 __m256i v0 = _mm256_madd_epi16(t0, w1); in btf_16_w16_avx2() 36 __m256i v1 = _mm256_madd_epi16(t1, w1); in btf_16_w16_avx2() 38 __m256i a0 = _mm256_add_epi32(u0, _r); in btf_16_w16_avx2() [all …]
|
D | highbd_convolve_avx2.c | 47 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in aom_highbd_convolve_copy_avx2() 48 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in aom_highbd_convolve_copy_avx2() 49 const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); in aom_highbd_convolve_copy_avx2() 50 const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); in aom_highbd_convolve_copy_avx2() 52 _mm256_storeu_si256((__m256i *)dst, p0); in aom_highbd_convolve_copy_avx2() 53 _mm256_storeu_si256((__m256i *)(dst + 16), p1); in aom_highbd_convolve_copy_avx2() 54 _mm256_storeu_si256((__m256i *)(dst + 32), p2); in aom_highbd_convolve_copy_avx2() 55 _mm256_storeu_si256((__m256i *)(dst + 48), p3); in aom_highbd_convolve_copy_avx2() 61 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in aom_highbd_convolve_copy_avx2() 62 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in aom_highbd_convolve_copy_avx2() [all …]
|
D | variance_avx2.c | 18 static INLINE __m128i mm256_add_hi_lo_epi16(const __m256i val) { in mm256_add_hi_lo_epi16() 23 static INLINE __m128i mm256_add_hi_lo_epi32(const __m256i val) { in mm256_add_hi_lo_epi32() 28 static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, in variance_kernel_avx2() 29 __m256i *const sse, in variance_kernel_avx2() 30 __m256i *const sum) { in variance_kernel_avx2() 31 const __m256i adj_sub = _mm256_set1_epi16(0xff01); // (1,-1) in variance_kernel_avx2() 34 const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); in variance_kernel_avx2() 35 const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); in variance_kernel_avx2() 38 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2() 39 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2() [all …]
|
D | blend_a64_mask_avx2.c | 30 const __m256i *m0, const __m256i *v_round_offset, const __m256i *v_maxval, in blend_a64_d16_mask_w16_avx2() 32 const __m256i max_minus_m0 = _mm256_sub_epi16(*v_maxval, *m0); in blend_a64_d16_mask_w16_avx2() 33 const __m256i s0_0 = yy_loadu_256(src0); in blend_a64_d16_mask_w16_avx2() 34 const __m256i s1_0 = yy_loadu_256(src1); in blend_a64_d16_mask_w16_avx2() 35 __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2() 37 __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2() 43 const __m256i res0 = _mm256_packs_epi32(res0_lo, res0_hi); in blend_a64_d16_mask_w16_avx2() 44 __m256i res = _mm256_packus_epi16(res0, res0); in blend_a64_d16_mask_w16_avx2() 51 const __m256i *m0, const __m256i *m1, const __m256i *v_round_offset, in blend_a64_d16_mask_w32_avx2() 52 const __m256i *v_maxval, int shift) { in blend_a64_d16_mask_w32_avx2() [all …]
|
D | avg_intrin_avx2.c | 19 static void hadamard_col8x2_avx2(__m256i *in, int iter) { in hadamard_col8x2_avx2() 20 __m256i a0 = in[0]; in hadamard_col8x2_avx2() 21 __m256i a1 = in[1]; in hadamard_col8x2_avx2() 22 __m256i a2 = in[2]; in hadamard_col8x2_avx2() 23 __m256i a3 = in[3]; in hadamard_col8x2_avx2() 24 __m256i a4 = in[4]; in hadamard_col8x2_avx2() 25 __m256i a5 = in[5]; in hadamard_col8x2_avx2() 26 __m256i a6 = in[6]; in hadamard_col8x2_avx2() 27 __m256i a7 = in[7]; in hadamard_col8x2_avx2() 29 __m256i b0 = _mm256_add_epi16(a0, a1); in hadamard_col8x2_avx2() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | reconinter_avx2.c | 22 static INLINE __m256i calc_mask_avx2(const __m256i mask_base, const __m256i s0, in calc_mask_avx2() 23 const __m256i s1) { in calc_mask_avx2() 24 const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)); in calc_mask_avx2() 35 const __m256i y_mask_base = _mm256_set1_epi16(38 - mb); in av1_build_compound_diffwtd_mask_avx2() 46 const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD); in av1_build_compound_diffwtd_mask_avx2() 55 const __m256i s1ABCD_w = _mm256_cvtepu8_epi16(s1ABCD); in av1_build_compound_diffwtd_mask_avx2() 56 const __m256i m16 = calc_mask_avx2(y_mask_base, s0ABCD_w, s1ABCD_w); in av1_build_compound_diffwtd_mask_avx2() 57 const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256()); in av1_build_compound_diffwtd_mask_avx2() 72 const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C)); in av1_build_compound_diffwtd_mask_avx2() 73 const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D)); in av1_build_compound_diffwtd_mask_avx2() [all …]
|
D | cfl_avx2.c | 66 const __m256i twos = _mm256_set1_epi8(2); // Thirty two twos in cfl_luma_subsampling_420_lbd_avx2() 68 __m256i *row = (__m256i *)pred_buf_q3; in cfl_luma_subsampling_420_lbd_avx2() 69 const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256; in cfl_luma_subsampling_420_lbd_avx2() 71 __m256i top = _mm256_loadu_si256((__m256i *)input); in cfl_luma_subsampling_420_lbd_avx2() 72 __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride)); in cfl_luma_subsampling_420_lbd_avx2() 74 __m256i top_16x16 = _mm256_maddubs_epi16(top, twos); in cfl_luma_subsampling_420_lbd_avx2() 75 __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos); in cfl_luma_subsampling_420_lbd_avx2() 76 __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16); in cfl_luma_subsampling_420_lbd_avx2() 101 const __m256i fours = _mm256_set1_epi8(4); // Thirty two fours in cfl_luma_subsampling_422_lbd_avx2() 102 __m256i *row = (__m256i *)pred_buf_q3; in cfl_luma_subsampling_422_lbd_avx2() [all …]
|
D | highbd_jnt_convolve_avx2.c | 44 const __m256i wt0 = _mm256_set1_epi32(w0); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 45 const __m256i wt1 = _mm256_set1_epi32(w1); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 46 const __m256i zero = _mm256_setzero_si256(); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 52 const __m256i offset_const = _mm256_set1_epi32(offset); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 53 const __m256i offset_const_16b = _mm256_set1_epi16(offset); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 56 const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 57 const __m256i clip_pixel_to_bd = in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 65 const __m256i src_16bit = in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 66 _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j])); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 68 const __m256i res = _mm256_sll_epi16(src_16bit, left_shift); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() [all …]
|
D | highbd_wiener_convolve_avx2.c | 49 const __m256i zero_256 = _mm256_setzero_si256(); in av1_highbd_wiener_convolve_add_src_avx2() 54 const __m256i clamp_low = zero_256; in av1_highbd_wiener_convolve_add_src_avx2() 58 const __m256i clamp_high_ep = in av1_highbd_wiener_convolve_add_src_avx2() 79 const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); in av1_highbd_wiener_convolve_add_src_avx2() 81 const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); in av1_highbd_wiener_convolve_add_src_avx2() 83 const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); in av1_highbd_wiener_convolve_add_src_avx2() 85 const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); in av1_highbd_wiener_convolve_add_src_avx2() 87 const __m256i round_const = _mm256_set1_epi32( in av1_highbd_wiener_convolve_add_src_avx2() 95 const __m256i src_0 = yy_loadu_256(src_ij + 0); in av1_highbd_wiener_convolve_add_src_avx2() 96 const __m256i src_1 = yy_loadu_256(src_ij + 1); in av1_highbd_wiener_convolve_add_src_avx2() [all …]
|
D | wiener_convolve_avx2.c | 48 const __m256i zero_256 = _mm256_setzero_si256(); in av1_wiener_convolve_add_src_avx2() 53 const __m256i clamp_low = zero_256; in av1_wiener_convolve_add_src_avx2() 54 const __m256i clamp_high = in av1_wiener_convolve_add_src_avx2() 77 const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); in av1_wiener_convolve_add_src_avx2() 79 const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); in av1_wiener_convolve_add_src_avx2() 81 const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); in av1_wiener_convolve_add_src_avx2() 83 const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); in av1_wiener_convolve_add_src_avx2() 85 const __m256i round_const = _mm256_set1_epi32( in av1_wiener_convolve_add_src_avx2() 103 const __m256i src_0 = _mm256_cvtepu8_epi16(data_0); in av1_wiener_convolve_add_src_avx2() 104 const __m256i src_1 = _mm256_cvtepu8_epi16(data_1); in av1_wiener_convolve_add_src_avx2() [all …]
|
D | selfguided_avx2.c | 23 static __m256i yy256_load_extend_8_32(const void *p) { in yy256_load_extend_8_32() 29 static __m256i yy256_load_extend_16_32(const void *p) { in yy256_load_extend_16_32() 51 static __m256i scan_32(__m256i x) { in scan_32() 52 const __m256i x01 = _mm256_slli_si256(x, 4); in scan_32() 53 const __m256i x02 = _mm256_add_epi32(x, x01); in scan_32() 54 const __m256i x03 = _mm256_slli_si256(x02, 8); in scan_32() 55 const __m256i x04 = _mm256_add_epi32(x02, x03); in scan_32() 58 const __m256i s02 = _mm256_insertf128_si256(_mm256_setzero_si256(), s01, 1); in scan_32() 69 static void *memset_zero_avx(int32_t *dest, const __m256i *zero, size_t count) { in memset_zero_avx() 72 _mm256_storeu_si256((__m256i *)(dest + i), *zero); in memset_zero_avx() [all …]
|
D | highbd_convolve_2d_avx2.c | 42 __m256i s[8], coeffs_y[4], coeffs_x[4]; in av1_highbd_convolve_2d_sr_avx2() 44 const __m256i round_const_x = _mm256_set1_epi32( in av1_highbd_convolve_2d_sr_avx2() 48 const __m256i round_const_y = _mm256_set1_epi32( in av1_highbd_convolve_2d_sr_avx2() 56 const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1); in av1_highbd_convolve_2d_sr_avx2() 57 const __m256i clip_pixel = in av1_highbd_convolve_2d_sr_avx2() 59 const __m256i zero = _mm256_setzero_si256(); in av1_highbd_convolve_2d_sr_avx2() 68 const __m256i row0 = in av1_highbd_convolve_2d_sr_avx2() 69 _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]); in av1_highbd_convolve_2d_sr_avx2() 70 __m256i row1 = _mm256_set1_epi16(0); in av1_highbd_convolve_2d_sr_avx2() 73 _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]); in av1_highbd_convolve_2d_sr_avx2() [all …]
|
D | jnt_convolve_avx2.c | 23 static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) { in unpack_weights_avx2() 26 const __m256i wt0 = _mm256_set1_epi16(w0); in unpack_weights_avx2() 27 const __m256i wt1 = _mm256_set1_epi16(w1); in unpack_weights_avx2() 28 const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1); in unpack_weights_avx2() 32 static INLINE __m256i load_line2_avx2(const void *a, const void *b) { in load_line2_avx2() 49 const __m256i wt = unpack_weights_avx2(conv_params); in av1_dist_wtd_convolve_x_avx2() 55 const __m256i offset_const = _mm256_set1_epi16(offset); in av1_dist_wtd_convolve_x_avx2() 58 const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); in av1_dist_wtd_convolve_x_avx2() 63 const __m256i round_const = in av1_dist_wtd_convolve_x_avx2() 70 __m256i filt[4], coeffs[4]; in av1_dist_wtd_convolve_x_avx2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | avg_intrin_avx2.c | 19 static void highbd_hadamard_col8_avx2(__m256i *in, int iter) { in highbd_hadamard_col8_avx2() 20 __m256i a0 = in[0]; in highbd_hadamard_col8_avx2() 21 __m256i a1 = in[1]; in highbd_hadamard_col8_avx2() 22 __m256i a2 = in[2]; in highbd_hadamard_col8_avx2() 23 __m256i a3 = in[3]; in highbd_hadamard_col8_avx2() 24 __m256i a4 = in[4]; in highbd_hadamard_col8_avx2() 25 __m256i a5 = in[5]; in highbd_hadamard_col8_avx2() 26 __m256i a6 = in[6]; in highbd_hadamard_col8_avx2() 27 __m256i a7 = in[7]; in highbd_hadamard_col8_avx2() 29 __m256i b0 = _mm256_add_epi32(a0, a1); in highbd_hadamard_col8_avx2() [all …]
|
D | variance_avx2.c | 41 static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, in variance_kernel_avx2() 42 __m256i *const sse, in variance_kernel_avx2() 43 __m256i *const sum) { in variance_kernel_avx2() 44 const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); in variance_kernel_avx2() 47 const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); in variance_kernel_avx2() 48 const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); in variance_kernel_avx2() 51 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2() 52 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2() 53 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2() 54 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2() [all …]
|
D | highbd_convolve_avx2.c | 34 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in vpx_highbd_convolve_copy_avx2() 35 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in vpx_highbd_convolve_copy_avx2() 36 const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); in vpx_highbd_convolve_copy_avx2() 37 const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); in vpx_highbd_convolve_copy_avx2() 39 _mm256_storeu_si256((__m256i *)dst, p0); in vpx_highbd_convolve_copy_avx2() 40 _mm256_storeu_si256((__m256i *)(dst + 16), p1); in vpx_highbd_convolve_copy_avx2() 41 _mm256_storeu_si256((__m256i *)(dst + 32), p2); in vpx_highbd_convolve_copy_avx2() 42 _mm256_storeu_si256((__m256i *)(dst + 48), p3); in vpx_highbd_convolve_copy_avx2() 48 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in vpx_highbd_convolve_copy_avx2() 49 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in vpx_highbd_convolve_copy_avx2() [all …]
|
D | fwd_dct32x32_impl_avx2.h | 27 static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) { in k_madd_epi32_avx2() 28 __m256i buf0, buf1; in k_madd_epi32_avx2() 36 static INLINE __m256i k_packs_epi64_avx2(__m256i a, __m256i b) { in k_packs_epi64_avx2() 37 __m256i buf0 = _mm256_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64_avx2() 38 __m256i buf1 = _mm256_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64_avx2() 54 const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(cospi_16_64); in FDCT32x32_2D_AVX2() 55 const __m256i k__cospi_p16_m16 = in FDCT32x32_2D_AVX2() 57 const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64); in FDCT32x32_2D_AVX2() 58 const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64); in FDCT32x32_2D_AVX2() 59 const __m256i k__cospi_p24_p08 = pair256_set_epi16(+cospi_24_64, cospi_8_64); in FDCT32x32_2D_AVX2() [all …]
|