Home
last modified time | relevance | path

Searched refs:__m256i (Results 1 – 25 of 116) sorted by relevance

12345

/external/clang/test/CodeGen/
Davx2-builtins.c11 __m256i test_mm256_abs_epi8(__m256i a) { in test_mm256_abs_epi8()
17 __m256i test_mm256_abs_epi16(__m256i a) { in test_mm256_abs_epi16()
23 __m256i test_mm256_abs_epi32(__m256i a) { in test_mm256_abs_epi32()
29 __m256i test_mm256_add_epi8(__m256i a, __m256i b) { in test_mm256_add_epi8()
35 __m256i test_mm256_add_epi16(__m256i a, __m256i b) { in test_mm256_add_epi16()
41 __m256i test_mm256_add_epi32(__m256i a, __m256i b) { in test_mm256_add_epi32()
47 __m256i test_mm256_add_epi64(__m256i a, __m256i b) { in test_mm256_add_epi64()
53 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { in test_mm256_adds_epi8()
59 __m256i test_mm256_adds_epi16(__m256i a, __m256i b) { in test_mm256_adds_epi16()
65 __m256i test_mm256_adds_epu8(__m256i a, __m256i b) { in test_mm256_adds_epu8()
[all …]
Davx512vlbw-builtins.c9 __mmask32 test_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epi8_mask()
15 __mmask32 test_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epi8_mask()
35 __mmask16 test_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epi16_mask()
41 __mmask16 test_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epi16_mask()
61 __mmask32 test_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpgt_epi8_mask()
67 __mmask32 test_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpgt_epi8_mask()
87 __mmask16 test_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { in test_mm256_cmpgt_epi16_mask()
93 __mmask16 test_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpgt_epi16_mask()
139 __mmask32 test_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { in test_mm256_cmpeq_epu8_mask()
145 __mmask32 test_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in test_mm256_mask_cmpeq_epu8_mask()
[all …]
/external/clang/lib/Headers/
Davx2intrin.h36 (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
37 (__v32qi)(__m256i)(Y), (int)(M))
39 static __inline__ __m256i __DEFAULT_FN_ATTRS
40 _mm256_abs_epi8(__m256i __a) in _mm256_abs_epi8()
42 return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); in _mm256_abs_epi8()
45 static __inline__ __m256i __DEFAULT_FN_ATTRS
46 _mm256_abs_epi16(__m256i __a) in _mm256_abs_epi16()
48 return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); in _mm256_abs_epi16()
51 static __inline__ __m256i __DEFAULT_FN_ATTRS
52 _mm256_abs_epi32(__m256i __a) in _mm256_abs_epi32()
[all …]
Davx512vlbwintrin.h66 _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epi8_mask()
72 _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epi8_mask()
78 _mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epu8_mask()
84 _mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epu8_mask()
114 _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epi16_mask()
120 _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epi16_mask()
126 _mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { in _mm256_cmpeq_epu16_mask()
132 _mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpeq_epu16_mask()
162 _mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { in _mm256_cmpge_epi8_mask()
168 _mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { in _mm256_mask_cmpge_epi8_mask()
[all …]
/external/libaom/libaom/av1/encoder/x86/
Dwedge_utils_avx2.c33 const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE); in av1_wedge_sse_from_residuals_avx2()
34 const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff); in av1_wedge_sse_from_residuals_avx2()
36 __m256i v_acc0_q = _mm256_setzero_si256(); in av1_wedge_sse_from_residuals_avx2()
45 const __m256i v_r0_w = _mm256_lddqu_si256((__m256i *)(r1 + n)); in av1_wedge_sse_from_residuals_avx2()
46 const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(d + n)); in av1_wedge_sse_from_residuals_avx2()
49 const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2()
50 const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2()
51 const __m256i v_m0_w = _mm256_cvtepu8_epi16(v_m01_b); in av1_wedge_sse_from_residuals_avx2()
53 const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2()
54 const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2()
[all …]
Dav1_fwd_txfm_avx2.h19 __m256i *in0, __m256i *in1, in btf_32_avx2_type0()
20 const __m256i _r, const int32_t cos_bit) { in btf_32_avx2_type0()
21 __m256i _in0 = *in0; in btf_32_avx2_type0()
22 __m256i _in1 = *in1; in btf_32_avx2_type0()
23 const __m256i ww0 = _mm256_set1_epi32(w0); in btf_32_avx2_type0()
24 const __m256i ww1 = _mm256_set1_epi32(w1); in btf_32_avx2_type0()
25 const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); in btf_32_avx2_type0()
26 const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); in btf_32_avx2_type0()
27 __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); in btf_32_avx2_type0()
30 const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); in btf_32_avx2_type0()
[all …]
Dav1_quantize_avx2.c19 static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) { in read_coeff()
21 const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff); in read_coeff()
22 const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1); in read_coeff()
26 *c = _mm256_loadu_si256((const __m256i *)coeff); in read_coeff()
31 const __m256i zero = _mm256_setzero_si256(); in write_zero()
33 _mm256_storeu_si256((__m256i *)qcoeff, zero); in write_zero()
34 _mm256_storeu_si256((__m256i *)qcoeff + 1, zero); in write_zero()
36 _mm256_storeu_si256((__m256i *)qcoeff, zero); in write_zero()
40 static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { in init_one_qp()
47 __m256i *thr, __m256i *qp) { in init_qp()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dconvolve_avx2.h39 __m256i data = _mm256_castsi128_si256( \
47 __m256i res = convolve_lowbd_x(data, coeffs_h, filt); \
50 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res); \
53 __m256i data_1 = _mm256_castsi128_si256( \
56 __m256i res = convolve_lowbd_x(data_1, coeffs_h, filt); \
60 _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
63 __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride)); \
64 __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride)); \
65 __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride)); \
66 __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride)); \
[all …]
Dtxfm_common_avx2.h23 static INLINE __m256i pair_set_w16_epi16(int16_t a, int16_t b) { in pair_set_w16_epi16()
28 static INLINE void btf_16_w16_avx2(const __m256i w0, const __m256i w1, in btf_16_w16_avx2()
29 __m256i *in0, __m256i *in1, const __m256i _r, in btf_16_w16_avx2()
31 __m256i t0 = _mm256_unpacklo_epi16(*in0, *in1); in btf_16_w16_avx2()
32 __m256i t1 = _mm256_unpackhi_epi16(*in0, *in1); in btf_16_w16_avx2()
33 __m256i u0 = _mm256_madd_epi16(t0, w0); in btf_16_w16_avx2()
34 __m256i u1 = _mm256_madd_epi16(t1, w0); in btf_16_w16_avx2()
35 __m256i v0 = _mm256_madd_epi16(t0, w1); in btf_16_w16_avx2()
36 __m256i v1 = _mm256_madd_epi16(t1, w1); in btf_16_w16_avx2()
38 __m256i a0 = _mm256_add_epi32(u0, _r); in btf_16_w16_avx2()
[all …]
Dhighbd_convolve_avx2.c47 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in aom_highbd_convolve_copy_avx2()
48 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in aom_highbd_convolve_copy_avx2()
49 const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); in aom_highbd_convolve_copy_avx2()
50 const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); in aom_highbd_convolve_copy_avx2()
52 _mm256_storeu_si256((__m256i *)dst, p0); in aom_highbd_convolve_copy_avx2()
53 _mm256_storeu_si256((__m256i *)(dst + 16), p1); in aom_highbd_convolve_copy_avx2()
54 _mm256_storeu_si256((__m256i *)(dst + 32), p2); in aom_highbd_convolve_copy_avx2()
55 _mm256_storeu_si256((__m256i *)(dst + 48), p3); in aom_highbd_convolve_copy_avx2()
61 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in aom_highbd_convolve_copy_avx2()
62 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in aom_highbd_convolve_copy_avx2()
[all …]
Dvariance_avx2.c18 static INLINE __m128i mm256_add_hi_lo_epi16(const __m256i val) { in mm256_add_hi_lo_epi16()
23 static INLINE __m128i mm256_add_hi_lo_epi32(const __m256i val) { in mm256_add_hi_lo_epi32()
28 static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, in variance_kernel_avx2()
29 __m256i *const sse, in variance_kernel_avx2()
30 __m256i *const sum) { in variance_kernel_avx2()
31 const __m256i adj_sub = _mm256_set1_epi16(0xff01); // (1,-1) in variance_kernel_avx2()
34 const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); in variance_kernel_avx2()
35 const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); in variance_kernel_avx2()
38 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2()
39 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2()
[all …]
Dblend_a64_mask_avx2.c30 const __m256i *m0, const __m256i *v_round_offset, const __m256i *v_maxval, in blend_a64_d16_mask_w16_avx2()
32 const __m256i max_minus_m0 = _mm256_sub_epi16(*v_maxval, *m0); in blend_a64_d16_mask_w16_avx2()
33 const __m256i s0_0 = yy_loadu_256(src0); in blend_a64_d16_mask_w16_avx2()
34 const __m256i s1_0 = yy_loadu_256(src1); in blend_a64_d16_mask_w16_avx2()
35 __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2()
37 __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2()
43 const __m256i res0 = _mm256_packs_epi32(res0_lo, res0_hi); in blend_a64_d16_mask_w16_avx2()
44 __m256i res = _mm256_packus_epi16(res0, res0); in blend_a64_d16_mask_w16_avx2()
51 const __m256i *m0, const __m256i *m1, const __m256i *v_round_offset, in blend_a64_d16_mask_w32_avx2()
52 const __m256i *v_maxval, int shift) { in blend_a64_d16_mask_w32_avx2()
[all …]
Davg_intrin_avx2.c19 static void hadamard_col8x2_avx2(__m256i *in, int iter) { in hadamard_col8x2_avx2()
20 __m256i a0 = in[0]; in hadamard_col8x2_avx2()
21 __m256i a1 = in[1]; in hadamard_col8x2_avx2()
22 __m256i a2 = in[2]; in hadamard_col8x2_avx2()
23 __m256i a3 = in[3]; in hadamard_col8x2_avx2()
24 __m256i a4 = in[4]; in hadamard_col8x2_avx2()
25 __m256i a5 = in[5]; in hadamard_col8x2_avx2()
26 __m256i a6 = in[6]; in hadamard_col8x2_avx2()
27 __m256i a7 = in[7]; in hadamard_col8x2_avx2()
29 __m256i b0 = _mm256_add_epi16(a0, a1); in hadamard_col8x2_avx2()
[all …]
/external/libaom/libaom/av1/common/x86/
Dreconinter_avx2.c22 static INLINE __m256i calc_mask_avx2(const __m256i mask_base, const __m256i s0, in calc_mask_avx2()
23 const __m256i s1) { in calc_mask_avx2()
24 const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)); in calc_mask_avx2()
35 const __m256i y_mask_base = _mm256_set1_epi16(38 - mb); in av1_build_compound_diffwtd_mask_avx2()
46 const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD); in av1_build_compound_diffwtd_mask_avx2()
55 const __m256i s1ABCD_w = _mm256_cvtepu8_epi16(s1ABCD); in av1_build_compound_diffwtd_mask_avx2()
56 const __m256i m16 = calc_mask_avx2(y_mask_base, s0ABCD_w, s1ABCD_w); in av1_build_compound_diffwtd_mask_avx2()
57 const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256()); in av1_build_compound_diffwtd_mask_avx2()
72 const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C)); in av1_build_compound_diffwtd_mask_avx2()
73 const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D)); in av1_build_compound_diffwtd_mask_avx2()
[all …]
Dcfl_avx2.c66 const __m256i twos = _mm256_set1_epi8(2); // Thirty two twos in cfl_luma_subsampling_420_lbd_avx2()
68 __m256i *row = (__m256i *)pred_buf_q3; in cfl_luma_subsampling_420_lbd_avx2()
69 const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256; in cfl_luma_subsampling_420_lbd_avx2()
71 __m256i top = _mm256_loadu_si256((__m256i *)input); in cfl_luma_subsampling_420_lbd_avx2()
72 __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride)); in cfl_luma_subsampling_420_lbd_avx2()
74 __m256i top_16x16 = _mm256_maddubs_epi16(top, twos); in cfl_luma_subsampling_420_lbd_avx2()
75 __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos); in cfl_luma_subsampling_420_lbd_avx2()
76 __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16); in cfl_luma_subsampling_420_lbd_avx2()
101 const __m256i fours = _mm256_set1_epi8(4); // Thirty two fours in cfl_luma_subsampling_422_lbd_avx2()
102 __m256i *row = (__m256i *)pred_buf_q3; in cfl_luma_subsampling_422_lbd_avx2()
[all …]
Dhighbd_jnt_convolve_avx2.c44 const __m256i wt0 = _mm256_set1_epi32(w0); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
45 const __m256i wt1 = _mm256_set1_epi32(w1); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
46 const __m256i zero = _mm256_setzero_si256(); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
52 const __m256i offset_const = _mm256_set1_epi32(offset); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
53 const __m256i offset_const_16b = _mm256_set1_epi16(offset); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
56 const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
57 const __m256i clip_pixel_to_bd = in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
65 const __m256i src_16bit = in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
66 _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j])); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
68 const __m256i res = _mm256_sll_epi16(src_16bit, left_shift); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
[all …]
Dhighbd_wiener_convolve_avx2.c49 const __m256i zero_256 = _mm256_setzero_si256(); in av1_highbd_wiener_convolve_add_src_avx2()
54 const __m256i clamp_low = zero_256; in av1_highbd_wiener_convolve_add_src_avx2()
58 const __m256i clamp_high_ep = in av1_highbd_wiener_convolve_add_src_avx2()
79 const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); in av1_highbd_wiener_convolve_add_src_avx2()
81 const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); in av1_highbd_wiener_convolve_add_src_avx2()
83 const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); in av1_highbd_wiener_convolve_add_src_avx2()
85 const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); in av1_highbd_wiener_convolve_add_src_avx2()
87 const __m256i round_const = _mm256_set1_epi32( in av1_highbd_wiener_convolve_add_src_avx2()
95 const __m256i src_0 = yy_loadu_256(src_ij + 0); in av1_highbd_wiener_convolve_add_src_avx2()
96 const __m256i src_1 = yy_loadu_256(src_ij + 1); in av1_highbd_wiener_convolve_add_src_avx2()
[all …]
Dwiener_convolve_avx2.c48 const __m256i zero_256 = _mm256_setzero_si256(); in av1_wiener_convolve_add_src_avx2()
53 const __m256i clamp_low = zero_256; in av1_wiener_convolve_add_src_avx2()
54 const __m256i clamp_high = in av1_wiener_convolve_add_src_avx2()
77 const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128); in av1_wiener_convolve_add_src_avx2()
79 const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128); in av1_wiener_convolve_add_src_avx2()
81 const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128); in av1_wiener_convolve_add_src_avx2()
83 const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128); in av1_wiener_convolve_add_src_avx2()
85 const __m256i round_const = _mm256_set1_epi32( in av1_wiener_convolve_add_src_avx2()
103 const __m256i src_0 = _mm256_cvtepu8_epi16(data_0); in av1_wiener_convolve_add_src_avx2()
104 const __m256i src_1 = _mm256_cvtepu8_epi16(data_1); in av1_wiener_convolve_add_src_avx2()
[all …]
Dselfguided_avx2.c23 static __m256i yy256_load_extend_8_32(const void *p) { in yy256_load_extend_8_32()
29 static __m256i yy256_load_extend_16_32(const void *p) { in yy256_load_extend_16_32()
51 static __m256i scan_32(__m256i x) { in scan_32()
52 const __m256i x01 = _mm256_slli_si256(x, 4); in scan_32()
53 const __m256i x02 = _mm256_add_epi32(x, x01); in scan_32()
54 const __m256i x03 = _mm256_slli_si256(x02, 8); in scan_32()
55 const __m256i x04 = _mm256_add_epi32(x02, x03); in scan_32()
58 const __m256i s02 = _mm256_insertf128_si256(_mm256_setzero_si256(), s01, 1); in scan_32()
69 static void *memset_zero_avx(int32_t *dest, const __m256i *zero, size_t count) { in memset_zero_avx()
72 _mm256_storeu_si256((__m256i *)(dest + i), *zero); in memset_zero_avx()
[all …]
Dhighbd_convolve_2d_avx2.c42 __m256i s[8], coeffs_y[4], coeffs_x[4]; in av1_highbd_convolve_2d_sr_avx2()
44 const __m256i round_const_x = _mm256_set1_epi32( in av1_highbd_convolve_2d_sr_avx2()
48 const __m256i round_const_y = _mm256_set1_epi32( in av1_highbd_convolve_2d_sr_avx2()
56 const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1); in av1_highbd_convolve_2d_sr_avx2()
57 const __m256i clip_pixel = in av1_highbd_convolve_2d_sr_avx2()
59 const __m256i zero = _mm256_setzero_si256(); in av1_highbd_convolve_2d_sr_avx2()
68 const __m256i row0 = in av1_highbd_convolve_2d_sr_avx2()
69 _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]); in av1_highbd_convolve_2d_sr_avx2()
70 __m256i row1 = _mm256_set1_epi16(0); in av1_highbd_convolve_2d_sr_avx2()
73 _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]); in av1_highbd_convolve_2d_sr_avx2()
[all …]
Djnt_convolve_avx2.c23 static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) { in unpack_weights_avx2()
26 const __m256i wt0 = _mm256_set1_epi16(w0); in unpack_weights_avx2()
27 const __m256i wt1 = _mm256_set1_epi16(w1); in unpack_weights_avx2()
28 const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1); in unpack_weights_avx2()
32 static INLINE __m256i load_line2_avx2(const void *a, const void *b) { in load_line2_avx2()
49 const __m256i wt = unpack_weights_avx2(conv_params); in av1_dist_wtd_convolve_x_avx2()
55 const __m256i offset_const = _mm256_set1_epi16(offset); in av1_dist_wtd_convolve_x_avx2()
58 const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1); in av1_dist_wtd_convolve_x_avx2()
63 const __m256i round_const = in av1_dist_wtd_convolve_x_avx2()
70 __m256i filt[4], coeffs[4]; in av1_dist_wtd_convolve_x_avx2()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Davg_intrin_avx2.c19 static void highbd_hadamard_col8_avx2(__m256i *in, int iter) { in highbd_hadamard_col8_avx2()
20 __m256i a0 = in[0]; in highbd_hadamard_col8_avx2()
21 __m256i a1 = in[1]; in highbd_hadamard_col8_avx2()
22 __m256i a2 = in[2]; in highbd_hadamard_col8_avx2()
23 __m256i a3 = in[3]; in highbd_hadamard_col8_avx2()
24 __m256i a4 = in[4]; in highbd_hadamard_col8_avx2()
25 __m256i a5 = in[5]; in highbd_hadamard_col8_avx2()
26 __m256i a6 = in[6]; in highbd_hadamard_col8_avx2()
27 __m256i a7 = in[7]; in highbd_hadamard_col8_avx2()
29 __m256i b0 = _mm256_add_epi32(a0, a1); in highbd_hadamard_col8_avx2()
[all …]
Dvariance_avx2.c41 static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, in variance_kernel_avx2()
42 __m256i *const sse, in variance_kernel_avx2()
43 __m256i *const sum) { in variance_kernel_avx2()
44 const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); in variance_kernel_avx2()
47 const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); in variance_kernel_avx2()
48 const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); in variance_kernel_avx2()
51 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); in variance_kernel_avx2()
52 const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); in variance_kernel_avx2()
53 const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); in variance_kernel_avx2()
54 const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); in variance_kernel_avx2()
[all …]
Dhighbd_convolve_avx2.c34 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in vpx_highbd_convolve_copy_avx2()
35 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in vpx_highbd_convolve_copy_avx2()
36 const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); in vpx_highbd_convolve_copy_avx2()
37 const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); in vpx_highbd_convolve_copy_avx2()
39 _mm256_storeu_si256((__m256i *)dst, p0); in vpx_highbd_convolve_copy_avx2()
40 _mm256_storeu_si256((__m256i *)(dst + 16), p1); in vpx_highbd_convolve_copy_avx2()
41 _mm256_storeu_si256((__m256i *)(dst + 32), p2); in vpx_highbd_convolve_copy_avx2()
42 _mm256_storeu_si256((__m256i *)(dst + 48), p3); in vpx_highbd_convolve_copy_avx2()
48 const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); in vpx_highbd_convolve_copy_avx2()
49 const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); in vpx_highbd_convolve_copy_avx2()
[all …]
Dfwd_dct32x32_impl_avx2.h27 static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) { in k_madd_epi32_avx2()
28 __m256i buf0, buf1; in k_madd_epi32_avx2()
36 static INLINE __m256i k_packs_epi64_avx2(__m256i a, __m256i b) { in k_packs_epi64_avx2()
37 __m256i buf0 = _mm256_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64_avx2()
38 __m256i buf1 = _mm256_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64_avx2()
54 const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(cospi_16_64); in FDCT32x32_2D_AVX2()
55 const __m256i k__cospi_p16_m16 = in FDCT32x32_2D_AVX2()
57 const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64); in FDCT32x32_2D_AVX2()
58 const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64); in FDCT32x32_2D_AVX2()
59 const __m256i k__cospi_p24_p08 = pair256_set_epi16(+cospi_24_64, cospi_8_64); in FDCT32x32_2D_AVX2()
[all …]

12345