Home
last modified time | relevance | path

Searched refs:_mm256_permute2x128_si256 (Results 1 – 25 of 25) sorted by relevance

/external/libaom/libaom/aom_dsp/x86/
Dcommon_avx2.h129 out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20); // 0010 0000 in mm256_transpose_16x16()
130 out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31); // 0011 0001 in mm256_transpose_16x16()
131 out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20); in mm256_transpose_16x16()
132 out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31); in mm256_transpose_16x16()
133 out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20); in mm256_transpose_16x16()
134 out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31); in mm256_transpose_16x16()
135 out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20); in mm256_transpose_16x16()
136 out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31); in mm256_transpose_16x16()
138 out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20); in mm256_transpose_16x16()
139 out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31); in mm256_transpose_16x16()
[all …]
Dtxfm_common_avx2.h150 out[0 + 0] = _mm256_permute2x128_si256(c[0], c[1], 0x20); in transpose_16bit_16x16_avx2()
151 out[1 + 0] = _mm256_permute2x128_si256(c[8], c[9], 0x20); in transpose_16bit_16x16_avx2()
152 out[2 + 0] = _mm256_permute2x128_si256(c[4], c[5], 0x20); in transpose_16bit_16x16_avx2()
153 out[3 + 0] = _mm256_permute2x128_si256(c[12], c[13], 0x20); in transpose_16bit_16x16_avx2()
155 out[0 + 8] = _mm256_permute2x128_si256(c[0], c[1], 0x31); in transpose_16bit_16x16_avx2()
156 out[1 + 8] = _mm256_permute2x128_si256(c[8], c[9], 0x31); in transpose_16bit_16x16_avx2()
157 out[2 + 8] = _mm256_permute2x128_si256(c[4], c[5], 0x31); in transpose_16bit_16x16_avx2()
158 out[3 + 8] = _mm256_permute2x128_si256(c[12], c[13], 0x31); in transpose_16bit_16x16_avx2()
160 out[4 + 0] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x20); in transpose_16bit_16x16_avx2()
161 out[5 + 0] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x20); in transpose_16bit_16x16_avx2()
[all …]
Dhighbd_convolve_avx2.c148 __m256i s01 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
154 __m256i s12 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
160 __m256i s23 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
166 __m256i s34 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
172 __m256i s45 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
180 __m256i s56 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
196 const __m256i s67 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
205 const __m256i s78 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
306 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_x_sr_avx2()
307 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_x_sr_avx2()
[all …]
Davg_intrin_avx2.c111 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2()
114 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2()
117 _mm256_permute2x128_si256(src[4], src[5], 0x20)); in hadamard_8x8x2_avx2()
120 _mm256_permute2x128_si256(src[6], src[7], 0x20)); in hadamard_8x8x2_avx2()
123 _mm256_permute2x128_si256(src[0], src[1], 0x31)); in hadamard_8x8x2_avx2()
126 _mm256_permute2x128_si256(src[2], src[3], 0x31)); in hadamard_8x8x2_avx2()
129 _mm256_permute2x128_si256(src[4], src[5], 0x31)); in hadamard_8x8x2_avx2()
132 _mm256_permute2x128_si256(src[6], src[7], 0x31)); in hadamard_8x8x2_avx2()
Daom_subpixel_8t_intrin_avx2.c838 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d8_v4_avx2()
934 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d8_v8_avx2()
935 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d8_v8_avx2()
936 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d8_v8_avx2()
1076 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d16_v4_avx2()
1184 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d16_v8_avx2()
1185 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d16_v8_avx2()
1186 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d16_v8_avx2()
1365 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d4_v4_avx2()
Dhighbd_quantize_intrin_avx2.c28 qp[i] = _mm256_permute2x128_si256(qp[i], qp[i], 0x11); in update_qp()
Dintrapred_avx2.c24 __m256i u0 = _mm256_permute2x128_si256(y0, y0, 1); in dc_sum_64()
34 __m256i u = _mm256_permute2x128_si256(y, y, 1); in dc_sum_32()
318 __m256i r0 = _mm256_permute2x128_si256(t[i], t[i], 0); in h_predictor_32x8line()
319 __m256i r1 = _mm256_permute2x128_si256(t[i], t[i], 0x11); in h_predictor_32x8line()
811 return _mm256_permute2x128_si256(x0, x1, 0x20); in paeth_32x1_pred()
/external/libaom/libaom/av1/common/x86/
Dconvolve_avx2.c63 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
66 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
69 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
72 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
84 const __m256i src_45a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
88 const __m256i src_56a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
160 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
163 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
166 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
169 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
[all …]
Dhighbd_jnt_convolve_avx2.c132 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
176 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
294 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
295 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_2d_avx2()
368 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
413 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
517 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
518 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_x_avx2()
554 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
594 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
[all …]
Djnt_convolve_avx2.c33 return _mm256_permute2x128_si256( in load_line2_avx2()
249 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2()
263 const __m256i src_45a = _mm256_permute2x128_si256(src4, src5, 0x20); in av1_dist_wtd_convolve_y_avx2()
267 const __m256i src_56a = _mm256_permute2x128_si256(src5, src4, 0x20); in av1_dist_wtd_convolve_y_avx2()
427 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2()
442 const __m256i src_67a = _mm256_permute2x128_si256(src6, src7, 0x20); in av1_dist_wtd_convolve_y_avx2()
446 const __m256i src_78a = _mm256_permute2x128_si256(src7, src6, 0x20); in av1_dist_wtd_convolve_y_avx2()
Dhighbd_convolve_2d_avx2.c75 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_2d_sr_avx2()
76 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_2d_sr_avx2()
/external/libvpx/libvpx/vpx_dsp/x86/
Davg_intrin_avx2.c75 in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in highbd_hadamard_col8_avx2()
76 in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in highbd_hadamard_col8_avx2()
77 in[2] = _mm256_permute2x128_si256(b2, b3, 0x20); in highbd_hadamard_col8_avx2()
78 in[3] = _mm256_permute2x128_si256(b2, b3, 0x31); in highbd_hadamard_col8_avx2()
79 in[4] = _mm256_permute2x128_si256(b4, b5, 0x20); in highbd_hadamard_col8_avx2()
80 in[5] = _mm256_permute2x128_si256(b4, b5, 0x31); in highbd_hadamard_col8_avx2()
81 in[6] = _mm256_permute2x128_si256(b6, b7, 0x20); in highbd_hadamard_col8_avx2()
82 in[7] = _mm256_permute2x128_si256(b6, b7, 0x31); in highbd_hadamard_col8_avx2()
313 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2()
316 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2()
[all …]
Dhighbd_convolve_avx2.c236 x[0] = _mm256_permute2x128_si256(pp[0], pp[4], 0x20); in pack_16_pixels()
237 x[1] = _mm256_permute2x128_si256(pp[1], pp[5], 0x20); in pack_16_pixels()
238 x[2] = _mm256_permute2x128_si256(pp[2], pp[6], 0x20); in pack_16_pixels()
239 x[3] = _mm256_permute2x128_si256(pp[3], pp[7], 0x20); in pack_16_pixels()
242 x[6] = _mm256_permute2x128_si256(pp[0], pp[4], 0x31); in pack_16_pixels()
243 x[7] = _mm256_permute2x128_si256(pp[1], pp[5], 0x31); in pack_16_pixels()
251 x[0] = _mm256_permute2x128_si256(pp[0], pp[2], 0x30); in pack_8x1_pixels()
252 x[1] = _mm256_permute2x128_si256(pp[1], pp[3], 0x30); in pack_8x1_pixels()
253 x[2] = _mm256_permute2x128_si256(pp[2], pp[0], 0x30); in pack_8x1_pixels()
254 x[3] = _mm256_permute2x128_si256(pp[3], pp[1], 0x30); in pack_8x1_pixels()
[all …]
Dvpx_subpixel_8t_intrin_avx2.c488 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d16_v4_avx2()
687 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d8_v4_avx2()
860 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d4_v4_avx2()
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_quantize_avx2.c105 dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); in vp9_quantize_fp_avx2()
106 quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); in vp9_quantize_fp_avx2()
107 round256 = _mm256_permute2x128_si256(round256, round256, 0x31); in vp9_quantize_fp_avx2()
/external/libaom/libaom/av1/encoder/x86/
Dav1_highbd_quantize_avx2.c27 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp()
28 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp()
29 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp()
Dav1_quantize_avx2.c70 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp()
71 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp()
72 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp()
81 __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
82 __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
Dav1_fwd_txfm2d_avx2.c1453 output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20); in transpose_32_8x8_avx2()
1454 output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20); in transpose_32_8x8_avx2()
1455 output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20); in transpose_32_8x8_avx2()
1456 output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20); in transpose_32_8x8_avx2()
1457 output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31); in transpose_32_8x8_avx2()
1458 output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31); in transpose_32_8x8_avx2()
1459 output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31); in transpose_32_8x8_avx2()
1460 output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31); in transpose_32_8x8_avx2()
/external/eigen/Eigen/src/Core/arch/CUDA/
DPacketMathHalf.h604 __m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);
605 __m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);
606 __m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);
607 __m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);
608 __m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);
609 __m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);
610 __m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);
611 __m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);
612 __m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);
613 __m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);
[all …]
/external/libaom/libaom/aom_dsp/simd/
Dv256_intrinsics_x86.h350 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 1, 0, 1)), pattern), in v256_shuffle_8()
352 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 0, 0)), pattern), in v256_shuffle_8()
361 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 1, 0, 1)), p32), in v256_wideshuffle_8()
363 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 0, 0, 0)), p32), in v256_wideshuffle_8()
367 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 3, 0, 3)), pattern), in v256_wideshuffle_8()
369 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 2, 0, 2)), pattern), in v256_wideshuffle_8()
396 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s16()
419 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s32()
672 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(2, 0, 0, 1)), a, n) \
/external/clang/test/CodeGen/
Davx2-builtins.c895 return _mm256_permute2x128_si256(a, b, 0x31); in test_mm256_permute2x128_si256()
/external/mesa3d/src/gallium/drivers/swr/rasterizer/core/
Dformat_utils.h243 simdscalari dst23 = _mm256_permute2x128_si256(src, src, 0x01); in Transpose()
/external/clang/lib/Headers/
Davx2intrin.h965 #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ macro
/external/skia/src/opts/
DSkRasterPipeline_opts.h2852 __m256i _02 = _mm256_permute2x128_si256(_01,_23, 0x20),
2853 _13 = _mm256_permute2x128_si256(_01,_23, 0x31);
/external/skqp/src/opts/
DSkRasterPipeline_opts.h2836 __m256i _02 = _mm256_permute2x128_si256(_01,_23, 0x20),
2837 _13 = _mm256_permute2x128_si256(_01,_23, 0x31);