Home
last modified time | relevance | path

Searched refs:_mm256_permute2x128_si256 (Results 1 – 25 of 31) sorted by relevance

12

/external/libaom/libaom/aom_dsp/x86/
Dcommon_avx2.h129 out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20); // 0010 0000 in mm256_transpose_16x16()
130 out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31); // 0011 0001 in mm256_transpose_16x16()
131 out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20); in mm256_transpose_16x16()
132 out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31); in mm256_transpose_16x16()
133 out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20); in mm256_transpose_16x16()
134 out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31); in mm256_transpose_16x16()
135 out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20); in mm256_transpose_16x16()
136 out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31); in mm256_transpose_16x16()
138 out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20); in mm256_transpose_16x16()
139 out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31); in mm256_transpose_16x16()
[all …]
Dhighbd_convolve_avx2.c148 __m256i s01 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
154 __m256i s12 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
160 __m256i s23 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
166 __m256i s34 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
172 __m256i s45 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
180 __m256i s56 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
196 const __m256i s67 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
205 const __m256i s78 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2()
306 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_x_sr_avx2()
307 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_x_sr_avx2()
[all …]
Davg_intrin_avx2.c111 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2()
114 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2()
117 _mm256_permute2x128_si256(src[4], src[5], 0x20)); in hadamard_8x8x2_avx2()
120 _mm256_permute2x128_si256(src[6], src[7], 0x20)); in hadamard_8x8x2_avx2()
123 _mm256_permute2x128_si256(src[0], src[1], 0x31)); in hadamard_8x8x2_avx2()
126 _mm256_permute2x128_si256(src[2], src[3], 0x31)); in hadamard_8x8x2_avx2()
129 _mm256_permute2x128_si256(src[4], src[5], 0x31)); in hadamard_8x8x2_avx2()
132 _mm256_permute2x128_si256(src[6], src[7], 0x31)); in hadamard_8x8x2_avx2()
316 in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in highbd_hadamard_col8_avx2()
317 in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in highbd_hadamard_col8_avx2()
[all …]
Dhighbd_adaptive_quantize_avx2.c334 zbin = _mm256_permute2x128_si256(zbin, zbin, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
344 round = _mm256_permute2x128_si256(round, round, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
345 quant = _mm256_permute2x128_si256(quant, quant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
346 shift = _mm256_permute2x128_si256(shift, shift, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
347 dequant = _mm256_permute2x128_si256(dequant, dequant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
350 round = _mm256_permute2x128_si256(round, round, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
351 quant = _mm256_permute2x128_si256(quant, quant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
352 shift = _mm256_permute2x128_si256(shift, shift, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
363 dequant = _mm256_permute2x128_si256(dequant, dequant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
Dvariance_impl_avx2.c430 src_temp = _mm256_permute2x128_si256(src_avg, src_next_reg, 0x21); in aom_sub_pixel_variance16xh_avx2()
445 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2()
465 src_temp = _mm256_permute2x128_si256(src_avg, src_next_reg, 0x21); in aom_sub_pixel_variance16xh_avx2()
479 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2()
522 src_next_reg = _mm256_permute2x128_si256(src_pack, src_reg, 0x21); in aom_sub_pixel_variance16xh_avx2()
536 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2()
565 src_next_reg = _mm256_permute2x128_si256(src_pack, src_reg, 0x21); in aom_sub_pixel_variance16xh_avx2()
580 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2()
Daom_subpixel_8t_intrin_avx2.c838 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d8_v4_avx2()
934 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d8_v8_avx2()
935 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d8_v8_avx2()
936 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d8_v8_avx2()
1076 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d16_v4_avx2()
1184 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d16_v8_avx2()
1185 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d16_v8_avx2()
1186 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d16_v8_avx2()
1365 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d4_v4_avx2()
Dhighbd_quantize_intrin_avx2.c28 qp[i] = _mm256_permute2x128_si256(qp[i], qp[i], 0x11); in update_qp()
/external/libaom/libaom/av1/common/x86/
Dconvolve_avx2.c63 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
66 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
69 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
72 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
84 const __m256i src_45a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
88 const __m256i src_56a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
160 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
163 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
166 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
169 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2()
[all …]
Dhighbd_jnt_convolve_avx2.c132 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
176 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2()
294 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
295 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_2d_avx2()
368 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
413 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2()
517 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
518 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_x_avx2()
554 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
594 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2()
[all …]
Dwiener_convolve_avx2.c212 s[0] = _mm256_permute2x128_si256(s[0], s[4], 0x20); in av1_wiener_convolve_add_src_avx2()
213 s[1] = _mm256_permute2x128_si256(s[1], s[5], 0x20); in av1_wiener_convolve_add_src_avx2()
214 s[2] = _mm256_permute2x128_si256(s[2], s[6], 0x20); in av1_wiener_convolve_add_src_avx2()
Dwarp_plane_avx2.c631 _mm256_permute2x128_si256(horz_out[row + 3], horz_out[row + 4], 0x21); in filter_src_pixels_vertical_avx2()
790 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_avx2()
793 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_avx2()
796 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_avx2()
840 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_gamma0_avx2()
843 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_gamma0_avx2()
846 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_gamma0_avx2()
889 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_delta0_avx2()
892 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_delta0_avx2()
895 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_delta0_avx2()
[all …]
Djnt_convolve_avx2.c33 return _mm256_permute2x128_si256( in load_line2_avx2()
249 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2()
263 const __m256i src_45a = _mm256_permute2x128_si256(src4, src5, 0x20); in av1_dist_wtd_convolve_y_avx2()
267 const __m256i src_56a = _mm256_permute2x128_si256(src5, src4, 0x20); in av1_dist_wtd_convolve_y_avx2()
427 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2()
442 const __m256i src_67a = _mm256_permute2x128_si256(src6, src7, 0x20); in av1_dist_wtd_convolve_y_avx2()
446 const __m256i src_78a = _mm256_permute2x128_si256(src7, src6, 0x20); in av1_dist_wtd_convolve_y_avx2()
Dhighbd_convolve_2d_avx2.c75 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_2d_sr_avx2()
76 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_2d_sr_avx2()
/external/tensorflow/tensorflow/lite/experimental/ruy/
Dpack_avx2.cc210 r0 = _mm256_permute2x128_si256(t0, t4, 0x20);
211 r4 = _mm256_permute2x128_si256(t1, t5, 0x20);
212 r1 = _mm256_permute2x128_si256(t0, t4, 0x31);
213 r5 = _mm256_permute2x128_si256(t1, t5, 0x31);
214 r2 = _mm256_permute2x128_si256(t2, t6, 0x20);
215 r6 = _mm256_permute2x128_si256(t3, t7, 0x20);
216 r3 = _mm256_permute2x128_si256(t2, t6, 0x31);
217 r7 = _mm256_permute2x128_si256(t3, t7, 0x31);
342 r0 = _mm256_permute2x128_si256(t0, t4, 0x20);
343 r4 = _mm256_permute2x128_si256(t1, t5, 0x20);
[all …]
Dkernel_avx2.cc511 const __m256i lhs_16_bit_low = _mm256_permute2x128_si256(
514 const __m256i lhs_16_bit_high = _mm256_permute2x128_si256(
1258 const __m256i lhs_16_bit_low = _mm256_permute2x128_si256(
1261 const __m256i lhs_16_bit_high = _mm256_permute2x128_si256(
/external/libvpx/libvpx/vpx_dsp/x86/
Davg_intrin_avx2.c75 in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in highbd_hadamard_col8_avx2()
76 in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in highbd_hadamard_col8_avx2()
77 in[2] = _mm256_permute2x128_si256(b2, b3, 0x20); in highbd_hadamard_col8_avx2()
78 in[3] = _mm256_permute2x128_si256(b2, b3, 0x31); in highbd_hadamard_col8_avx2()
79 in[4] = _mm256_permute2x128_si256(b4, b5, 0x20); in highbd_hadamard_col8_avx2()
80 in[5] = _mm256_permute2x128_si256(b4, b5, 0x31); in highbd_hadamard_col8_avx2()
81 in[6] = _mm256_permute2x128_si256(b6, b7, 0x20); in highbd_hadamard_col8_avx2()
82 in[7] = _mm256_permute2x128_si256(b6, b7, 0x31); in highbd_hadamard_col8_avx2()
313 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2()
316 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2()
[all …]
Dhighbd_convolve_avx2.c236 x[0] = _mm256_permute2x128_si256(pp[0], pp[4], 0x20); in pack_16_pixels()
237 x[1] = _mm256_permute2x128_si256(pp[1], pp[5], 0x20); in pack_16_pixels()
238 x[2] = _mm256_permute2x128_si256(pp[2], pp[6], 0x20); in pack_16_pixels()
239 x[3] = _mm256_permute2x128_si256(pp[3], pp[7], 0x20); in pack_16_pixels()
242 x[6] = _mm256_permute2x128_si256(pp[0], pp[4], 0x31); in pack_16_pixels()
243 x[7] = _mm256_permute2x128_si256(pp[1], pp[5], 0x31); in pack_16_pixels()
251 x[0] = _mm256_permute2x128_si256(pp[0], pp[2], 0x30); in pack_8x1_pixels()
252 x[1] = _mm256_permute2x128_si256(pp[1], pp[3], 0x30); in pack_8x1_pixels()
253 x[2] = _mm256_permute2x128_si256(pp[2], pp[0], 0x30); in pack_8x1_pixels()
254 x[3] = _mm256_permute2x128_si256(pp[3], pp[1], 0x30); in pack_8x1_pixels()
[all …]
Dvpx_subpixel_8t_intrin_avx2.c484 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d16_v4_avx2()
679 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d8_v4_avx2()
848 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d4_v4_avx2()
/external/libaom/libaom/av1/encoder/x86/
Dav1_quantize_avx2.c70 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp()
71 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp()
72 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp()
81 __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
82 __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
213 dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); in av1_quantize_lp_avx2()
214 quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); in av1_quantize_lp_avx2()
215 round256 = _mm256_permute2x128_si256(round256, round256, 0x31); in av1_quantize_lp_avx2()
Dav1_highbd_quantize_avx2.c27 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp()
28 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp()
29 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp()
Dav1_fwd_txfm2d_avx2.c1453 output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20); in transpose_32_8x8_avx2()
1454 output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20); in transpose_32_8x8_avx2()
1455 output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20); in transpose_32_8x8_avx2()
1456 output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20); in transpose_32_8x8_avx2()
1457 output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31); in transpose_32_8x8_avx2()
1458 output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31); in transpose_32_8x8_avx2()
1459 output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31); in transpose_32_8x8_avx2()
1460 output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31); in transpose_32_8x8_avx2()
/external/libvpx/libvpx/vp9/encoder/x86/
Dvp9_quantize_avx2.c105 dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); in vp9_quantize_fp_avx2()
106 quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); in vp9_quantize_fp_avx2()
107 round256 = _mm256_permute2x128_si256(round256, round256, 0x31); in vp9_quantize_fp_avx2()
/external/libaom/libaom/aom_dsp/simd/
Dv256_intrinsics_x86.h190 return _mm256_permute2x128_si256(a, b, 0x02); in v256_ziplo_128()
194 return _mm256_permute2x128_si256(a, b, 0x13); in v256_ziphi_128()
348 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 1, 0, 1)), pattern), in v256_shuffle_8()
350 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 0, 0)), pattern), in v256_shuffle_8()
359 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 1, 0, 1)), p32), in v256_wideshuffle_8()
361 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 0, 0, 0)), p32), in v256_wideshuffle_8()
365 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 3, 0, 3)), pattern), in v256_wideshuffle_8()
367 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 2, 0, 2)), pattern), in v256_wideshuffle_8()
394 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s16()
417 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s32()
[all …]
/external/eigen/Eigen/src/Core/arch/CUDA/
DPacketMathHalf.h604 __m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);
605 __m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);
606 __m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);
607 __m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);
608 __m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);
609 __m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);
610 __m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);
611 __m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);
612 __m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);
613 __m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);
[all …]
/external/skia/src/opts/
DSkBitmapProcState_opts.h108 __m256i abcd = _mm256_permute2x128_si256(lo, hi, 0x20), in S32_alpha_D32_filter_DX()
109 efgh = _mm256_permute2x128_si256(lo, hi, 0x31); in S32_alpha_D32_filter_DX()

12