/external/libaom/libaom/aom_dsp/x86/ |
D | common_avx2.h | 129 out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20); // 0010 0000 in mm256_transpose_16x16() 130 out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31); // 0011 0001 in mm256_transpose_16x16() 131 out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20); in mm256_transpose_16x16() 132 out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31); in mm256_transpose_16x16() 133 out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20); in mm256_transpose_16x16() 134 out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31); in mm256_transpose_16x16() 135 out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20); in mm256_transpose_16x16() 136 out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31); in mm256_transpose_16x16() 138 out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20); in mm256_transpose_16x16() 139 out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31); in mm256_transpose_16x16() [all …]
|
D | highbd_convolve_avx2.c | 148 __m256i s01 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 154 __m256i s12 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 160 __m256i s23 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 166 __m256i s34 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 172 __m256i s45 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 180 __m256i s56 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 196 const __m256i s67 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 205 const __m256i s78 = _mm256_permute2x128_si256( in av1_highbd_convolve_y_sr_avx2() 306 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_x_sr_avx2() 307 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_x_sr_avx2() [all …]
|
D | avg_intrin_avx2.c | 111 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2() 114 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2() 117 _mm256_permute2x128_si256(src[4], src[5], 0x20)); in hadamard_8x8x2_avx2() 120 _mm256_permute2x128_si256(src[6], src[7], 0x20)); in hadamard_8x8x2_avx2() 123 _mm256_permute2x128_si256(src[0], src[1], 0x31)); in hadamard_8x8x2_avx2() 126 _mm256_permute2x128_si256(src[2], src[3], 0x31)); in hadamard_8x8x2_avx2() 129 _mm256_permute2x128_si256(src[4], src[5], 0x31)); in hadamard_8x8x2_avx2() 132 _mm256_permute2x128_si256(src[6], src[7], 0x31)); in hadamard_8x8x2_avx2() 316 in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in highbd_hadamard_col8_avx2() 317 in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in highbd_hadamard_col8_avx2() [all …]
|
D | highbd_adaptive_quantize_avx2.c | 334 zbin = _mm256_permute2x128_si256(zbin, zbin, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 344 round = _mm256_permute2x128_si256(round, round, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 345 quant = _mm256_permute2x128_si256(quant, quant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 346 shift = _mm256_permute2x128_si256(shift, shift, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 347 dequant = _mm256_permute2x128_si256(dequant, dequant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 350 round = _mm256_permute2x128_si256(round, round, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 351 quant = _mm256_permute2x128_si256(quant, quant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 352 shift = _mm256_permute2x128_si256(shift, shift, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2() 363 dequant = _mm256_permute2x128_si256(dequant, dequant, 0x11); in aom_highbd_quantize_b_32x32_adaptive_avx2()
|
D | variance_impl_avx2.c | 430 src_temp = _mm256_permute2x128_si256(src_avg, src_next_reg, 0x21); in aom_sub_pixel_variance16xh_avx2() 445 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2() 465 src_temp = _mm256_permute2x128_si256(src_avg, src_next_reg, 0x21); in aom_sub_pixel_variance16xh_avx2() 479 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2() 522 src_next_reg = _mm256_permute2x128_si256(src_pack, src_reg, 0x21); in aom_sub_pixel_variance16xh_avx2() 536 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2() 565 src_next_reg = _mm256_permute2x128_si256(src_pack, src_reg, 0x21); in aom_sub_pixel_variance16xh_avx2() 580 src_next_reg = _mm256_permute2x128_si256( in aom_sub_pixel_variance16xh_avx2()
|
D | aom_subpixel_8t_intrin_avx2.c | 838 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d8_v4_avx2() 934 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d8_v8_avx2() 935 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d8_v8_avx2() 936 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d8_v8_avx2() 1076 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d16_v4_avx2() 1184 srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21); in aom_filter_block1d16_v8_avx2() 1185 srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21); in aom_filter_block1d16_v8_avx2() 1186 srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21); in aom_filter_block1d16_v8_avx2() 1365 srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21); in aom_filter_block1d4_v4_avx2()
|
D | highbd_quantize_intrin_avx2.c | 28 qp[i] = _mm256_permute2x128_si256(qp[i], qp[i], 0x11); in update_qp()
|
/external/libaom/libaom/av1/common/x86/ |
D | convolve_avx2.c | 63 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 66 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 69 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 72 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 84 const __m256i src_45a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 88 const __m256i src_56a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 160 const __m256i src_01a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 163 const __m256i src_12a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 166 const __m256i src_23a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() 169 const __m256i src_34a = _mm256_permute2x128_si256( in av1_convolve_y_sr_avx2() [all …]
|
D | highbd_jnt_convolve_avx2.c | 132 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 176 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 294 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2() 295 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_2d_avx2() 368 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2() 413 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_2d_avx2() 517 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2() 518 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_dist_wtd_convolve_x_avx2() 554 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2() 594 _mm256_permute2x128_si256(data_0, data_1, 0x20); in av1_highbd_dist_wtd_convolve_x_avx2() [all …]
|
D | wiener_convolve_avx2.c | 212 s[0] = _mm256_permute2x128_si256(s[0], s[4], 0x20); in av1_wiener_convolve_add_src_avx2() 213 s[1] = _mm256_permute2x128_si256(s[1], s[5], 0x20); in av1_wiener_convolve_add_src_avx2() 214 s[2] = _mm256_permute2x128_si256(s[2], s[6], 0x20); in av1_wiener_convolve_add_src_avx2()
|
D | warp_plane_avx2.c | 631 _mm256_permute2x128_si256(horz_out[row + 3], horz_out[row + 4], 0x21); in filter_src_pixels_vertical_avx2() 790 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_avx2() 793 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_avx2() 796 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_avx2() 840 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_gamma0_avx2() 843 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_gamma0_avx2() 846 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_gamma0_avx2() 889 _mm256_permute2x128_si256(horz_out[0], horz_out[1], 0x21); in warp_vertical_filter_delta0_avx2() 892 _mm256_permute2x128_si256(horz_out[1], horz_out[2], 0x21); in warp_vertical_filter_delta0_avx2() 895 _mm256_permute2x128_si256(horz_out[2], horz_out[3], 0x21); in warp_vertical_filter_delta0_avx2() [all …]
|
D | jnt_convolve_avx2.c | 33 return _mm256_permute2x128_si256( in load_line2_avx2() 249 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2() 263 const __m256i src_45a = _mm256_permute2x128_si256(src4, src5, 0x20); in av1_dist_wtd_convolve_y_avx2() 267 const __m256i src_56a = _mm256_permute2x128_si256(src5, src4, 0x20); in av1_dist_wtd_convolve_y_avx2() 427 _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20); in av1_dist_wtd_convolve_y_avx2() 442 const __m256i src_67a = _mm256_permute2x128_si256(src6, src7, 0x20); in av1_dist_wtd_convolve_y_avx2() 446 const __m256i src_78a = _mm256_permute2x128_si256(src7, src6, 0x20); in av1_dist_wtd_convolve_y_avx2()
|
D | highbd_convolve_2d_avx2.c | 75 const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20); in av1_highbd_convolve_2d_sr_avx2() 76 const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31); in av1_highbd_convolve_2d_sr_avx2()
|
/external/tensorflow/tensorflow/lite/experimental/ruy/ |
D | pack_avx2.cc | 210 r0 = _mm256_permute2x128_si256(t0, t4, 0x20); 211 r4 = _mm256_permute2x128_si256(t1, t5, 0x20); 212 r1 = _mm256_permute2x128_si256(t0, t4, 0x31); 213 r5 = _mm256_permute2x128_si256(t1, t5, 0x31); 214 r2 = _mm256_permute2x128_si256(t2, t6, 0x20); 215 r6 = _mm256_permute2x128_si256(t3, t7, 0x20); 216 r3 = _mm256_permute2x128_si256(t2, t6, 0x31); 217 r7 = _mm256_permute2x128_si256(t3, t7, 0x31); 342 r0 = _mm256_permute2x128_si256(t0, t4, 0x20); 343 r4 = _mm256_permute2x128_si256(t1, t5, 0x20); [all …]
|
D | kernel_avx2.cc | 511 const __m256i lhs_16_bit_low = _mm256_permute2x128_si256( 514 const __m256i lhs_16_bit_high = _mm256_permute2x128_si256( 1258 const __m256i lhs_16_bit_low = _mm256_permute2x128_si256( 1261 const __m256i lhs_16_bit_high = _mm256_permute2x128_si256(
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | avg_intrin_avx2.c | 75 in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in highbd_hadamard_col8_avx2() 76 in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in highbd_hadamard_col8_avx2() 77 in[2] = _mm256_permute2x128_si256(b2, b3, 0x20); in highbd_hadamard_col8_avx2() 78 in[3] = _mm256_permute2x128_si256(b2, b3, 0x31); in highbd_hadamard_col8_avx2() 79 in[4] = _mm256_permute2x128_si256(b4, b5, 0x20); in highbd_hadamard_col8_avx2() 80 in[5] = _mm256_permute2x128_si256(b4, b5, 0x31); in highbd_hadamard_col8_avx2() 81 in[6] = _mm256_permute2x128_si256(b6, b7, 0x20); in highbd_hadamard_col8_avx2() 82 in[7] = _mm256_permute2x128_si256(b6, b7, 0x31); in highbd_hadamard_col8_avx2() 313 _mm256_permute2x128_si256(src[0], src[1], 0x20)); in hadamard_8x8x2_avx2() 316 _mm256_permute2x128_si256(src[2], src[3], 0x20)); in hadamard_8x8x2_avx2() [all …]
|
D | highbd_convolve_avx2.c | 236 x[0] = _mm256_permute2x128_si256(pp[0], pp[4], 0x20); in pack_16_pixels() 237 x[1] = _mm256_permute2x128_si256(pp[1], pp[5], 0x20); in pack_16_pixels() 238 x[2] = _mm256_permute2x128_si256(pp[2], pp[6], 0x20); in pack_16_pixels() 239 x[3] = _mm256_permute2x128_si256(pp[3], pp[7], 0x20); in pack_16_pixels() 242 x[6] = _mm256_permute2x128_si256(pp[0], pp[4], 0x31); in pack_16_pixels() 243 x[7] = _mm256_permute2x128_si256(pp[1], pp[5], 0x31); in pack_16_pixels() 251 x[0] = _mm256_permute2x128_si256(pp[0], pp[2], 0x30); in pack_8x1_pixels() 252 x[1] = _mm256_permute2x128_si256(pp[1], pp[3], 0x30); in pack_8x1_pixels() 253 x[2] = _mm256_permute2x128_si256(pp[2], pp[0], 0x30); in pack_8x1_pixels() 254 x[3] = _mm256_permute2x128_si256(pp[3], pp[1], 0x30); in pack_8x1_pixels() [all …]
|
D | vpx_subpixel_8t_intrin_avx2.c | 484 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d16_v4_avx2() 679 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d8_v4_avx2() 848 src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); in vpx_filter_block1d4_v4_avx2()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | av1_quantize_avx2.c | 70 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp() 71 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp() 72 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp() 81 __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \ 82 __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \ 213 dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); in av1_quantize_lp_avx2() 214 quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); in av1_quantize_lp_avx2() 215 round256 = _mm256_permute2x128_si256(round256, round256, 0x31); in av1_quantize_lp_avx2()
|
D | av1_highbd_quantize_avx2.c | 27 qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); in update_qp() 28 qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); in update_qp() 29 qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); in update_qp()
|
D | av1_fwd_txfm2d_avx2.c | 1453 output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20); in transpose_32_8x8_avx2() 1454 output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20); in transpose_32_8x8_avx2() 1455 output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20); in transpose_32_8x8_avx2() 1456 output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20); in transpose_32_8x8_avx2() 1457 output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31); in transpose_32_8x8_avx2() 1458 output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31); in transpose_32_8x8_avx2() 1459 output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31); in transpose_32_8x8_avx2() 1460 output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31); in transpose_32_8x8_avx2()
|
/external/libvpx/libvpx/vp9/encoder/x86/ |
D | vp9_quantize_avx2.c | 105 dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); in vp9_quantize_fp_avx2() 106 quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); in vp9_quantize_fp_avx2() 107 round256 = _mm256_permute2x128_si256(round256, round256, 0x31); in vp9_quantize_fp_avx2()
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v256_intrinsics_x86.h | 190 return _mm256_permute2x128_si256(a, b, 0x02); in v256_ziplo_128() 194 return _mm256_permute2x128_si256(a, b, 0x13); in v256_ziphi_128() 348 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 1, 0, 1)), pattern), in v256_shuffle_8() 350 _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 0, 0)), pattern), in v256_shuffle_8() 359 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 1, 0, 1)), p32), in v256_wideshuffle_8() 361 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 0, 0, 0)), p32), in v256_wideshuffle_8() 365 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 3, 0, 3)), pattern), in v256_wideshuffle_8() 367 _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 2, 0, 2)), pattern), in v256_wideshuffle_8() 394 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s16() 417 r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1)))); in v256_dotp_s32() [all …]
|
/external/eigen/Eigen/src/Core/arch/CUDA/ |
D | PacketMathHalf.h | 604 __m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20); 605 __m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31); 606 __m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20); 607 __m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31); 608 __m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20); 609 __m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31); 610 __m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20); 611 __m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31); 612 __m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20); 613 __m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31); [all …]
|
/external/skia/src/opts/ |
D | SkBitmapProcState_opts.h | 108 __m256i abcd = _mm256_permute2x128_si256(lo, hi, 0x20), in S32_alpha_D32_filter_DX() 109 efgh = _mm256_permute2x128_si256(lo, hi, 0x31); in S32_alpha_D32_filter_DX()
|