/external/libaom/libaom/aom_dsp/x86/ |
D | common_avx2.h | 21 __m256i tr0_0 = _mm256_unpacklo_epi16(in[0], in[1]); in mm256_transpose_16x16() 23 __m256i tr0_2 = _mm256_unpacklo_epi16(in[2], in[3]); in mm256_transpose_16x16() 25 __m256i tr0_4 = _mm256_unpacklo_epi16(in[4], in[5]); in mm256_transpose_16x16() 27 __m256i tr0_6 = _mm256_unpacklo_epi16(in[6], in[7]); in mm256_transpose_16x16() 30 __m256i tr0_8 = _mm256_unpacklo_epi16(in[8], in[9]); in mm256_transpose_16x16() 32 __m256i tr0_a = _mm256_unpacklo_epi16(in[10], in[11]); in mm256_transpose_16x16() 34 __m256i tr0_c = _mm256_unpacklo_epi16(in[12], in[13]); in mm256_transpose_16x16() 36 __m256i tr0_e = _mm256_unpacklo_epi16(in[14], in[15]); in mm256_transpose_16x16()
|
D | convolve_avx2.h | 96 s[0] = _mm256_unpacklo_epi16(src_0, src_1); \ 97 s[1] = _mm256_unpacklo_epi16(src_2, src_3); \ 98 s[2] = _mm256_unpacklo_epi16(src_4, src_5); \ 110 s[3] = _mm256_unpacklo_epi16(s6, s7); \ 178 s[0] = _mm256_unpacklo_epi16(s0, s1); \ 179 s[1] = _mm256_unpacklo_epi16(s2, s3); \ 180 s[2] = _mm256_unpacklo_epi16(s4, s5); \ 192 s[3] = _mm256_unpacklo_epi16(s6, s7); \ 408 const __m256i data_lo = _mm256_unpacklo_epi16(*data_ref_0, *res_unsigned); in comp_avg()
|
D | highbd_convolve_avx2.c | 185 s[0] = _mm256_unpacklo_epi16(s01, s12); in av1_highbd_convolve_y_sr_avx2() 186 s[1] = _mm256_unpacklo_epi16(s23, s34); in av1_highbd_convolve_y_sr_avx2() 187 s[2] = _mm256_unpacklo_epi16(s45, s56); in av1_highbd_convolve_y_sr_avx2() 210 s[3] = _mm256_unpacklo_epi16(s67, s78); in av1_highbd_convolve_y_sr_avx2() 337 __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_x_sr_avx2() 879 sig[0] = _mm256_unpacklo_epi16(s0, s1); in pack_8x9_init() 881 sig[1] = _mm256_unpacklo_epi16(s2, s3); in pack_8x9_init() 883 sig[2] = _mm256_unpacklo_epi16(s4, s5); in pack_8x9_init() 898 sig[3] = _mm256_unpacklo_epi16(s2, s3); in pack_8x9_pixels() 958 sig[0] = _mm256_unpacklo_epi16(u0, u2); in pack_16x9_init() [all …]
|
D | txfm_common_avx2.h | 31 __m256i t0 = _mm256_unpacklo_epi16(*in0, *in1); in btf_16_w16_avx2() 126 t[2 * i] = _mm256_unpacklo_epi16(in[2 * i], in[2 * i + 1]); in transpose2_8x8_avx2() 200 const __m256i a0 = _mm256_unpacklo_epi16(in[0], in[1]); in transpose_16bit_16x8_avx2() 201 const __m256i a1 = _mm256_unpacklo_epi16(in[2], in[3]); in transpose_16bit_16x8_avx2() 202 const __m256i a2 = _mm256_unpacklo_epi16(in[4], in[5]); in transpose_16bit_16x8_avx2() 203 const __m256i a3 = _mm256_unpacklo_epi16(in[6], in[7]); in transpose_16bit_16x8_avx2() 306 const __m256i a_lo = _mm256_unpacklo_epi16(a, one); in store_rect_16bit_to_32bit_w8_avx2()
|
D | blend_a64_mask_avx2.c | 35 __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2() 36 _mm256_unpacklo_epi16(*m0, max_minus_m0)); in blend_a64_d16_mask_w16_avx2() 59 __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0), in blend_a64_d16_mask_w32_avx2() 60 _mm256_unpacklo_epi16(*m0, max_minus_m0)); in blend_a64_d16_mask_w32_avx2() 63 __m256i res1_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_1, s1_1), in blend_a64_d16_mask_w32_avx2() 64 _mm256_unpacklo_epi16(*m1, max_minus_m1)); in blend_a64_d16_mask_w32_avx2() 928 const __m256i mul0l = _mm256_unpacklo_epi16(mul0_lows, mul0_highs); in highbd_blend_a64_d16_mask_w4_avx2() 936 const __m256i mul1l = _mm256_unpacklo_epi16(mul1_lows, mul1_highs); in highbd_blend_a64_d16_mask_w4_avx2() 1046 const __m256i mul0al = _mm256_unpacklo_epi16(mul0a_lows, mul0a_highs); in highbd_blend_a64_d16_mask_w8_avx2() 1054 const __m256i mul1al = _mm256_unpacklo_epi16(mul1a_lows, mul1a_highs); in highbd_blend_a64_d16_mask_w8_avx2() [all …]
|
D | masked_sad_intrin_avx2.c | 226 const __m256i data_l = _mm256_unpacklo_epi16(a, b); in highbd_masked_sad8xh_avx2() 227 const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv); in highbd_masked_sad8xh_avx2() 283 const __m256i data_l = _mm256_unpacklo_epi16(a, b); in highbd_masked_sad16xh_avx2() 284 const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv); in highbd_masked_sad16xh_avx2()
|
D | bitdepth_conversion_avx2.h | 28 const __m256i a_1 = _mm256_unpacklo_epi16(a_lo, a_hi); in store_tran_low()
|
D | sum_squares_avx2.c | 201 __m256i vsrc0 = _mm256_unpacklo_epi16(vsrc[k], vzero); in aom_var_2d_u16_avx2() 220 __m256i vsrc0 = _mm256_unpacklo_epi16(vsrc, vzero); in aom_var_2d_u16_avx2()
|
D | avg_intrin_avx2.c | 57 a0 = _mm256_unpacklo_epi16(b0, b1); in hadamard_col8x2_avx2() 58 a1 = _mm256_unpacklo_epi16(b2, b3); in hadamard_col8x2_avx2() 61 a4 = _mm256_unpacklo_epi16(b4, b5); in hadamard_col8x2_avx2() 62 a5 = _mm256_unpacklo_epi16(b6, b7); in hadamard_col8x2_avx2()
|
D | adaptive_quantize_avx2.c | 62 coeff[0] = _mm256_unpacklo_epi16(*qcoeff, zero); in update_mask0_avx2() 90 __m256i coeff_vals_lo = _mm256_unpacklo_epi16(coeff_vals, coeff_sign); in store_coefficients_avx2()
|
D | intrapred_avx2.c | 183 w0 = _mm256_unpacklo_epi16(x[0], x[1]); // 00 10 01 11 02 12 03 13 in highbd_transpose4x16_avx2() 184 w1 = _mm256_unpacklo_epi16(x[2], x[3]); // 20 30 21 31 22 32 23 33 in highbd_transpose4x16_avx2() 204 w0 = _mm256_unpacklo_epi16(x[0], x[1]); // 00 10 01 11 02 12 03 13 in highbd_transpose8x16_16x8_avx2() 205 w1 = _mm256_unpacklo_epi16(x[2], x[3]); // 20 30 21 31 22 32 23 33 in highbd_transpose8x16_16x8_avx2() 206 w2 = _mm256_unpacklo_epi16(x[4], x[5]); // 40 50 41 51 42 52 43 53 in highbd_transpose8x16_16x8_avx2() 207 w3 = _mm256_unpacklo_epi16(x[6], x[7]); // 60 70 61 71 62 72 63 73 in highbd_transpose8x16_16x8_avx2() 242 w0 = _mm256_unpacklo_epi16(x[0], x[1]); in highbd_transpose16x16_avx2() 243 w1 = _mm256_unpacklo_epi16(x[2], x[3]); in highbd_transpose16x16_avx2() 244 w2 = _mm256_unpacklo_epi16(x[4], x[5]); in highbd_transpose16x16_avx2() 245 w3 = _mm256_unpacklo_epi16(x[6], x[7]); in highbd_transpose16x16_avx2() [all …]
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_jnt_convolve_avx2.c | 74 const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 77 const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 134 const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 136 const __m256i res_32b = _mm256_unpacklo_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 178 const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 181 const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 319 __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_dist_wtd_convolve_2d_avx2() 334 s[0] = _mm256_unpacklo_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_2d_avx2() 335 s[1] = _mm256_unpacklo_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_2d_avx2() 336 s[2] = _mm256_unpacklo_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_2d_avx2() [all …]
|
D | warp_plane_avx2.c | 178 const __m256i tmp_12 = _mm256_unpacklo_epi16(tmp0_256, tmp2_256); in prepare_horizontal_filter_coeff_avx2() 179 const __m256i tmp_13 = _mm256_unpacklo_epi16(tmp1_256, tmp3_256); in prepare_horizontal_filter_coeff_avx2() 180 const __m256i tmp_14 = _mm256_unpacklo_epi16(tmp4_256, tmp6_256); in prepare_horizontal_filter_coeff_avx2() 181 const __m256i tmp_15 = _mm256_unpacklo_epi16(tmp5_256, tmp7_256); in prepare_horizontal_filter_coeff_avx2() 443 *wt = _mm256_unpacklo_epi16(wt0, wt1); in unpack_weights_and_set_round_const_avx2() 633 src[6] = _mm256_unpacklo_epi16(src_6, src_7); in filter_src_pixels_vertical_avx2() 688 const __m256i p_16_lo = _mm256_unpacklo_epi16(p_16, temp_lo_16); in store_vertical_filter_output_avx2() 731 const __m256i p_16_hi = _mm256_unpacklo_epi16(p4_16, temp_hi_16); in store_vertical_filter_output_avx2() 798 src[0] = _mm256_unpacklo_epi16(src_0, src_1); in warp_vertical_filter_avx2() 799 src[2] = _mm256_unpacklo_epi16(src_2, src_3); in warp_vertical_filter_avx2() [all …]
|
D | wiener_convolve_avx2.c | 160 s[0] = _mm256_unpacklo_epi16(src_0, src_1); in av1_wiener_convolve_add_src_avx2() 161 s[1] = _mm256_unpacklo_epi16(src_2, src_3); in av1_wiener_convolve_add_src_avx2() 162 s[2] = _mm256_unpacklo_epi16(src_4, src_5); in av1_wiener_convolve_add_src_avx2() 176 s[3] = _mm256_unpacklo_epi16(s6, s7); in av1_wiener_convolve_add_src_avx2()
|
D | highbd_convolve_2d_avx2.c | 100 __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1); in av1_highbd_convolve_2d_sr_avx2() 115 s[0] = _mm256_unpacklo_epi16(s0, s1); in av1_highbd_convolve_2d_sr_avx2() 116 s[1] = _mm256_unpacklo_epi16(s2, s3); in av1_highbd_convolve_2d_sr_avx2() 117 s[2] = _mm256_unpacklo_epi16(s4, s5); in av1_highbd_convolve_2d_sr_avx2() 131 s[3] = _mm256_unpacklo_epi16(s6, s7); in av1_highbd_convolve_2d_sr_avx2()
|
D | highbd_wiener_convolve_avx2.c | 191 const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1); in av1_highbd_wiener_convolve_add_src_avx2() 192 const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3); in av1_highbd_wiener_convolve_add_src_avx2() 193 const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5); in av1_highbd_wiener_convolve_add_src_avx2() 194 const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7); in av1_highbd_wiener_convolve_add_src_avx2()
|
D | jnt_convolve_avx2.c | 28 const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1); in unpack_weights_avx2() 276 const __m256i res_lo_0_32b = _mm256_unpacklo_epi16(res_lo, zero); in av1_dist_wtd_convolve_y_avx2() 334 const __m256i res_hi_0_32b = _mm256_unpacklo_epi16(res_hi, zero); in av1_dist_wtd_convolve_y_avx2() 455 const __m256i res_lo_0_32b = _mm256_unpacklo_epi16(res_lo, zero); in av1_dist_wtd_convolve_y_avx2() 513 const __m256i res_hi_0_32b = _mm256_unpacklo_epi16(res_hi, zero); in av1_dist_wtd_convolve_y_avx2() 692 s[0] = _mm256_unpacklo_epi16(s0, s1); in av1_dist_wtd_convolve_2d_avx2() 693 s[1] = _mm256_unpacklo_epi16(s2, s3); in av1_dist_wtd_convolve_2d_avx2() 706 s[2] = _mm256_unpacklo_epi16(s4, s5); in av1_dist_wtd_convolve_2d_avx2()
|
D | convolve_2d_avx2.c | 124 s[0] = _mm256_unpacklo_epi16(src_0, src_1); in av1_convolve_2d_sr_avx2() 125 s[1] = _mm256_unpacklo_epi16(src_2, src_3); in av1_convolve_2d_sr_avx2() 137 s[2] = _mm256_unpacklo_epi16(s4, s5); in av1_convolve_2d_sr_avx2()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_avx2.c | 49 const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2() 53 const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2() 172 const __m256i v_ab0l_w = _mm256_unpacklo_epi16(v_a0_w, v_b0_w); in av1_wedge_compute_delta_squares_avx2() 174 const __m256i v_ab1l_w = _mm256_unpacklo_epi16(v_a1_w, v_b1_w); in av1_wedge_compute_delta_squares_avx2() 176 const __m256i v_ab2l_w = _mm256_unpacklo_epi16(v_a2_w, v_b2_w); in av1_wedge_compute_delta_squares_avx2() 178 const __m256i v_ab3l_w = _mm256_unpacklo_epi16(v_a3_w, v_b3_w); in av1_wedge_compute_delta_squares_avx2()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_dct32x32_impl_avx2.h | 357 const __m256i s2_20_0 = _mm256_unpacklo_epi16(step1[27], step1[20]); in FDCT32x32_2D_AVX2() 359 const __m256i s2_21_0 = _mm256_unpacklo_epi16(step1[26], step1[21]); in FDCT32x32_2D_AVX2() 361 const __m256i s2_22_0 = _mm256_unpacklo_epi16(step1[25], step1[22]); in FDCT32x32_2D_AVX2() 363 const __m256i s2_23_0 = _mm256_unpacklo_epi16(step1[24], step1[23]); in FDCT32x32_2D_AVX2() 594 const __m256i s3_10_0 = _mm256_unpacklo_epi16(step2[13], step2[10]); in FDCT32x32_2D_AVX2() 596 const __m256i s3_11_0 = _mm256_unpacklo_epi16(step2[12], step2[11]); in FDCT32x32_2D_AVX2() 672 const __m256i s1_05_0 = _mm256_unpacklo_epi16(step3[6], step3[5]); in FDCT32x32_2D_AVX2() 696 const __m256i s1_18_0 = _mm256_unpacklo_epi16(step3[18], step3[29]); in FDCT32x32_2D_AVX2() 698 const __m256i s1_19_0 = _mm256_unpacklo_epi16(step3[19], step3[28]); in FDCT32x32_2D_AVX2() 700 const __m256i s1_20_0 = _mm256_unpacklo_epi16(step3[20], step3[27]); in FDCT32x32_2D_AVX2() [all …]
|
D | highbd_convolve_avx2.c | 530 sig[0] = _mm256_unpacklo_epi16(s0, s1); in pack_8x9_init() 532 sig[1] = _mm256_unpacklo_epi16(s2, s3); in pack_8x9_init() 534 sig[2] = _mm256_unpacklo_epi16(s4, s5); in pack_8x9_init() 549 sig[3] = _mm256_unpacklo_epi16(s2, s3); in pack_8x9_pixels() 609 sig[0] = _mm256_unpacklo_epi16(u0, u2); in pack_16x9_init() 612 sig[8] = _mm256_unpacklo_epi16(u1, u3); in pack_16x9_init() 621 sig[1] = _mm256_unpacklo_epi16(u0, u2); in pack_16x9_init() 624 sig[9] = _mm256_unpacklo_epi16(u1, u3); in pack_16x9_init() 633 sig[2] = _mm256_unpacklo_epi16(u0, u2); in pack_16x9_init() 636 sig[10] = _mm256_unpacklo_epi16(u1, u3); in pack_16x9_init() [all …]
|
D | bitdepth_conversion_avx2.h | 36 const __m256i a_1 = _mm256_unpacklo_epi16(a_lo, a_hi); in store_tran_low()
|
D | avg_intrin_avx2.c | 259 a0 = _mm256_unpacklo_epi16(b0, b1); in hadamard_col8x2_avx2() 260 a1 = _mm256_unpacklo_epi16(b2, b3); in hadamard_col8x2_avx2() 263 a4 = _mm256_unpacklo_epi16(b4, b5); in hadamard_col8x2_avx2() 264 a5 = _mm256_unpacklo_epi16(b6, b7); in hadamard_col8x2_avx2()
|
/external/eigen/Eigen/src/Core/arch/CUDA/ |
D | PacketMathHalf.h | 550 __m256i ab_07 = _mm256_unpacklo_epi16(a, b); 551 __m256i cd_07 = _mm256_unpacklo_epi16(c, d); 552 __m256i ef_07 = _mm256_unpacklo_epi16(e, f); 553 __m256i gh_07 = _mm256_unpacklo_epi16(g, h); 554 __m256i ij_07 = _mm256_unpacklo_epi16(i, j); 555 __m256i kl_07 = _mm256_unpacklo_epi16(k, l); 556 __m256i mn_07 = _mm256_unpacklo_epi16(m, n); 557 __m256i op_07 = _mm256_unpacklo_epi16(o, p);
|
/external/gemmlowp/internal/ |
D | pack_avx.h | 87 __m256i ymm5 = _mm256_unpacklo_epi16(ymm1, ymm2); in Pack() 88 __m256i ymm6 = _mm256_unpacklo_epi16(ymm3, ymm4); in Pack()
|