/external/libaom/libaom/aom_dsp/x86/ |
D | common_avx2.h | 22 __m256i tr0_1 = _mm256_unpackhi_epi16(in[0], in[1]); in mm256_transpose_16x16() 24 __m256i tr0_3 = _mm256_unpackhi_epi16(in[2], in[3]); in mm256_transpose_16x16() 26 __m256i tr0_5 = _mm256_unpackhi_epi16(in[4], in[5]); in mm256_transpose_16x16() 28 __m256i tr0_7 = _mm256_unpackhi_epi16(in[6], in[7]); in mm256_transpose_16x16() 31 __m256i tr0_9 = _mm256_unpackhi_epi16(in[8], in[9]); in mm256_transpose_16x16() 33 __m256i tr0_b = _mm256_unpackhi_epi16(in[10], in[11]); in mm256_transpose_16x16() 35 __m256i tr0_d = _mm256_unpackhi_epi16(in[12], in[13]); in mm256_transpose_16x16() 37 __m256i tr0_f = _mm256_unpackhi_epi16(in[14], in[15]); in mm256_transpose_16x16()
|
D | convolve_avx2.h | 75 s[4] = _mm256_unpackhi_epi16(src_0, src_1); \ 76 s[5] = _mm256_unpackhi_epi16(src_2, src_3); \ 77 s[6] = _mm256_unpackhi_epi16(src_4, src_5); \ 86 s[7] = _mm256_unpackhi_epi16(s6, s7); \ 157 s[4] = _mm256_unpackhi_epi16(s0, s1); \ 158 s[5] = _mm256_unpackhi_epi16(s2, s3); \ 159 s[6] = _mm256_unpackhi_epi16(s4, s5); \ 168 s[7] = _mm256_unpackhi_epi16(s6, s7); \ 384 const __m256i data_hi = _mm256_unpackhi_epi16(*data_ref_0, *res_unsigned); in comp_avg()
|
D | txfm_common_avx2.h | 32 __m256i t1 = _mm256_unpackhi_epi16(*in0, *in1); in btf_16_w16_avx2() 138 a[i / 2 + 8] = _mm256_unpackhi_epi16(in[i], in[i + 1]); in transpose_16bit_16x16_avx2() 177 const __m256i a4 = _mm256_unpackhi_epi16(in[0], in[1]); in transpose_16bit_16x8_avx2() 178 const __m256i a5 = _mm256_unpackhi_epi16(in[2], in[3]); in transpose_16bit_16x8_avx2() 179 const __m256i a6 = _mm256_unpackhi_epi16(in[4], in[5]); in transpose_16bit_16x8_avx2() 180 const __m256i a7 = _mm256_unpackhi_epi16(in[6], in[7]); in transpose_16bit_16x8_avx2() 280 const __m256i a_hi = _mm256_unpackhi_epi16(a, one); in store_rect_16bit_to_32bit_w8_avx2()
|
D | highbd_convolve_avx2.c | 189 s[4] = _mm256_unpackhi_epi16(s01, s12); in av1_highbd_convolve_y_sr_avx2() 190 s[5] = _mm256_unpackhi_epi16(s23, s34); in av1_highbd_convolve_y_sr_avx2() 191 s[6] = _mm256_unpackhi_epi16(s45, s56); in av1_highbd_convolve_y_sr_avx2() 211 s[7] = _mm256_unpackhi_epi16(s67, s78); in av1_highbd_convolve_y_sr_avx2() 880 sig[4] = _mm256_unpackhi_epi16(s0, s1); in pack_8x9_init() 882 sig[5] = _mm256_unpackhi_epi16(s2, s3); in pack_8x9_init() 884 sig[6] = _mm256_unpackhi_epi16(s4, s5); in pack_8x9_init() 899 sig[7] = _mm256_unpackhi_epi16(s2, s3); in pack_8x9_pixels() 959 sig[4] = _mm256_unpackhi_epi16(u0, u2); in pack_16x9_init() 962 sig[12] = _mm256_unpackhi_epi16(u1, u3); in pack_16x9_init() [all …]
|
D | avg_intrin_avx2.c | 59 a2 = _mm256_unpackhi_epi16(b0, b1); in hadamard_col8x2_avx2() 60 a3 = _mm256_unpackhi_epi16(b2, b3); in hadamard_col8x2_avx2() 63 a6 = _mm256_unpackhi_epi16(b4, b5); in hadamard_col8x2_avx2() 64 a7 = _mm256_unpackhi_epi16(b6, b7); in hadamard_col8x2_avx2()
|
D | blend_a64_mask_avx2.c | 37 __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0), in blend_a64_d16_mask_w16_avx2() 38 _mm256_unpackhi_epi16(*m0, max_minus_m0)); in blend_a64_d16_mask_w16_avx2() 61 __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0), in blend_a64_d16_mask_w32_avx2() 62 _mm256_unpackhi_epi16(*m0, max_minus_m0)); in blend_a64_d16_mask_w32_avx2() 65 __m256i res1_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_1, s1_1), in blend_a64_d16_mask_w32_avx2() 66 _mm256_unpackhi_epi16(*m1, max_minus_m1)); in blend_a64_d16_mask_w32_avx2() 926 const __m256i mul0h = _mm256_unpackhi_epi16(mul0_lows, mul0_highs); in highbd_blend_a64_d16_mask_w4_avx2() 934 const __m256i mul1h = _mm256_unpackhi_epi16(mul1_lows, mul1_highs); in highbd_blend_a64_d16_mask_w4_avx2() 1044 const __m256i mul0ah = _mm256_unpackhi_epi16(mul0a_lows, mul0a_highs); in highbd_blend_a64_d16_mask_w8_avx2() 1052 const __m256i mul1ah = _mm256_unpackhi_epi16(mul1a_lows, mul1a_highs); in highbd_blend_a64_d16_mask_w8_avx2() [all …]
|
D | masked_sad_intrin_avx2.c | 232 const __m256i data_r = _mm256_unpackhi_epi16(a, b); in highbd_masked_sad8xh_avx2() 233 const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv); in highbd_masked_sad8xh_avx2() 289 const __m256i data_r = _mm256_unpackhi_epi16(a, b); in highbd_masked_sad16xh_avx2() 290 const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv); in highbd_masked_sad16xh_avx2()
|
D | sad_highbd_avx2.c | 101 s1 = _mm256_unpackhi_epi16(sum0, zero); in aom_highbd_sad16x8_avx2() 103 r1 = _mm256_unpackhi_epi16(sum1, zero); in aom_highbd_sad16x8_avx2() 149 s1 = _mm256_unpackhi_epi16(sum0, zero); in aom_highbd_sad16x16_avx2() 203 r1 = _mm256_unpackhi_epi16(s0, zero); in sad32x4() 328 r[1] = _mm256_unpackhi_epi16(s[0], zero); in sad64x2() 330 r[3] = _mm256_unpackhi_epi16(s[4], zero); in sad64x2() 432 r[1] = _mm256_unpackhi_epi16(s[0], zero); in sad128x1() 434 r[3] = _mm256_unpackhi_epi16(s[4], zero); in sad128x1() 517 r1 = _mm256_unpackhi_epi16(s0, zero); in sad16x4()
|
D | bitdepth_conversion_avx2.h | 29 const __m256i a_2 = _mm256_unpackhi_epi16(a_lo, a_hi); in store_tran_low()
|
D | intrapred_avx2.c | 116 w2 = _mm256_unpackhi_epi16(x[0], x[1]); // 40 50 41 51 42 52 43 53 in highbd_transpose4x16_avx2() 117 w3 = _mm256_unpackhi_epi16(x[2], x[3]); // 60 70 61 71 62 72 63 73 in highbd_transpose4x16_avx2() 152 w0 = _mm256_unpackhi_epi16(x[0], x[1]); // 04 14 05 15 06 16 07 17 in highbd_transpose8x16_16x8_avx2() 153 w1 = _mm256_unpackhi_epi16(x[2], x[3]); // 24 34 25 35 26 36 27 37 in highbd_transpose8x16_16x8_avx2() 154 w2 = _mm256_unpackhi_epi16(x[4], x[5]); // 44 54 45 55 46 56 47 57 in highbd_transpose8x16_16x8_avx2() 155 w3 = _mm256_unpackhi_epi16(x[6], x[7]); // 64 74 65 75 66 76 67 77 in highbd_transpose8x16_16x8_avx2() 190 w0 = _mm256_unpackhi_epi16(x[0], x[1]); in highbd_transpose16x16_avx2() 191 w1 = _mm256_unpackhi_epi16(x[2], x[3]); in highbd_transpose16x16_avx2() 192 w2 = _mm256_unpackhi_epi16(x[4], x[5]); in highbd_transpose16x16_avx2() 193 w3 = _mm256_unpackhi_epi16(x[6], x[7]); in highbd_transpose16x16_avx2() [all …]
|
D | variance_avx2.c | 425 const __m256i s_hi = _mm256_unpackhi_epi16(s0, s1); in highbd_comp_mask_pred_line_avx2() 426 const __m256i a_hi = _mm256_unpackhi_epi16(a, a_inv); in highbd_comp_mask_pred_line_avx2()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | wedge_utils_avx2.c | 50 const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w); in av1_wedge_sse_from_residuals_avx2() 54 const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w); in av1_wedge_sse_from_residuals_avx2() 173 const __m256i v_ab0h_w = _mm256_unpackhi_epi16(v_a0_w, v_b0_w); in av1_wedge_compute_delta_squares_avx2() 175 const __m256i v_ab1h_w = _mm256_unpackhi_epi16(v_a1_w, v_b1_w); in av1_wedge_compute_delta_squares_avx2() 177 const __m256i v_ab2h_w = _mm256_unpackhi_epi16(v_a2_w, v_b2_w); in av1_wedge_compute_delta_squares_avx2() 179 const __m256i v_ab3h_w = _mm256_unpackhi_epi16(v_a3_w, v_b3_w); in av1_wedge_compute_delta_squares_avx2()
|
D | av1_quantize_avx2.c | 80 __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits); \
|
/external/libaom/libaom/av1/common/x86/ |
D | highbd_jnt_convolve_avx2.c | 75 const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_0, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 85 const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 179 const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 189 const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero); in av1_highbd_dist_wtd_convolve_2d_copy_avx2() 338 s[4] = _mm256_unpackhi_epi16(s0, s1); in av1_highbd_dist_wtd_convolve_2d_avx2() 339 s[5] = _mm256_unpackhi_epi16(s2, s3); in av1_highbd_dist_wtd_convolve_2d_avx2() 340 s[6] = _mm256_unpackhi_epi16(s4, s5); in av1_highbd_dist_wtd_convolve_2d_avx2() 351 s[7] = _mm256_unpackhi_epi16(s6, s7); in av1_highbd_dist_wtd_convolve_2d_avx2() 416 const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); in av1_highbd_dist_wtd_convolve_2d_avx2() 597 const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero); in av1_highbd_dist_wtd_convolve_x_avx2() [all …]
|
D | highbd_wiener_convolve_avx2.c | 205 const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1); in av1_highbd_wiener_convolve_add_src_avx2() 206 const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3); in av1_highbd_wiener_convolve_add_src_avx2() 207 const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5); in av1_highbd_wiener_convolve_add_src_avx2() 208 const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7); in av1_highbd_wiener_convolve_add_src_avx2()
|
D | wiener_convolve_avx2.c | 211 const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1); in av1_wiener_convolve_add_src_avx2() 212 const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3); in av1_wiener_convolve_add_src_avx2() 213 const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5); in av1_wiener_convolve_add_src_avx2() 214 const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7); in av1_wiener_convolve_add_src_avx2()
|
D | highbd_convolve_2d_avx2.c | 119 s[4] = _mm256_unpackhi_epi16(s0, s1); in av1_highbd_convolve_2d_sr_avx2() 120 s[5] = _mm256_unpackhi_epi16(s2, s3); in av1_highbd_convolve_2d_sr_avx2() 121 s[6] = _mm256_unpackhi_epi16(s4, s5); in av1_highbd_convolve_2d_sr_avx2() 132 s[7] = _mm256_unpackhi_epi16(s6, s7); in av1_highbd_convolve_2d_sr_avx2()
|
D | convolve_2d_avx2.c | 126 s[3] = _mm256_unpackhi_epi16(src_0, src_1); in av1_convolve_2d_sr_avx2() 127 s[4] = _mm256_unpackhi_epi16(src_2, src_3); in av1_convolve_2d_sr_avx2() 138 s[5] = _mm256_unpackhi_epi16(s4, s5); in av1_convolve_2d_sr_avx2()
|
D | jnt_convolve_avx2.c | 282 const __m256i res_lo_1_32b = _mm256_unpackhi_epi16(res_lo, zero); in av1_dist_wtd_convolve_y_avx2() 340 const __m256i res_hi_1_32b = _mm256_unpackhi_epi16(res_hi, zero); in av1_dist_wtd_convolve_y_avx2() 461 const __m256i res_lo_1_32b = _mm256_unpackhi_epi16(res_lo, zero); in av1_dist_wtd_convolve_y_avx2() 519 const __m256i res_hi_1_32b = _mm256_unpackhi_epi16(res_hi, zero); in av1_dist_wtd_convolve_y_avx2() 695 s[3] = _mm256_unpackhi_epi16(s0, s1); in av1_dist_wtd_convolve_2d_avx2() 696 s[4] = _mm256_unpackhi_epi16(s2, s3); in av1_dist_wtd_convolve_2d_avx2() 707 s[5] = _mm256_unpackhi_epi16(s4, s5); in av1_dist_wtd_convolve_2d_avx2()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_dct32x32_impl_avx2.h | 358 const __m256i s2_20_1 = _mm256_unpackhi_epi16(step1[27], step1[20]); in FDCT32x32_2D_AVX2() 360 const __m256i s2_21_1 = _mm256_unpackhi_epi16(step1[26], step1[21]); in FDCT32x32_2D_AVX2() 362 const __m256i s2_22_1 = _mm256_unpackhi_epi16(step1[25], step1[22]); in FDCT32x32_2D_AVX2() 364 const __m256i s2_23_1 = _mm256_unpackhi_epi16(step1[24], step1[23]); in FDCT32x32_2D_AVX2() 595 const __m256i s3_10_1 = _mm256_unpackhi_epi16(step2[13], step2[10]); in FDCT32x32_2D_AVX2() 597 const __m256i s3_11_1 = _mm256_unpackhi_epi16(step2[12], step2[11]); in FDCT32x32_2D_AVX2() 673 const __m256i s1_05_1 = _mm256_unpackhi_epi16(step3[6], step3[5]); in FDCT32x32_2D_AVX2() 697 const __m256i s1_18_1 = _mm256_unpackhi_epi16(step3[18], step3[29]); in FDCT32x32_2D_AVX2() 699 const __m256i s1_19_1 = _mm256_unpackhi_epi16(step3[19], step3[28]); in FDCT32x32_2D_AVX2() 701 const __m256i s1_20_1 = _mm256_unpackhi_epi16(step3[20], step3[27]); in FDCT32x32_2D_AVX2() [all …]
|
D | highbd_convolve_avx2.c | 531 sig[4] = _mm256_unpackhi_epi16(s0, s1); in pack_8x9_init() 533 sig[5] = _mm256_unpackhi_epi16(s2, s3); in pack_8x9_init() 535 sig[6] = _mm256_unpackhi_epi16(s4, s5); in pack_8x9_init() 550 sig[7] = _mm256_unpackhi_epi16(s2, s3); in pack_8x9_pixels() 610 sig[4] = _mm256_unpackhi_epi16(u0, u2); in pack_16x9_init() 613 sig[12] = _mm256_unpackhi_epi16(u1, u3); in pack_16x9_init() 622 sig[5] = _mm256_unpackhi_epi16(u0, u2); in pack_16x9_init() 625 sig[13] = _mm256_unpackhi_epi16(u1, u3); in pack_16x9_init() 634 sig[6] = _mm256_unpackhi_epi16(u0, u2); in pack_16x9_init() 637 sig[14] = _mm256_unpackhi_epi16(u1, u3); in pack_16x9_init() [all …]
|
D | bitdepth_conversion_avx2.h | 37 const __m256i a_2 = _mm256_unpackhi_epi16(a_lo, a_hi); in store_tran_low()
|
D | avg_intrin_avx2.c | 261 a2 = _mm256_unpackhi_epi16(b0, b1); in hadamard_col8x2_avx2() 262 a3 = _mm256_unpackhi_epi16(b2, b3); in hadamard_col8x2_avx2() 265 a6 = _mm256_unpackhi_epi16(b4, b5); in hadamard_col8x2_avx2() 266 a7 = _mm256_unpackhi_epi16(b6, b7); in hadamard_col8x2_avx2()
|
/external/eigen/Eigen/src/Core/arch/CUDA/ |
D | PacketMathHalf.h | 559 __m256i ab_8f = _mm256_unpackhi_epi16(a, b); 560 __m256i cd_8f = _mm256_unpackhi_epi16(c, d); 561 __m256i ef_8f = _mm256_unpackhi_epi16(e, f); 562 __m256i gh_8f = _mm256_unpackhi_epi16(g, h); 563 __m256i ij_8f = _mm256_unpackhi_epi16(i, j); 564 __m256i kl_8f = _mm256_unpackhi_epi16(k, l); 565 __m256i mn_8f = _mm256_unpackhi_epi16(m, n); 566 __m256i op_8f = _mm256_unpackhi_epi16(o, p);
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v256_intrinsics_x86.h | 160 return _mm256_unpackhi_epi16( in v256_ziphi_16() 335 return _mm256_unpackhi_epi16( in v256_unpackhi_u16_s32() 342 _mm256_unpackhi_epi16( in v256_unpackhi_s16_s32()
|