/external/libgav1/libgav1/src/dsp/arm/ |
D | intrapred_neon.cc | 124 return vaddq_u16(sum_0, sum_1); in Add() 133 return vaddq_u16(sum_0, sum_1); in Add() 217 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 221 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 235 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 238 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 241 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 254 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 257 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() 260 return Sum(vaddq_u16(sum_0, sum_1)); in DcSum_NEON() [all …]
|
D | super_res_neon.cc | 59 res = vaddq_u16(res, weighted_src[3]); in ComputeSuperRes_NEON() 60 res = vaddq_u16(res, weighted_src[4]); in ComputeSuperRes_NEON() 61 res = vaddq_u16(res, weighted_src[6]); in ComputeSuperRes_NEON()
|
D | loop_filter_neon.cc | 300 sum = vaddq_u16(vaddl_u8(p1q1, p1q1), sum); in Filter6() 306 sum = vaddq_u16(vaddl_u8(p0q0, p0q0), sum); in Filter6() 322 sum = vaddq_u16(vaddl_u8(q0p0, q1p1), sum); in Filter6() 536 sum = vaddq_u16(vaddl_u8(p2q2, p2q2), sum); in Filter8() 542 sum = vaddq_u16(vaddl_u8(p1q1, p0q0), sum); in Filter8() 558 sum = vaddq_u16(vaddl_u8(p1q1, q1p1), sum); in Filter8() 567 sum = vaddq_u16(vaddl_u8(p0q0, q2p2), sum); in Filter8() 754 sum = vaddq_u16(vaddl_u8(p5q5, p5q5), sum); in Filter14() 760 sum = vaddq_u16(vaddl_u8(p4q4, p4q4), sum); in Filter14() 766 sum = vaddq_u16(vaddl_u8(p3q3, p2q2), sum); in Filter14() [all …]
|
D | intra_edge_neon.cc | 154 sum_lo = vaddq_u16(sum_lo, vshlq_n_u16(sum_123_lo, 2)); in IntraEdgeFilter_NEON() 161 sum_hi = vaddq_u16(sum_hi, vshlq_n_u16(sum_123_hi, 2)); in IntraEdgeFilter_NEON() 185 sum_lo = vaddq_u16(sum_lo, vshlq_n_u16(sum_123_lo, 2)); in IntraEdgeFilter_NEON() 192 sum_hi = vaddq_u16(sum_hi, vshlq_n_u16(sum_123_hi, 2)); in IntraEdgeFilter_NEON()
|
D | cdef_neon.cc | 217 *partial_lo = vaddq_u16(*partial_lo, vextq_u16(v_zero, v_pair_add[1], 7)); in AddPartial_D5_D7() 220 *partial_hi = vaddq_u16(*partial_hi, vextq_u16(v_pair_add[1], v_zero, 7)); in AddPartial_D5_D7() 224 *partial_lo = vaddq_u16(*partial_lo, vextq_u16(v_zero, v_pair_add[2], 6)); in AddPartial_D5_D7() 227 *partial_hi = vaddq_u16(*partial_hi, vextq_u16(v_pair_add[2], v_zero, 6)); in AddPartial_D5_D7() 231 *partial_lo = vaddq_u16(*partial_lo, vextq_u16(v_zero, v_pair_add[3], 5)); in AddPartial_D5_D7() 234 *partial_hi = vaddq_u16(*partial_hi, vextq_u16(v_pair_add[3], v_zero, 5)); in AddPartial_D5_D7()
|
D | intrapred_smooth_neon.cc | 255 const uint16x8_t pred = vaddq_u16(weighted_top, weighted_bl); in SmoothVertical4Or8xN_NEON() 272 const uint16x8_t pred_low = vaddq_u16(weighted_top_low, weighted_bl); in CalculateVerticalWeightsAndPred() 273 const uint16x8_t pred_high = vaddq_u16(weighted_top_high, weighted_bl); in CalculateVerticalWeightsAndPred() 352 const uint16x8_t pred = vaddq_u16(weighted_left, weighted_tr); in SmoothHorizontal4Or8xN_NEON() 370 const uint16x8_t pred_low = vaddq_u16(weighted_left_low, weighted_tr_low); in CalculateHorizontalWeightsAndPred() 376 const uint16x8_t pred_high = vaddq_u16(weighted_left_high, weighted_tr_high); in CalculateHorizontalWeightsAndPred()
|
D | convolve_neon.cc | 866 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontal2Tap() 905 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontal2Tap() 971 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontalPositive4Tap() 1121 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontalSigned6Tap() 1217 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontalMixed6Tap() 1304 const uint16x8_t subpel_index_offsets = vaddq_u16(index_steps, p_fraction); in ConvolveKernelHorizontalSigned8Tap() 2896 vst1_u8(dst, vrshrn_n_u16(vaddq_u16(row[0], below_0), 2)); in IntraBlockCopy2D() 2903 vst1_u8(dst, vrshrn_n_u16(vaddq_u16(row[1], below_1), 2)); in IntraBlockCopy2D() 2910 vst1_u8(dst, vrshrn_n_u16(vaddq_u16(row[2], below_2), 2)); in IntraBlockCopy2D() 2916 vst1_u8(dst, vrshrn_n_u16(vaddq_u16(row[3], below_3), 2)); in IntraBlockCopy2D() [all …]
|
/external/libaom/libaom/av1/common/arm/ |
D | cfl_neon.c | 24 return vaddq_u16(vld1q_u16(buf), vld1q_u16(buf + offset)); in vldaddq_u16() 73 sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1); in cfl_luma_subsampling_420_lbd_neon() 74 sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1); in cfl_luma_subsampling_420_lbd_neon() 158 const uint16x8_t sum = vaddq_u16(top, bot); in cfl_luma_subsampling_420_hbd_neon() 165 const uint16x8_t sum_1 = vaddq_u16(top_1, bot_1); in cfl_luma_subsampling_420_hbd_neon() 173 const uint16x8_t top_0 = vaddq_u16(top.val[0], top.val[1]); in cfl_luma_subsampling_420_hbd_neon() 175 const uint16x8_t bot_0 = vaddq_u16(bot.val[0], bot.val[1]); in cfl_luma_subsampling_420_hbd_neon() 177 const uint16x8_t top_1 = vaddq_u16(top.val[2], top.val[3]); in cfl_luma_subsampling_420_hbd_neon() 179 const uint16x8_t bot_1 = vaddq_u16(bot.val[2], bot.val[3]); in cfl_luma_subsampling_420_hbd_neon() 181 sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1); in cfl_luma_subsampling_420_hbd_neon() [all …]
|
D | selfguided_neon.c | 940 xb = vaddq_u16(xb, x); in cross_sum_inp_u16() 941 xt = vaddq_u16(xt, xr); in cross_sum_inp_u16() 942 xl = vaddq_u16(xl, xb); in cross_sum_inp_u16() 943 xl = vaddq_u16(xl, xt); in cross_sum_inp_u16() 947 xbl = vaddq_u16(xbl, xbr); in cross_sum_inp_u16() 948 xtl = vaddq_u16(xtl, xtr); in cross_sum_inp_u16() 949 xtl = vaddq_u16(xtl, xbl); in cross_sum_inp_u16() 990 xbr = vaddq_u16(xbr, xbl); in cross_sum_fast_even_row_inp16() 991 xtr = vaddq_u16(xtr, xtl); in cross_sum_fast_even_row_inp16() 992 xbr = vaddq_u16(xbr, xtr); in cross_sum_fast_even_row_inp16() [all …]
|
/external/libhevc/encoder/arm/ |
D | ihevce_ssd_and_sad_calculator_neon.c | 107 abs_sum = vaddq_u16(abs_sum, abs); in ihevce_ssd_and_sad_calculator_neon() 147 abs_sum_l = vaddq_u16(abs_sum_l, abs_l); in ihevce_ssd_and_sad_calculator_neon() 148 abs_sum_h = vaddq_u16(abs_sum_h, abs_h); in ihevce_ssd_and_sad_calculator_neon() 212 abs_sum_l = vaddq_u16(abs_sum_l, abs_sum_h); in ihevce_ssd_and_sad_calculator_neon() 213 abs_sum = vaddq_u16(abs_sum, abs_sum_l); in ihevce_ssd_and_sad_calculator_neon() 297 abs_sum_0 = vaddq_u16(abs_sum_0, abs_sum_1); in ihevce_ssd_and_sad_calculator_neon() 298 abs_sum_2 = vaddq_u16(abs_sum_2, abs_sum_3); in ihevce_ssd_and_sad_calculator_neon() 299 abs_sum_0 = vaddq_u16(abs_sum_0, abs_sum_2); in ihevce_ssd_and_sad_calculator_neon()
|
D | ihevce_coarse_layer_sad_neon.c | 496 mvy_wt = vaddq_u16(mvy_wt, v_ref_idx); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 508 uint16x8_t sad_4x8 = vaddq_u16(curr, south); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 509 uint16x8_t sad_8x4 = vaddq_u16(curr, east); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 518 mv_wt = vaddq_u16(mv_wt, mvy_wt); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 531 sad_4x8 = vaddq_u16(total_cost, sad_4x8); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 532 sad_8x4 = vaddq_u16(total_cost, sad_8x4); in hme_combine_4x4_sads_and_compute_cost_high_speed_neon() 657 mvy_wt = vaddq_u16(mvy_wt, v_ref_idx); in hme_combine_4x4_sads_and_compute_cost_high_quality_neon() 669 uint16x8_t sad_4x8 = vaddq_u16(curr, south); in hme_combine_4x4_sads_and_compute_cost_high_quality_neon() 670 uint16x8_t sad_8x4 = vaddq_u16(curr, east); in hme_combine_4x4_sads_and_compute_cost_high_quality_neon() 679 mv_wt = vaddq_u16(mv_wt, mvy_wt); in hme_combine_4x4_sads_and_compute_cost_high_quality_neon() [all …]
|
D | ihevce_had_compute_neon.c | 595 vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(q0)), vreinterpretq_u16_s16(vabsq_s16(q1))); in ihevce_HAD_16x16_8bit_neon() 597 vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(q2)), vreinterpretq_u16_s16(vabsq_s16(q3))); in ihevce_HAD_16x16_8bit_neon() 736 uint16x8_t r0 = vaddq_u16( in ihevce_HAD_32x32_8bit_neon() 738 uint16x8_t r1 = vaddq_u16( in ihevce_HAD_32x32_8bit_neon() 960 uint16x8_t r0 = vaddq_u16(vreinterpretq_u16_s16(p0), vreinterpretq_u16_s16(p1)); in ihevce_compute_16x16HAD_using_8x8_neon() 961 uint16x8_t r1 = vaddq_u16(vreinterpretq_u16_s16(p2), vreinterpretq_u16_s16(p3)); in ihevce_compute_16x16HAD_using_8x8_neon() 1170 uint16x8_t r0 = vaddq_u16(vreinterpretq_u16_s16(p0), vreinterpretq_u16_s16(p1)); in ihevce_compute_32x32HAD_using_16x16_neon() 1171 uint16x8_t r1 = vaddq_u16(vreinterpretq_u16_s16(p2), vreinterpretq_u16_s16(p3)); in ihevce_compute_32x32HAD_using_16x16_neon()
|
/external/XNNPACK/src/q8-gavgpool/ |
D | mp7p7q-neon.c | 57 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 59 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 94 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 96 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 156 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 158 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 228 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_mp7p7q__neon() 230 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_mp7p7q__neon()
|
D | up7-neon.c | 80 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_up7__neon() 82 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_up7__neon() 150 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_gavgpool_ukernel_up7__neon() 152 const int16x8_t vsum = vreinterpretq_s16_u16(vaddq_u16(vsum016, vsum2345)); in xnn_q8_gavgpool_ukernel_up7__neon()
|
/external/XNNPACK/src/q8-avgpool/ |
D | up9-neon.c | 97 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_avgpool_ukernel_up9__neon() 98 const uint16x8_t vsum01678 = vaddq_u16(vsum018, vsum67); in xnn_q8_avgpool_ukernel_up9__neon() 99 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in xnn_q8_avgpool_ukernel_up9__neon() 170 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_avgpool_ukernel_up9__neon() 171 const uint16x8_t vsum01678 = vaddq_u16(vsum018, vsum67); in xnn_q8_avgpool_ukernel_up9__neon() 172 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in xnn_q8_avgpool_ukernel_up9__neon()
|
D | mp9p8q-neon.c | 73 const uint16x8_t vsum2345 = vaddq_u16(vsum23, vsum45); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 74 const uint16x8_t vsum01678 = vaddq_u16(vsum018, vsum67); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 75 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 114 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 115 const uint16x8_t vsum4567 = vaddq_u16(vsum45, vsum67); in xnn_q8_avgpool_ukernel_mp9p8q__neon() 116 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in xnn_q8_avgpool_ukernel_mp9p8q__neon()
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | highbd_loopfilter_neon.c | 59 t0 = vaddq_u16(t0, t0); in filter_hev_mask4() 61 t0 = vaddq_u16(t0, t1); in filter_hev_mask4() 126 *sum = vaddq_u16(*sum, add0); in filter_update() 127 *sum = vaddq_u16(*sum, add1); in filter_update() 156 sum = vaddq_u16(p3, p3); // 2*p3 in calc_7_tap_filter() 157 sum = vaddq_u16(sum, p3); // 3*p3 in calc_7_tap_filter() 158 sum = vaddq_u16(sum, p2); // 3*p3+p2 in calc_7_tap_filter() 159 sum = vaddq_u16(sum, p2); // 3*p3+2*p2 in calc_7_tap_filter() 160 sum = vaddq_u16(sum, p1); // 3*p3+2*p2+p1 in calc_7_tap_filter() 161 sum = vaddq_u16(sum, p0); // 3*p3+2*p2+p1+p0 in calc_7_tap_filter() [all …]
|
D | highbd_intrapred_neon.c | 103 const uint16x8_t p0 = vaddq_u16(above_u16, left_u16); in vpx_highbd_dc_predictor_8x8_neon() 147 const uint16x8_t p0 = vaddq_u16(ref_u16.val[0], ref_u16.val[1]); in dc_sum_16() 168 const uint16x8_t pa = vaddq_u16(a.val[0], a.val[1]); in vpx_highbd_dc_predictor_16x16_neon() 169 const uint16x8_t pl = vaddq_u16(l.val[0], l.val[1]); in vpx_highbd_dc_predictor_16x16_neon() 170 const uint16x8_t pal0 = vaddq_u16(pa, pl); in vpx_highbd_dc_predictor_16x16_neon() 215 const uint16x8_t p0 = vaddq_u16(r.val[0], r.val[1]); in dc_sum_32() 216 const uint16x8_t p1 = vaddq_u16(r.val[2], r.val[3]); in dc_sum_32() 217 const uint16x8_t p2 = vaddq_u16(p0, p1); in dc_sum_32() 242 const uint16x8_t pa0 = vaddq_u16(a.val[0], a.val[1]); in vpx_highbd_dc_predictor_32x32_neon() 243 const uint16x8_t pa1 = vaddq_u16(a.val[2], a.val[3]); in vpx_highbd_dc_predictor_32x32_neon() [all …]
|
/external/webp/src/dsp/ |
D | alpha_processing_neon.c | 34 const uint16x8_t r3 = vaddq_u16(r2, kOne); \ 35 const uint16x8_t g3 = vaddq_u16(g2, kOne); \ 36 const uint16x8_t b3 = vaddq_u16(b2, kOne); \
|
D | dec_neon.c | 1290 const uint16x8_t s0 = vaddq_u16(L0, L1); in DC4_NEON() 1291 const uint16x8_t s1 = vaddq_u16(L2, L3); in DC4_NEON() 1292 const uint16x8_t s01 = vaddq_u16(s0, s1); in DC4_NEON() 1293 const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1)); in DC4_NEON() 1452 const uint16x8_t s0 = vaddq_u16(L0, L1); in DC8_NEON() 1453 const uint16x8_t s1 = vaddq_u16(L2, L3); in DC8_NEON() 1454 const uint16x8_t s2 = vaddq_u16(L4, L5); in DC8_NEON() 1455 const uint16x8_t s3 = vaddq_u16(L6, L7); in DC8_NEON() 1456 const uint16x8_t s01 = vaddq_u16(s0, s1); in DC8_NEON() 1457 const uint16x8_t s23 = vaddq_u16(s2, s3); in DC8_NEON() [all …]
|
D | upsampling_neon.c | 39 const uint16x8_t abcd = vaddq_u16(ad, bc); \ 41 const uint16x8_t al = vaddq_u16(abcd, vshlq_n_u16(ad, 1)); \ 43 const uint16x8_t bl = vaddq_u16(abcd, vshlq_n_u16(bc, 1)); \
|
/external/libaom/libaom/aom_dsp/arm/ |
D | sad_neon.c | 102 q12 = vaddq_u16(q12, q13); in aom_sad16x8_neon() 189 return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); in aom_sad32x32_neon() 208 return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi)); in aom_sad16x16_neon()
|
D | intrapred_neon.c | 44 const uint16x8_t sum = vaddq_u16(sum_left, sum_top); in dc_4x4() 114 const uint16x8_t sum = vaddq_u16(sum_left, sum_top); in dc_8x8() 187 const uint16x8_t sum = vaddq_u16(sum_left, sum_top); in dc_16x16() 249 const uint16x8_t p2 = vaddq_u16(p0, p1); in dc_32x32() 261 const uint16x8_t p2 = vaddq_u16(p0, p1); in dc_32x32() 269 const uint16x8_t sum = vaddq_u16(sum_left, sum_top); in dc_32x32()
|
D | loopfilter_neon.c | 276 out_pq2 = vaddq_u16(out_pq2, p6q6_2); in lpf_14_neon() 278 out_pq3 = vaddq_u16(out_pq3, p6q6_temp); in lpf_14_neon() 280 out_pq4 = vaddq_u16(out_pq4, p6q6_temp); in lpf_14_neon() 281 p6q6_temp = vaddq_u16(p6q6_temp, p6q6_2); in lpf_14_neon() 282 out_pq5 = vaddq_u16(out_pq5, p6q6_temp); in lpf_14_neon() 287 out_pq3 = vaddq_u16(out_pq3, qp_sum); in lpf_14_neon() 291 out_pq2 = vaddq_u16(out_pq2, qp_sum); in lpf_14_neon() 295 out_pq1 = vaddq_u16(out_pq1, qp_sum); in lpf_14_neon() 299 out_pq0 = vaddq_u16(out_pq0, qp_sum); in lpf_14_neon() 525 out = vaddq_u16(out, out); in lpf_6_neon()
|
/external/libhevc/common/arm/ |
D | ihevc_intra_pred_filters_neon_intr.c | 758 prod_t1 = vaddq_u16(prod_t1, const_nt_dup); in ihevc_intra_pred_luma_planar_neonintr() 759 prod_t1 = vaddq_u16(prod_t1, prod_t2); in ihevc_intra_pred_luma_planar_neonintr() 843 prod_t1 = vaddq_u16(prod_t1, const_nt_dup); in ihevc_intra_pred_luma_planar_neonintr() 844 prod_t1 = vaddq_u16(prod_t1, prod_t2); in ihevc_intra_pred_luma_planar_neonintr() 991 ref_load_q = vaddq_u16(ref_load_q, three_dc_val_t); in ihevc_intra_pred_luma_dc_neonintr() 1016 ref_load_q = vaddq_u16(ref_load_q, three_dc_val_t); in ihevc_intra_pred_luma_dc_neonintr() 2158 add_res = vaddq_u16(mul_res1, mul_res2); in ihevc_intra_pred_luma_mode_3_to_9_neonintr() 2243 add_res = vaddq_u16(mul_res1, mul_res2); in ihevc_intra_pred_luma_mode_3_to_9_neonintr() 2438 add_res = vaddq_u16(mul_res1, mul_res2); in ihevc_intra_pred_luma_mode_11_to_17_neonintr() 2508 add_res = vaddq_u16(mul_res1, mul_res2); in ihevc_intra_pred_luma_mode_11_to_17_neonintr() [all …]
|