/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.h | 39 sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]); in wiener_convolve8_vert_4x8() 40 sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]); in wiener_convolve8_vert_4x8() 41 sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]); in wiener_convolve8_vert_4x8() 44 sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]); in wiener_convolve8_vert_4x8() 45 sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]); in wiener_convolve8_vert_4x8() 46 sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]); in wiener_convolve8_vert_4x8() 212 sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); in convolve8_4x4_s32() 213 sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); in convolve8_4x4_s32() 214 sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); in convolve8_4x4_s32() 215 sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); in convolve8_4x4_s32() [all …]
|
D | convolve_neon.c | 132 sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); in convolve8_vert_4x4_s32() 133 sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); in convolve8_vert_4x4_s32() 134 sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); in convolve8_vert_4x4_s32() 135 sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); in convolve8_vert_4x4_s32() 136 sum0 = vmlal_n_s16(sum0, s5, y_filter[5]); in convolve8_vert_4x4_s32() 137 sum0 = vmlal_n_s16(sum0, s6, y_filter[6]); in convolve8_vert_4x4_s32() 138 sum0 = vmlal_n_s16(sum0, s7, y_filter[7]); in convolve8_vert_4x4_s32() 161 sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]); in convolve8_vert_8x4_s32() 162 sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]); in convolve8_vert_8x4_s32() 163 sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]); in convolve8_vert_8x4_s32() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | fwd_txfm_neon.c | 56 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon() 57 v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon() 101 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon() 102 v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon() 105 v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), cospi_20_64); in vpx_fdct8x8_neon() 106 v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), cospi_20_64); in vpx_fdct8x8_neon()
|
D | fdct16x16_neon.c | 158 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), c); in butterfly_one_coeff() 159 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), c); in butterfly_one_coeff() 179 const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), c0); in butterfly_two_coeff() 180 const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), c0); in butterfly_two_coeff()
|
D | fdct_neon.c | 66 const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); in vpx_fdct4x4_neon()
|
D | fdct32x32_neon.c | 203 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); in butterfly_one_coeff() 204 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); in butterfly_one_coeff() 224 const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), constant0); in butterfly_two_coeff() 225 const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), constant0); in butterfly_two_coeff() 572 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); in butterfly_one_coeff_s16_s32() 573 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); in butterfly_one_coeff_s16_s32()
|
D | idct_neon.h | 155 t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16() 156 t[1] = vmlal_n_s16(t[1], vget_high_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16()
|
/external/libaom/libaom/aom_dsp/arm/ |
D | fwd_txfm_neon.c | 63 const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); in aom_fdct4x4_helper() 147 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon() 148 v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon() 192 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64); in aom_fdct8x8_neon() 193 v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64); in aom_fdct8x8_neon() 196 v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64); in aom_fdct8x8_neon() 197 v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64); in aom_fdct8x8_neon()
|
/external/libhevc/common/arm/ |
D | ihevc_resi_trans_neon.c | 136 temp3 = vmlal_n_s16(o_1_m_trans_10, o_0, (WORD32)g_ai2_ihevc_trans_4[1][1]); in ihevc_resi_trans_4x4_neon() 492 a2.val[1] = vmlal_n_s16(a2.val[1], vget_high_s16(eo1), 36); in ihevc_resi_trans_8x8_neon() 496 a2.val[0] = vmlal_n_s16(a2.val[0], vget_low_s16(eo1), 36); in ihevc_resi_trans_8x8_neon() 523 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o1), 75); in ihevc_resi_trans_8x8_neon() 525 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o1), 75); in ihevc_resi_trans_8x8_neon() 534 a7.val[0] = vmlal_n_s16(a7.val[0], vget_low_s16(o2), 75); in ihevc_resi_trans_8x8_neon() 536 a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o2), 18); in ihevc_resi_trans_8x8_neon() 540 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o2), 50); in ihevc_resi_trans_8x8_neon() 542 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o2), 50); in ihevc_resi_trans_8x8_neon() 546 a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o2), 18); in ihevc_resi_trans_8x8_neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | depthwise_conv.h | 516 acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input)); 608 acc = vmlal_n_s16(acc, filter, input); 1105 vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input); 1107 vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input); 1148 acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input); 1149 acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input); 1150 acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input); 1151 acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input); 1152 acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input); 1153 acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input); [all …]
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | film_grain_neon.cc | 78 sum.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(grain), coeff); in AccumulateWeightedGrain() 79 sum.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(grain), coeff); in AccumulateWeightedGrain() 319 sum_u.val[0] = vmlal_n_s16(sum_u.val[0], vget_low_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 320 sum_u.val[1] = vmlal_n_s16(sum_u.val[1], vget_high_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 321 sum_v.val[0] = vmlal_n_s16(sum_v.val[0], vget_low_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 322 sum_v.val[1] = vmlal_n_s16(sum_v.val[1], vget_high_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 399 sum_u.val[0] = vmlal_n_s16(sum_u.val[0], vget_low_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 400 sum_u.val[1] = vmlal_n_s16(sum_u.val[1], vget_high_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 401 sum_v.val[0] = vmlal_n_s16(sum_v.val[0], vget_low_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON() 402 sum_v.val[1] = vmlal_n_s16(sum_v.val[1], vget_high_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
|
D | warp_neon.cc | 307 vmlal_n_s16(sum_low, vget_low_s16(filter[k]), intermediate); in Warp_NEON() 309 vmlal_n_s16(sum_high, vget_high_s16(filter[k]), intermediate); in Warp_NEON()
|
D | loop_restoration_neon.cc | 229 d.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(a), filter); in WienerVertical2() 230 d.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(a), filter); in WienerVertical2() 237 d.val[0] = vmlal_n_s16(d.val[0], vget_low_s16(a[1]), filter[3]); in WienerVertical() 238 d.val[1] = vmlal_n_s16(d.val[1], vget_high_s16(a[1]), filter[3]); in WienerVertical() 900 v[0] = vmlal_n_s16(v[0], vget_low_s16(filter[1]), w2); in SelfGuidedDoubleMultiplier() 901 v[1] = vmlal_n_s16(v[1], vget_high_s16(filter[1]), w2); in SelfGuidedDoubleMultiplier()
|
D | inverse_transform_neon.cc | 340 const int32x4_t y0 = vmlal_n_s16(acc_y, vget_low_s16(*b), cos128); in ButterflyRotation_4() 363 const int32x4_t y0 = vmlal_n_s16(acc_y, vget_low_s16(*b), cos128); in ButterflyRotation_8() 370 const int32x4_t y0_hi = vmlal_n_s16(acc_y_hi, vget_high_s16(*b), cos128); in ButterflyRotation_8() 1208 s[0] = vmlal_n_s16(s[0], vget_low_s16(x[2]), kAdst4Multiplier[3]); in Adst4_NEON() 2064 vmlal_n_s16(v_dual_round, vget_low_s16(v_src), kIdentity16Multiplier); in Identity16Row_NEON() 2065 const int32x4_t v_src_mult_hi = vmlal_n_s16( in Identity16Row_NEON()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8.h | 530 acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input)); 625 acc = vmlal_n_s16(acc, filter, input); 1143 vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input); 1145 vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input); 1190 acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input); 1191 acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input); 1192 acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input); 1193 acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input); 1194 acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input); 1195 acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input); [all …]
|
/external/webp/src/dsp/ |
D | yuv_neon.c | 101 const int32x4_t tmp1_lo = vmlal_n_s16(tmp0_lo, g_lo, C1); \ 102 const int32x4_t tmp1_hi = vmlal_n_s16(tmp0_hi, g_hi, C1); \ 103 const int32x4_t tmp2_lo = vmlal_n_s16(tmp1_lo, b_lo, C2); \ 104 const int32x4_t tmp2_hi = vmlal_n_s16(tmp1_hi, b_hi, C2); \
|
D | enc_neon.c | 322 const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352); in FTransform_NEON() 339 const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352); in FTransform_NEON()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-2velem.c | 3977 return vmlal_n_s16(a, b, c); in test_vmlal_n_s16()
|
D | arm_neon_intrinsics.c | 9052 return vmlal_n_s16(a, b, c); in test_vmlal_n_s16()
|
/external/neon_2_sse/ |
D | NEON_2_SSE.h | 1932 _NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d… 14088 _NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d… 14089 _NEON2SSE_INLINE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) // VMLAL.S16 q0, d0, d0… in vmlal_n_s16() function
|