Home
last modified time | relevance | path

Searched refs:vmlal_n_s16 (Results 1 – 20 of 20) sorted by relevance

/external/libaom/libaom/av1/common/arm/
Dconvolve_neon.h39 sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]); in wiener_convolve8_vert_4x8()
40 sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]); in wiener_convolve8_vert_4x8()
41 sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]); in wiener_convolve8_vert_4x8()
44 sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]); in wiener_convolve8_vert_4x8()
45 sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]); in wiener_convolve8_vert_4x8()
46 sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]); in wiener_convolve8_vert_4x8()
212 sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); in convolve8_4x4_s32()
213 sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); in convolve8_4x4_s32()
214 sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); in convolve8_4x4_s32()
215 sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); in convolve8_4x4_s32()
[all …]
Dconvolve_neon.c132 sum0 = vmlal_n_s16(sum0, s1, y_filter[1]); in convolve8_vert_4x4_s32()
133 sum0 = vmlal_n_s16(sum0, s2, y_filter[2]); in convolve8_vert_4x4_s32()
134 sum0 = vmlal_n_s16(sum0, s3, y_filter[3]); in convolve8_vert_4x4_s32()
135 sum0 = vmlal_n_s16(sum0, s4, y_filter[4]); in convolve8_vert_4x4_s32()
136 sum0 = vmlal_n_s16(sum0, s5, y_filter[5]); in convolve8_vert_4x4_s32()
137 sum0 = vmlal_n_s16(sum0, s6, y_filter[6]); in convolve8_vert_4x4_s32()
138 sum0 = vmlal_n_s16(sum0, s7, y_filter[7]); in convolve8_vert_4x4_s32()
161 sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]); in convolve8_vert_8x4_s32()
162 sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]); in convolve8_vert_8x4_s32()
163 sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]); in convolve8_vert_8x4_s32()
[all …]
/external/libvpx/libvpx/vpx_dsp/arm/
Dfwd_txfm_neon.c56 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon()
57 v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon()
101 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon()
102 v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon()
105 v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), cospi_20_64); in vpx_fdct8x8_neon()
106 v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), cospi_20_64); in vpx_fdct8x8_neon()
Dfdct16x16_neon.c158 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), c); in butterfly_one_coeff()
159 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), c); in butterfly_one_coeff()
179 const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), c0); in butterfly_two_coeff()
180 const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), c0); in butterfly_two_coeff()
Dfdct_neon.c66 const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); in vpx_fdct4x4_neon()
Dfdct32x32_neon.c203 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); in butterfly_one_coeff()
204 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); in butterfly_one_coeff()
224 const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), constant0); in butterfly_two_coeff()
225 const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), constant0); in butterfly_two_coeff()
572 const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); in butterfly_one_coeff_s16_s32()
573 const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); in butterfly_one_coeff_s16_s32()
Didct_neon.h155 t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16()
156 t[1] = vmlal_n_s16(t[1], vget_high_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16()
/external/libaom/libaom/aom_dsp/arm/
Dfwd_txfm_neon.c63 const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); in aom_fdct4x4_helper()
147 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon()
148 v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon()
192 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64); in aom_fdct8x8_neon()
193 v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64); in aom_fdct8x8_neon()
196 v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64); in aom_fdct8x8_neon()
197 v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64); in aom_fdct8x8_neon()
/external/libhevc/common/arm/
Dihevc_resi_trans_neon.c136 temp3 = vmlal_n_s16(o_1_m_trans_10, o_0, (WORD32)g_ai2_ihevc_trans_4[1][1]); in ihevc_resi_trans_4x4_neon()
492 a2.val[1] = vmlal_n_s16(a2.val[1], vget_high_s16(eo1), 36); in ihevc_resi_trans_8x8_neon()
496 a2.val[0] = vmlal_n_s16(a2.val[0], vget_low_s16(eo1), 36); in ihevc_resi_trans_8x8_neon()
523 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o1), 75); in ihevc_resi_trans_8x8_neon()
525 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o1), 75); in ihevc_resi_trans_8x8_neon()
534 a7.val[0] = vmlal_n_s16(a7.val[0], vget_low_s16(o2), 75); in ihevc_resi_trans_8x8_neon()
536 a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o2), 18); in ihevc_resi_trans_8x8_neon()
540 a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o2), 50); in ihevc_resi_trans_8x8_neon()
542 a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o2), 50); in ihevc_resi_trans_8x8_neon()
546 a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o2), 18); in ihevc_resi_trans_8x8_neon()
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/
Ddepthwise_conv.h516 acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
608 acc = vmlal_n_s16(acc, filter, input);
1105 vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
1107 vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
1148 acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
1149 acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
1150 acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
1151 acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
1152 acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
1153 acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
[all …]
/external/libgav1/libgav1/src/dsp/arm/
Dfilm_grain_neon.cc78 sum.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(grain), coeff); in AccumulateWeightedGrain()
79 sum.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(grain), coeff); in AccumulateWeightedGrain()
319 sum_u.val[0] = vmlal_n_s16(sum_u.val[0], vget_low_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
320 sum_u.val[1] = vmlal_n_s16(sum_u.val[1], vget_high_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
321 sum_v.val[0] = vmlal_n_s16(sum_v.val[0], vget_low_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
322 sum_v.val[1] = vmlal_n_s16(sum_v.val[1], vget_high_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
399 sum_u.val[0] = vmlal_n_s16(sum_u.val[0], vget_low_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
400 sum_u.val[1] = vmlal_n_s16(sum_u.val[1], vget_high_s16(luma), coeff_u); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
401 sum_v.val[0] = vmlal_n_s16(sum_v.val[0], vget_low_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
402 sum_v.val[1] = vmlal_n_s16(sum_v.val[1], vget_high_s16(luma), coeff_v); in ApplyAutoRegressiveFilterToChromaGrains_NEON()
Dwarp_neon.cc307 vmlal_n_s16(sum_low, vget_low_s16(filter[k]), intermediate); in Warp_NEON()
309 vmlal_n_s16(sum_high, vget_high_s16(filter[k]), intermediate); in Warp_NEON()
Dloop_restoration_neon.cc229 d.val[0] = vmlal_n_s16(sum.val[0], vget_low_s16(a), filter); in WienerVertical2()
230 d.val[1] = vmlal_n_s16(sum.val[1], vget_high_s16(a), filter); in WienerVertical2()
237 d.val[0] = vmlal_n_s16(d.val[0], vget_low_s16(a[1]), filter[3]); in WienerVertical()
238 d.val[1] = vmlal_n_s16(d.val[1], vget_high_s16(a[1]), filter[3]); in WienerVertical()
900 v[0] = vmlal_n_s16(v[0], vget_low_s16(filter[1]), w2); in SelfGuidedDoubleMultiplier()
901 v[1] = vmlal_n_s16(v[1], vget_high_s16(filter[1]), w2); in SelfGuidedDoubleMultiplier()
Dinverse_transform_neon.cc340 const int32x4_t y0 = vmlal_n_s16(acc_y, vget_low_s16(*b), cos128); in ButterflyRotation_4()
363 const int32x4_t y0 = vmlal_n_s16(acc_y, vget_low_s16(*b), cos128); in ButterflyRotation_8()
370 const int32x4_t y0_hi = vmlal_n_s16(acc_y_hi, vget_high_s16(*b), cos128); in ButterflyRotation_8()
1208 s[0] = vmlal_n_s16(s[0], vget_low_s16(x[2]), kAdst4Multiplier[3]); in Adst4_NEON()
2064 vmlal_n_s16(v_dual_round, vget_low_s16(v_src), kIdentity16Multiplier); in Identity16Row_NEON()
2065 const int32x4_t v_src_mult_hi = vmlal_n_s16( in Identity16Row_NEON()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Ddepthwiseconv_uint8.h530 acc = vget_low_s32(vmlal_n_s16(vcombine_s32(acc, acc), filter, input));
625 acc = vmlal_n_s16(acc, filter, input);
1143 vmlal_n_s16(acc[2 * i + 0], vget_low_s16(filter[i]), input);
1145 vmlal_n_s16(acc[2 * i + 1], vget_high_s16(filter[i]), input);
1190 acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input);
1191 acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input);
1192 acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input);
1193 acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input);
1194 acc_4 = vmlal_n_s16(acc_4, vget_low_s16(filter_2), input);
1195 acc_5 = vmlal_n_s16(acc_5, vget_high_s16(filter_2), input);
[all …]
/external/webp/src/dsp/
Dyuv_neon.c101 const int32x4_t tmp1_lo = vmlal_n_s16(tmp0_lo, g_lo, C1); \
102 const int32x4_t tmp1_hi = vmlal_n_s16(tmp0_hi, g_hi, C1); \
103 const int32x4_t tmp2_lo = vmlal_n_s16(tmp1_lo, b_lo, C2); \
104 const int32x4_t tmp2_hi = vmlal_n_s16(tmp1_hi, b_hi, C2); \
Denc_neon.c322 const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352); in FTransform_NEON()
339 const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352); in FTransform_NEON()
/external/clang/test/CodeGen/
Daarch64-neon-2velem.c3977 return vmlal_n_s16(a, b, c); in test_vmlal_n_s16()
Darm_neon_intrinsics.c9052 return vmlal_n_s16(a, b, c); in test_vmlal_n_s16()
/external/neon_2_sse/
DNEON_2_SSE.h1932 _NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d…
14088 _NEON2SSESTORAGE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c); // VMLAL.S16 q0, d0, d…
14089 _NEON2SSE_INLINE int32x4_t vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) // VMLAL.S16 q0, d0, d0… in vmlal_n_s16() function