Home
last modified time | relevance | path

Searched refs:vmlal_s16 (Results 1 – 25 of 38) sorted by relevance

12

/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x9-minmax-neon-mul16.c108 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
110 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
111 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
112 vaccGHIJ = vmlal_s16(vaccGHIJ, vget_low_s16(vi0xGHIJKLMN), vget_low_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
113 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
114 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi0xOPQRSTUV), vget_low_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
126 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
[all …]
Dup24x9-minmax-neon-mul16.c104 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
105 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
106 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
107 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
108 vaccGHIJ = vmlal_s16(vaccGHIJ, vget_low_s16(vi0xGHIJKLMN), vget_low_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
109 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
118 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
120 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi1x89ABCDEF), vget_low_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
121 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
[all …]
Dup16x9-minmax-neon-mul16.c100 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
101 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
102 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
103 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
110 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
111 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
112 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi1x89ABCDEF), vget_low_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
113 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi2x01234567), vget_low_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
[all …]
Dup8x9-minmax-neon-mul16.c96 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
97 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
102 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
103 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
108 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi2x01234567), vget_low_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
114 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi3x01234567), vget_low_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
115 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
[all …]
/external/libvpx/libvpx/vpx_dsp/arm/
Dsum_squares_neon.c29 s0 = vmlal_s16(s0, s[1], s[1]); in vpx_sum_squares_2d_i16_neon()
30 s0 = vmlal_s16(s0, s[2], s[2]); in vpx_sum_squares_2d_i16_neon()
31 s0 = vmlal_s16(s0, s[3], s[3]); in vpx_sum_squares_2d_i16_neon()
55 s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); in vpx_sum_squares_2d_i16_neon()
56 s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); in vpx_sum_squares_2d_i16_neon()
57 s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); in vpx_sum_squares_2d_i16_neon()
58 s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); in vpx_sum_squares_2d_i16_neon()
59 s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); in vpx_sum_squares_2d_i16_neon()
60 s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); in vpx_sum_squares_2d_i16_neon()
61 s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); in vpx_sum_squares_2d_i16_neon()
[all …]
Dvariance_neon.c55 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w4x4()
57 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), in variance_neon_w4x4()
60 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w4x4()
62 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), in variance_neon_w4x4()
104 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w16()
106 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), in variance_neon_w16()
109 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w16()
111 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), in variance_neon_w16()
147 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_0_s16), in variance_neon_w8x2()
149 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_1_s16), in variance_neon_w8x2()
[all …]
Davg_neon.c162 sse = vmlal_s16(sse, diff_lo, diff_lo); // dynamic range 26 bits. in vpx_vector_var_neon()
163 sse = vmlal_s16(sse, diff_hi, diff_hi); in vpx_vector_var_neon()
/external/libhevc/common/arm/
Dihevc_resi_trans_neon_32x32.c287 a[0].val[0] = vmlal_s16( in ihevc_resi_trans_32x32_neon()
291 a[0].val[1] = vmlal_s16( in ihevc_resi_trans_32x32_neon()
381 a[14].val[0] = vmlal_s16(a[14].val[0], in ihevc_resi_trans_32x32_neon()
383 a[10].val[0] = vmlal_s16(a[10].val[0], in ihevc_resi_trans_32x32_neon()
385 a[6].val[0] = vmlal_s16(a[6].val[0], in ihevc_resi_trans_32x32_neon()
387 a[2].val[0] = vmlal_s16(a[2].val[0], in ihevc_resi_trans_32x32_neon()
399 a[14].val[1] = vmlal_s16(a[14].val[1], in ihevc_resi_trans_32x32_neon()
401 a[10].val[1] = vmlal_s16(a[10].val[1], in ihevc_resi_trans_32x32_neon()
403 a[6].val[1] = vmlal_s16(a[6].val[1], in ihevc_resi_trans_32x32_neon()
405 a[2].val[1] = vmlal_s16(a[2].val[1], in ihevc_resi_trans_32x32_neon()
[all …]
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-neon.c91 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk1), vget_low_s16(vxi1)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
92 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk1), vget_high_s16(vxi1)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
98 vaccX0_lo = vmlal_s16(vaccX0_lo, vget_low_s16(vxk2), vget_low_s16(vxi2)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
99 vaccX0_hi = vmlal_s16(vaccX0_hi, vget_high_s16(vxk2), vget_high_s16(vxi2)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
105 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk3), vget_low_s16(vxi3)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
106 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk3), vget_high_s16(vxi3)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
112 vaccX0_lo = vmlal_s16(vaccX0_lo, vget_low_s16(vxk4), vget_low_s16(vxi4)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
113 vaccX0_hi = vmlal_s16(vaccX0_hi, vget_high_s16(vxk4), vget_high_s16(vxi4)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
119 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk5), vget_low_s16(vxi5)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
120 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk5), vget_high_s16(vxi5)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon()
[all …]
/external/libvpx/libvpx/vp8/encoder/arm/neon/
Dshortfdct_neon.c61 q9s32 = vmlal_s16(q9s32, d7s16, d16s16); in vp8_short_fdct4x4_neon()
62 q10s32 = vmlal_s16(q10s32, d7s16, d17s16); in vp8_short_fdct4x4_neon()
63 q9s32 = vmlal_s16(q9s32, d6s16, d17s16); in vp8_short_fdct4x4_neon()
88 q11s32 = vmlal_s16(q11s32, d7s16, d16s16); in vp8_short_fdct4x4_neon()
89 q12s32 = vmlal_s16(q12s32, d7s16, d17s16); in vp8_short_fdct4x4_neon()
97 q11s32 = vmlal_s16(q11s32, d6s16, d17s16); in vp8_short_fdct4x4_neon()
169 q9s32 = vmlal_s16(q9s32, d28s16, d16s16); in vp8_short_fdct8x4_neon()
170 q10s32 = vmlal_s16(q10s32, d28s16, d17s16); in vp8_short_fdct8x4_neon()
171 q11s32 = vmlal_s16(q11s32, d29s16, d16s16); in vp8_short_fdct8x4_neon()
172 q12s32 = vmlal_s16(q12s32, d29s16, d17s16); in vp8_short_fdct8x4_neon()
[all …]
/external/tensorflow/tensorflow/lite/kernels/
Dcpu_backend_gemm_custom_gemv.h393 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[0]),
395 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[0]),
397 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[0]),
399 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[0]),
401 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[1]),
403 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[1]),
405 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[1]),
407 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[1]),
409 acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0.val[0]),
411 acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1.val[0]),
[all …]
/external/libaom/libaom/aom_dsp/arm/
Dvariance_neon.c37 vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); in variance_neon_w8()
39 vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); in variance_neon_w8()
189 q9s32 = vmlal_s16(q9s32, d22s16, d22s16); in aom_variance16x8_neon()
190 q10s32 = vmlal_s16(q10s32, d23s16, d23s16); in aom_variance16x8_neon()
195 q9s32 = vmlal_s16(q9s32, d24s16, d24s16); in aom_variance16x8_neon()
196 q10s32 = vmlal_s16(q10s32, d25s16, d25s16); in aom_variance16x8_neon()
201 q9s32 = vmlal_s16(q9s32, d26s16, d26s16); in aom_variance16x8_neon()
202 q10s32 = vmlal_s16(q10s32, d27s16, d27s16); in aom_variance16x8_neon()
207 q9s32 = vmlal_s16(q9s32, d28s16, d28s16); in aom_variance16x8_neon()
208 q10s32 = vmlal_s16(q10s32, d29s16, d29s16); in aom_variance16x8_neon()
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/
Ddepthwise_conv.h77 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]),
79 acc[1].val[i] = vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]),
124 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
126 vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
127 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
129 vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
149 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
150 acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
186 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter),
188 acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(filter),
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Ddepthwiseconv_uint8.h68 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]),
70 acc[1].val[i] = vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]),
116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
118 vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0]));
119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
121 vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1]));
141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
142 acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input));
179 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter),
181 acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(filter),
[all …]
Dlegacy_optimized_ops.h851 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
853 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
855 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
857 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
859 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_1), in LegacyFullyConnectedAsGEMVWorkerImpl()
861 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_1), in LegacyFullyConnectedAsGEMVWorkerImpl()
863 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_1), in LegacyFullyConnectedAsGEMVWorkerImpl()
865 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_1), in LegacyFullyConnectedAsGEMVWorkerImpl()
867 acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
869 acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1_0), in LegacyFullyConnectedAsGEMVWorkerImpl()
[all …]
/external/libopus/celt/arm/
Dpitch_neon_intr.c49 xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8)); in celt_inner_prod_neon()
50 xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8)); in celt_inner_prod_neon()
56 xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4); in celt_inner_prod_neon()
90 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8)); in dual_inner_prod_neon()
91 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8)); in dual_inner_prod_neon()
92 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8)); in dual_inner_prod_neon()
93 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8)); in dual_inner_prod_neon()
100 xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4); in dual_inner_prod_neon()
101 xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4); in dual_inner_prod_neon()
/external/libaom/libaom/av1/encoder/arm/neon/
Dav1_error_neon.c34 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in av1_block_error_neon()
41 const int32x4_t sqcoeff1 = vmlal_s16(sqcoeff0, coeff_hi, coeff_hi); in av1_block_error_neon()
76 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in av1_block_error_lp_neon()
/external/libaom/libaom/av1/common/arm/
Dwarp_plane_neon.c366 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon()
377 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon()
391 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon()
402 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon()
419 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon()
430 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon()
444 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon()
455 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon()
/external/webrtc/modules/audio_coding/codecs/isac/fix/source/
Dtransform_neon.c52 tmp0 = vmlal_s16(tmp0, vget_low_s16(tmpi), vget_low_s16(inre2)); in ComplexMulAndFindMaxNeon()
62 tmp2 = vmlal_s16(tmp2, vget_high_s16(tmpi), vget_high_s16(inre2)); in ComplexMulAndFindMaxNeon()
189 outi0 = vmlal_s16(outi0, tmpr, xi); in PostShiftAndSeparateNeon()
190 outr1 = vmlal_s16(outr1, tmpr, yi); in PostShiftAndSeparateNeon()
289 xr = vmlal_s16(xr, tmpi, inim_0); in TransformAndFindMaxNeon()
291 yr = vmlal_s16(yr, tmpi, inre_1); in TransformAndFindMaxNeon()
/external/libxaac/decoder/armv7/
Dixheaacd_fft32x32_ld.s714 VMLAL.S16 q12, d12, d0 @prod_1i=vmlal_s16(prod_1i, a_data1.val[2], cos_1)@
730 VMLAL.S16 q14, d16, d2 @prod_2i=vmlal_s16(prod_2i, a_data2.val[2], cos_2)@
735 VMLAL.S16 q11, d11, d0 @a_data1_r=vmlal_s16(a_data1_r, a_data1.val[1], cos_1)@
744 VMLAL.S16 q12, d11, d1 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[1], sin_1)@
745 VMLAL.S16 q12, d13, d0 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[3], cos_1)@
749 VMLAL.S16 q5, d20, d4 @prod_3i=vmlal_s16(prod_3i, a_data3.val[2], cos_3)@
752 VMLAL.S16 q13, d15, d2 @a_data2_r=vmlal_s16(a_data2_r, a_data2.val[1], cos_2)@
755 VMLAL.S16 q14, d15, d3 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[1], sin_2)@
756 VMLAL.S16 q14, d17, d2 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[3], cos_2)@
759 VMLAL.S16 q15, d19, d4 @a_data3_r=vmlal_s16(a_data3_r, a_data3.val[1], cos_3)@
[all …]
/external/libgav1/libgav1/src/dsp/arm/
Dmask_blend_neon.cc95 const int32x4_t weighted_combo_lo = vmlal_s16( in WriteMaskBlendLine4x2()
98 vmlal_s16(weighted_pred_0_hi, vget_high_s16(pred_mask_1), in WriteMaskBlendLine4x2()
225 vmlal_s16(weighted_pred_0_lo, vget_low_s16(pred_mask_1), in MaskBlend_NEON()
228 vmlal_s16(weighted_pred_0_hi, vget_high_s16(pred_mask_1), in MaskBlend_NEON()
Dwarp_neon.cc408 sum_low = vmlal_s16(sum_low, vget_low_s16(filter[k]), in Warp_NEON()
410 sum_high = vmlal_s16(sum_high, vget_high_s16(filter[k]), in Warp_NEON()
/external/webrtc/modules/video_processing/util/
Ddenoiser_filter_neon.cc54 vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); in VarianceNeonW8()
56 vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); in VarianceNeonW8()
/external/XNNPACK/src/qs8-dwconv/
Dunipass-neon-mul16.c.in64 …vacc${ABC[C:C+4]} = vmlal_s16(vacc${ABC[C:C+4]}, vget_low_s16(vi${K}x${ABC[C:C+8]}), vget_low_s16(…
65 …vacc${ABC[C+4:C+8]} = vmlal_s16(vacc${ABC[C+4:C+8]}, vget_high_s16(vi${K}x${ABC[C:C+8]}), vget_hig…
143 …vacc${ABC[0:4]} = vmlal_s16(vacc${ABC[0:4]}, vget_low_s16(vi${K}x${ABC[0:8]}), vget_low_s16(vk${K}…
144 …vacc${ABC[4:8]} = vmlal_s16(vacc${ABC[4:8]}, vget_high_s16(vi${K}x${ABC[0:8]}), vget_high_s16(vk${…
/external/libvpx/libvpx/vp9/encoder/arm/neon/
Dvp9_error_neon.c32 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in vp9_block_error_fp_neon()

12