/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x9-minmax-neon-mul16.c | 108 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 110 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 111 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 112 vaccGHIJ = vmlal_s16(vaccGHIJ, vget_low_s16(vi0xGHIJKLMN), vget_low_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 113 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 114 vaccOPQR = vmlal_s16(vaccOPQR, vget_low_s16(vi0xOPQRSTUV), vget_low_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 126 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() 127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() [all …]
|
D | up24x9-minmax-neon-mul16.c | 104 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 105 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 106 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 107 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 108 vaccGHIJ = vmlal_s16(vaccGHIJ, vget_low_s16(vi0xGHIJKLMN), vget_low_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 109 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 118 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 120 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi1x89ABCDEF), vget_low_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() 121 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() [all …]
|
D | up16x9-minmax-neon-mul16.c | 100 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 101 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 102 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi0x89ABCDEF), vget_low_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 103 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 110 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 111 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 112 vacc89AB = vmlal_s16(vacc89AB, vget_low_s16(vi1x89ABCDEF), vget_low_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 113 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi2x01234567), vget_low_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() 121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() [all …]
|
D | up8x9-minmax-neon-mul16.c | 96 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi0x01234567), vget_low_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 97 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 102 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi1x01234567), vget_low_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 103 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 108 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi2x01234567), vget_low_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 114 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi3x01234567), vget_low_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 115 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 120 vacc0123 = vmlal_s16(vacc0123, vget_low_s16(vi4x01234567), vget_low_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() 121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | sum_squares_neon.c | 29 s0 = vmlal_s16(s0, s[1], s[1]); in vpx_sum_squares_2d_i16_neon() 30 s0 = vmlal_s16(s0, s[2], s[2]); in vpx_sum_squares_2d_i16_neon() 31 s0 = vmlal_s16(s0, s[3], s[3]); in vpx_sum_squares_2d_i16_neon() 55 s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); in vpx_sum_squares_2d_i16_neon() 56 s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); in vpx_sum_squares_2d_i16_neon() 57 s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); in vpx_sum_squares_2d_i16_neon() 58 s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); in vpx_sum_squares_2d_i16_neon() 59 s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); in vpx_sum_squares_2d_i16_neon() 60 s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); in vpx_sum_squares_2d_i16_neon() 61 s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); in vpx_sum_squares_2d_i16_neon() [all …]
|
D | variance_neon.c | 55 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w4x4() 57 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), in variance_neon_w4x4() 60 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w4x4() 62 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), in variance_neon_w4x4() 104 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w16() 106 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), in variance_neon_w16() 109 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w16() 111 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), in variance_neon_w16() 147 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_0_s16), in variance_neon_w8x2() 149 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_1_s16), in variance_neon_w8x2() [all …]
|
D | avg_neon.c | 162 sse = vmlal_s16(sse, diff_lo, diff_lo); // dynamic range 26 bits. in vpx_vector_var_neon() 163 sse = vmlal_s16(sse, diff_hi, diff_hi); in vpx_vector_var_neon()
|
/external/libhevc/common/arm/ |
D | ihevc_resi_trans_neon_32x32.c | 287 a[0].val[0] = vmlal_s16( in ihevc_resi_trans_32x32_neon() 291 a[0].val[1] = vmlal_s16( in ihevc_resi_trans_32x32_neon() 381 a[14].val[0] = vmlal_s16(a[14].val[0], in ihevc_resi_trans_32x32_neon() 383 a[10].val[0] = vmlal_s16(a[10].val[0], in ihevc_resi_trans_32x32_neon() 385 a[6].val[0] = vmlal_s16(a[6].val[0], in ihevc_resi_trans_32x32_neon() 387 a[2].val[0] = vmlal_s16(a[2].val[0], in ihevc_resi_trans_32x32_neon() 399 a[14].val[1] = vmlal_s16(a[14].val[1], in ihevc_resi_trans_32x32_neon() 401 a[10].val[1] = vmlal_s16(a[10].val[1], in ihevc_resi_trans_32x32_neon() 403 a[6].val[1] = vmlal_s16(a[6].val[1], in ihevc_resi_trans_32x32_neon() 405 a[2].val[1] = vmlal_s16(a[2].val[1], in ihevc_resi_trans_32x32_neon() [all …]
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-neon.c | 91 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk1), vget_low_s16(vxi1)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 92 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk1), vget_high_s16(vxi1)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 98 vaccX0_lo = vmlal_s16(vaccX0_lo, vget_low_s16(vxk2), vget_low_s16(vxi2)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 99 vaccX0_hi = vmlal_s16(vaccX0_hi, vget_high_s16(vxk2), vget_high_s16(vxi2)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 105 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk3), vget_low_s16(vxi3)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 106 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk3), vget_high_s16(vxi3)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 112 vaccX0_lo = vmlal_s16(vaccX0_lo, vget_low_s16(vxk4), vget_low_s16(vxi4)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 113 vaccX0_hi = vmlal_s16(vaccX0_hi, vget_high_s16(vxk4), vget_high_s16(vxi4)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 119 vaccX1_lo = vmlal_s16(vaccX1_lo, vget_low_s16(vxk5), vget_low_s16(vxi5)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 120 vaccX1_hi = vmlal_s16(vaccX1_hi, vget_high_s16(vxk5), vget_high_s16(vxi5)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() [all …]
|
/external/libvpx/libvpx/vp8/encoder/arm/neon/ |
D | shortfdct_neon.c | 61 q9s32 = vmlal_s16(q9s32, d7s16, d16s16); in vp8_short_fdct4x4_neon() 62 q10s32 = vmlal_s16(q10s32, d7s16, d17s16); in vp8_short_fdct4x4_neon() 63 q9s32 = vmlal_s16(q9s32, d6s16, d17s16); in vp8_short_fdct4x4_neon() 88 q11s32 = vmlal_s16(q11s32, d7s16, d16s16); in vp8_short_fdct4x4_neon() 89 q12s32 = vmlal_s16(q12s32, d7s16, d17s16); in vp8_short_fdct4x4_neon() 97 q11s32 = vmlal_s16(q11s32, d6s16, d17s16); in vp8_short_fdct4x4_neon() 169 q9s32 = vmlal_s16(q9s32, d28s16, d16s16); in vp8_short_fdct8x4_neon() 170 q10s32 = vmlal_s16(q10s32, d28s16, d17s16); in vp8_short_fdct8x4_neon() 171 q11s32 = vmlal_s16(q11s32, d29s16, d16s16); in vp8_short_fdct8x4_neon() 172 q12s32 = vmlal_s16(q12s32, d29s16, d17s16); in vp8_short_fdct8x4_neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/ |
D | cpu_backend_gemm_custom_gemv.h | 393 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[0]), 395 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[0]), 397 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[0]), 399 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[0]), 401 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[1]), 403 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[1]), 405 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[1]), 407 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[1]), 409 acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0.val[0]), 411 acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1.val[0]), [all …]
|
/external/libaom/libaom/aom_dsp/arm/ |
D | variance_neon.c | 37 vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); in variance_neon_w8() 39 vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); in variance_neon_w8() 189 q9s32 = vmlal_s16(q9s32, d22s16, d22s16); in aom_variance16x8_neon() 190 q10s32 = vmlal_s16(q10s32, d23s16, d23s16); in aom_variance16x8_neon() 195 q9s32 = vmlal_s16(q9s32, d24s16, d24s16); in aom_variance16x8_neon() 196 q10s32 = vmlal_s16(q10s32, d25s16, d25s16); in aom_variance16x8_neon() 201 q9s32 = vmlal_s16(q9s32, d26s16, d26s16); in aom_variance16x8_neon() 202 q10s32 = vmlal_s16(q10s32, d27s16, d27s16); in aom_variance16x8_neon() 207 q9s32 = vmlal_s16(q9s32, d28s16, d28s16); in aom_variance16x8_neon() 208 q10s32 = vmlal_s16(q10s32, d29s16, d29s16); in aom_variance16x8_neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | depthwise_conv.h | 77 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), 79 acc[1].val[i] = vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), 124 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0])); 126 vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0])); 127 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1])); 129 vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1])); 149 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input)); 150 acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input)); 186 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), 188 acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8.h | 68 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), 70 acc[1].val[i] = vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), 116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0])); 118 vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input[0])); 119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1])); 121 vmlal_s16(acc[3], vget_high_s16(filter), vget_high_s16(input[1])); 141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input)); 142 acc[1] = vmlal_s16(acc[1], vget_high_s16(filter), vget_high_s16(input)); 179 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), 181 acc[2 * i + 1] = vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), [all …]
|
D | legacy_optimized_ops.h | 851 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_0), in LegacyFullyConnectedAsGEMVWorkerImpl() 853 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_0), in LegacyFullyConnectedAsGEMVWorkerImpl() 855 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_0), in LegacyFullyConnectedAsGEMVWorkerImpl() 857 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_0), in LegacyFullyConnectedAsGEMVWorkerImpl() 859 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0_1), in LegacyFullyConnectedAsGEMVWorkerImpl() 861 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1_1), in LegacyFullyConnectedAsGEMVWorkerImpl() 863 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2_1), in LegacyFullyConnectedAsGEMVWorkerImpl() 865 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3_1), in LegacyFullyConnectedAsGEMVWorkerImpl() 867 acc0 = vmlal_s16(acc0, vget_high_s16(filter_val_0_0), in LegacyFullyConnectedAsGEMVWorkerImpl() 869 acc1 = vmlal_s16(acc1, vget_high_s16(filter_val_1_0), in LegacyFullyConnectedAsGEMVWorkerImpl() [all …]
|
/external/libopus/celt/arm/ |
D | pitch_neon_intr.c | 49 xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8)); in celt_inner_prod_neon() 50 xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8)); in celt_inner_prod_neon() 56 xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4); in celt_inner_prod_neon() 90 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8)); in dual_inner_prod_neon() 91 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8)); in dual_inner_prod_neon() 92 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8)); in dual_inner_prod_neon() 93 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8)); in dual_inner_prod_neon() 100 xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4); in dual_inner_prod_neon() 101 xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4); in dual_inner_prod_neon()
|
/external/libaom/libaom/av1/encoder/arm/neon/ |
D | av1_error_neon.c | 34 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in av1_block_error_neon() 41 const int32x4_t sqcoeff1 = vmlal_s16(sqcoeff0, coeff_hi, coeff_hi); in av1_block_error_neon() 76 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in av1_block_error_lp_neon()
|
/external/libaom/libaom/av1/common/arm/ |
D | warp_plane_neon.c | 366 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon() 377 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon() 391 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon() 402 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon() 419 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon() 430 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon() 444 res_0 = vmlal_s16(res_0, src_0, fltr_0); in vertical_filter_neon() 455 res_1 = vmlal_s16(res_1, src_1, fltr_1); in vertical_filter_neon()
|
/external/webrtc/modules/audio_coding/codecs/isac/fix/source/ |
D | transform_neon.c | 52 tmp0 = vmlal_s16(tmp0, vget_low_s16(tmpi), vget_low_s16(inre2)); in ComplexMulAndFindMaxNeon() 62 tmp2 = vmlal_s16(tmp2, vget_high_s16(tmpi), vget_high_s16(inre2)); in ComplexMulAndFindMaxNeon() 189 outi0 = vmlal_s16(outi0, tmpr, xi); in PostShiftAndSeparateNeon() 190 outr1 = vmlal_s16(outr1, tmpr, yi); in PostShiftAndSeparateNeon() 289 xr = vmlal_s16(xr, tmpi, inim_0); in TransformAndFindMaxNeon() 291 yr = vmlal_s16(yr, tmpi, inre_1); in TransformAndFindMaxNeon()
|
/external/libxaac/decoder/armv7/ |
D | ixheaacd_fft32x32_ld.s | 714 VMLAL.S16 q12, d12, d0 @prod_1i=vmlal_s16(prod_1i, a_data1.val[2], cos_1)@ 730 VMLAL.S16 q14, d16, d2 @prod_2i=vmlal_s16(prod_2i, a_data2.val[2], cos_2)@ 735 VMLAL.S16 q11, d11, d0 @a_data1_r=vmlal_s16(a_data1_r, a_data1.val[1], cos_1)@ 744 VMLAL.S16 q12, d11, d1 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[1], sin_1)@ 745 VMLAL.S16 q12, d13, d0 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[3], cos_1)@ 749 VMLAL.S16 q5, d20, d4 @prod_3i=vmlal_s16(prod_3i, a_data3.val[2], cos_3)@ 752 VMLAL.S16 q13, d15, d2 @a_data2_r=vmlal_s16(a_data2_r, a_data2.val[1], cos_2)@ 755 VMLAL.S16 q14, d15, d3 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[1], sin_2)@ 756 VMLAL.S16 q14, d17, d2 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[3], cos_2)@ 759 VMLAL.S16 q15, d19, d4 @a_data3_r=vmlal_s16(a_data3_r, a_data3.val[1], cos_3)@ [all …]
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | mask_blend_neon.cc | 95 const int32x4_t weighted_combo_lo = vmlal_s16( in WriteMaskBlendLine4x2() 98 vmlal_s16(weighted_pred_0_hi, vget_high_s16(pred_mask_1), in WriteMaskBlendLine4x2() 225 vmlal_s16(weighted_pred_0_lo, vget_low_s16(pred_mask_1), in MaskBlend_NEON() 228 vmlal_s16(weighted_pred_0_hi, vget_high_s16(pred_mask_1), in MaskBlend_NEON()
|
D | warp_neon.cc | 408 sum_low = vmlal_s16(sum_low, vget_low_s16(filter[k]), in Warp_NEON() 410 sum_high = vmlal_s16(sum_high, vget_high_s16(filter[k]), in Warp_NEON()
|
/external/webrtc/modules/video_processing/util/ |
D | denoiser_filter_neon.cc | 54 vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); in VarianceNeonW8() 56 vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); in VarianceNeonW8()
|
/external/XNNPACK/src/qs8-dwconv/ |
D | unipass-neon-mul16.c.in | 64 …vacc${ABC[C:C+4]} = vmlal_s16(vacc${ABC[C:C+4]}, vget_low_s16(vi${K}x${ABC[C:C+8]}), vget_low_s16(… 65 …vacc${ABC[C+4:C+8]} = vmlal_s16(vacc${ABC[C+4:C+8]}, vget_high_s16(vi${K}x${ABC[C:C+8]}), vget_hig… 143 …vacc${ABC[0:4]} = vmlal_s16(vacc${ABC[0:4]}, vget_low_s16(vi${K}x${ABC[0:8]}), vget_low_s16(vk${K}… 144 …vacc${ABC[4:8]} = vmlal_s16(vacc${ABC[4:8]}, vget_high_s16(vi${K}x${ABC[0:8]}), vget_high_s16(vk${…
|
/external/libvpx/libvpx/vp9/encoder/arm/neon/ |
D | vp9_error_neon.c | 32 const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); in vp9_block_error_fp_neon()
|