/external/XNNPACK/src/q8-igemm/ |
D | 8x8-neon.c | 147 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_8x8__neon() 148 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_8x8__neon() 149 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_8x8__neon() 150 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_8x8__neon() 151 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_8x8__neon() 152 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_8x8__neon() 153 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_8x8__neon() 154 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_8x8__neon() 155 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa4), 0); in xnn_q8_igemm_ukernel_8x8__neon() 156 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa4), 0); in xnn_q8_igemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 99 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_4x8__neon() 100 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_4x8__neon() 101 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_4x8__neon() 102 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_4x8__neon() 103 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_4x8__neon() 104 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_4x8__neon() 105 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_4x8__neon() 106 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_4x8__neon() 113 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_q8_igemm_ukernel_4x8__neon() 114 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_q8_igemm_ukernel_4x8__neon() [all …]
|
/external/XNNPACK/src/q8-gemm/ |
D | 8x8-neon.c | 121 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_8x8__neon() 122 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_8x8__neon() 123 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_8x8__neon() 124 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_8x8__neon() 125 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_8x8__neon() 126 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_8x8__neon() 127 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_8x8__neon() 128 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_8x8__neon() 129 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_q8_gemm_ukernel_8x8__neon() 130 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_q8_gemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 81 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_4x8__neon() 82 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_4x8__neon() 83 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_4x8__neon() 84 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_4x8__neon() 85 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_4x8__neon() 86 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_4x8__neon() 87 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_4x8__neon() 88 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_4x8__neon() 93 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_q8_gemm_ukernel_4x8__neon() 94 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_q8_gemm_ukernel_4x8__neon() [all …]
|
/external/XNNPACK/src/q8-dwconv/ |
D | up8x9-neon.c | 76 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk00), vget_low_s16(vxi00)); in xnn_q8_dwconv_ukernel_up8x9__neon() 78 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk00), vget_low_s16(vxi01)); in xnn_q8_dwconv_ukernel_up8x9__neon() 80 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk00), vget_low_s16(vxi02)); in xnn_q8_dwconv_ukernel_up8x9__neon() 91 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk10), vget_low_s16(vxi10)); in xnn_q8_dwconv_ukernel_up8x9__neon() 93 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk10), vget_low_s16(vxi11)); in xnn_q8_dwconv_ukernel_up8x9__neon() 95 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk10), vget_low_s16(vxi12)); in xnn_q8_dwconv_ukernel_up8x9__neon() 106 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk20), vget_low_s16(vxi20)); in xnn_q8_dwconv_ukernel_up8x9__neon() 108 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk20), vget_low_s16(vxi21)); in xnn_q8_dwconv_ukernel_up8x9__neon() 110 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk20), vget_low_s16(vxi22)); in xnn_q8_dwconv_ukernel_up8x9__neon() 117 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk01), vget_low_s16(vxi01)); in xnn_q8_dwconv_ukernel_up8x9__neon() [all …]
|
/external/libhevc/common/arm/ |
D | ihevc_resi_trans_neon_32x32.c | 137 vget_high_s16(diff_16[2][0]), vget_low_s16(diff_16[2][0])); in ihevc_resi_trans_32x32_neon() 141 vget_high_s16(diff_16[3][0]), vget_low_s16(diff_16[3][0])); in ihevc_resi_trans_32x32_neon() 162 vget_high_s16(diff_16[2][1]), vget_low_s16(diff_16[2][1])); in ihevc_resi_trans_32x32_neon() 166 vget_high_s16(diff_16[3][1]), vget_low_s16(diff_16[3][1])); in ihevc_resi_trans_32x32_neon() 239 e0_1 = vcombine_s16(vget_high_s16(e0_1), vget_low_s16(e0_1)); in ihevc_resi_trans_32x32_neon() 244 e1_1 = vcombine_s16(vget_high_s16(e1_1), vget_low_s16(e1_1)); in ihevc_resi_trans_32x32_neon() 253 vcombine_s16(vget_low_s16(ee0), vget_low_s16(ee1)); in ihevc_resi_trans_32x32_neon() 265 vtrn_s32(vreinterpret_s32_s16(vget_low_s16(eee)), in ihevc_resi_trans_32x32_neon() 274 vtrn_s16(vget_low_s16(eeee), vget_high_s16(eeee)); in ihevc_resi_trans_32x32_neon() 286 vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_01_8), eeee_00); in ihevc_resi_trans_32x32_neon() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | sum_squares_neon.c | 55 s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); in vpx_sum_squares_2d_i16_neon() 56 s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); in vpx_sum_squares_2d_i16_neon() 57 s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); in vpx_sum_squares_2d_i16_neon() 58 s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); in vpx_sum_squares_2d_i16_neon() 59 s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); in vpx_sum_squares_2d_i16_neon() 60 s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); in vpx_sum_squares_2d_i16_neon() 61 s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); in vpx_sum_squares_2d_i16_neon() 62 s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7])); in vpx_sum_squares_2d_i16_neon()
|
D | fwd_txfm_neon.c | 48 int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in vpx_fdct8x8_neon() 50 int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in vpx_fdct8x8_neon() 52 int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), cospi_24_64); in vpx_fdct8x8_neon() 54 int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_24_64); in vpx_fdct8x8_neon() 56 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon() 58 v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), cospi_8_64); in vpx_fdct8x8_neon() 81 v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), cospi_16_64); in vpx_fdct8x8_neon() 83 v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), cospi_16_64); in vpx_fdct8x8_neon() 99 v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_4_64); in vpx_fdct8x8_neon() 101 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon() [all …]
|
D | idct_neon.h | 128 t[0] = vaddl_s16(vget_low_s16(a), vget_low_s16(b)); in add_multiply_shift_and_narrow_s16() 140 t[0] = vsubl_s16(vget_low_s16(a), vget_low_s16(b)); in sub_multiply_shift_and_narrow_s16() 153 t[0] = vmull_n_s16(vget_low_s16(a), a_const); in multiply_accumulate_shift_and_narrow_s16() 155 t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16() 297 b[0] = vget_low_s16(a[0]); in idct4x4_16_kernel_bd8() 299 b[2] = vget_low_s16(a[1]); in idct4x4_16_kernel_bd8() 401 t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); in idct8x8_12_pass2_bd8() 403 t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8() 405 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8() 430 input1l = vget_low_s16(io[1]); in idct8x8_64_1d_bd8_kernel() [all …]
|
D | variance_neon.c | 55 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w4x4() 56 vget_low_s16(diff_lo_s16)); in variance_neon_w4x4() 60 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w4x4() 61 vget_low_s16(diff_hi_s16)); in variance_neon_w4x4() 104 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w16() 105 vget_low_s16(diff_lo_s16)); in variance_neon_w16() 109 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w16() 110 vget_low_s16(diff_hi_s16)); in variance_neon_w16() 147 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_0_s16), in variance_neon_w8x2() 148 vget_low_s16(diff_0_s16)); in variance_neon_w8x2() [all …]
|
D | vpx_scaled_convolve8_neon.c | 41 const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); in scaledconvolve_horiz_w4() 54 t[0] = vget_low_s16(ss[0]); in scaledconvolve_horiz_w4() 55 t[1] = vget_low_s16(ss[1]); in scaledconvolve_horiz_w4() 56 t[2] = vget_low_s16(ss[2]); in scaledconvolve_horiz_w4() 57 t[3] = vget_low_s16(ss[3]); in scaledconvolve_horiz_w4() 170 const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); in scaledconvolve_vert_w4() 177 t[0] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[0]))); in scaledconvolve_vert_w4() 178 t[1] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[1]))); in scaledconvolve_vert_w4() 179 t[2] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[2]))); in scaledconvolve_vert_w4() 180 t[3] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[3]))); in scaledconvolve_vert_w4() [all …]
|
/external/libvpx/libvpx/vp8/common/arm/neon/ |
D | iwalsh_neon.c | 26 d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); in vp8_short_inv_walsh4x4_neon() 27 d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); in vp8_short_inv_walsh4x4_neon() 28 d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); in vp8_short_inv_walsh4x4_neon() 29 d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); in vp8_short_inv_walsh4x4_neon() 37 v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), in vp8_short_inv_walsh4x4_neon() 38 vreinterpret_s32_s16(vget_low_s16(q1s16))); in vp8_short_inv_walsh4x4_neon() 66 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); in vp8_short_inv_walsh4x4_neon() 70 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); in vp8_short_inv_walsh4x4_neon() 75 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); in vp8_short_inv_walsh4x4_neon() 79 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); in vp8_short_inv_walsh4x4_neon() [all …]
|
D | shortidct4x4llm_neon.c | 46 d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 in vp8_short_idct4x4llm_neon() 47 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 in vp8_short_idct4x4llm_neon() 53 d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 in vp8_short_idct4x4llm_neon() 54 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 in vp8_short_idct4x4llm_neon() 75 d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 in vp8_short_idct4x4llm_neon() 76 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 in vp8_short_idct4x4llm_neon() 82 d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 in vp8_short_idct4x4llm_neon() 83 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 in vp8_short_idct4x4llm_neon()
|
D | dequant_idct_neon.c | 61 d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); in vp8_dequant_idct_add_neon() 62 d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); in vp8_dequant_idct_add_neon() 73 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); in vp8_dequant_idct_add_neon() 74 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); in vp8_dequant_idct_add_neon() 101 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); in vp8_dequant_idct_add_neon() 102 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); in vp8_dequant_idct_add_neon()
|
/external/libvpx/libvpx/vp9/common/arm/neon/ |
D | vp9_iht_neon.h | 30 x[0] = vget_low_s16(io[0]); in iadst4() 31 x[1] = vget_low_s16(io[1]); in iadst4() 63 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); in iadst_half_butterfly_neon() 65 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); in iadst_half_butterfly_neon() 81 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1); in iadst_half_butterfly_neg_neon() 83 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1); in iadst_half_butterfly_neg_neon() 99 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0); in iadst_half_butterfly_pos_neon() 101 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0); in iadst_half_butterfly_pos_neon() 118 s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); in iadst_butterfly_lane_0_1_neon() 120 s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); in iadst_butterfly_lane_0_1_neon() [all …]
|
D | vp9_iht4x4_add_neon.c | 38 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon() 41 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon() 46 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon() 55 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | depthwise_conv.h | 69 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), 70 vget_low_s16(input_dup2.val[i])); 116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0])); 119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1])); 141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input)); 178 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), 179 vget_low_s16(input_dup2.val[i])); 203 const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8)); 208 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]); 246 const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8)); [all …]
|
/external/tensorflow/tensorflow/lite/kernels/ |
D | cpu_backend_gemm_custom_gemv.h | 388 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[0]), 389 vget_low_s16(input_val.val[0])); 390 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[0]), 391 vget_low_s16(input_val.val[0])); 392 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[0]), 393 vget_low_s16(input_val.val[0])); 394 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[0]), 395 vget_low_s16(input_val.val[0])); 396 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[1]), 397 vget_low_s16(input_val.val[1])); [all …]
|
/external/libaom/libaom/aom_dsp/arm/ |
D | fwd_txfm_neon.c | 42 const int16x4_t s_0 = vget_low_s16(s_01); in aom_fdct4x4_helper() 45 const int16x4_t s_3 = vget_low_s16(s_32); in aom_fdct4x4_helper() 139 int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in aom_fdct8x8_neon() 141 int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in aom_fdct8x8_neon() 143 int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64); in aom_fdct8x8_neon() 145 int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64); in aom_fdct8x8_neon() 147 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon() 149 v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64); in aom_fdct8x8_neon() 172 v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64); in aom_fdct8x8_neon() 174 v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64); in aom_fdct8x8_neon() [all …]
|
/external/XNNPACK/src/q8-vadd/ |
D | neon.c | 49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 59 vacc1_lo = vmlaq_s32(vacc1_lo, vmovl_s16(vget_low_s16(vxb1)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 60 vacc2_lo = vmlaq_s32(vacc2_lo, vmovl_s16(vget_low_s16(vxb2)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 61 vacc3_lo = vmlaq_s32(vacc3_lo, vmovl_s16(vget_low_s16(vxb3)), vb_multiplier); in xnn_q8_vadd_ukernel__neon() 115 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon() 116 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon() [all …]
|
/external/libaom/libaom/av1/encoder/arm/neon/ |
D | quantize_neon.c | 60 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_fp_neon() 86 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_fp_neon() 105 const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), in av1_quantize_fp_neon() 125 vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); in calculate_dqcoeff_lp_and_store() 158 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_lp_neon() 183 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_lp_neon() 201 const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), in av1_quantize_lp_neon()
|
/external/libaom/libaom/av1/common/arm/ |
D | warp_plane_neon.c | 217 *res = vpadd_s16(vget_low_s16(pix_0), vget_high_s16(pix_0)); in convolve() 273 b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f0)), in horizontal_filter_neon() 274 vreinterpret_s32_s16(vget_low_s16(f2))); in horizontal_filter_neon() 275 b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f4)), in horizontal_filter_neon() 276 vreinterpret_s32_s16(vget_low_s16(f6))); in horizontal_filter_neon() 279 b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f1)), in horizontal_filter_neon() 280 vreinterpret_s32_s16(vget_low_s16(f3))); in horizontal_filter_neon() 281 b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f5)), in horizontal_filter_neon() 282 vreinterpret_s32_s16(vget_low_s16(f7))); in horizontal_filter_neon() 359 src_0 = vget_low_s16(vreinterpretq_s16_s32(c0.val[0])); in vertical_filter_neon() [all …]
|
/external/webrtc/webrtc/common_audio/signal_processing/ |
D | downsample_fast_neon.c | 55 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon() 56 int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]); in WebRtcSpl_DownsampleFastNeon() 76 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon() 95 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon() 122 int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); in WebRtcSpl_DownsampleFastNeon() 123 int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]); in WebRtcSpl_DownsampleFastNeon() 124 int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]); in WebRtcSpl_DownsampleFastNeon() 125 int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]); in WebRtcSpl_DownsampleFastNeon() 151 int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); in WebRtcSpl_DownsampleFastNeon()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8.h | 68 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), 69 vget_low_s16(input_dup2.val[i])); 116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0])); 119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1])); 141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input)); 179 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), 180 vget_low_s16(input_dup2.val[i])); 210 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]); 253 acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0); 255 acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1); [all …]
|
/external/webp/src/dsp/ |
D | enc_neon.c | 106 const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]), in TransformPass_NEON() 107 vget_low_s16(rows->val[1])); // in0 + in8 in TransformPass_NEON() 108 const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]), in TransformPass_NEON() 109 vget_low_s16(rows->val[1])); // in0 - in8 in TransformPass_NEON() 112 const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0)); in TransformPass_NEON() 113 const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1)); in TransformPass_NEON() 118 const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp)); in TransformPass_NEON() 304 const int16x4_t D0 = vget_low_s16(D0D1); in FTransform_NEON() 306 const int16x4_t D2 = vget_low_s16(D2D3); in FTransform_NEON() 316 const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2), in FTransform_NEON() [all …]
|