Home
last modified time | relevance | path

Searched refs:vget_low_s16 (Results 1 – 25 of 113) sorted by relevance

12345

/external/XNNPACK/src/q8-igemm/
D8x8-neon.c147 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_8x8__neon()
148 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_8x8__neon()
149 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_8x8__neon()
150 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_8x8__neon()
151 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_8x8__neon()
152 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_8x8__neon()
153 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_8x8__neon()
154 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_8x8__neon()
155 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa4), 0); in xnn_q8_igemm_ukernel_8x8__neon()
156 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa4), 0); in xnn_q8_igemm_ukernel_8x8__neon()
[all …]
D4x8-neon.c99 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_4x8__neon()
100 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_q8_igemm_ukernel_4x8__neon()
101 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_4x8__neon()
102 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_q8_igemm_ukernel_4x8__neon()
103 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_4x8__neon()
104 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_q8_igemm_ukernel_4x8__neon()
105 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_4x8__neon()
106 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_q8_igemm_ukernel_4x8__neon()
113 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_q8_igemm_ukernel_4x8__neon()
114 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_q8_igemm_ukernel_4x8__neon()
[all …]
/external/XNNPACK/src/q8-gemm/
D8x8-neon.c121 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_8x8__neon()
122 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_8x8__neon()
123 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_8x8__neon()
124 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_8x8__neon()
125 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_8x8__neon()
126 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_8x8__neon()
127 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_8x8__neon()
128 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_8x8__neon()
129 vacc4x0123 = vmlal_lane_s16(vacc4x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_q8_gemm_ukernel_8x8__neon()
130 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_q8_gemm_ukernel_8x8__neon()
[all …]
D4x8-neon.c81 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_4x8__neon()
82 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_q8_gemm_ukernel_4x8__neon()
83 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_4x8__neon()
84 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_q8_gemm_ukernel_4x8__neon()
85 vacc2x0123 = vmlal_lane_s16(vacc2x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_4x8__neon()
86 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_q8_gemm_ukernel_4x8__neon()
87 vacc3x0123 = vmlal_lane_s16(vacc3x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_4x8__neon()
88 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_q8_gemm_ukernel_4x8__neon()
93 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_q8_gemm_ukernel_4x8__neon()
94 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_q8_gemm_ukernel_4x8__neon()
[all …]
/external/XNNPACK/src/q8-dwconv/
Dup8x9-neon.c76 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk00), vget_low_s16(vxi00)); in xnn_q8_dwconv_ukernel_up8x9__neon()
78 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk00), vget_low_s16(vxi01)); in xnn_q8_dwconv_ukernel_up8x9__neon()
80 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk00), vget_low_s16(vxi02)); in xnn_q8_dwconv_ukernel_up8x9__neon()
91 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk10), vget_low_s16(vxi10)); in xnn_q8_dwconv_ukernel_up8x9__neon()
93 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk10), vget_low_s16(vxi11)); in xnn_q8_dwconv_ukernel_up8x9__neon()
95 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk10), vget_low_s16(vxi12)); in xnn_q8_dwconv_ukernel_up8x9__neon()
106 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk20), vget_low_s16(vxi20)); in xnn_q8_dwconv_ukernel_up8x9__neon()
108 vacc1_lo = vmlal_s16(vacc1_lo, vget_low_s16(vxk20), vget_low_s16(vxi21)); in xnn_q8_dwconv_ukernel_up8x9__neon()
110 vacc2_lo = vmlal_s16(vacc2_lo, vget_low_s16(vxk20), vget_low_s16(vxi22)); in xnn_q8_dwconv_ukernel_up8x9__neon()
117 vacc0_lo = vmlal_s16(vacc0_lo, vget_low_s16(vxk01), vget_low_s16(vxi01)); in xnn_q8_dwconv_ukernel_up8x9__neon()
[all …]
/external/libhevc/common/arm/
Dihevc_resi_trans_neon_32x32.c137 vget_high_s16(diff_16[2][0]), vget_low_s16(diff_16[2][0])); in ihevc_resi_trans_32x32_neon()
141 vget_high_s16(diff_16[3][0]), vget_low_s16(diff_16[3][0])); in ihevc_resi_trans_32x32_neon()
162 vget_high_s16(diff_16[2][1]), vget_low_s16(diff_16[2][1])); in ihevc_resi_trans_32x32_neon()
166 vget_high_s16(diff_16[3][1]), vget_low_s16(diff_16[3][1])); in ihevc_resi_trans_32x32_neon()
239 e0_1 = vcombine_s16(vget_high_s16(e0_1), vget_low_s16(e0_1)); in ihevc_resi_trans_32x32_neon()
244 e1_1 = vcombine_s16(vget_high_s16(e1_1), vget_low_s16(e1_1)); in ihevc_resi_trans_32x32_neon()
253 vcombine_s16(vget_low_s16(ee0), vget_low_s16(ee1)); in ihevc_resi_trans_32x32_neon()
265 vtrn_s32(vreinterpret_s32_s16(vget_low_s16(eee)), in ihevc_resi_trans_32x32_neon()
274 vtrn_s16(vget_low_s16(eeee), vget_high_s16(eeee)); in ihevc_resi_trans_32x32_neon()
286 vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_01_8), eeee_00); in ihevc_resi_trans_32x32_neon()
[all …]
/external/libvpx/libvpx/vpx_dsp/arm/
Dsum_squares_neon.c55 s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); in vpx_sum_squares_2d_i16_neon()
56 s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); in vpx_sum_squares_2d_i16_neon()
57 s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); in vpx_sum_squares_2d_i16_neon()
58 s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); in vpx_sum_squares_2d_i16_neon()
59 s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); in vpx_sum_squares_2d_i16_neon()
60 s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); in vpx_sum_squares_2d_i16_neon()
61 s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); in vpx_sum_squares_2d_i16_neon()
62 s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7])); in vpx_sum_squares_2d_i16_neon()
Dfwd_txfm_neon.c48 int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in vpx_fdct8x8_neon()
50 int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in vpx_fdct8x8_neon()
52 int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), cospi_24_64); in vpx_fdct8x8_neon()
54 int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_24_64); in vpx_fdct8x8_neon()
56 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), cospi_8_64); in vpx_fdct8x8_neon()
58 v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), cospi_8_64); in vpx_fdct8x8_neon()
81 v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), cospi_16_64); in vpx_fdct8x8_neon()
83 v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), cospi_16_64); in vpx_fdct8x8_neon()
99 v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_4_64); in vpx_fdct8x8_neon()
101 v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), cospi_28_64); in vpx_fdct8x8_neon()
[all …]
Didct_neon.h128 t[0] = vaddl_s16(vget_low_s16(a), vget_low_s16(b)); in add_multiply_shift_and_narrow_s16()
140 t[0] = vsubl_s16(vget_low_s16(a), vget_low_s16(b)); in sub_multiply_shift_and_narrow_s16()
153 t[0] = vmull_n_s16(vget_low_s16(a), a_const); in multiply_accumulate_shift_and_narrow_s16()
155 t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); in multiply_accumulate_shift_and_narrow_s16()
297 b[0] = vget_low_s16(a[0]); in idct4x4_16_kernel_bd8()
299 b[2] = vget_low_s16(a[1]); in idct4x4_16_kernel_bd8()
401 t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); in idct8x8_12_pass2_bd8()
403 t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
405 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
430 input1l = vget_low_s16(io[1]); in idct8x8_64_1d_bd8_kernel()
[all …]
Dvariance_neon.c55 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w4x4()
56 vget_low_s16(diff_lo_s16)); in variance_neon_w4x4()
60 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w4x4()
61 vget_low_s16(diff_hi_s16)); in variance_neon_w4x4()
104 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), in variance_neon_w16()
105 vget_low_s16(diff_lo_s16)); in variance_neon_w16()
109 sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), in variance_neon_w16()
110 vget_low_s16(diff_hi_s16)); in variance_neon_w16()
147 sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_0_s16), in variance_neon_w8x2()
148 vget_low_s16(diff_0_s16)); in variance_neon_w8x2()
[all …]
Dvpx_scaled_convolve8_neon.c41 const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); in scaledconvolve_horiz_w4()
54 t[0] = vget_low_s16(ss[0]); in scaledconvolve_horiz_w4()
55 t[1] = vget_low_s16(ss[1]); in scaledconvolve_horiz_w4()
56 t[2] = vget_low_s16(ss[2]); in scaledconvolve_horiz_w4()
57 t[3] = vget_low_s16(ss[3]); in scaledconvolve_horiz_w4()
170 const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); in scaledconvolve_vert_w4()
177 t[0] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[0]))); in scaledconvolve_vert_w4()
178 t[1] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[1]))); in scaledconvolve_vert_w4()
179 t[2] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[2]))); in scaledconvolve_vert_w4()
180 t[3] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[3]))); in scaledconvolve_vert_w4()
[all …]
/external/libvpx/libvpx/vp8/common/arm/neon/
Diwalsh_neon.c26 d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); in vp8_short_inv_walsh4x4_neon()
27 d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); in vp8_short_inv_walsh4x4_neon()
28 d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); in vp8_short_inv_walsh4x4_neon()
29 d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); in vp8_short_inv_walsh4x4_neon()
37 v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), in vp8_short_inv_walsh4x4_neon()
38 vreinterpret_s32_s16(vget_low_s16(q1s16))); in vp8_short_inv_walsh4x4_neon()
66 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); in vp8_short_inv_walsh4x4_neon()
70 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); in vp8_short_inv_walsh4x4_neon()
75 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); in vp8_short_inv_walsh4x4_neon()
79 vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); in vp8_short_inv_walsh4x4_neon()
[all …]
Dshortidct4x4llm_neon.c46 d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 in vp8_short_idct4x4llm_neon()
47 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 in vp8_short_idct4x4llm_neon()
53 d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 in vp8_short_idct4x4llm_neon()
54 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 in vp8_short_idct4x4llm_neon()
75 d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 in vp8_short_idct4x4llm_neon()
76 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 in vp8_short_idct4x4llm_neon()
82 d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 in vp8_short_idct4x4llm_neon()
83 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 in vp8_short_idct4x4llm_neon()
Ddequant_idct_neon.c61 d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); in vp8_dequant_idct_add_neon()
62 d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); in vp8_dequant_idct_add_neon()
73 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); in vp8_dequant_idct_add_neon()
74 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); in vp8_dequant_idct_add_neon()
101 d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); in vp8_dequant_idct_add_neon()
102 d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); in vp8_dequant_idct_add_neon()
/external/libvpx/libvpx/vp9/common/arm/neon/
Dvp9_iht_neon.h30 x[0] = vget_low_s16(io[0]); in iadst4()
31 x[1] = vget_low_s16(io[1]); in iadst4()
63 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); in iadst_half_butterfly_neon()
65 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); in iadst_half_butterfly_neon()
81 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1); in iadst_half_butterfly_neg_neon()
83 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1); in iadst_half_butterfly_neg_neon()
99 const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0); in iadst_half_butterfly_pos_neon()
101 const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0); in iadst_half_butterfly_pos_neon()
118 s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); in iadst_butterfly_lane_0_1_neon()
120 s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); in iadst_butterfly_lane_0_1_neon()
[all …]
Dvp9_iht4x4_add_neon.c38 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon()
41 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon()
46 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon()
55 a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); in vp9_iht4x4_16_add_neon()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/
Ddepthwise_conv.h69 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]),
70 vget_low_s16(input_dup2.val[i]));
116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
178 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter),
179 vget_low_s16(input_dup2.val[i]));
203 const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
208 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
246 const int16x4_t input_s16 = vget_low_s16(vmovl_s8(input_s8));
[all …]
/external/tensorflow/tensorflow/lite/kernels/
Dcpu_backend_gemm_custom_gemv.h388 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[0]),
389 vget_low_s16(input_val.val[0]));
390 acc1 = vmlal_s16(acc1, vget_low_s16(filter_val_1.val[0]),
391 vget_low_s16(input_val.val[0]));
392 acc2 = vmlal_s16(acc2, vget_low_s16(filter_val_2.val[0]),
393 vget_low_s16(input_val.val[0]));
394 acc3 = vmlal_s16(acc3, vget_low_s16(filter_val_3.val[0]),
395 vget_low_s16(input_val.val[0]));
396 acc0 = vmlal_s16(acc0, vget_low_s16(filter_val_0.val[1]),
397 vget_low_s16(input_val.val[1]));
[all …]
/external/libaom/libaom/aom_dsp/arm/
Dfwd_txfm_neon.c42 const int16x4_t s_0 = vget_low_s16(s_01); in aom_fdct4x4_helper()
45 const int16x4_t s_3 = vget_low_s16(s_32); in aom_fdct4x4_helper()
139 int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in aom_fdct8x8_neon()
141 int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); in aom_fdct8x8_neon()
143 int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64); in aom_fdct8x8_neon()
145 int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64); in aom_fdct8x8_neon()
147 v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64); in aom_fdct8x8_neon()
149 v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64); in aom_fdct8x8_neon()
172 v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64); in aom_fdct8x8_neon()
174 v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64); in aom_fdct8x8_neon()
[all …]
/external/XNNPACK/src/q8-vadd/
Dneon.c49 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
50 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
51 int32x4_t vacc2_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa2)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
52 int32x4_t vacc3_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa3)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
58 vacc0_lo = vmlaq_s32(vacc0_lo, vmovl_s16(vget_low_s16(vxb0)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
59 vacc1_lo = vmlaq_s32(vacc1_lo, vmovl_s16(vget_low_s16(vxb1)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
60 vacc2_lo = vmlaq_s32(vacc2_lo, vmovl_s16(vget_low_s16(vxb2)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
61 vacc3_lo = vmlaq_s32(vacc3_lo, vmovl_s16(vget_low_s16(vxb3)), vb_multiplier); in xnn_q8_vadd_ukernel__neon()
115 int32x4_t vacc0_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa0)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
116 int32x4_t vacc1_lo = vmulq_s32(vmovl_s16(vget_low_s16(vxa1)), va_multiplier); in xnn_q8_vadd_ukernel__neon()
[all …]
/external/libaom/libaom/av1/encoder/arm/neon/
Dquantize_neon.c60 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_fp_neon()
86 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_fp_neon()
105 const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), in av1_quantize_fp_neon()
125 vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); in calculate_dqcoeff_lp_and_store()
158 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_lp_neon()
183 vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); in av1_quantize_lp_neon()
201 const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), in av1_quantize_lp_neon()
/external/libaom/libaom/av1/common/arm/
Dwarp_plane_neon.c217 *res = vpadd_s16(vget_low_s16(pix_0), vget_high_s16(pix_0)); in convolve()
273 b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f0)), in horizontal_filter_neon()
274 vreinterpret_s32_s16(vget_low_s16(f2))); in horizontal_filter_neon()
275 b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f4)), in horizontal_filter_neon()
276 vreinterpret_s32_s16(vget_low_s16(f6))); in horizontal_filter_neon()
279 b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f1)), in horizontal_filter_neon()
280 vreinterpret_s32_s16(vget_low_s16(f3))); in horizontal_filter_neon()
281 b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f5)), in horizontal_filter_neon()
282 vreinterpret_s32_s16(vget_low_s16(f7))); in horizontal_filter_neon()
359 src_0 = vget_low_s16(vreinterpretq_s16_s32(c0.val[0])); in vertical_filter_neon()
[all …]
/external/webrtc/webrtc/common_audio/signal_processing/
Ddownsample_fast_neon.c55 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon()
56 int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]); in WebRtcSpl_DownsampleFastNeon()
76 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon()
95 int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); in WebRtcSpl_DownsampleFastNeon()
122 int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); in WebRtcSpl_DownsampleFastNeon()
123 int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]); in WebRtcSpl_DownsampleFastNeon()
124 int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]); in WebRtcSpl_DownsampleFastNeon()
125 int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]); in WebRtcSpl_DownsampleFastNeon()
151 int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); in WebRtcSpl_DownsampleFastNeon()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Ddepthwiseconv_uint8.h68 acc[0].val[i] = vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]),
69 vget_low_s16(input_dup2.val[i]));
116 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input[0]));
119 acc[2] = vmlal_s16(acc[2], vget_low_s16(filter), vget_low_s16(input[1]));
141 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), vget_low_s16(input));
179 acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(filter),
180 vget_low_s16(input_dup2.val[i]));
210 acc[0] = vmlal_s16(acc[0], vget_low_s16(filter), input_dup2.val[0]);
253 acc[0] = vmlal_lane_s16(acc[0], vget_low_s16(filter[0]), input, 0);
255 acc[2] = vmlal_lane_s16(acc[2], vget_low_s16(filter[1]), input, 1);
[all …]
/external/webp/src/dsp/
Denc_neon.c106 const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]), in TransformPass_NEON()
107 vget_low_s16(rows->val[1])); // in0 + in8 in TransformPass_NEON()
108 const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]), in TransformPass_NEON()
109 vget_low_s16(rows->val[1])); // in0 - in8 in TransformPass_NEON()
112 const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0)); in TransformPass_NEON()
113 const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1)); in TransformPass_NEON()
118 const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp)); in TransformPass_NEON()
304 const int16x4_t D0 = vget_low_s16(D0D1); in FTransform_NEON()
306 const int16x4_t D2 = vget_low_s16(D2D3); in FTransform_NEON()
316 const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2), in FTransform_NEON()
[all …]

12345