Home
last modified time | relevance | path

Searched refs:vacc5x89AB (Results 1 – 25 of 26) sorted by relevance

12

/external/XNNPACK/src/qs8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c88 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
162 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
192 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
222 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
252 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
286vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
316vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
346vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
376vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
422 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c88 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
162 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
192 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
222 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
252 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
284vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
314vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
344vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
374vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
420 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16c4-minmax-rndnu-neondot.c89 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
165 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
189 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
233 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
265 vacc5x89AB = vshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
290 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
315 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
331 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
351 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
D8x16c4-minmax-rndnu-neondot.c97 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
191 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
223 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
277 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
317 vacc5x89AB = vshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
350 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
383 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
407 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
433 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qu8-gemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c96 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
142 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
172 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
202 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
232 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
264 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
294 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
324 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
354 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
400 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16c4-minmax-rndnu-neondot.c274 int32x4_t vacc5x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc5x89AB, vnacc5x0123)); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
303 vacc5x89AB = vshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
328 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
353 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
369 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
389 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
/external/XNNPACK/src/qu8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c89 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
163 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
193 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
223 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
253 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
285vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
315vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
345vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
375vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
421 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16c4-minmax-rndnu-neondot.c296 int32x4_t vacc5x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc5x89AB, vnacc5x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot() local
325 vacc5x89AB = vshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
350 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
375 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
391 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
411 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot()
D8x16c4-minmax-rndnu-neondot.c354 int32x4_t vacc5x89AB = vreinterpretq_s32_u32(vsubq_u32(vpacc5x89AB, vnacc5x0123)); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot() local
395 vacc5x89AB = vshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
428 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
461 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
485 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
511 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qs8-gemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c95 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
141 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
171 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
201 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
231 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
265 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
295 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
325 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
355 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
401 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c95 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
141 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
171 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
201 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
231 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
263 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
293 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
323 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
353 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
399 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16c4-minmax-rndnu-neondot.c99 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot() local
147 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
171 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
215 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
245 vacc5x89AB = vqshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
270 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
295 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
311 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
331 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot()
D8x16c4-minmax-rndnu-neondot.c111 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local
169 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
201 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
255 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
293 vacc5x89AB = vqshlq_s32(vacc5x89AB, vright_pre_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
326 vacc5x89AB = vqdmulhq_s32(vacc5x89AB, vmultiplier); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
359 vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_post_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
383 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
409 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qc8-igemm/gen/
D6x16-minmax-fp32-neon-mlal-lane-prfm.c88 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
162 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
192 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
222 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
252 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
286vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
316vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
346vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
376vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
422 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c88 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
162 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
192 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
222 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
252 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
284vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
314vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
344vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
374vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
420 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c89 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
163 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
193 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
223 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
253 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
285vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
315vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
345vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
375vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
421 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c89 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
163 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
193 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
223 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
253 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
287vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
317vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
347vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
377vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
423 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16c4-minmax-fp32-neondot.c90 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot() local
166 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
190 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
234 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
262 float32x4_t vfpacc5x89AB = vcvtq_f32_s32(vacc5x89AB); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
316 vacc5x89AB = vcvtnq_s32_f32(vfpacc5x89AB); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
332 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
352 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot()
D8x16c4-minmax-fp32-neondot.c98 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot() local
192 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
224 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
278 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
314 float32x4_t vfpacc5x89AB = vcvtq_f32_s32(vacc5x89AB); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
384 vacc5x89AB = vcvtnq_s32_f32(vfpacc5x89AB); in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
408 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
434 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot()
/external/XNNPACK/src/qc8-gemm/gen/
D6x16-minmax-fp32-neon-mlal-lane-prfm.c95 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
141 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
171 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
201 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
231 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
265 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
295 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
325 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
355 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
401 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c95 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
141 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
171 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
201 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
231 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
263 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
293 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
323 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
353 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
399 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c96 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
142 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
172 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
202 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
232 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
266 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
296 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
326 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
356 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
402 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c96 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
142 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
172 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
202 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc2), vget_low_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
232 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc3), vget_low_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
264 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc4), vget_high_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
294 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc5), vget_high_s16(vxa5), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
324 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc6), vget_high_s16(vxa5), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
354 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc7), vget_high_s16(vxa5), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
400 vacc5x89AB = vmlal_lane_s16(vacc5x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa5), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16c4-minmax-fp32-neondot.c100 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot() local
148 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
172 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
216 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
242 float32x4_t vfpacc5x89AB = vcvtq_f32_s32(vacc5x89AB); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
296 vacc5x89AB = vcvtnq_s32_f32(vfpacc5x89AB); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
312 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
332 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qc8_gemm_minmax_fp32_ukernel_6x16c4__neondot()
D8x16c4-minmax-fp32-neondot.c112 int32x4_t vacc5x89AB = vacc0x89AB; in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot() local
170 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
202 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb4567x89AB, va5x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
256 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
290 float32x4_t vfpacc5x89AB = vcvtq_f32_s32(vacc5x89AB); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
360 vacc5x89AB = vcvtnq_s32_f32(vfpacc5x89AB); in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
384 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x89AB), vacc5xCDEF), v… in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()
410 …const int16x8_t vacc5x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc5x89AB), vqmovn_s32(vacc5x… in xnn_qc8_gemm_minmax_fp32_ukernel_8x16c4__neondot()

12