Home
last modified time | relevance | path

Searched refs:vacc3xCDEF (Results 1 – 25 of 112) sorted by relevance

12345

/external/XNNPACK/src/qu8-igemm/gen/
D4x16-minmax-rndnu-neon-mlal-lane.c74 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
128vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
150vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
172vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
194vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
218vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
240vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
262vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
284vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
318vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c74 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
128vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
150vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
172vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
194vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
218vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
240vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
262vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
284vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
318vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c75 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
129vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
151vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
173vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
195vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
219vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
241vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
263vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
285vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
319vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D4x16-minmax-rndnu-neon-mull-addw-dup.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local
129 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
157 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
185 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
213 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
241 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc4)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
269 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
297 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc6)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
325 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
361 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
[all …]
D4x16-minmax-rndnu-neon-mlal-lane-prfm.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm() local
127vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
149vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
171vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
193vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
219vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
241vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
263vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
285vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
319vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
[all …]
D4x16-minmax-rndnu-neon-mlal-lane.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
127vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
149vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
171vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
193vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
217vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
239vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
261vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
283vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
317vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
127vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
149vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
171vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
193vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
217vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
239vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
261vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
283vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
317vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c74 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
128vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
150vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
172vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
194vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
218vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
240vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
262vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
284vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
318vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16-minmax-rndnu-neon-mlal-lane.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
154 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
176 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
200 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
222 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
244 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
266 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
300 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-rndnu-neon-mlal-lane-prfm.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm() local
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
154 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
176 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
202 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
224 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
246 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
268 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
302 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm()
[all …]
D4x16-minmax-rndnu-neon-mull-addw-dup.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup() local
112 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
140 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
168 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
196 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
224 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc4)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
252 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
280 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc6)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
308 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
344 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mull_addw_dup()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
154 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
176 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
200 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
222 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
244 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
266 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
300 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16c4-minmax-rndnu-neondot.c80 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot() local
118 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
134 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb4567xCDEF, va3x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
168 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
190 vacc3xCDEF = vqshlq_s32(vacc3xCDEF, vright_pre_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
207 vacc3xCDEF = vqdmulhq_s32(vacc3xCDEF, vmultiplier); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
224 vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_post_shift); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
235 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
249 …x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc3x89AB), vqmovn_s32(vacc3xCDEF)), voutput_zero_… in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot()
D4x16-minmax-fp32-neonv8-mlal-lane.c77 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
155 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
177 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
201 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
223 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
245 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
267 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
301 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]
/external/XNNPACK/src/qu8-gemm/gen/
D4x16-minmax-rndnu-neon-mlal-lane.c77 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
155 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
177 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
201 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
223 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
245 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
267 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
301 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c77 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
155 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
177 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
201 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
223 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
245 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
267 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
301 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c78 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
112 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
134 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
156 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
178 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
202 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
224 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
246 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
268 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
302 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]
/external/XNNPACK/src/qc8-igemm/gen/
D4x16-minmax-fp32-neon-mlal-lane.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
127vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
149vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
171vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
193vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
217vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
239vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
261vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
283vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
317vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane-prfm.c73 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm() local
127vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
149vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
171vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
193vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
219vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
241vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
263vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
285vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
319vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c74 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
128vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
150vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
172vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
194vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
218vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
240vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
262vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
284vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
318vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane-prfm.c74 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm() local
128vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
150vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
172vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
194vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
220vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
242vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
264vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
286vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
320vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
[all …]
/external/XNNPACK/src/qc8-gemm/gen/
D4x16-minmax-fp32-neon-mlal-lane.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
154 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
176 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
200 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
222 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
244 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
266 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
300 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane-prfm.c76 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm() local
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
154 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
176 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
202 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
224 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
246 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
268 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
302 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane-prfm.c77 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm() local
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
155 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
177 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
203 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
225 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
247 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
269 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
303 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c77 int32x4_t vacc3xCDEF = vacc0xCDEF; in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
155 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
177 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
201 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc4), vget_high_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
223 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc5), vget_high_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
245 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc6), vget_high_s16(vxa3), 2); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
267 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc7), vget_high_s16(vxa3), 3); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
301 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
[all …]

12345