/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c4-minmax-neondot.c | 237 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 251 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 259 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 264 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 271 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 289 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 296 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 2x16c2-minmax-neon-mull-padal-dup.c | 263 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local 271 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local 277 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 280 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 284 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 294 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 298 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
|
D | 2x16c8-minmax-neon-mull-padal.c | 295 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 303 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 309 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 312 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 316 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 326 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 330 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 327 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 335 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 341 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 344 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 348 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 358 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 362 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 2x16-minmax-neon-mlal-lane.c | 336 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() local 344 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() local 350 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() 353 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() 357 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() 367 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane() 371 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
|
D | 2x16-minmax-neon-mull-addw-dup.c | 360 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() local 368 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() local 374 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 377 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 381 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 391 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 395 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
|
D | 6x16c4-minmax-neondot.c | 313 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 333 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 343 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 350 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 359 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 383 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 393 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 345 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 356 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 363 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 367 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 372 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 385 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 391 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 3x16c8-minmax-neon-mull-padal.c | 404 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 415 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 422 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 426 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 431 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 444 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 450 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 434 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 442 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 448 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 451 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 455 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 465 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 469 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c2-minmax-neon-mlal-padal-dup.c | 401 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local 409 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local 415 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() 418 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() 422 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() 432 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() 436 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
|
D | 8x16c4-minmax-neondot.c | 389 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 415 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 427 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 436 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 447 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 477 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 490 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c4-minmax-neondot.c | 253 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 267 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 276 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 281 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 287 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 299 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 307 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 2x16c2-minmax-neon-mull-padal-dup.c | 279 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local 287 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local 292 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 295 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 299 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 309 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() 313 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
|
D | 2x16c8-minmax-neon-mull-padal.c | 311 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 319 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 324 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 327 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 331 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 341 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 345 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 6x16c4-minmax-neondot.c | 333 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 353 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 366 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 373 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 381 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 395 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 407 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
|
D | 2x16-minmax-neon-mlal-lane.c | 351 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() local 359 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() local 364 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() 367 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() 371 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() 381 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() 385 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
|
D | 2x16-minmax-neon-mull-addw-dup.c | 376 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() local 384 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() local 389 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 392 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 396 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 406 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 410 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 343 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 351 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 356 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 359 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 363 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 373 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 377 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 363 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 374 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() local 381 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 385 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 390 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 401 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 408 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
D | 8x16c4-minmax-neondot.c | 413 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 439 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 456 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 465 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 475 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 491 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 507 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
D | 3x16c8-minmax-neon-mull-padal.c | 422 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 433 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 440 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 444 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 449 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 460 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 467 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 450 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 458 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 463 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 466 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 470 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 480 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 484 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16-minmax-neon-mull-addw-dup.c | 494 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() local 505 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() local 512 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 516 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 521 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 532 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 539 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
|
D | 3x16-minmax-neon-mlal-lane.c | 441 int8x16_t vout1x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc1x01234567), vacc1x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() local 452 …int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCD… in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() local 459 vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() 463 vout1x0123456789ABCDEF = vminq_s8(vout1x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() 468 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() 479 …1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane() 486 …01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0123456789ABCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
|