/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mull.c | 79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 144 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 161 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 208 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 3x8c8-minmax-rndnu-neon-mlal.c | 79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 179 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 245 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 262 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 309 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 79 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 168 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 185 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 231 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 174 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 192 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 245 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 218 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 301 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 319 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 372 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 85 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 206 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 224 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 276 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 211 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 357 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 438 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 523 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 168 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 249 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 334 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 95 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 200 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 297 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 381 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 206 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 304 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 401 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 258 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 445 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 543 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 640 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 101 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 246 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 368 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 464 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mull.c | 78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 159 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 179 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() 226 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 3x8c8-minmax-rndnu-neon-mlal.c | 78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 194 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 260 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 280 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 327 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 78 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 183 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 203 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 249 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 191 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 212 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() 265 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 235 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 318 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 339 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 392 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 82 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 223 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 244 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 296 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 226 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 372 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 456 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 541 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 183 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 267 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 352 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 94 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 215 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 315 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 399 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 223 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 324 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 421 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 275 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 462 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 563 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 660 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 98 int32x4_t vacc2x7 = vacc0x7; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 263 vacc2x7 = vpadalq_s16(vacc2x7, vprod2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 388 const int32x4_t vsum2x67 = vpaddq_s32(vacc2x6, vacc2x7); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 484 const int32x2_t vpsum2x7 = vadd_s32(vget_low_s32(vacc2x7), vget_high_s32(vacc2x7)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|