/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 76 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 152 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 203 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 259 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 76 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 190 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 291 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 342 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 398 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 76 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 178 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 235 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 291 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 80 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 196 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 260 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 328 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 80 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 245 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 385 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 449 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 517 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 80 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 232 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 308 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 376 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 84 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 240 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 317 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 397 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 84 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 300 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 479 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 556 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 636 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 84 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 286 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 381 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 461 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mull-padal.c | 75 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 139 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 187 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 243 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 75 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 177 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 278 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 326 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 382 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 75 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 165 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 219 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 275 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 81 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 181 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 242 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 310 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 81 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 230 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 370 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 431 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 499 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 81 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 217 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 290 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 358 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 87 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 223 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 297 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 377 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 87 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 283 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 462 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 536 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 616 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 87 int32x4_t vacc1x9 = vacc0x9; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 269 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 361 const int32x4_t vsum1x89 = vpaddq_s32(vacc1x8, vacc1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 441 const int32x2_t vpsum1x9 = vadd_s32(vget_low_s32(vacc1x9), vget_high_s32(vacc1x9)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|