/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-rndnu-neon-mull.c | 82 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 182 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 206 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 269 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 82 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 232 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 321 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 345 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 408 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 82 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 214 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 238 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 300 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 86 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 305 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 427 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 452 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 527 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 86 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 238 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 263 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 338 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 86 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 286 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 311 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 385 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 90 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 294 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 320 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 407 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 90 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 378 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 533 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 559 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 646 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 90 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 358 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 384 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 470 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-rndnu-neon-mull.c | 81 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 169 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 190 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() 253 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 81 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 219 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 308 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 329 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 392 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 81 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 201 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 222 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 284 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 87 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 290 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 412 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 434 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 509 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 87 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 223 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 245 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() 320 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 87 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 271 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 293 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 367 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 93 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 277 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 300 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() 387 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 93 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 361 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 516 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 539 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 626 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 93 int32x4_t vacc1x15 = vacc0x15; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 341 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 364 const int32x4_t vsum1xEF = vpaddq_s32(vacc1x14, vacc1x15); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 450 const int32x2_t vpsum1xF = vadd_s32(vget_low_s32(vacc1x15), vget_high_s32(vacc1x15)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|