/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 188 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 192 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 195 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 368 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 371 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 499 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 500 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 182 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 186 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 189 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 359 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 362 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 490 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 491 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 182 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 186 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 189 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 359 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 362 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 490 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 491 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 174 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 178 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 181 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 345 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 348 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 225 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 230 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 234 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 453 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 457 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 608 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 609 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 217 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 222 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 226 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 441 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 445 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 596 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 597 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 217 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 222 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 226 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 441 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 445 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 596 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 597 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 157 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 160 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 288 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 289 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 157 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 160 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 288 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 289 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 160 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 163 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 291 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 292 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 207 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 212 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 216 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 424 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 428 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 167 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 171 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 174 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 344 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 347 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 475 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 476 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 167 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 171 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 174 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 344 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 347 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 475 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 476 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 173 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 177 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 180 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 353 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 356 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 484 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 485 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 159 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 163 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 166 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 330 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 333 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 200 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 205 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 209 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 424 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 428 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 579 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 580 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 208 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 213 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 217 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 436 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 440 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 591 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 592 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 200 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 205 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 209 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 424 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 428 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 579 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 580 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 145 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 148 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 276 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 277 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 142 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 145 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 273 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 274 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 142 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 145 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 273 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 274 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 190 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 195 vprod2xABc0 = vmlal_s8(vprod2xABc0, vbABc0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 199 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 407 int16x8_t vprod2xABc0 = vmull_s8(vbABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 411 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 168 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 172 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 323 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 324 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 168 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 172 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 323 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 324 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 172 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 176 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 327 const int16x8_t vprod2xABc0 = vmull_s8(vbABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 328 vacc2xAB = vpadalq_s16(vacc2xAB, vprod2xABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|