/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 187 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 190 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 192 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 286 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 288 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 198 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 201 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 203 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 304 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 306 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 194 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 197 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 199 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 298 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 300 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld2r.c | 194 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 197 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 199 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 298 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 300 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 244 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 248 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 251 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 380 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 383 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 253 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 257 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 260 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 395 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 398 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 253 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 257 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 260 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 395 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 398 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 259 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 263 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 266 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 404 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 407 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 301 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 306 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 310 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 474 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 478 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 312 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 317 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 321 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 492 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 496 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 320 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 325 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 329 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 504 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 508 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 312 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 317 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 321 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 492 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 496 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 2x16c4s2-minmax-rndnu-neon-mull.c | 144 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local 146 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 200 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 203 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 205 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 299 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 301 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 207 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 210 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 212 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 311 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 313 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 211 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 214 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 216 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 317 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 319 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld2r.c | 207 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 210 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 212 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 311 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 313 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 259 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 263 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 266 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 395 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 398 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 274 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 278 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 281 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 419 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 422 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 268 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 272 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 275 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 410 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 413 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 268 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 272 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 275 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 410 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 413 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 318 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 323 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 327 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 491 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 495 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 337 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 342 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 346 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 521 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 525 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 329 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 334 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 338 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 509 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 513 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 329 int16x8_t vprod1xABc1 = vmull_s8(vbABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 334 vprod1xABc1 = vmlal_s8(vprod1xABc1, vbABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 338 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 509 const int16x8_t vprod1xABc1 = vmull_s8(vbABc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 513 vacc1xAB = vpadalq_s16(vacc1xAB, vprod1xABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|