/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 180 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 183 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 185 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 282 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 284 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 191 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 194 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 196 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 300 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 302 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 187 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 190 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 192 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 294 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 296 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld2r.c | 187 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 190 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 192 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 294 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 296 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 234 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 238 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 241 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 374 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 377 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 243 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 247 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 250 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 389 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 392 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 243 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 247 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 250 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 389 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 392 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 249 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 253 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 256 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 398 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 401 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 288 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 293 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 297 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 466 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 470 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 299 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 304 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 308 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 484 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 488 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 307 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 312 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 316 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 496 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 500 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 299 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 304 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 308 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 484 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 488 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 2x16c4s2-minmax-rndnu-neon-mull.c | 140 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull() local 142 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 193 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 196 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 198 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 295 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 297 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 200 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 203 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 205 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 307 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 309 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 204 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 207 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 209 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 313 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 315 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld2r.c | 200 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 203 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 205 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 307 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 309 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
|
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 249 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 253 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 256 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 389 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 392 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 264 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 268 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 271 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 413 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 416 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 258 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 262 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 265 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 404 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 407 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 258 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 262 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 265 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 404 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 407 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 305 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 310 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 314 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 483 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 487 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 324 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 329 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 333 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 513 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 517 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 316 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 321 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 325 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 501 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 505 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 316 int16x8_t vprod1x89c1 = vmull_s8(vb89c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 321 vprod1x89c1 = vmlal_s8(vprod1x89c1, vb89c1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 325 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 501 const int16x8_t vprod1x89c1 = vmull_s8(vb89c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 505 vacc1x89 = vpadalq_s16(vacc1x89, vprod1x89c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|