/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 170 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 174 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 177 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 354 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 357 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 491 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 492 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 170 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 174 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 177 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 354 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 357 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 491 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 492 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 176 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 180 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 183 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 363 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 366 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 500 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 501 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 188 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 192 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 195 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 381 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 384 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 518 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 519 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 163 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 167 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 170 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 339 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 342 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 204 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 209 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 213 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 437 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 441 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 606 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 607 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 212 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 217 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 221 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 449 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 453 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 618 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 619 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 204 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 209 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 213 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 437 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 441 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 606 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 607 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 228 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 233 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 237 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 473 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 477 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 642 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 643 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 139 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 142 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 276 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 277 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 142 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 145 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 279 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 280 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 139 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 142 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 276 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 277 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 195 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 200 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 204 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 419 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 423 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 148 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 151 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 285 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 286 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 186 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 190 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 193 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 370 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 373 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 507 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 508 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 186 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 190 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 193 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 370 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 373 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 507 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 508 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 204 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 208 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 211 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 397 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 400 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 534 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 535 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 192 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 196 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 199 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 379 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 382 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 516 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 517 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 178 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 182 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 185 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 354 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 357 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 230 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 235 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 239 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 467 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 471 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 636 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 637 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 246 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 251 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 255 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 491 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 495 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 660 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 661 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 222 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 227 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 231 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 455 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 459 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 624 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 625 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 222 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 227 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 231 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 455 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 459 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 624 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 625 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 212 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 217 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 221 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 436 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 440 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 163 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 166 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 300 const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 301 vacc2x89AB = vpadalq_s16(vacc2x89AB, vprod2x89ABc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|