/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c2-minmax-rndnu-neon-mlal-ld1r.c | 128 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 131 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 133 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 283 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 285 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 374 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 375 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld4r.c | 116 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 119 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 121 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 265 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 267 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 356 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 357 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-dup.c | 116 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 119 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 121 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 265 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 267 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 356 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 357 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld2r.c | 120 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 123 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 125 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 271 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 273 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 362 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 363 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 111 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 114 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 116 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 253 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 255 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 138 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 142 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 145 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 341 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 344 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 464 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 465 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 138 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 142 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 145 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 341 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 344 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 464 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 465 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 156 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 160 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 163 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 368 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 371 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 491 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 492 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 144 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 148 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 151 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 350 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 353 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 473 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 474 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 131 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 135 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 138 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 326 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 329 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 113 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 115 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 204 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 205 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 107 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 109 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 198 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 199 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 107 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 109 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 198 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 199 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c2-minmax-rndnu-neon-mlal-ld2r.c | 106 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 109 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 111 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 257 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 259 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() 348 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r() local 349 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld1r.c | 114 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 117 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 119 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 269 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 271 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 360 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 361 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-dup.c | 102 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 105 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 107 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 251 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 253 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() 342 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup() local 343 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld4r.c | 102 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 105 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 107 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 251 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 253 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() 342 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r() local 343 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 98 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 101 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 103 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 240 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 242 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 122 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 126 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 129 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 325 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 328 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 448 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 449 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 122 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 126 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 129 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 325 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 328 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 448 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 449 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 128 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 132 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 135 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 334 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 337 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 457 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 458 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 140 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 144 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 147 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 352 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 355 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 475 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 476 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 116 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 120 vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 123 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 311 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 314 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 94 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 96 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 185 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 186 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 100 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 102 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 191 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 192 vacc1x89AB = vpadalq_s16(vacc1x89AB, vprod1x89ABc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|