/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 162 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 167 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 171 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 268 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 272 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 173 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 178 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 182 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 286 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 290 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 173 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 178 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 182 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 286 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 290 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 181 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 186 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 190 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 298 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 302 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 238 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 243 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 247 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 436 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 440 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 249 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 254 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 258 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 454 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 458 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 257 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 262 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 266 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 466 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 470 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 249 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 254 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 258 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 454 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 458 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 134 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 138 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 142 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 146 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 142 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 146 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 146 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 150 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 179 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 184 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 188 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 285 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 289 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 198 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 203 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 207 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 315 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 319 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 190 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 195 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 199 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 303 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 307 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 190 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 195 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 199 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 303 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 307 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 255 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 260 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 264 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 453 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 457 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 274 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 279 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 283 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 483 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 487 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 266 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 271 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 275 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 471 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 475 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 266 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 271 vprod3x01c1 = vmlal_s8(vprod3x01c1, vb01c1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 275 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 471 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 475 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 151 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 155 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 159 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 163 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 159 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 163 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 163 const int16x8_t vprod3x01c1 = vmull_s8(vb01c1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 167 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4s2-minmax-rndnu-neon-mull.c | 207 int16x8_t vprod3x01c1 = vmull_s8(vb01c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull() local 211 vacc3x01 = vpadalq_s16(vacc3x01, vprod3x01c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull()
|