/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 185 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 190 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 194 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 307 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 311 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 414 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 415 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 185 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 190 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 194 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 307 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 311 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 414 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 415 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 209 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 214 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 218 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 343 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 347 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 450 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 451 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 193 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 198 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 202 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 319 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 323 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 426 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 427 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 175 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 180 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 184 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 288 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 292 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld1r.c | 158 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 162 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() 265 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 266 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld4r.c | 146 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 150 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() 253 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 254 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mull-dup.c | 146 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 150 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() 253 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 254 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld2r.c | 150 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 154 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() 257 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 258 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 253 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 258 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 262 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 467 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 471 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 656 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 657 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 261 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 266 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 270 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 479 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 483 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 668 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 669 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 253 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 258 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 262 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 467 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 471 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 656 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 657 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 203 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 208 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 212 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 325 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 329 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 432 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 433 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 227 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 232 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 236 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 361 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 365 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 468 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 469 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 211 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 216 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 220 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 337 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 341 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 444 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 445 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 203 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 208 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 212 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 325 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 329 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 432 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 433 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 192 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 197 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 201 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 305 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 309 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld4r.c | 163 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 167 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() 270 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r() local 271 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld2r.c | 167 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 171 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() 274 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r() local 275 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mull-ld1r.c | 175 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 179 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() 282 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r() local 283 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mull-dup.c | 163 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 167 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() 270 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup() local 271 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 279 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 284 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 288 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 497 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 501 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 686 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 687 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 295 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 300 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 304 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 521 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 525 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 710 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 711 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 271 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 276 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 280 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 485 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 489 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 674 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 675 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 271 int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, va3c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 276 vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, va3c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 280 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 485 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 489 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 674 const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 675 vacc3x4567 = vpadalq_s16(vacc3x4567, vprod3x4567c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|