/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 138 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 143 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 147 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 265 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 269 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 362 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 363 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 138 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 143 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 147 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 265 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 269 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 362 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 363 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 146 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 151 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 155 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 277 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 281 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 374 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 375 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 128 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 133 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 137 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 248 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 252 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 121 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 125 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 218 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 219 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 121 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 125 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 218 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 219 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 125 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 129 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 222 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 223 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 162 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 167 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 171 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 401 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 405 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 590 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 591 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 170 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 175 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 179 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 413 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 417 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 602 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 603 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 162 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 167 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 171 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 401 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 405 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 590 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 591 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 152 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 157 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 161 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 384 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 388 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 145 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 149 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 334 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 335 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 145 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 149 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 334 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 335 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 149 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 153 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 338 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 339 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 163 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 168 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 172 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 294 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 298 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 391 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 392 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 155 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 160 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 164 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 282 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 286 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 379 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 380 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 155 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 160 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 164 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 282 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 286 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 379 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 380 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 145 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 150 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 154 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 265 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 269 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld2r.c | 138 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 142 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() 235 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local 236 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mull-dup.c | 138 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 142 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() 235 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local 236 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
|
D | 4x8c4-minmax-rndnu-neon-mull-ld1r.c | 142 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 146 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() 239 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local 240 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 187 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 192 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 196 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 430 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 434 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 619 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 620 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 179 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 184 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 188 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 418 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 422 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 607 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 608 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 179 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 184 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 188 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 418 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 422 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 607 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 608 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 169 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 174 vprod3x45c0 = vmlal_s8(vprod3x45c0, vb45c0x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 178 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 401 int16x8_t vprod3x45c0 = vmull_s8(vb45c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 405 vacc3x45 = vpadalq_s16(vacc3x45, vprod3x45c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|