/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c2-minmax-rndnu-neon-mlal-ld2r.c | 105 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 109 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 112 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 227 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 230 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 304 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 305 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld1r.c | 117 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 121 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 124 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 245 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 248 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 322 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 323 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-dup.c | 99 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 103 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 106 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 218 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 221 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 295 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 296 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld4r.c | 99 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 103 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 106 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 218 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 221 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 295 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 296 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 93 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 97 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 100 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 204 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 207 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 114 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 119 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 123 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 264 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 268 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 359 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 360 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 114 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 119 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 123 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 264 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 268 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 359 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 360 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 138 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 143 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 147 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 300 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 304 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 395 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 396 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 122 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 127 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 131 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 276 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 280 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 371 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 372 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 106 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 111 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 115 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 247 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 251 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld1r.c | 100 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 103 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() 177 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 178 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld4r.c | 91 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 94 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() 168 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 169 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 91 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 94 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() 168 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 169 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c2-minmax-rndnu-neon-mlal-ld1r.c | 133 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 137 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 140 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 261 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 264 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 338 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 339 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld2r.c | 121 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 125 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 128 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 243 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 246 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 320 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 321 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld4r.c | 115 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 119 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 122 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 234 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 237 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 311 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 312 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-dup.c | 115 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 119 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 122 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 234 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 237 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 311 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 312 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 108 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 112 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 115 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 219 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 222 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 132 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 137 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 141 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 282 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 286 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 377 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 378 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 156 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 161 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 165 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 318 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 322 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 413 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 414 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 140 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 145 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 149 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 294 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 298 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 389 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 390 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 132 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 137 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 141 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 282 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 286 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 377 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 378 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 123 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 128 vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 132 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 264 int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 268 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 106 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 109 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() 183 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 184 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld2r.c | 109 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 112 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() 186 const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 187 vacc2x4567 = vpadalq_s16(vacc2x4567, vprod2x4567c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
|