/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c4-minmax-rndnu-neon-mlal-ld2r.c | 127 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 131 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 134 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 224 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 227 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 295 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 296 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld1r.c | 133 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 137 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 140 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 233 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 236 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 304 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 305 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-dup.c | 127 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 131 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 134 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 224 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 227 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 295 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 296 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
|
D | 3x8c4s2-minmax-rndnu-neon-mlal.c | 119 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 123 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 126 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 210 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 213 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 150 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 155 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 159 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 272 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 276 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 355 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 356 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 150 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 155 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 159 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 272 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 276 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 355 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 356 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 158 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 163 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 167 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 284 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 288 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 367 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 368 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 140 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 145 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 149 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 255 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 259 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 3x8c4-minmax-rndnu-neon-mull-ld1r.c | 113 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() local 116 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() 184 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() local 185 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mull-dup.c | 110 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local 113 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() 181 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local 182 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup()
|
D | 3x8c4-minmax-rndnu-neon-mull-ld2r.c | 110 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local 113 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() 181 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local 182 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c4-minmax-rndnu-neon-mlal-dup.c | 142 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 146 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 149 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 239 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 242 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() 310 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local 311 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld1r.c | 148 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 152 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 155 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 248 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 251 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() 319 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local 320 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mlal-ld2r.c | 142 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 146 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 149 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 239 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 242 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() 310 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local 311 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
|
D | 3x8c4s2-minmax-rndnu-neon-mlal.c | 134 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 138 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 141 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 225 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 228 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld1r.c | 175 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 180 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 184 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 301 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 305 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() 384 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local 385 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
|
D | 4x8c4-minmax-rndnu-neon-mlal-dup.c | 167 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 172 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 176 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 289 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 293 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() 372 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local 373 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
|
D | 4x8c4-minmax-rndnu-neon-mlal-ld2r.c | 167 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 172 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 176 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 289 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 293 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() 372 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local 373 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
|
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 157 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 162 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 166 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 272 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 276 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 3x8c4-minmax-rndnu-neon-mull-ld1r.c | 128 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() local 131 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() 199 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r() local 200 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r()
|
D | 3x8c4-minmax-rndnu-neon-mull-dup.c | 125 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local 128 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() 196 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local 197 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup()
|
D | 3x8c4-minmax-rndnu-neon-mull-ld2r.c | 125 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local 128 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() 196 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local 197 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 168 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 172 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 175 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 356 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 359 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 495 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 496 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 162 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 166 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 169 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 347 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 350 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 486 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 487 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 162 int16x8_t vprod2x67c0 = vmull_s8(vb67c0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 166 vprod2x67c0 = vmlal_s8(vprod2x67c0, vb67c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 169 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 347 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 350 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 486 const int16x8_t vprod2x67c0 = vmull_s8(vb67c0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 487 vacc2x67 = vpadalq_s16(vacc2x67, vprod2x67c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|