/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c2-minmax-rndnu-neon-mlal-ld2r.c | 122 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 126 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 129 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 237 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 240 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 322 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 323 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld1r.c | 134 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 138 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 141 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 255 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 258 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 340 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 341 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-dup.c | 116 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 120 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 123 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 228 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 231 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 313 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 314 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld4r.c | 116 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 120 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 123 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 228 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 231 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 313 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 314 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 109 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 113 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 116 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 213 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 216 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 136 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 141 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 145 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 277 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 281 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 382 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 383 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 136 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 141 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 145 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 277 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 281 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 382 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 383 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 160 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 165 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 169 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 313 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 317 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 418 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 419 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 144 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 149 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 153 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 289 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 293 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 394 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 395 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 127 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 132 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 136 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 259 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 263 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld1r.c | 110 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 113 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() 195 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 196 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld4r.c | 101 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 104 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() 186 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 187 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r()
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 101 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 104 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() 186 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 187 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c2-minmax-rndnu-neon-mlal-ld1r.c | 150 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 154 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 157 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 271 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 274 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() 356 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r() local 357 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld2r.c | 138 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 142 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 145 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 253 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 256 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() 338 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r() local 339 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-ld4r.c | 132 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 136 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 139 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 244 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 247 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() 329 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r() local 330 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r()
|
D | 3x8c2-minmax-rndnu-neon-mlal-dup.c | 132 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 136 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 139 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 244 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 247 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() 329 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup() local 330 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 124 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 128 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 131 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 228 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 231 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mlal-dup.c | 154 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 159 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 163 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 295 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 299 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() 400 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup() local 401 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld1r.c | 178 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 183 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 187 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 331 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 335 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() 436 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r() local 437 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld2r.c | 162 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 167 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 171 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 307 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 311 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() 412 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r() local 413 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 154 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 159 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 163 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 295 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 299 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 400 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 401 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 144 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 149 vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 153 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 276 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 280 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 116 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 119 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() 201 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 202 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup()
|
D | 3x8c2-minmax-rndnu-neon-mull-ld2r.c | 119 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 122 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() 204 const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 205 vacc2x0123 = vpadalq_s16(vacc2x0123, vprod2x0123c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r()
|