/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 228 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 230 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 232 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 319 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 321 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 210 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 212 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 214 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 299 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 302 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 305 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 424 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 427 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 280 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 283 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 286 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 370 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 374 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 378 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 529 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 533 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 180 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 182 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 350 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 354 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 358 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 235 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 238 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 290 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 294 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 215 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 217 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 219 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 306 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 308 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 197 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 199 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 201 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 284 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 287 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 290 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 409 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 412 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 265 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 268 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 271 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 353 int16x8_t vprod1x15 = vmull_s8(vb15x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 357 vprod1x15 = vmlal_s8(vprod1x15, vb15x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 361 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 512 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 516 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 167 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 169 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 333 int16x8_t vprod1x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 337 vprod1x15 = vmlal_s8(vprod1x15, vget_high_s8(vb15), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 341 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 220 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 223 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 273 const int16x8_t vprod1x15 = vmull_s8(vb15, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 277 vacc1x15 = vpadalq_s16(vacc1x15, vprod1x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|