/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 255 int16x8_t vprod2x12 = vmull_s8(vb12x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 258 vprod2x12 = vmlal_s8(vprod2x12, vb12x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 261 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 389 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 392 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 239 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 242 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 245 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 315 int16x8_t vprod2x12 = vmull_s8(vb12x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 319 vprod2x12 = vmlal_s8(vprod2x12, vb12x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 323 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 486 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 490 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 298 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 302 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 306 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 200 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 203 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 247 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 251 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 270 int16x8_t vprod2x12 = vmull_s8(vb12x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 273 vprod2x12 = vmlal_s8(vprod2x12, vb12x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 276 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 404 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 407 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 254 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 257 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 260 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 332 int16x8_t vprod2x12 = vmull_s8(vb12x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 336 vprod2x12 = vmlal_s8(vprod2x12, vb12x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 340 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 503 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 507 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 315 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 319 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 323 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 215 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 218 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 264 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 268 vacc2x12 = vpadalq_s16(vacc2x12, vprod2x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|