/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 215 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 218 vprod2x8 = vmlal_s8(vprod2x8, vb8x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 221 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 361 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 364 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 203 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 206 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 209 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 263 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 267 vprod2x8 = vmlal_s8(vprod2x8, vb8x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 271 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 450 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 454 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 250 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 254 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 258 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 172 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 175 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 211 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 215 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 230 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 233 vprod2x8 = vmlal_s8(vprod2x8, vb8x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 236 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 376 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 379 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 218 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 221 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 224 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 280 int16x8_t vprod2x8 = vmull_s8(vb8x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 284 vprod2x8 = vmlal_s8(vprod2x8, vb8x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 288 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 467 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 471 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 267 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 271 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 275 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 187 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 190 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 228 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 232 vacc2x8 = vpadalq_s16(vacc2x8, vprod2x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|