/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 225 int16x8_t vprod2x9 = vmull_s8(vb9x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 228 vprod2x9 = vmlal_s8(vprod2x9, vb9x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 231 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 368 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 371 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 212 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 215 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 218 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 276 int16x8_t vprod2x9 = vmull_s8(vb9x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 280 vprod2x9 = vmlal_s8(vprod2x9, vb9x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 284 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 459 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 463 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 262 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 266 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 270 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 179 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 182 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 220 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 224 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c8-minmax-neon-mlal-padal.c | 240 int16x8_t vprod2x9 = vmull_s8(vb9x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 243 vprod2x9 = vmlal_s8(vprod2x9, vb9x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 246 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 383 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 386 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 227 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 230 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 233 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 293 int16x8_t vprod2x9 = vmull_s8(vb9x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 297 vprod2x9 = vmlal_s8(vprod2x9, vb9x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 301 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 476 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 480 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 279 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 283 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 287 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 194 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 197 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 237 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 241 vacc2x9 = vpadalq_s16(vacc2x9, vprod2x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|