/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 193 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 195 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 197 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 294 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 296 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 180 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 182 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 184 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 249 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 252 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 255 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 389 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 392 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 235 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 238 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 241 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 305 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 309 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 313 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 484 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 488 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 155 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 157 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 290 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 294 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 298 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 200 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 203 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 245 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 249 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 180 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 182 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 184 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 281 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 283 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 167 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 169 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 171 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 234 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 237 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 240 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 374 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 377 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 220 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 223 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 226 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 288 int16x8_t vprod1x10 = vmull_s8(vb10x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 292 vprod1x10 = vmlal_s8(vprod1x10, vb10x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 296 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 467 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 471 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 142 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 144 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 273 int16x8_t vprod1x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 277 vprod1x10 = vmlal_s8(vprod1x10, vget_high_s8(vb10), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 281 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 185 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 188 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 228 const int16x8_t vprod1x10 = vmull_s8(vb10, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 232 vacc1x10 = vpadalq_s16(vacc1x10, vprod1x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|