/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 221 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 223 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 225 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 314 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 316 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 204 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 206 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 208 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 289 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 292 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 295 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 417 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 420 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 271 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 274 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 277 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 357 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 361 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 365 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 520 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 524 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 175 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 177 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 338 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 342 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 346 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 228 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 231 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 281 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 285 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 208 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 210 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 212 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 301 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 303 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 191 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 193 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 195 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 274 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 277 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 280 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 402 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 405 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 256 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 259 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 262 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 340 int16x8_t vprod1x14 = vmull_s8(vb14x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 344 vprod1x14 = vmlal_s8(vprod1x14, vb14x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 348 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 503 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 507 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 162 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 164 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 321 int16x8_t vprod1x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 325 vprod1x14 = vmlal_s8(vprod1x14, vget_high_s8(vb14), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 329 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 213 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 216 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 264 const int16x8_t vprod1x14 = vmull_s8(vb14, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 268 vacc1x14 = vpadalq_s16(vacc1x14, vprod1x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|