/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 207 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 209 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 211 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 304 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 306 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 192 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 194 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 196 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 269 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 272 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 275 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 403 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 406 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 253 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 256 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 259 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 331 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 335 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 339 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 502 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 506 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 165 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 167 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 314 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 318 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 322 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 214 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 217 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 263 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 267 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 194 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 196 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 198 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 291 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 293 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 179 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 181 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 183 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 254 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 257 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 260 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 388 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 391 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 238 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 241 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 244 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 314 int16x8_t vprod1x12 = vmull_s8(vb12x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 318 vprod1x12 = vmlal_s8(vprod1x12, vb12x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 322 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 485 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 489 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 152 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 154 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 297 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 301 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 305 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 199 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 202 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 246 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 250 vacc1x12 = vpadalq_s16(vacc1x12, vprod1x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|