/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 200 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 202 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 204 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 299 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 301 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 186 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 188 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 190 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 259 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 262 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 265 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 396 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 399 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 244 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 247 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 250 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 318 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 322 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 326 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 493 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 497 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 160 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 162 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 302 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 306 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 310 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 207 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 210 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 254 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 258 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 187 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 189 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 191 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 286 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 288 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 173 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 175 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 177 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 244 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 247 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 250 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 381 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 384 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 229 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 232 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 235 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 301 int16x8_t vprod1x11 = vmull_s8(vb11x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 305 vprod1x11 = vmlal_s8(vprod1x11, vb11x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 309 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 476 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 480 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 147 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 149 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 285 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 289 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 293 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 192 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 195 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 237 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 241 vacc1x11 = vpadalq_s16(vacc1x11, vprod1x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|