/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 186 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 188 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 190 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 289 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 291 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 174 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 176 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 178 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 239 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 242 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 245 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 382 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 385 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 226 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 229 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 232 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 292 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 296 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 300 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 475 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 479 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 150 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 152 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 278 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 282 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 286 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 193 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 196 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 236 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 240 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x16c8-minmax-neon-mlal-padal.c | 173 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 175 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 177 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 276 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 278 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 161 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 163 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 165 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 224 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 227 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 230 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 367 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 370 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 211 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 214 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 217 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 275 int16x8_t vprod1x9 = vmull_s8(vb9x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 279 vprod1x9 = vmlal_s8(vprod1x9, vb9x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 283 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 458 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 462 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 137 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 139 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 261 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 265 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 269 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 178 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 181 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 219 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 223 vacc1x9 = vpadalq_s16(vacc1x9, vprod1x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|