/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 136 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 137 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 138 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 198 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 199 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 124 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 125 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 126 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 192 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 194 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 196 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 293 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 295 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 179 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 181 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 183 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 248 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 251 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 254 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 388 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 391 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 109 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 110 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 234 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 237 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 240 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 304 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 308 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 312 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 483 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 487 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 154 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 156 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 289 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 293 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 297 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 199 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 202 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 244 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 248 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 125 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 126 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 127 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 187 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 188 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 113 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 114 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 115 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 179 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 181 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 183 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 280 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 282 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 166 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 168 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 170 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 233 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 236 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 239 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 373 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 376 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 98 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 99 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 219 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 222 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 225 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 287 int16x8_t vprod0x10 = vmull_s8(vb10x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 291 vprod0x10 = vmlal_s8(vprod0x10, vb10x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 295 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 466 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 470 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 141 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 143 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 272 int16x8_t vprod0x10 = vmull_s8(vget_low_s8(vb10), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 276 vprod0x10 = vmlal_s8(vprod0x10, vget_high_s8(vb10), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 280 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 184 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 187 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 227 const int16x8_t vprod0x10 = vmull_s8(vb10, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 231 vacc0x10 = vpadalq_s16(vacc0x10, vprod0x10); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|