/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 156 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 157 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 158 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 213 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 214 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 139 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 140 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 141 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 227 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 229 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 231 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 318 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 320 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 209 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 211 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 213 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 298 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 301 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 304 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 423 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 426 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 124 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 125 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 279 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 282 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 285 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 369 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 373 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 377 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 528 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 532 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 179 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 181 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 349 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 353 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 357 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 234 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 237 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 289 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 293 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 145 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 146 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 147 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 202 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 203 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 128 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 129 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 130 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 214 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 216 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 218 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 305 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 307 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 196 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 198 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 200 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 283 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 286 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 289 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 408 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 411 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 113 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 114 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 264 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 267 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 270 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 352 int16x8_t vprod0x15 = vmull_s8(vb15x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 356 vprod0x15 = vmlal_s8(vprod0x15, vb15x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 360 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 511 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 515 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 166 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 168 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 332 int16x8_t vprod0x15 = vmull_s8(vget_low_s8(vb15), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 336 vprod0x15 = vmlal_s8(vprod0x15, vget_high_s8(vb15), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 340 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 219 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 222 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 272 const int16x8_t vprod0x15 = vmull_s8(vb15, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 276 vacc0x15 = vpadalq_s16(vacc0x15, vprod0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|