/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 140 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 141 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 142 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 201 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 202 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 127 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 128 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 129 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 199 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 201 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 203 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 298 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 300 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 185 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 187 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 189 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 258 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 261 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 264 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 395 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 398 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 112 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 113 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 243 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 246 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 249 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 317 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 321 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 325 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 492 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 496 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 159 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 161 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 301 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 305 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 309 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 206 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 209 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 253 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 257 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 129 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 130 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 131 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 190 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 191 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 116 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 117 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 118 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 186 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 188 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 190 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 285 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 287 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 172 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 174 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 176 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 243 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 246 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 249 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 380 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 383 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 101 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 102 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 228 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 231 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 234 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 300 int16x8_t vprod0x11 = vmull_s8(vb11x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 304 vprod0x11 = vmlal_s8(vprod0x11, vb11x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 308 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 475 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 479 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 146 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 148 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 284 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 288 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 292 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 191 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 194 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 236 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 240 vacc0x11 = vpadalq_s16(vacc0x11, vprod0x11); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|