/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 144 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 145 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 146 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 204 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 205 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 130 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 131 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 132 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 206 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 208 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 210 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 303 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 305 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 191 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 193 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 195 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 268 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 271 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 274 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 402 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 405 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 115 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 116 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 252 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 255 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 258 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 330 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 334 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 338 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 501 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 505 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 164 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 166 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 313 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 317 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 321 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 213 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 216 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 262 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 266 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 133 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 134 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 135 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 193 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 194 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 119 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 120 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 121 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 193 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 195 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 197 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 290 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 292 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 178 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 180 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 182 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 253 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 256 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 259 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 387 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 390 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 104 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 105 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 237 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 240 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 243 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 313 int16x8_t vprod0x12 = vmull_s8(vb12x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 317 vprod0x12 = vmlal_s8(vprod0x12, vb12x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 321 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 484 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 488 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 151 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 153 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 296 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 300 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 304 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 198 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 201 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 245 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 249 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|