/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-rndnu-neon-mlal.c | 141 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 142 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 143 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 199 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 200 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 1x16c16-minmax-rndnu-neon-mlal.c | 125 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 126 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() 127 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 207 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 209 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 211 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 300 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 302 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 190 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 192 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 194 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 273 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 276 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 279 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 401 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 404 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mull.c | 110 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 111 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 255 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 258 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 261 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 339 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 343 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 347 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 502 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 506 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 320 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 324 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 328 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 161 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 163 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 212 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 215 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 263 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 267 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-rndnu-neon-mlal.c | 152 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 153 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 154 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 210 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 211 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 1x16c16-minmax-rndnu-neon-mlal.c | 136 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 137 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() 138 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 220 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 222 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 224 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 313 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 315 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 203 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 205 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 207 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 288 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 291 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 294 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 416 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 419 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mull.c | 121 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 122 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 270 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 273 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 276 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 356 int16x8_t vprod0x14 = vmull_s8(vb14x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 360 vprod0x14 = vmlal_s8(vprod0x14, vb14x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 364 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 519 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 523 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 337 int16x8_t vprod0x14 = vmull_s8(vget_low_s8(vb14), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 341 vprod0x14 = vmlal_s8(vprod0x14, vget_high_s8(vb14), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 345 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 174 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 176 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 227 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 230 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 280 const int16x8_t vprod0x14 = vmull_s8(vb14, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 284 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|