/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-rndnu-neon-mlal.c | 137 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 138 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 139 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 196 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 197 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 1x16c16-minmax-rndnu-neon-mlal.c | 122 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 123 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() 124 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 200 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 202 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 204 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 295 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 297 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 184 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 186 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 188 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 263 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 266 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 269 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 394 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 397 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mull.c | 107 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 108 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 246 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 249 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 252 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 326 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 330 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 334 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 493 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 497 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 308 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 312 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 316 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 156 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 158 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 205 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 208 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 254 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 258 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-rndnu-neon-mlal.c | 148 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 149 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 150 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 207 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 208 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 1x16c16-minmax-rndnu-neon-mlal.c | 133 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local 134 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() 135 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mlal.c | 213 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 215 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 217 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() 308 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local 310 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
|
D | 2x16c16-minmax-rndnu-neon-mlal.c | 197 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local 199 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() 201 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 278 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 281 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 284 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 409 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 412 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mull.c | 118 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local 119 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 261 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 264 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 267 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 343 int16x8_t vprod0x13 = vmull_s8(vb13x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 347 vprod0x13 = vmlal_s8(vprod0x13, vb13x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 351 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 510 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 514 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 325 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 329 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 333 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 2x16c8-minmax-rndnu-neon-mull.c | 169 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local 171 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 220 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 223 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 271 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 275 vacc0x13 = vpadalq_s16(vacc0x13, vprod0x13); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|