/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 132 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 133 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 134 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 195 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 196 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 121 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 122 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 123 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 185 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 187 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 189 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 288 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 290 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 173 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 175 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 177 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 238 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 241 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 244 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 381 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 384 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 106 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 107 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 225 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 228 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 231 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 291 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 295 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 299 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 474 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 478 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 149 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 151 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 277 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 281 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 285 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 192 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 195 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 235 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 239 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c8-minmax-neon-mlal-padal.c | 121 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 122 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 123 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 184 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 185 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 110 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 111 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 112 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 172 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 174 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 176 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 275 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 277 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 160 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 162 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 164 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 223 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 226 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 229 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 366 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 369 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 95 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 96 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 210 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 213 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 216 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 274 int16x8_t vprod0x9 = vmull_s8(vb9x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 278 vprod0x9 = vmlal_s8(vprod0x9, vb9x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 282 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 457 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 461 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 136 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 138 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 260 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 264 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 268 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 177 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 180 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 218 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 222 vacc0x9 = vpadalq_s16(vacc0x9, vprod0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|