/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 128 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 130 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 132 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 181 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 183 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 162 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 165 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 168 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 234 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 237 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 119 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 121 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 123 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 152 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 155 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 158 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 196 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 200 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 204 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 287 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 291 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 152 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 154 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 156 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 261 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 263 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 143 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 145 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 147 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 185 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 189 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 193 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 106 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 108 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 194 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 197 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 200 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 346 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 349 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 184 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 187 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 190 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 236 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 240 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 244 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 431 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 435 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 141 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 143 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 145 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 194 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 196 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 132 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 134 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 136 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 177 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 180 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 183 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 249 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 252 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 167 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 170 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 173 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 165 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 167 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 169 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 274 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 276 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 213 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 217 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 221 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 304 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 308 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 202 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 206 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 210 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 156 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 158 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 160 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 119 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local 121 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 209 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 212 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 215 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 361 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 364 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 199 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 202 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 205 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 253 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 257 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 261 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 448 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 452 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 148 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 151 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|