/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 153 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 156 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 159 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 228 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 231 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 144 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 147 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 150 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 184 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 188 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 192 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 279 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 283 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 174 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 178 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 182 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 185 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 188 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 191 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 340 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 343 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 176 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 179 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 182 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 224 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 228 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 232 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 423 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 427 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 127 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 130 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 152 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 156 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 214 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 218 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 222 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 151 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 154 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 184 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 188 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 168 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 171 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 174 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 243 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 246 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 159 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 162 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 165 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 201 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 205 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 209 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 296 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 300 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 191 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 195 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 199 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 200 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 203 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 206 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 355 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 358 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 191 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 194 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 197 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 241 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 245 vprod2x5 = vmlal_s8(vprod2x5, vb5x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 249 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 440 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 444 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 142 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 145 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 169 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 173 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 231 int16x8_t vprod2x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 235 vprod2x5 = vmlal_s8(vprod2x5, vget_high_s8(vb5), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 239 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 166 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 169 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 201 const int16x8_t vprod2x5 = vmull_s8(vb5, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 205 vacc2x5 = vpadalq_s16(vacc2x5, vprod2x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|