/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 135 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 137 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 139 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 186 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 188 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 172 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 175 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 178 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 241 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 244 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 125 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 127 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 129 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 161 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 164 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 167 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 209 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 213 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 217 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 296 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 300 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 159 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 161 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 163 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 266 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 268 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 149 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 151 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 153 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 197 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 201 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 205 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 111 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 113 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 204 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 207 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 210 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 353 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 356 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 193 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 196 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 199 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 249 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 253 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 257 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 440 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 444 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 148 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 150 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 152 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 199 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 201 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 138 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 140 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 142 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 187 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 190 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 193 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 256 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 259 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 176 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 179 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 182 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 172 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 174 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 176 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 279 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 281 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 226 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 230 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 234 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 313 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 317 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 214 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 218 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 222 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 162 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 164 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 166 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 124 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local 126 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 219 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 222 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 225 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 368 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 371 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 208 int16x8_t vprod1x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 211 vprod1x7 = vmlal_s8(vprod1x7, vget_high_s8(vb7), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 214 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 266 int16x8_t vprod1x7 = vmull_s8(vb7x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 270 vprod1x7 = vmlal_s8(vprod1x7, vb7x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 274 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 457 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 461 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 155 const int16x8_t vprod1x7 = vmull_s8(vb7, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 158 vacc1x7 = vpadalq_s16(vacc1x7, vprod1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|