/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 163 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 166 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 169 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 235 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 238 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 153 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 156 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 159 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 197 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 201 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 205 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 288 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 292 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 186 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 190 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 194 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 195 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 198 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 201 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 347 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 350 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 185 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 188 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 191 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 237 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 241 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 245 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 432 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 436 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 134 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 137 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 161 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 165 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 226 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 230 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 234 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 158 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 161 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 193 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 197 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 178 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 181 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 184 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 250 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 253 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 168 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 171 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 174 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 214 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 218 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 222 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 305 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 309 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 203 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 207 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 211 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 210 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 213 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 216 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 362 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 365 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 200 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 203 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 206 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 254 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 258 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 262 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 449 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 453 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 149 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 152 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 178 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 182 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 243 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 247 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 251 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 173 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 176 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 210 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 214 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|