/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 163 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 166 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 169 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 235 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 238 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 197 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 201 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 205 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 288 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 292 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 153 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 156 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 159 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 186 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 190 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 194 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 195 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 198 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 201 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 347 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 350 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 185 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 188 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 191 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 134 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 137 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 237 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 241 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 245 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 432 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 436 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 226 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 230 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 234 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 161 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 165 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 158 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 161 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 193 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 197 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 178 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 181 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 184 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 250 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 253 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 214 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 218 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 222 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 305 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 309 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 168 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 171 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 174 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 203 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 207 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 211 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 210 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 213 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 216 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 362 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 365 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 200 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 203 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 206 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 254 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 258 vprod2x6 = vmlal_s8(vprod2x6, vb6x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 262 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 449 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 453 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 149 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 152 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 243 int16x8_t vprod2x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 247 vprod2x6 = vmlal_s8(vprod2x6, vget_high_s8(vb6), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 251 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 178 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 182 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 173 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 176 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 210 const int16x8_t vprod2x6 = vmull_s8(vb6, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 214 vacc2x6 = vpadalq_s16(vacc2x6, vprod2x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|