/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 133 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 136 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 139 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 214 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 217 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 158 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 162 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 166 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 261 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 265 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 126 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 129 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 132 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 150 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 154 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 158 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 165 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 168 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 171 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 326 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 329 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 158 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 161 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 164 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 113 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 116 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 198 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 202 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 206 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 405 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 409 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 190 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 194 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 198 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 134 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 138 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 137 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 140 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 166 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 170 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 148 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 151 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 154 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 229 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 232 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 175 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 179 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 183 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 278 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 282 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 141 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 144 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 147 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 167 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 171 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 175 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 180 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 183 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 186 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 341 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 344 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 173 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 176 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 179 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 215 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 219 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 223 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 422 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 426 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 128 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 131 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 207 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 211 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 215 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 151 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 155 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 152 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 155 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 183 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 187 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|