/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 121 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 123 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 125 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 176 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 178 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 152 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 155 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 158 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 227 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 230 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 113 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 115 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 117 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 143 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 146 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 183 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 187 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 278 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 282 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 145 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 147 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 256 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 258 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 137 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 139 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 141 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 173 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 177 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 181 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 101 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 103 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 184 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 187 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 190 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 339 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 342 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 175 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 178 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 181 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 223 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 227 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 231 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 422 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 426 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c8-minmax-neon-mlal-padal.c | 134 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 136 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 138 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 189 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 126 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 128 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 130 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 167 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 170 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 173 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 242 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 245 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 158 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 161 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 164 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 158 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 160 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 162 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 269 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 271 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 200 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 204 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 208 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 295 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 299 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 190 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 194 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 198 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 150 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 152 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 154 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mull-padal.c | 114 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local 116 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 199 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 202 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 205 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 354 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 357 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 190 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 193 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 196 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 240 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 244 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 248 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 439 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 443 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 141 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 144 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|