/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c4-minmax-fp32-neonv8-mlal-dup.c | 83 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 86 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 88 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 164 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 166 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 221 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 222 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld2r.c | 82 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 85 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 87 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 163 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 165 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 220 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 221 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld1r.c | 86 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 89 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 91 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 169 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 171 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 226 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 227 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld2r.c | 83 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 86 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 88 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 164 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 166 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 221 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 222 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld1r.c | 87 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 90 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 92 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 170 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 172 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 227 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 228 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
|
D | 2x8c4-minmax-fp32-neon-mlal-dup.c | 82 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 85 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 87 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 163 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 165 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 220 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 221 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c4-minmax-rndnu-neon-mlal-ld1r.c | 99 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() local 102 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() 104 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() 182 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() local 184 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() 239 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r() local 240 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r()
|
D | 2x8c4-minmax-rndnu-neon-mlal-ld2r.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r()
|
D | 2x8c4-minmax-rndnu-neon-mlal-dup.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld2r.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-dup.c | 96 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 99 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 101 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 177 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 179 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 234 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 235 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld1r.c | 99 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 102 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 104 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 182 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 184 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 239 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 240 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
|
D | 2x8c4-minmax-fp32-neon-mlal-dup.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c4-minmax-fp32-neon-mlal-ld1r.c | 99 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 102 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 104 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 182 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 184 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() 239 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r() local 240 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-dup.c | 96 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 99 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 101 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 177 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 179 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 234 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 235 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld2r.c | 96 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 99 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 101 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 177 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 179 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 234 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 235 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld1r.c | 100 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 103 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 105 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 183 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 185 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 240 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 241 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
|
D | 2x8c4-minmax-fp32-neon-mlal-dup.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld2r.c | 95 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 98 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 100 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 176 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 178 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 233 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 234 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c4-minmax-fp32-neonv8-mlal-dup.c | 83 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 86 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 88 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 164 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 166 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() 221 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup() local 222 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup()
|
D | 2x8c4-minmax-rndnu-neon-mlal-dup.c | 82 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 85 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 87 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 163 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 165 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() 220 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup() local 221 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup()
|
D | 2x8c4-minmax-fp32-neon-mlal-dup.c | 82 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 85 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 87 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 163 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 165 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() 220 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup() local 221 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld2r.c | 83 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 86 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 88 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 164 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 166 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() 221 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r() local 222 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neon-mlal-ld2r.c | 82 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 85 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 87 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 163 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 165 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() 220 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r() local 221 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r()
|
D | 2x8c4-minmax-fp32-neonv8-mlal-ld1r.c | 87 int16x8_t vprod1x01c0 = vmull_s8(vb01c0x0, va1c0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 90 vprod1x01c0 = vmlal_s8(vprod1x01c0, vb01c0x1, va1c0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 92 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 170 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 172 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() 227 const int16x8_t vprod1x01c0 = vmull_s8(vb01c0, va1c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r() local 228 vacc1x01 = vpadalq_s16(vacc1x01, vprod1x01c0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r()
|