/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 79 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 81 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 82 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 131 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 132 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 168 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 169 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 85 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 87 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 88 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 140 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 141 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 177 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 178 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 78 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 80 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 81 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 130 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 131 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 167 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 168 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-rndnu-neon-mlal-dup.c | 78 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() local 80 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() 81 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() 130 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() local 131 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() 167 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup() local 168 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 80 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 82 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 83 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 133 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 134 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 170 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 171 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 78 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 80 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 81 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 130 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 131 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 167 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 168 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 79 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 81 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 82 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 131 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 132 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 168 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 169 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 91 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 93 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 94 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 143 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 144 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 180 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 181 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 99 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 100 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 152 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 153 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 189 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 190 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 91 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 93 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 94 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 143 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 144 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 180 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 181 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld1r.c | 96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 98 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 99 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 151 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 152 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 188 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 189 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 93 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 95 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 96 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 146 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 147 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 183 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 184 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 90 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 92 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 93 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 142 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 143 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 179 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 180 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 92 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 94 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 95 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 145 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 146 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 182 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 183 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 99 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 100 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 152 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 153 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 189 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 190 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-dup.c | 91 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 93 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 94 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 143 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 144 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() 180 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup() local 181 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 91 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 93 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 94 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 143 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 144 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() 180 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r() local 181 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neon-mlal-dup.c | 90 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 92 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 93 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 142 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 143 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() 179 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup() local 180 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 93 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 95 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 96 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 146 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 147 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 183 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 184 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 90 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 92 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 93 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 142 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 143 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 179 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 180 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 92 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 94 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 95 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 145 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 146 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 182 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 183 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld4r.c | 78 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 80 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 81 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 130 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 131 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() 167 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r() local 168 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 85 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 87 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 88 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 140 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 141 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() 177 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 178 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld1r.c | 84 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 86 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 87 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 139 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 140 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() 176 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r() local 177 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 80 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 82 vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, va0c1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 83 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 133 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 134 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 170 const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, va0c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 171 vacc0x4567 = vpadalq_s16(vacc0x4567, vprod0x4567c1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|