Home
last modified time | relevance | path

Searched refs:vprod1x0123c2 (Results 1 – 25 of 160) sorted by relevance

1234567

/external/XNNPACK/src/qc8-gemm/gen/
D2x8c2-minmax-fp32-neonv8-mlal-ld4r.c115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
D2x8c2-minmax-fp32-neonv8-mlal-dup.c115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
D2x8c2-minmax-fp32-neon-mlal-ld1r.c126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
D2x8c2-minmax-fp32-neonv8-mlal-ld1r.c127 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
130 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
132 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
208 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
210 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
273 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
274 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
D2x8c2-minmax-fp32-neon-mlal-ld4r.c114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
D2x8c2-minmax-fp32-neon-mlal-dup.c114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup()
119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup()
189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup()
254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup()
D2x8c2-minmax-fp32-neon-mlal-ld2r.c118 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
121 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
123 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
195 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
197 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
260 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
261 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
/external/XNNPACK/src/qs8-gemm/gen/
D2x8c2-minmax-fp32-neon-mlal-ld1r.c126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local
273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
D2x8c2-minmax-rndnu-neon-mlal-ld4r.c114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
D2x8c2-minmax-rndnu-neon-mlal-ld1r.c126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local
129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local
209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local
273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
D2x8c2-minmax-fp32-neonv8-mlal-ld1r.c127 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
130 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
132 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
208 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
210 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
273 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local
274 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
D2x8c2-minmax-fp32-neonv8-mlal-ld4r.c115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
D2x8c2-minmax-fp32-neon-mlal-ld4r.c114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
/external/XNNPACK/src/qc8-igemm/gen/
D2x8c2-minmax-fp32-neonv8-mlal-ld4r.c129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
D2x8c2-minmax-fp32-neonv8-mlal-dup.c129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
D2x8c2-minmax-fp32-neon-mlal-ld2r.c132 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
135 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
137 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
209 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
211 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
274 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local
275 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
D2x8c2-minmax-fp32-neonv8-mlal-ld2r.c133 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
136 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
138 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
210 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
212 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
275 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
276 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
D2x8c2-minmax-fp32-neon-mlal-ld4r.c128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c2-minmax-rndnu-neon-mlal-ld4r.c128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
D2x8c2-minmax-fp32-neon-mlal-ld4r.c128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
D2x8c2-minmax-fp32-neonv8-mlal-ld2r.c133 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
136 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
138 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
210 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
212 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
275 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local
276 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
D2x8c2-minmax-fp32-neonv8-mlal-dup.c129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
D2x8c2-minmax-rndnu-neon-mlal-ld2r.c132 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local
135 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
137 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
209 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local
211 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
274 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local
275 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
D2x8c2-minmax-fp32-neonv8-mlal-ld4r.c129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
D2x8c2-minmax-rndnu-neon-mlal-dup.c128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local
131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup()
133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup()
203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local
205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup()
268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local
269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup()

1234567