/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld1r.c | 126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 127 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 130 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 132 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 208 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 210 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 273 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 274 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neon-mlal-dup.c | 114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() 119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() 189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() 254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 118 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 121 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 123 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 195 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 197 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 260 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 261 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c2-minmax-fp32-neon-mlal-ld1r.c | 126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() 272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r() local 273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld1r.c | 126 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local 129 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() 131 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() 207 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local 209 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() 272 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r() local 273 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 127 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 130 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 132 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 208 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 210 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() 273 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r() local 274 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 115 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 118 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 120 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 190 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 192 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 255 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 256 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 114 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 117 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 119 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 189 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 191 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 254 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 255 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 132 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 135 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 137 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 209 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 211 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 274 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 275 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 133 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 136 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 138 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 210 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 212 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 275 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 276 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() 268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() 268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 133 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 136 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 138 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 210 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 212 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 275 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 276 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() 269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld2r.c | 132 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 135 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 137 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 209 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 211 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 274 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 275 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 129 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 132 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 134 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 204 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 206 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() 269 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 270 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-dup.c | 128 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1c2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 131 vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, va1c2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() 133 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() 203 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 205 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() 268 const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 269 vacc1x0123 = vpadalq_s16(vacc1x0123, vprod1x0123c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup()
|