/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 103 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 183 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 242 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 103 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 183 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 242 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neon-mlal-dup.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 103 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 183 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 242 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neon-mlal-dup.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 103 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 183 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 242 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld2r.c | 106 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 188 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 247 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local
|
D | 2x8c2-minmax-rndnu-neon-mlal-dup.c | 102 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 182 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 241 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 97 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 170 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local
|
D | 2x8c2-minmax-rndnu-neon-mull-dup.c | 89 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local 148 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 89 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 148 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 117 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 197 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 256 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 117 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 197 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 256 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neon-mlal-dup.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c2-minmax-rndnu-neon-mlal-ld4r.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neon-mlal-ld4r.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-dup.c | 117 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 197 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local 256 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup() local
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld4r.c | 117 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 197 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local 256 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r() local
|
D | 2x8c2-minmax-rndnu-neon-mlal-dup.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup() local
|
D | 2x8c2-minmax-fp32-neon-mlal-dup.c | 116 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1c1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 196 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local 255 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 102 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 161 const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|