/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 214 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 215 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 216 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 217 vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 218 vprod3x0123c3 = vmlal_s8(vprod3x0123c3, vb0123c3x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 177 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 178 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 179 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 180 vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 141 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 142 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 143 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 140 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 141 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 142 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 140 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 141 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 142 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 104 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 105 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 103 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 104 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 103 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 104 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 197 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 198 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 199 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 200 vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 201 vprod3x0123c3 = vmlal_s8(vprod3x0123c3, vb0123c3x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 162 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 163 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 164 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 165 vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 127 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 128 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 129 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 128 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 129 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 130 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 127 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 128 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 129 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 92 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 93 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 93 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 94 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 92 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 93 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
D | 4x8c2-minmax-rndnu-neon-mlal-ld4r.c | 208 … const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() local 209 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 210 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 211 vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, va2c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r() 212 vprod3x0123c3 = vmlal_s8(vprod3x0123c3, vb0123c3x1, va3c3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 127 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 128 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 129 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 128 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 129 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 130 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 93 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 94 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 92 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 93 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 140 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 141 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 142 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 141 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 142 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 143 vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, va1x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 104 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 105 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 103 const int8x8_t vb0123c3x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 104 vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|