/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 150 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 151 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 152 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 153 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 1x16c4s2-minmax-rndnu-neon-mlal.c | 92 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal() local 93 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 179 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 180 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 181 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 182 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 183 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 121 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 122 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 123 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 189 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 190 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 191 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 192 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 193 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 197 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 198 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 199 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 200 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 201 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 189 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 190 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 191 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 192 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 193 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 158 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 159 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 160 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 161 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 158 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 159 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 160 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 161 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 164 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 165 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 166 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 167 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 131 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 132 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 133 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 127 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 128 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 129 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld2r.c | 127 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() local 128 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r() 129 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c4s2-minmax-rndnu-neon-mlal.c | 165 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() local 166 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 167 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal() 168 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal()
|
D | 4x16c4s2-minmax-rndnu-neon-mlal.c | 196 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() local 197 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 198 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 199 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal() 200 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal()
|
D | 2x16c4s2-minmax-rndnu-neon-mlal.c | 134 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() local 135 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal() 136 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal()
|
D | 1x16c4s2-minmax-rndnu-neon-mlal.c | 103 const int8x8_t vb89c0x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal() local 104 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 214 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 215 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 216 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 217 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 218 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-dup.c | 206 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local 207 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 208 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 209 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() 210 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld2r.c | 206 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local 207 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 208 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 209 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() 210 vprod3x89c0 = vmlal_s8(vprod3x89c0, vb89c0x1, va3c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld1r.c | 179 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local 180 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 181 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() 182 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-ld2r.c | 173 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local 174 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 175 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() 176 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mlal-dup.c | 173 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local 174 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 175 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() 176 vprod2x89c0 = vmlal_s8(vprod2x89c0, vb89c0x1, va2c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-dup.c | 140 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() local 141 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup() 142 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup()
|
D | 2x16c4-minmax-rndnu-neon-mlal-ld1r.c | 144 … const int8x8_t vb89c0x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() local 145 vprod0x89c0 = vmlal_s8(vprod0x89c0, vb89c0x1, va0c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r() 146 vprod1x89c0 = vmlal_s8(vprod1x89c0, vb89c0x1, va1c0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r()
|