/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 164 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 165 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 166 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 167 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 131 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 132 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 133 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 197 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 198 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 199 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 200 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 201 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 98 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 99 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 206 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 207 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 208 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 209 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 210 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 214 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 215 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 216 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 217 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 218 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 206 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 207 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 208 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 209 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 210 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 230 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 231 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 232 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 233 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 234 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 171 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 172 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 173 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 174 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 171 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 172 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 173 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 174 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 177 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 178 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 179 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 180 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 189 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 190 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 191 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 192 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 179 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 180 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 181 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 182 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 144 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 145 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 146 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 214 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 215 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 216 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 217 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 218 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 109 const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 110 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 232 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 233 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 234 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 235 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 236 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 248 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 249 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 250 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 251 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 252 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 224 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 225 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 226 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 227 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 228 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 224 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 225 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 226 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 227 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 228 vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, va3c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld4r.c | 187 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() local 188 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 189 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r() 190 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 187 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 188 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 189 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 190 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld1r.c | 205 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() local 206 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 207 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r() 208 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-ld2r.c | 193 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() local 194 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 195 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r() 196 vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, va2c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mlal-ld1r.c | 162 … const int8x8_t vb89ABc1x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() local 163 vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, va0c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r() 164 vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, va1c1x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r()
|