/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 84 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 161 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 162 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 163 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 287 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 337 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 338 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 339 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 72 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 129 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 130 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 220 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 257 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 258 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 96 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 193 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 194 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 195 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 196 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 354 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 417 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 418 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 419 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 420 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 60 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 97 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 153 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 177 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 81 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 131 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 132 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 133 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 92 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 155 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 156 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 157 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 158 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 70 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 107 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 108 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 59 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 83 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 95 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 202 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 203 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 204 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 205 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 103 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 210 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 211 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 212 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 213 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 95 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 202 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 203 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 204 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 205 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 119 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 226 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 227 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 228 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 229 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 83 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 168 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 169 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 170 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 99 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 176 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 177 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 178 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 302 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 352 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 353 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 354 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 85 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 142 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 143 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 233 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 270 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 271 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 113 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 210 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 211 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 212 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 213 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 371 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 434 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 435 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 436 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 437 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 71 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 108 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 164 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 188 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 96 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 146 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 147 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 148 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 109 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 172 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 173 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 174 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 175 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 83 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 120 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 121 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 70 const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 94 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 121 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 228 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 229 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 230 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 231 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 137 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 244 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 245 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 246 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 247 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 113 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 220 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 221 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 222 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 223 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 113 … const int8x8_t vb89ABc1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 220 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 221 int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, va1c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 222 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 223 int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, va3c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|