/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 80 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 115 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 116 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 117 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 283 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 310 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 311 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 312 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 68 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 97 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 98 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 216 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 239 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 240 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 92 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 133 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 134 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 135 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 136 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 350 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 381 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 382 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 383 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 384 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 56 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 79 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 149 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 168 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 77 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 104 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 105 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 106 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 88 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 119 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 120 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 121 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 122 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 66 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 89 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 90 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 55 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 74 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 91 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 141 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 142 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 143 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 144 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 99 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 149 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 150 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 151 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 152 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 91 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 141 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 142 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 143 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 144 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 115 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 165 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 166 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 167 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 168 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 79 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 121 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 122 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 123 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 95 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 130 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 131 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 132 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 298 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 325 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 326 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 327 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 81 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 110 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 111 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 229 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 252 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 253 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 109 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 150 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 151 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 152 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 153 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 367 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 398 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 399 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 400 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 401 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 67 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 90 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 160 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 179 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 92 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 119 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 120 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 121 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 105 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 136 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 137 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 138 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 139 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 79 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 102 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 103 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 66 const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 85 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 117 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 167 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 168 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 169 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 170 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 133 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 183 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 184 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 185 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 186 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 109 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 159 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 160 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 161 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 162 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 109 … const int8x8_t vb89ABc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 159 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 160 int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 161 int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 162 int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|