/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 81 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 125 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 126 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 127 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 284 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 316 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 317 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 318 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 69 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 104 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 105 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 217 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 243 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 244 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 93 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 146 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 147 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 148 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 149 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 351 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 389 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 390 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 391 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 392 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 57 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 83 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 150 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 170 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 78 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 110 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 111 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 112 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 89 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 127 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 128 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 129 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 130 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 67 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 93 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 94 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 56 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 76 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 92 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 154 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 155 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 156 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 157 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 100 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 162 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 163 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 164 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 165 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 92 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 154 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 155 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 156 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 157 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 116 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 178 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 179 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 180 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 181 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mlal-dup.c | 80 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local 131 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 132 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() 133 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c2s4-minmax-rndnu-neon-mlal.c | 96 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 140 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 141 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 142 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 299 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local 331 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 332 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() 333 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
|
D | 2x16c2s4-minmax-rndnu-neon-mlal.c | 82 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 117 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 118 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 230 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local 256 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() 257 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
|
D | 4x16c2s4-minmax-rndnu-neon-mlal.c | 110 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 163 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 164 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 165 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 166 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 368 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local 406 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 407 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 408 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() 409 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
|
D | 1x16c2s4-minmax-rndnu-neon-mlal.c | 68 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 94 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() 161 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local 181 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
|
D | 3x16c2s4-minmax-rndnu-neon-mull.c | 93 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local 125 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 126 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() 127 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
|
D | 4x16c2s4-minmax-rndnu-neon-mull.c | 106 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local 144 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 145 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 146 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() 147 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
|
D | 2x16c2s4-minmax-rndnu-neon-mull.c | 80 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local 106 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() 107 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
|
D | 1x16c2s4-minmax-rndnu-neon-mull.c | 67 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local 87 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld2r.c | 118 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local 180 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 181 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 182 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() 183 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld1r.c | 134 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local 196 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 197 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 198 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() 199 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-ld4r.c | 110 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local 172 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 173 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 174 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() 175 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mlal-dup.c | 110 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local 172 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 173 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 174 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() 175 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
|