/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 94 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 137 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 138 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 139 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 140 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 269 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 278 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 287 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 296 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 305 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 102 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 145 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 146 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 147 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 148 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 277 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 286 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 295 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 304 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 313 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 90 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 133 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 134 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 135 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 136 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 265 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 274 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 283 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 292 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 301 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 90 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 133 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 134 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 135 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 136 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 265 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 274 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 283 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 292 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 301 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 79 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 115 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 116 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 117 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 217 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 226 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 235 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 244 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 82 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 118 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 119 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 120 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 220 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 229 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 238 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 247 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 79 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 115 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 116 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 117 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 217 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 226 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 235 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 244 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 88 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 124 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 125 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 126 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 226 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 235 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 244 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 253 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 68 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 97 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 98 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 169 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 178 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 187 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 74 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 103 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 104 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 175 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 184 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 193 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 68 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 97 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 98 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 169 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 178 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 187 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 70 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 99 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 100 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 171 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 180 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 189 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mull-ld2r.c | 58 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 80 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() 122 const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 131 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 107 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 150 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 151 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 152 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 153 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 282 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 291 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 300 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 309 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 318 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 107 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 150 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 151 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 152 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 153 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 282 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 291 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 300 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 309 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 318 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 111 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 154 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 155 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 156 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 157 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 286 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 295 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 304 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 313 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 322 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 119 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 162 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 163 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 164 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 165 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 294 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 303 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 312 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 321 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 330 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 103 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 139 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 140 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 141 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 241 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 250 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 259 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 268 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 94 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 130 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 131 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 132 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 232 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 241 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 250 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 259 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 94 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 130 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 131 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 132 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 232 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 241 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 250 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 259 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 97 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 133 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 134 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 135 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 235 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 244 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 253 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 262 const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 87 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 116 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 117 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 188 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 197 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 206 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 81 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 110 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 111 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 182 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 191 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 200 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 81 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 110 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 111 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 182 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 191 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 200 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 83 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 112 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 113 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 184 … const int8x8_t vbCDEFc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 193 const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 202 const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|