/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 93 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 129 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 130 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 131 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 132 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 268 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 276 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 285 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 294 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 303 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 101 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 137 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 138 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 139 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 140 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 276 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 284 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 293 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 302 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 311 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 89 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 125 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 126 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 127 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 128 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 264 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 272 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 281 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 290 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 299 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 89 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 125 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 126 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 127 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 128 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 264 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 272 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 281 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 290 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 299 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 78 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 109 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 110 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 111 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 216 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 224 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 233 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 242 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 81 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 112 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 113 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 114 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 219 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 227 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 236 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 245 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 78 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 109 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 110 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 111 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 216 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 224 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 233 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 242 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 87 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 118 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 119 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 120 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 225 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 233 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 242 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 251 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 67 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 93 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 94 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 168 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 176 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 185 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 73 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 99 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 100 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 174 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 182 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 191 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 67 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 93 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 94 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 168 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 176 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 185 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 69 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 95 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 96 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 170 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 178 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 187 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mull-ld2r.c | 57 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 78 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() 121 const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 129 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 106 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 142 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 143 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 144 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 145 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 281 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 289 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 298 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 307 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 316 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 106 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 142 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 143 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 144 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 145 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 281 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 289 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 298 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 307 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 316 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 110 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 146 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 147 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 148 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 149 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 285 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 293 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 302 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 311 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 320 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 118 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 154 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 155 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 156 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 157 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 293 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 301 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 310 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 319 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 328 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 102 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 133 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 134 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 135 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 240 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 248 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 257 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 266 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 93 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 124 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 125 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 126 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 231 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 239 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 248 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 257 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 93 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 124 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 125 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 126 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 231 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 239 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 248 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 257 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 96 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 127 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 128 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 129 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 234 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 242 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 251 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 260 const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, va2c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 86 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 112 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 113 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 187 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 195 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 204 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 80 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 106 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 107 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 181 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 189 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 198 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 80 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 106 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 107 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 181 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 189 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 198 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 82 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 108 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 109 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 183 … const int8x8_t vb89ABc0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 191 const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 200 const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, va1c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|