/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 102 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 211 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 212 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 213 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 214 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 355 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 364 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 373 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 382 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 391 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 110 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 219 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 220 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 221 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 222 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 363 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 372 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 381 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 390 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 399 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 98 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 207 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 208 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 209 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 210 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 351 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 360 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 369 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 378 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 387 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 98 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 207 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 208 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 209 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 210 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 351 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 360 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 369 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 378 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 387 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 87 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 171 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 172 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 173 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 285 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 294 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 303 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 312 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 90 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 174 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 175 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 176 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 288 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 297 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 306 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 315 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 87 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 171 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 172 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 173 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 285 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 294 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 303 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 312 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 96 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 180 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 181 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 182 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 294 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 303 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 312 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 321 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 76 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 135 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 136 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 219 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 228 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 237 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 82 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 141 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 142 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 225 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 234 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 243 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 76 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 135 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 136 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 219 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 228 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 237 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 78 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 137 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 138 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 221 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 230 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 239 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mull-ld2r.c | 66 const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 100 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() 154 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 163 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 115 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 224 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 225 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 226 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 227 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 368 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 377 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 386 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 395 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 404 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 115 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 224 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 225 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 226 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 227 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 368 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 377 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 386 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 395 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 404 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 119 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 228 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 229 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 230 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 231 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 372 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 381 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 390 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 399 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 408 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 127 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 236 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 237 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 238 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 239 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 380 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 389 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 398 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 407 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 416 const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, va3c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 111 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 195 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 196 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 197 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 309 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 318 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 327 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 336 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 102 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 186 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 187 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 188 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 300 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 309 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 318 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 327 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 102 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 186 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 187 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 188 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 300 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 309 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 318 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 327 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 105 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 189 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 190 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 191 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 303 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 312 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 321 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 330 const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, va2c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 95 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 154 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 155 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 238 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 247 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 256 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 89 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 148 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 149 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 232 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 241 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 250 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 89 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 148 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 149 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 232 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 241 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 250 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 91 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 150 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 151 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 234 … const int8x8_t vbCDEFc2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 243 const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, va0c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 252 const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, va1c2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|