/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 118 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 243 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 244 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 245 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 246 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 118 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 243 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 244 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 245 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 246 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 122 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 247 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 248 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 249 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 250 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mull-dup.c | 88 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local 153 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() 154 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld1r.c | 90 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local 155 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() 156 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld2r.c | 88 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local 153 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() 154 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 106 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 201 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 202 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 203 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 103 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 198 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 199 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 200 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 103 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 198 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 199 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 200 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
|
D | 1x16c4-minmax-rndnu-neon-mull-ld1r.c | 74 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local 109 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
|
D | 1x16c4-minmax-rndnu-neon-mull-ld2r.c | 73 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local 108 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
|
D | 1x16c4-minmax-rndnu-neon-mull-dup.c | 73 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local 108 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c4-minmax-rndnu-neon-mull-ld2r.c | 135 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local 260 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 261 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 262 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() 263 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
|
D | 4x16c4-minmax-rndnu-neon-mull-ld1r.c | 139 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local 264 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 265 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 266 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() 267 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mull-dup.c | 135 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local 260 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 261 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 262 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() 263 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld2r.c | 101 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local 166 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() 167 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-dup.c | 118 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() local 213 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 214 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup() 215 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld2r.c | 118 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() local 213 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 214 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r() 215 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r()
|
D | 3x16c4-minmax-rndnu-neon-mull-ld1r.c | 121 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() local 216 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 217 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r() 218 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r()
|
D | 1x16c4-minmax-rndnu-neon-mull-ld2r.c | 84 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local 119 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
|
D | 1x16c4-minmax-rndnu-neon-mull-ld1r.c | 85 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local 120 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
|
D | 1x16c4-minmax-rndnu-neon-mull-dup.c | 84 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local 119 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-dup.c | 101 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local 166 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() 167 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
|
D | 2x16c4-minmax-rndnu-neon-mull-ld1r.c | 103 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local 168 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() 169 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
|
D | 4x16c4-minmax-rndnu-neon-mlal-ld1r.c | 403 const int8x8_t vbCDc1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local 528 const int16x8_t vprod0xCDc1 = vmull_s8(vbCDc1, va0c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 529 const int16x8_t vprod1xCDc1 = vmull_s8(vbCDc1, va1c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 530 const int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1, va2c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() 531 const int16x8_t vprod3xCDc1 = vmull_s8(vbCDc1, va3c1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
|