/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 105 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 240 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 241 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 242 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 243 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 113 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 248 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 249 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 250 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 251 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 101 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 236 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 237 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 238 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 239 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 101 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 236 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 237 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 238 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 239 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 90 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 193 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 194 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 195 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 93 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 196 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 197 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 198 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 90 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 193 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 194 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 195 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 99 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 202 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 203 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 204 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 79 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 150 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 151 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 85 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 156 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 157 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 79 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 150 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 151 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 81 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 152 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 153 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mull-ld2r.c | 69 const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r() local 108 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c2-minmax-rndnu-neon-mull-dup.c | 118 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local 253 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 254 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 255 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() 256 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld4r.c | 118 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() local 253 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 254 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 255 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r() 256 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld2r.c | 122 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local 257 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 258 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 259 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() 260 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
|
D | 4x16c2-minmax-rndnu-neon-mull-ld1r.c | 130 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local 265 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 266 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 267 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() 268 const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, va3c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld1r.c | 114 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() local 217 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 218 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r() 219 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r()
|
D | 3x16c2-minmax-rndnu-neon-mull-dup.c | 105 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() local 208 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 209 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup() 210 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld4r.c | 105 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() local 208 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 209 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r() 210 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r()
|
D | 3x16c2-minmax-rndnu-neon-mull-ld2r.c | 108 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() local 211 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 212 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r() 213 const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, va2c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld1r.c | 98 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() local 169 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r() 170 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r()
|
D | 2x16c2-minmax-rndnu-neon-mull-dup.c | 92 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() local 163 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup() 164 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld4r.c | 92 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() local 163 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r() 164 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r()
|
D | 2x16c2-minmax-rndnu-neon-mull-ld2r.c | 94 … const int8x8_t vb89ABc3 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() local 165 const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, va0c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r() 166 const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, va1c3); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r()
|