/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x16c16-minmax-neon-mlal-padal.c | 120 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 201 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 202 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 203 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 204 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 205 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 206 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 143 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 248 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 249 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 250 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 251 int16x8_t vprod3x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 252 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 253 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 254 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 255 vprod3x8 = vmlal_s8(vprod3x8, vget_high_s8(vb8), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 97 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 154 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 155 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 156 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 157 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 74 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 107 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 108 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 91 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local 92 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 130 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 131 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 132 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 208 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 209 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 210 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 211 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 212 const int16x8_t vprod3x8 = vmull_s8(vb8, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 169 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 170 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 171 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 172 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 180 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 181 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 269 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 270 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 271 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 358 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 359 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 360 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 361 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 447 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 448 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 449 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 450 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 451 const int16x8_t vprod3x8 = vmull_s8(vb8, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x16c16-minmax-neon-mlal-padal.c | 135 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 216 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 217 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 218 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 219 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 220 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 221 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 160 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 265 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 266 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 267 int16x8_t vprod2x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 268 int16x8_t vprod3x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 269 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 270 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 271 vprod2x8 = vmlal_s8(vprod2x8, vget_high_s8(vb8), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 272 vprod3x8 = vmlal_s8(vprod3x8, vget_high_s8(vb8), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 110 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 167 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 168 int16x8_t vprod1x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 169 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 170 vprod1x8 = vmlal_s8(vprod1x8, vget_high_s8(vb8), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 85 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 118 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 119 vprod0x8 = vmlal_s8(vprod0x8, vget_high_s8(vb8), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mull-padal.c | 143 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 144 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 145 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 225 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 226 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 227 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 228 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 229 const int16x8_t vprod3x8 = vmull_s8(vb8, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mull-padal.c | 102 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local 103 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mull-padal.c | 184 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 185 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 186 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 187 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 191 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 192 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 282 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 283 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 284 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 464 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 465 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 466 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 467 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 468 const int16x8_t vprod3x8 = vmull_s8(vb8, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 373 const int8x8_t vb8 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 374 const int16x8_t vprod0x8 = vmull_s8(vb8, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 375 const int16x8_t vprod1x8 = vmull_s8(vb8, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 376 const int16x8_t vprod2x8 = vmull_s8(vb8, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
/external/libcups/examples/ |
D | testppdx.ppd | 111 *EXData0005: "CM3WJ0OzASUA7kH27eSgtwNteZJqmDklRnN9ajTTYf3wHjpxeTXYkGK4F30W4zaV1lI1bm+vb8+4+Llw8LagM…
|