Home
last modified time | relevance | path

Searched refs:vb13 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-neon-mlal-padal.c125 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
246 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
247 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
248 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
249 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
250 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
251 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c148 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
308 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
309 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
310 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
311 int16x8_t vprod3x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
312 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
313 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
314 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
315 vprod3x13 = vmlal_s8(vprod3x13, vget_high_s8(vb13), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c102 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
184 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
185 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
186 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
187 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c79 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
122 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
123 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c106 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
107 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c155 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
156 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
157 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c253 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
254 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
255 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
256 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
257 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c204 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
205 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
206 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
207 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c195 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
196 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c294 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
295 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
296 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c393 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
394 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
395 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
396 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c492 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
493 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
494 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
495 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
496 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-neon-mlal-padal.c140 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
261 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
262 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
263 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
264 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
265 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
266 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c165 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
325 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
326 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
327 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
328 int16x8_t vprod3x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
329 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
330 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
331 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
332 vprod3x13 = vmlal_s8(vprod3x13, vget_high_s8(vb13), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c115 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
197 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
198 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
199 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
200 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c90 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
133 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
134 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c168 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
169 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
170 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c270 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
271 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
272 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
273 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
274 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c117 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
118 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c219 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
220 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
221 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
222 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c206 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
207 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c307 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
308 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
309 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c509 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
510 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
511 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
512 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
513 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c408 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
409 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
410 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
411 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()