Home
last modified time | relevance | path

Searched refs:vb13 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-rndnu-neon-mlal.c125 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
246 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
247 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
248 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
249 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
250 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
251 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c148 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
308 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
309 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
310 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
311 int16x8_t vprod3x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
312 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
313 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
314 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
315 vprod3x13 = vmlal_s8(vprod3x13, vget_high_s8(vb13), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D2x16c16-minmax-rndnu-neon-mlal.c102 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
184 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
185 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
186 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
187 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D1x16c16-minmax-rndnu-neon-mlal.c79 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
122 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
123 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
D1x16c8-minmax-rndnu-neon-mull.c106 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
107 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
D2x16c8-minmax-rndnu-neon-mull.c155 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
156 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
157 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c253 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
254 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
255 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
256 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
257 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c204 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
205 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
206 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
207 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c393 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
394 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
395 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
396 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c8-minmax-rndnu-neon-mlal.c294 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
295 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
296 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D1x16c8-minmax-rndnu-neon-mlal.c195 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
196 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c492 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
493 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
494 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
495 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
496 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-rndnu-neon-mlal.c140 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
261 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
262 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
263 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
264 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
265 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
266 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c165 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
325 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
326 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
327 int16x8_t vprod2x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
328 int16x8_t vprod3x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
329 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
330 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
331 vprod2x13 = vmlal_s8(vprod2x13, vget_high_s8(vb13), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
332 vprod3x13 = vmlal_s8(vprod3x13, vget_high_s8(vb13), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D2x16c16-minmax-rndnu-neon-mlal.c115 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
197 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
198 int16x8_t vprod1x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
199 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
200 vprod1x13 = vmlal_s8(vprod1x13, vget_high_s8(vb13), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D1x16c16-minmax-rndnu-neon-mlal.c90 const int8x16_t vb13 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
133 int16x8_t vprod0x13 = vmull_s8(vget_low_s8(vb13), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
134 vprod0x13 = vmlal_s8(vprod0x13, vget_high_s8(vb13), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
D2x16c8-minmax-rndnu-neon-mull.c168 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
169 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
170 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c270 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
271 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
272 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
273 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
274 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c219 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
220 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
221 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
222 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D1x16c8-minmax-rndnu-neon-mull.c117 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
118 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c408 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
409 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
410 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
411 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c8-minmax-rndnu-neon-mlal.c307 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
308 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
309 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D1x16c8-minmax-rndnu-neon-mlal.c206 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
207 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c509 const int8x8_t vb13 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
510 const int16x8_t vprod0x13 = vmull_s8(vb13, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
511 const int16x8_t vprod1x13 = vmull_s8(vb13, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
512 const int16x8_t vprod2x13 = vmull_s8(vb13, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
513 const int16x8_t vprod3x13 = vmull_s8(vb13, va3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()