Home
last modified time | relevance | path

Searched refs:vbCDEFc3x0 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c2s4-minmax-rndnu-neon-mlal.c93 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
263 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
264 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
265 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
296 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
397 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
398 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
399 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2s4-minmax-rndnu-neon-mlal.c81 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
200 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
201 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
229 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
297 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
298 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D4x16c2s4-minmax-rndnu-neon-mlal.c105 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
326 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
327 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
328 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
329 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
363 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
497 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
498 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
499 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
500 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
D1x16c2s4-minmax-rndnu-neon-mlal.c69 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
137 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
162 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
197 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
D3x16c2s4-minmax-rndnu-neon-mull.c90 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local
191 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
192 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
193 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
D4x16c2s4-minmax-rndnu-neon-mull.c101 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local
235 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
236 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
237 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
238 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
D2x16c2s4-minmax-rndnu-neon-mull.c79 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local
147 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
148 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
D1x16c2s4-minmax-rndnu-neon-mull.c68 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
103 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
D4x16c2-minmax-rndnu-neon-mlal-dup.c104 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local
337 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
338 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
339 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
340 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
D4x16c2-minmax-rndnu-neon-mlal-ld2r.c112 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local
345 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
346 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
347 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
348 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
D4x16c2-minmax-rndnu-neon-mlal-ld4r.c104 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local
337 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
338 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
339 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
340 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
D4x16c2-minmax-rndnu-neon-mlal-ld1r.c128 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local
361 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
362 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
363 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
364 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
D3x16c2-minmax-rndnu-neon-mlal-dup.c92 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
272 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
273 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
274 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c2s4-minmax-rndnu-neon-mlal.c108 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
278 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
279 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
280 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
311 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
412 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
413 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
414 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2s4-minmax-rndnu-neon-mlal.c94 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
213 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
214 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
242 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
310 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
311 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D4x16c2s4-minmax-rndnu-neon-mlal.c122 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
343 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
344 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
345 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
346 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
380 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
514 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
515 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
516 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
517 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
D1x16c2s4-minmax-rndnu-neon-mlal.c80 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
148 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
173 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
208 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
D3x16c2s4-minmax-rndnu-neon-mull.c105 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local
206 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
207 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
208 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
D4x16c2s4-minmax-rndnu-neon-mull.c118 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local
252 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
253 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
254 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
255 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
D2x16c2s4-minmax-rndnu-neon-mull.c92 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local
160 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
161 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
D1x16c2s4-minmax-rndnu-neon-mull.c79 const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
114 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
D4x16c2-minmax-rndnu-neon-mlal-ld2r.c130 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local
363 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
364 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
365 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
366 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
D4x16c2-minmax-rndnu-neon-mlal-ld1r.c146 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local
379 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
380 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
381 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
382 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
D4x16c2-minmax-rndnu-neon-mlal-ld4r.c122 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local
355 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
356 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
357 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
358 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
D4x16c2-minmax-rndnu-neon-mlal-dup.c122 … const int8x8_t vbCDEFc3x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local
355 int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, va0c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
356 int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, va1c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
357 int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, va2c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
358 int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, va3c3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()

12