Home
last modified time | relevance | path

Searched refs:vbCDEFc0x0 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c2s4-minmax-rndnu-neon-mlal.c81 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
125 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
126 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
127 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
284 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
316 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
317 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
318 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2s4-minmax-rndnu-neon-mlal.c69 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
104 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
105 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
217 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
243 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
244 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D4x16c2s4-minmax-rndnu-neon-mlal.c93 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
146 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
147 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
148 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
149 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
351 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
389 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
390 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
391 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
392 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
D1x16c2s4-minmax-rndnu-neon-mlal.c57 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
83 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
150 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
170 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
D3x16c2s4-minmax-rndnu-neon-mull.c78 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local
110 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
111 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
112 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
D4x16c2s4-minmax-rndnu-neon-mull.c89 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local
127 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
128 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
129 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
130 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
D2x16c2s4-minmax-rndnu-neon-mull.c67 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local
93 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
94 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
D1x16c2s4-minmax-rndnu-neon-mull.c56 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
76 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
D4x16c2-minmax-rndnu-neon-mlal-dup.c92 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local
154 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
155 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
156 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
157 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
D4x16c2-minmax-rndnu-neon-mlal-ld2r.c100 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local
162 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
163 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
164 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
165 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
D4x16c2-minmax-rndnu-neon-mlal-ld4r.c92 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local
154 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
155 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
156 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
157 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
D4x16c2-minmax-rndnu-neon-mlal-ld1r.c116 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local
178 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
179 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
180 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
181 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
D3x16c2-minmax-rndnu-neon-mlal-dup.c80 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
131 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
132 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
133 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c2s4-minmax-rndnu-neon-mlal.c96 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
140 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
141 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
142 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
299 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal() local
331 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
332 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
333 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal()
D2x16c2s4-minmax-rndnu-neon-mlal.c82 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
117 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
118 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
230 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal() local
256 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
257 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal()
D4x16c2s4-minmax-rndnu-neon-mlal.c110 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
163 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
164 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
165 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
166 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
368 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal() local
406 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
407 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
408 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
409 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal()
D1x16c2s4-minmax-rndnu-neon-mlal.c68 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
94 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
161 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
181 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
D3x16c2s4-minmax-rndnu-neon-mull.c93 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull() local
125 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
126 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
127 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull()
D4x16c2s4-minmax-rndnu-neon-mull.c106 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull() local
144 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
145 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
146 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
147 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull()
D2x16c2s4-minmax-rndnu-neon-mull.c80 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull() local
106 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
107 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull()
D1x16c2s4-minmax-rndnu-neon-mull.c67 const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
87 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
D4x16c2-minmax-rndnu-neon-mlal-ld2r.c118 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r() local
180 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
181 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
182 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
183 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r()
D4x16c2-minmax-rndnu-neon-mlal-ld1r.c134 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r() local
196 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
197 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
198 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
199 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r()
D4x16c2-minmax-rndnu-neon-mlal-ld4r.c110 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r() local
172 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
173 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
174 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
175 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r()
D4x16c2-minmax-rndnu-neon-mlal-dup.c110 … const int8x8_t vbCDEFc0x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup() local
172 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
173 int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, va1c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
174 int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, va2c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()
175 int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, va3c0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup()

12