Home
last modified time | relevance | path

Searched refs:vb12 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-rndnu-neon-mlal.c124 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
237 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
238 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
239 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
240 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
241 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
242 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c147 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
296 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
297 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
298 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
299 int16x8_t vprod3x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
300 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
301 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
302 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
303 vprod3x12 = vmlal_s8(vprod3x12, vget_high_s8(vb12), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D2x16c16-minmax-rndnu-neon-mlal.c101 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
178 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
179 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
180 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
181 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D1x16c16-minmax-rndnu-neon-mlal.c78 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
119 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
120 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
D1x16c8-minmax-rndnu-neon-mull.c103 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
104 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
D2x16c8-minmax-rndnu-neon-mull.c150 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
151 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
152 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c244 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
245 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
246 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
247 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
248 const int16x8_t vprod3x12 = vmull_s8(vb12, va3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c197 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
198 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
199 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
200 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c386 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
387 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
388 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
389 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c8-minmax-rndnu-neon-mlal.c289 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
290 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
291 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D1x16c8-minmax-rndnu-neon-mlal.c192 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
193 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c483 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
484 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
485 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
486 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
487 const int16x8_t vprod3x12 = vmull_s8(vb12, va3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-rndnu-neon-mlal.c139 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
252 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
253 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
254 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
255 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
256 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
257 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c164 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
313 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
314 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
315 int16x8_t vprod2x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
316 int16x8_t vprod3x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
317 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
318 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
319 vprod2x12 = vmlal_s8(vprod2x12, vget_high_s8(vb12), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
320 vprod3x12 = vmlal_s8(vprod3x12, vget_high_s8(vb12), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D2x16c16-minmax-rndnu-neon-mlal.c114 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal() local
191 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
192 int16x8_t vprod1x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
193 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
194 vprod1x12 = vmlal_s8(vprod1x12, vget_high_s8(vb12), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal()
D1x16c16-minmax-rndnu-neon-mlal.c89 const int8x16_t vb12 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal() local
130 int16x8_t vprod0x12 = vmull_s8(vget_low_s8(vb12), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
131 vprod0x12 = vmlal_s8(vprod0x12, vget_high_s8(vb12), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal()
D2x16c8-minmax-rndnu-neon-mull.c163 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull() local
164 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
165 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c261 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
262 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
263 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
264 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
265 const int16x8_t vprod3x12 = vmull_s8(vb12, va3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c212 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
213 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
214 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
215 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D1x16c8-minmax-rndnu-neon-mull.c114 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull() local
115 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mlal.c401 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
402 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
403 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
404 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D2x16c8-minmax-rndnu-neon-mlal.c302 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal() local
303 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
304 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal()
D1x16c8-minmax-rndnu-neon-mlal.c203 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local
204 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c500 const int8x8_t vb12 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
501 const int16x8_t vprod0x12 = vmull_s8(vb12, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
502 const int16x8_t vprod1x12 = vmull_s8(vb12, va1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
503 const int16x8_t vprod2x12 = vmull_s8(vb12, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
504 const int16x8_t vprod3x12 = vmull_s8(vb12, va3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()