Home
last modified time | relevance | path

Searched refs:vb11 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-neon-mlal-padal.c123 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
228 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
229 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
230 int16x8_t vprod2x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
231 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
232 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
233 vprod2x11 = vmlal_s8(vprod2x11, vget_high_s8(vb11), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c146 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
284 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
285 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
286 int16x8_t vprod2x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
287 int16x8_t vprod3x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
288 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
289 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
290 vprod2x11 = vmlal_s8(vprod2x11, vget_high_s8(vb11), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
291 vprod3x11 = vmlal_s8(vprod3x11, vget_high_s8(vb11), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c100 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
172 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
173 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
174 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
175 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c77 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
116 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
117 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c100 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
101 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c145 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
146 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
147 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c235 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
236 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
237 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
238 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
239 const int16x8_t vprod3x11 = vmull_s8(vb11, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c190 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
191 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
192 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
193 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c189 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
190 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c284 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
285 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
286 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c379 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
380 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
381 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
382 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c474 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
475 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
476 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
477 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
478 const int16x8_t vprod3x11 = vmull_s8(vb11, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-neon-mlal-padal.c138 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
243 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
244 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
245 int16x8_t vprod2x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
246 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
247 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
248 vprod2x11 = vmlal_s8(vprod2x11, vget_high_s8(vb11), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c163 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
301 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
302 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
303 int16x8_t vprod2x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
304 int16x8_t vprod3x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
305 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
306 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
307 vprod2x11 = vmlal_s8(vprod2x11, vget_high_s8(vb11), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
308 vprod3x11 = vmlal_s8(vprod3x11, vget_high_s8(vb11), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c113 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
185 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
186 int16x8_t vprod1x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
187 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
188 vprod1x11 = vmlal_s8(vprod1x11, vget_high_s8(vb11), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c88 const int8x16_t vb11 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
127 int16x8_t vprod0x11 = vmull_s8(vget_low_s8(vb11), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
128 vprod0x11 = vmlal_s8(vprod0x11, vget_high_s8(vb11), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c158 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
159 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
160 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c252 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
253 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
254 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
255 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
256 const int16x8_t vprod3x11 = vmull_s8(vb11, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c111 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
112 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c205 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
206 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
207 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
208 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c200 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
201 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c297 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
298 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
299 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c491 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
492 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
493 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
494 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
495 const int16x8_t vprod3x11 = vmull_s8(vb11, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c394 const int8x8_t vb11 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
395 const int16x8_t vprod0x11 = vmull_s8(vb11, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
396 const int16x8_t vprod1x11 = vmull_s8(vb11, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
397 const int16x8_t vprod2x11 = vmull_s8(vb11, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()