Home
last modified time | relevance | path

Searched refs:vb9 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x16c16-minmax-neon-mlal-padal.c121 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
210 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
211 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
212 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
213 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
214 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
215 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c144 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
260 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
261 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
262 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
263 int16x8_t vprod3x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
264 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
265 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
266 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
267 vprod3x9 = vmlal_s8(vprod3x9, vget_high_s8(vb9), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c98 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
160 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
161 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
162 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
163 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c75 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
110 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
111 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c94 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
95 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c135 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
136 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
137 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c217 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
218 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
219 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
220 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
221 const int16x8_t vprod3x9 = vmull_s8(vb9, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c176 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
177 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
178 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
179 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c183 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
184 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c274 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
275 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
276 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c365 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
366 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
367 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
368 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c456 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
457 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
458 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
459 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
460 const int16x8_t vprod3x9 = vmull_s8(vb9, va3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x16c16-minmax-neon-mlal-padal.c136 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
225 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
226 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
227 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
228 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
229 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
230 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c161 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
277 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
278 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
279 int16x8_t vprod2x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
280 int16x8_t vprod3x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
281 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
282 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
283 vprod2x9 = vmlal_s8(vprod2x9, vget_high_s8(vb9), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
284 vprod3x9 = vmlal_s8(vprod3x9, vget_high_s8(vb9), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c111 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
173 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
174 int16x8_t vprod1x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
175 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
176 vprod1x9 = vmlal_s8(vprod1x9, vget_high_s8(vb9), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c86 const int8x16_t vb9 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
121 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
122 vprod0x9 = vmlal_s8(vprod0x9, vget_high_s8(vb9), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c148 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
149 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
150 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D4x16c8-minmax-neon-mull-padal.c234 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
235 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
236 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
237 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
238 const int16x8_t vprod3x9 = vmull_s8(vb9, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c105 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
106 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mull-padal.c191 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
192 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
193 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
194 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c194 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
195 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c287 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
288 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
289 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c473 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
474 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
475 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
476 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
477 const int16x8_t vprod3x9 = vmull_s8(vb9, va3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c380 const int8x8_t vb9 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
381 const int16x8_t vprod0x9 = vmull_s8(vb9, va0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
382 const int16x8_t vprod1x9 = vmull_s8(vb9, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
383 const int16x8_t vprod2x9 = vmull_s8(vb9, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()