Home
last modified time | relevance | path

Searched refs:vst1_s8 (Results 1 – 25 of 180) sorted by relevance

12345678

/external/XNNPACK/src/qs8-gemm/gen/
D8x8c4-minmax-neondot.c296 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
297 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
298 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
299 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
300 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
301 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
302 vst1_s8(c6 + 0, vget_low_s8(vout6x01234567_7x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
303 vst1_s8(c7 + 0, vget_high_s8(vout6x01234567_7x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
D6x8c4-minmax-neondot.c244 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
245 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
246 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
247 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
248 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
249 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
D4x8c4-minmax-neondot.c192 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
193 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
194 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
195 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
D8x16c4-minmax-neondot.c482 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
483 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
484 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
485 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
486 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
487 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
488 vst1_s8(c6, vget_low_s8(vout6x01234567_7x01234567)); c6 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
489 vst1_s8(c7, vget_high_s8(vout6x01234567_7x01234567)); c7 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
D6x16c4-minmax-neondot.c387 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
388 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
389 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
390 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
391 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
392 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
D4x16c4-minmax-neondot.c292 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
293 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
294 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
295 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
/external/XNNPACK/src/qs8-igemm/gen/
D8x8c4-minmax-neondot.c319 vst1_s8(c7 + 0, vget_high_s8(vout6x01234567_7x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
320 vst1_s8(c6 + 0, vget_low_s8(vout6x01234567_7x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
321 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
322 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
323 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
324 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
325 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
326 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
D6x8c4-minmax-neondot.c263 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
264 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
265 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
266 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
267 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
268 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
D4x8c4-minmax-neondot.c207 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
208 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
209 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
210 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
D8x16c4-minmax-neondot.c496 vst1_s8(c7, vget_high_s8(vout6x01234567_7x01234567)); c7 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
497 vst1_s8(c6, vget_low_s8(vout6x01234567_7x01234567)); c6 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
498 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
499 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
500 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
501 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
502 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
503 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
D6x16c4-minmax-neondot.c399 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
400 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
401 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
402 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
403 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
404 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
D4x16c4-minmax-neondot.c302 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
303 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
304 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
305 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/
Dpooling.h109 vst1_s8(acc + channel, acc_reg); in MaxPool()
132 vst1_s8(output_ptr + channel, a); in MaxPool()
255 vst1_s8(output_ptr + channel, buf8); in AveragePool()
/external/XNNPACK/src/qs8-igemm/
DMRxNRc4-neondot.c.in194 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
195 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
197 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
215 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
216vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
218 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc16-neon-mlal-padal.c.in197 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
198 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
200 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
218 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
219vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
221 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc8-neon-mull-padal.c.in215 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
216 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
218 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
236 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
237vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
239 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc2-neon-mull-padal-dup.c.in233 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
234 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
236 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
254 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
255vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
257 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dneon-mull-addw-dup.c.in252 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
253 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
255 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
273 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
274vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
276 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
/external/XNNPACK/src/qs8-gemm/
DMRxNRc4-neondot.c.in188 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
189 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
191 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
212vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
213 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
215 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc16-neon-mlal-padal.c.in186 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
187 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
189 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
208vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
209 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
211 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc8-neon-mull-padal.c.in204 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
205 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
207 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
226vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
227 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
229 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dc2-neon-mull-padal-dup.c.in222 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
223 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
225 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
244vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
245 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
247 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
Dneon-mull-addw-dup.c.in241 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
242 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]}));
244 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]});
263vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8;
264 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8;
266 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x24.c83 vst1_s8(output, voutGHIJKLMN); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
107 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x24.c91 vst1_s8(output, voutGHIJKLMN); output += 8; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
120 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()

12345678