/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-neondot.c | 296 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 297 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 298 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 299 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 300 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 301 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 302 vst1_s8(c6 + 0, vget_low_s8(vout6x01234567_7x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 303 vst1_s8(c7 + 0, vget_high_s8(vout6x01234567_7x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-neondot.c | 244 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 245 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 246 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 247 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 248 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 249 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-neondot.c | 192 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 193 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 194 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 195 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 482 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 483 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 484 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 485 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 486 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 487 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 488 vst1_s8(c6, vget_low_s8(vout6x01234567_7x01234567)); c6 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 489 vst1_s8(c7, vget_high_s8(vout6x01234567_7x01234567)); c7 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 387 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 388 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 389 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 390 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 391 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 392 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 292 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 293 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 294 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 295 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-neondot.c | 319 vst1_s8(c7 + 0, vget_high_s8(vout6x01234567_7x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 320 vst1_s8(c6 + 0, vget_low_s8(vout6x01234567_7x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 321 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 322 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 323 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 324 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 325 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 326 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-neondot.c | 263 vst1_s8(c5 + 0, vget_high_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 264 vst1_s8(c4 + 0, vget_low_s8(vout4x01234567_5x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 265 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 266 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 267 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 268 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-neondot.c | 207 vst1_s8(c3 + 0, vget_high_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 208 vst1_s8(c2 + 0, vget_low_s8(vout2x01234567_3x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 209 vst1_s8(c1 + 0, vget_high_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 210 vst1_s8(c0 + 0, vget_low_s8(vout0x01234567_1x01234567)); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 496 vst1_s8(c7, vget_high_s8(vout6x01234567_7x01234567)); c7 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 497 vst1_s8(c6, vget_low_s8(vout6x01234567_7x01234567)); c6 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 498 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 499 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 500 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 501 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 502 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 503 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 399 vst1_s8(c5, vget_high_s8(vout4x01234567_5x01234567)); c5 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 400 vst1_s8(c4, vget_low_s8(vout4x01234567_5x01234567)); c4 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 401 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 402 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 403 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 404 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 302 vst1_s8(c3, vget_high_s8(vout2x01234567_3x01234567)); c3 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 303 vst1_s8(c2, vget_low_s8(vout2x01234567_3x01234567)); c2 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 304 vst1_s8(c1, vget_high_s8(vout0x01234567_1x01234567)); c1 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 305 vst1_s8(c0, vget_low_s8(vout0x01234567_1x01234567)); c0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | pooling.h | 109 vst1_s8(acc + channel, acc_reg); in MaxPool() 132 vst1_s8(output_ptr + channel, a); in MaxPool() 255 vst1_s8(output_ptr + channel, buf8); in AveragePool()
|
/external/XNNPACK/src/qs8-igemm/ |
D | MRxNRc4-neondot.c.in | 194 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 195 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 197 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 215 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 216 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 218 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c16-neon-mlal-padal.c.in | 197 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 198 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 200 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 218 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 219 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 221 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c8-neon-mull-padal.c.in | 215 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 216 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 218 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 236 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 237 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 239 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c2-neon-mull-padal-dup.c.in | 233 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 234 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 236 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 254 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 255 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 257 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | neon-mull-addw-dup.c.in | 252 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 253 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 255 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 273 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 274 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 276 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
/external/XNNPACK/src/qs8-gemm/ |
D | MRxNRc4-neondot.c.in | 188 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 189 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 191 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 212 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 213 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 215 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c16-neon-mlal-padal.c.in | 186 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 187 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 189 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 208 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 209 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 211 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c8-neon-mull-padal.c.in | 204 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 205 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 207 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 226 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 227 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 229 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | c2-neon-mull-padal-dup.c.in | 222 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 223 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 225 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 244 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 245 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 247 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
D | neon-mull-addw-dup.c.in | 241 vst1_s8(c${M-1} + ${N}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 242 vst1_s8(c${M} + ${N}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); 244 vst1_s8(c${M} + ${N}, vout${M}x${ABC[N:N+8]}); 263 … vst1_s8(c${M-1}, vget_low_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M-1} += 8; 264 vst1_s8(c${M}, vget_high_s8(vout${M-1}x${ABC[N:N+8]}_${M}x${ABC[N:N+8]})); c${M} += 8; 266 vst1_s8(c${M}, vout${M}x${ABC[N:N+8]}); c${M} += 8;
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x24.c | 83 vst1_s8(output, voutGHIJKLMN); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 107 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x24.c | 91 vst1_s8(output, voutGHIJKLMN); output += 8; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 120 vst1_s8(output, vout01234567); output += 8; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
|