/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8_3x3_filter.h | 6150 vst1q_s8(scratch_data_0, work_reg_a); 6151 vst1q_s8(scratch_data_0 + 16, work_reg_b); 6164 vst1q_s8(scratch_data_0, work_reg_a_sp); 6165 vst1q_s8(scratch_data_0 + 16, work_reg_b_sp); 6180 vst1q_s8(scratch_data_0, work_reg_a); 6181 vst1q_s8(scratch_data_0 + 16, work_reg_b); 6195 vst1q_s8(scratch_data_0, work_reg_a_sp); 6196 vst1q_s8(scratch_data_0 + 16, work_reg_b_sp); 6221 vst1q_s8(scratch_data_0, work_reg_a); 6222 vst1q_s8(scratch_data_0 + 16, work_reg_b); [all …]
|
D | depthwiseconv_uint8_transitional.h | 381 vst1q_s8(shuffled_filter_data, filter_0_a); 383 vst1q_s8(shuffled_filter_data, filter_0_b); 385 vst1q_s8(shuffled_filter_data, filter_1_a); 387 vst1q_s8(shuffled_filter_data, filter_1_b); 389 vst1q_s8(shuffled_filter_data, filter_2_a); 391 vst1q_s8(shuffled_filter_data, filter_2_b); 1340 vst1q_s8(scratch_data_0, work_reg_a); 1341 vst1q_s8(scratch_data_0 + 16, work_reg_b); 1352 vst1q_s8(scratch_data_0, work_reg_a_sp); 1353 vst1q_s8(scratch_data_0 + 16, work_reg_b_sp); [all …]
|
/external/ruy/ruy/ |
D | pack_arm.cc | 2430 vst1q_s8(dst_ptr, vcombine_s8(val0, val8)); in Pack8bitRowMajorForNeon() 2431 vst1q_s8(dst_ptr + 16, vcombine_s8(val1, val9)); in Pack8bitRowMajorForNeon() 2433 vst1q_s8(dst_ptr, vcombine_s8(val2, val10)); in Pack8bitRowMajorForNeon() 2434 vst1q_s8(dst_ptr + 16, vcombine_s8(val3, val11)); in Pack8bitRowMajorForNeon() 2437 vst1q_s8(dst_ptr, vcombine_s8(val4, val12)); in Pack8bitRowMajorForNeon() 2438 vst1q_s8(dst_ptr + 16, vcombine_s8(val5, val13)); in Pack8bitRowMajorForNeon() 2440 vst1q_s8(dst_ptr, vcombine_s8(val6, val14)); in Pack8bitRowMajorForNeon() 2441 vst1q_s8(dst_ptr + 16, vcombine_s8(val7, val15)); in Pack8bitRowMajorForNeon()
|
/external/XNNPACK/src/qs8-requantization/ |
D | fp32-neon.c | 90 vst1q_s8(output, xyzw_clamped); output += 16; in xnn_qs8_requantize_fp32__neon() 131 vst1q_s8(output, xyzw_packed); output += 16; in xnn_qs8_requantize_fp32__neon()
|
D | q31-neon.c | 122 vst1q_s8(output, xyzw_clamped); in xnn_qs8_requantize_q31__neon()
|
D | precise-neon.c | 163 vst1q_s8(output, xyzw_clamped); in xnn_qs8_requantize_precise__neon()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-neondot.c | 469 vst1q_s8(c7 + 0, vout7x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 470 vst1q_s8(c6 + 0, vout6x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 471 vst1q_s8(c5 + 0, vout5x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 472 vst1q_s8(c4 + 0, vout4x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 473 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 474 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 475 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 476 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 377 vst1q_s8(c5 + 0, vout5x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 378 vst1q_s8(c4 + 0, vout4x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 379 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 380 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 381 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 382 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 285 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 286 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 287 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 288 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 479 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 480 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 481 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 482 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|
D | 1x16c4-minmax-neondot.c | 147 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-neondot.c | 446 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 447 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 448 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 449 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 450 vst1q_s8(c4 + 0, vout4x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 451 vst1q_s8(c5 + 0, vout5x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 452 vst1q_s8(c6 + 0, vout6x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 453 vst1q_s8(c7 + 0, vout7x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 358 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 359 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 360 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 361 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 362 vst1q_s8(c4 + 0, vout4x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 363 vst1q_s8(c5 + 0, vout5x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 270 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 271 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 272 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 273 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 459 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 460 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 461 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 462 vst1q_s8(c3 + 0, vout3x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup()
|
D | 1x16c4-minmax-neondot.c | 138 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
|
D | 3x16c2-minmax-neon-mull-padal-dup.c | 371 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 372 vst1q_s8(c1 + 0, vout1x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup() 373 vst1q_s8(c2 + 0, vout2x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x32.c | 91 vst1q_s8(output, vout0123456789ABCDEF); output += 16; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 92 vst1q_s8(output, voutGHIJKLMNOPQRSTUV); output += 16; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x16.c | 70 vst1q_s8(output, vout0123456789ABCDEF); output += 16; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
|
D | minmax-neon-ld64-x24.c | 82 vst1q_s8(output, vout0123456789ABCDEF); output += 16; in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x32.c | 103 vst1q_s8(output, vout0123456789ABCDEF); output += 16; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 104 vst1q_s8(output, voutGHIJKLMNOPQRSTUV); output += 16; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x16.c | 74 vst1q_s8(output, vout0123456789ABCDEF); output += 16; in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | mul.h | 121 vst1q_s8(output_data + i, clamped); in MulElementwise() 214 vst1q_s8(output_data + i, clamped); in MulSimpleBroadcast()
|
D | pooling.h | 101 vst1q_s8(acc + channel, acc_reg); in MaxPool() 126 vst1q_s8(output_ptr + channel, a); in MaxPool()
|
/external/webp/src/dsp/ |
D | lossless_enc_neon.c | 122 vst1q_s8((int8_t*)(argb_data + i), out); in TransformColor_NEON()
|