/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_filter_neon.cc | 36 inline uint8x8_t Hev(const uint8x8_t abd_p0p1_q0q1, const uint8_t thresh) { in Hev() 37 const uint8x8_t a = vcgt_u8(abd_p0p1_q0q1, vdup_n_u8(thresh)); in Hev() 42 inline uint8x8_t OuterThreshold(const uint8x8_t p0q0, const uint8x8_t p1q1, in OuterThreshold() 45 const uint8x8_t b = vabd_u8(a.val[0], a.val[1]); in OuterThreshold() 46 const uint8x8_t p0q0_double = vqadd_u8(b, b); in OuterThreshold() 47 const uint8x8_t p1q1_half = RightShiftVector<32>(vshr_n_u8(b, 1)); in OuterThreshold() 48 const uint8x8_t c = vqadd_u8(p0q0_double, p1q1_half); in OuterThreshold() 54 inline uint8x8_t NeedsFilter4(const uint8x8_t abd_p0p1_q0q1, in NeedsFilter4() 55 const uint8x8_t p0q0, const uint8x8_t p1q1, in NeedsFilter4() 58 const uint8x8_t a = vcle_u8(abd_p0p1_q0q1, vdup_n_u8(inner_thresh)); in NeedsFilter4() [all …]
|
D | obmc_neon.cc | 40 const uint8x8_t pred_mask, in WriteObmcLine4() 41 const uint8x8_t obmc_pred_mask) { in WriteObmcLine4() 42 const uint8x8_t pred_val = Load4(pred); in WriteObmcLine4() 43 const uint8x8_t obmc_pred_val = Load4(obmc_pred); in WriteObmcLine4() 45 const uint8x8_t result = in WriteObmcLine4() 57 const uint8x8_t mask_inverter = vdup_n_u8(64); in OverlapBlend2xH_NEON() 59 uint8x8_t pred_mask; in OverlapBlend2xH_NEON() 60 uint8x8_t obmc_pred_mask; in OverlapBlend2xH_NEON() 71 uint8x8_t pred_val = vdup_n_u8(0); in OverlapBlend2xH_NEON() 72 uint8x8_t obmc_pred_val = vdup_n_u8(0); in OverlapBlend2xH_NEON() [all …]
|
D | intrapred_smooth_neon.cc | 80 uint8x8_t top_v; in Smooth4Or8xN_NEON() 86 const uint8x8_t top_right_v = vdup_n_u8(top_right); in Smooth4Or8xN_NEON() 87 const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left); in Smooth4Or8xN_NEON() 89 const uint8x8_t weights_x_v = vld1_u8(kSmoothWeights + width - 4); in Smooth4Or8xN_NEON() 91 const uint8x8_t scaled_weights_x = in Smooth4Or8xN_NEON() 95 const uint8x8_t left_v = vdup_n_u8(left[y]); in Smooth4Or8xN_NEON() 96 const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]); in Smooth4Or8xN_NEON() 97 const uint8x8_t scaled_weights_y = vdup_n_u8(256 - weights_y[y]); in Smooth4Or8xN_NEON() 120 const uint8x16_t top, const uint8x8_t left, const uint8x8_t top_right, in CalculateWeightsAndPred() 121 const uint8x8_t weights_y, const uint8x16_t weights_x, in CalculateWeightsAndPred() [all …]
|
D | mask_blend_neon.cc | 61 const uint8x8_t mask_val0 = Load4(mask); in GetMask4x2() 62 const uint8x8_t mask_val = Load4<1>(mask + mask_stride, mask_val0); in GetMask4x2() 78 const uint8x8_t mask_val = vld1_u8(mask); in GetMask8() 103 const uint8x8_t result = in WriteMaskBlendLine4x2() 217 uint8x8_t result; in MaskBlend_NEON() 252 inline uint8x8_t GetInterIntraMask4x2(const uint8_t* mask, in GetInterIntraMask4x2() 255 const uint8x8_t mask_val = in GetInterIntraMask4x2() 258 const uint8x8_t next_mask_val = vpadd_u8(vld1_u8(mask + mask_stride), in GetInterIntraMask4x2() 264 const uint8x8_t sum = vqadd_u8(mask_val, next_mask_val); in GetInterIntraMask4x2() 272 const uint8x8_t mask_val0 = Load4(mask); in GetInterIntraMask4x2() [all …]
|
D | common_neon.h | 126 inline void PrintReg(const uint8x8_t val, const char* name) { 197 inline uint8x8_t Load2(const void* const buf) { in Load2() 206 inline uint8x8_t Load2(const void* const buf, uint8x8_t val) { in Load2() 216 inline uint8x8_t Load4(const void* const buf) { in Load4() 225 inline uint8x8_t Load4(const void* const buf, uint8x8_t val) { in Load4() 249 inline void StoreLo4(void* const buf, const uint8x8_t val) { in StoreLo4() 254 inline void StoreHi4(void* const buf, const uint8x8_t val) { in StoreHi4() 261 inline void Store2(void* const buf, const uint8x8_t val) { in Store2() 294 inline uint8x8_t LeftShiftVector(const uint8x8_t vector) { in LeftShiftVector() 299 inline uint8x8_t RightShiftVector(const uint8x8_t vector) { in RightShiftVector() [all …]
|
/external/llvm-project/clang/test/CodeGen/ |
D | aarch64-neon-tbl.c | 19 int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) { in test_vqtbl1_s8() 62 int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) { in test_vqtbl2_s8() 112 int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) { in test_vqtbl3_s8() 168 int8x8_t test_vqtbl4_s8(int8x16x4_t a, uint8x8_t b) { in test_vqtbl4_s8() 351 int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) { in test_vqtbx1_s8() 372 int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) { in test_vqtbx2_s8() 396 int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) { in test_vqtbx3_s8() 423 int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, uint8x8_t c) { in test_vqtbx4_s8() 510 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { in test_vtbl1_u8() 517 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { in test_vqtbl1_u8() [all …]
|
D | arm-neon-shifts.c | 10 uint8x8_t test_shift_vshr(uint8x8_t a) { in test_shift_vshr() 22 uint8x8_t test_shift_vshr_umax(uint8x8_t a) { in test_shift_vshr_umax() 28 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { in test_shift_vsra() 42 uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { in test_shift_vsra_umax()
|
D | aarch64-neon-shifts.c | 6 uint8x8_t test_shift_vshr(uint8x8_t a) { in test_shift_vshr() 18 uint8x8_t test_shift_vshr_umax(uint8x8_t a) { in test_shift_vshr_umax() 24 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { in test_shift_vsra() 38 uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { in test_shift_vsra_umax()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-tbl.c | 510 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { in test_vtbl1_u8() 517 uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { in test_vqtbl1_u8() 539 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { in test_vtbl2_u8() 560 uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { in test_vqtbl2_u8() 586 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { in test_vtbl3_u8() 610 uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { in test_vqtbl3_u8() 639 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { in test_vtbl4_u8() 666 uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) { in test_vqtbl4_u8() 759 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { in test_vtbx1_u8() 781 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { in test_vtbx2_u8() [all …]
|
D | aarch64-neon-shifts.c | 6 uint8x8_t test_shift_vshr(uint8x8_t a) { in test_shift_vshr() 18 uint8x8_t test_shift_vshr_umax(uint8x8_t a) { in test_shift_vshr_umax() 24 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { in test_shift_vsra() 38 uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { in test_shift_vsra_umax()
|
D | arm-neon-shifts.c | 9 uint8x8_t test_shift_vshr(uint8x8_t a) { in test_shift_vshr() 21 uint8x8_t test_shift_vshr_umax(uint8x8_t a) { in test_shift_vshr_umax() 27 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { in test_shift_vsra() 41 uint8x8_t test_shift_vsra_umax(uint8x8_t a, uint8x8_t b) { in test_shift_vsra_umax()
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-neon.c | 27 …const uint8x8_t vkernel_zero_point = vld1_dup_u8((const uint8_t*) ¶ms->neon.kernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 31 const uint8x8_t voutput_min = vld1_dup_u8(¶ms->neon.output_min); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 32 const uint8x8_t voutput_max = vld1_dup_u8(¶ms->neon.output_max); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 80 const uint8x8_t vk0 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 81 const uint8x8_t vi0 = vld1_u8(i0); i0 += 8; in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 87 const uint8x8_t vk1 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 88 const uint8x8_t vi1 = vld1_u8(i1); i1 += 8; in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 94 const uint8x8_t vk2 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 95 const uint8x8_t vi2 = vld1_u8(i2); i2 += 8; in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() 101 const uint8x8_t vk3 = vld1_u8(w); w = (void*) ((uintptr_t) w + sizeof(uint8x8_t)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__neon() [all …]
|
/external/libjpeg-turbo/simd/arm/ |
D | jcphuff-neon.c | 207 uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 208 uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 209 uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 210 uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 211 uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 212 uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 213 uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 214 uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon() 217 const uint8x8_t bitmap_mask = in jsimd_encode_mcu_AC_first_prepare_neon() 229 uint8x8_t bitmap_rows_01 = vpadd_u8(row0_eq0, row1_eq0); in jsimd_encode_mcu_AC_first_prepare_neon() [all …]
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | deblock_neon.c | 20 static uint8x8_t average_k_out(const uint8x8_t a2, const uint8x8_t a1, in average_k_out() 21 const uint8x8_t v0, const uint8x8_t b1, in average_k_out() 22 const uint8x8_t b2) { in average_k_out() 23 const uint8x8_t k1 = vrhadd_u8(a2, a1); in average_k_out() 24 const uint8x8_t k2 = vrhadd_u8(b2, b1); in average_k_out() 25 const uint8x8_t k3 = vrhadd_u8(k1, k2); in average_k_out() 29 static uint8x8_t generate_mask(const uint8x8_t a2, const uint8x8_t a1, in generate_mask() 30 const uint8x8_t v0, const uint8x8_t b1, in generate_mask() 31 const uint8x8_t b2, const uint8x8_t filter) { in generate_mask() 32 const uint8x8_t a2_v0 = vabd_u8(a2, v0); in generate_mask() [all …]
|
D | intrapred_neon.c | 21 const uint8x8_t ref_u8 = vld1_u8(ref); in dc_sum_4() 27 const uint8x8_t dc) { in dc_store_4x4() 28 const uint8x8_t dc_dup = vdup_lane_u8(dc, 0); in dc_store_4x4() 37 const uint8x8_t a = vld1_u8(above); in vpx_dc_predictor_4x4_neon() 38 const uint8x8_t l = vld1_u8(left); in vpx_dc_predictor_4x4_neon() 41 uint8x8_t dc; in vpx_dc_predictor_4x4_neon() 51 const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2)); in vpx_dc_left_predictor_4x4_neon() 59 const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2)); in vpx_dc_top_predictor_4x4_neon() 66 const uint8x8_t dc = vdup_n_u8(0x80); in vpx_dc_128_predictor_4x4_neon() 76 const uint8x8_t ref_u8 = vld1_u8(ref); in dc_sum_8() [all …]
|
D | transpose_neon.h | 67 static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) { in transpose_u8_4x4() 191 static INLINE void transpose_u8_4x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, in transpose_u8_4x8() 192 uint8x8_t *a3, const uint8x8_t a4, in transpose_u8_4x8() 193 const uint8x8_t a5, const uint8x8_t a6, in transpose_u8_4x8() 194 const uint8x8_t a7) { in transpose_u8_4x8() 396 static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, in transpose_u8_8x4() 397 uint8x8_t *a3) { in transpose_u8_8x4() 517 static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, in transpose_u8_8x8() 518 uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5, in transpose_u8_8x8() 519 uint8x8_t *a6, uint8x8_t *a7) { in transpose_u8_8x8() [all …]
|
D | vpx_convolve8_neon.h | 20 uint8x8_t *const s0, uint8x8_t *const s1, in load_u8_8x4() 21 uint8x8_t *const s2, uint8x8_t *const s3) { in load_u8_8x4() 32 uint8x8_t *const s0, uint8x8_t *const s1, in load_u8_8x8() 33 uint8x8_t *const s2, uint8x8_t *const s3, in load_u8_8x8() 34 uint8x8_t *const s4, uint8x8_t *const s5, in load_u8_8x8() 35 uint8x8_t *const s6, uint8x8_t *const s7) { in load_u8_8x8() 97 static INLINE uint8x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1, in convolve8_8() 119 static INLINE uint8x8_t scale_filter_8(const uint8x8_t *const s, in scale_filter_8()
|
D | idct8x8_1_add_neon.c | 16 static INLINE uint8x8_t create_dcd(const int16_t dc) { in create_dcd() 22 const uint8x8_t res) { in idct8x8_1_add_pos_kernel() 23 const uint8x8_t a = vld1_u8(*dest); in idct8x8_1_add_pos_kernel() 24 const uint8x8_t b = vqadd_u8(a, res); in idct8x8_1_add_pos_kernel() 30 const uint8x8_t res) { in idct8x8_1_add_neg_kernel() 31 const uint8x8_t a = vld1_u8(*dest); in idct8x8_1_add_neg_kernel() 32 const uint8x8_t b = vqsub_u8(a, res); in idct8x8_1_add_neg_kernel() 45 const uint8x8_t dc = create_dcd(a1); in vpx_idct8x8_1_add_neon() 55 const uint8x8_t dc = create_dcd(-a1); in vpx_idct8x8_1_add_neon()
|
D | subpel_variance_neon.c | 32 const uint8x8_t f0 = vdup_n_u8(filter[0]); in var_filter_block2d_bil_w4() 33 const uint8x8_t f1 = vdup_n_u8(filter[1]); in var_filter_block2d_bil_w4() 36 const uint8x8_t src_0 = load_unaligned_u8(src_ptr, src_pixels_per_line); in var_filter_block2d_bil_w4() 37 const uint8x8_t src_1 = in var_filter_block2d_bil_w4() 41 const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); in var_filter_block2d_bil_w4() 55 const uint8x8_t f0 = vdup_n_u8(filter[0]); in var_filter_block2d_bil_w8() 56 const uint8x8_t f1 = vdup_n_u8(filter[1]); in var_filter_block2d_bil_w8() 59 const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); in var_filter_block2d_bil_w8() 60 const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); in var_filter_block2d_bil_w8() 63 const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); in var_filter_block2d_bil_w8() [all …]
|
/external/libaom/libaom/aom_dsp/arm/ |
D | loopfilter_neon.c | 21 static INLINE uint8x8_t lpf_mask(uint8x8_t p3q3, uint8x8_t p2q2, uint8x8_t p1q1, in lpf_mask() 22 uint8x8_t p0q0, const uint8_t blimit, in lpf_mask() 28 uint8x8_t mask_8x8, temp_8x8; in lpf_mask() 29 const uint8x8_t limit_8x8 = vdup_n_u8(limit); in lpf_mask() 55 static INLINE uint8x8_t lpf_mask2(uint8x8_t p1q1, uint8x8_t p0q0, in lpf_mask2() 61 const uint8x8_t limit_8x8 = vdup_n_u8(limit); in lpf_mask2() 62 uint8x8_t mask_8x8, temp_8x8; in lpf_mask2() 85 static INLINE uint8x8_t lpf_flat_mask4(uint8x8_t p3q3, uint8x8_t p2q2, in lpf_flat_mask4() 86 uint8x8_t p1q1, uint8x8_t p0q0) { in lpf_flat_mask4() 87 const uint8x8_t thresh_8x8 = vdup_n_u8(1); // for bd==8 threshold is always 1 in lpf_flat_mask4() [all …]
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7p7x-minmax-neon-c8.c | 44 const uint8x8_t vi0 = vld1_u8(i0); i0 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 45 const uint8x8_t vi1 = vld1_u8(i1); i1 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 46 const uint8x8_t vi2 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 47 const uint8x8_t vi3 = vld1_u8(i3); i3 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 48 const uint8x8_t vi4 = vld1_u8(i4); i4 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 49 const uint8x8_t vi5 = vld1_u8(i5); i5 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 50 const uint8x8_t vi6 = vld1_u8(i6); i6 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 79 const uint8x8_t vi0 = vld1_u8(i0); i0 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 80 const uint8x8_t vi1 = vld1_u8(i1); i1 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 81 const uint8x8_t vi2 = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() [all …]
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9p8x-minmax-neon-c8.c | 42 const uint8x8_t voutput_min = vld1_dup_u8(¶ms->neon.output_min); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 43 const uint8x8_t voutput_max = vld1_dup_u8(¶ms->neon.output_max); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 95 const uint8x8_t vi0 = vld1_u8(i0); i0 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 96 const uint8x8_t vi1 = vld1_u8(i1); i1 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 97 const uint8x8_t vi2 = vld1_u8(i2); i2 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 98 const uint8x8_t vi3 = vld1_u8(i3); i3 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 99 const uint8x8_t vi4 = vld1_u8(i4); i4 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 100 const uint8x8_t vi5 = vld1_u8(i5); i5 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 101 const uint8x8_t vi6 = vld1_u8(i6); i6 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 102 const uint8x8_t vi7 = vld1_u8(i7); i7 += 8; in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() [all …]
|
/external/webp/src/dsp/ |
D | upsampling_neon.c | 32 const uint8x8_t a = vld1_u8(r1 + 0); \ 33 const uint8x8_t b = vld1_u8(r1 + 1); \ 34 const uint8x8_t c = vld1_u8(r2 + 0); \ 35 const uint8x8_t d = vld1_u8(r2 + 1); \ 45 const uint8x8_t diag2 = vshrn_n_u16(al, 3); \ 46 const uint8x8_t diag1 = vshrn_n_u16(bl, 3); \ 48 const uint8x8_t A = vrhadd_u8(a, diag1); \ 49 const uint8x8_t B = vrhadd_u8(b, diag2); \ 50 const uint8x8_t C = vrhadd_u8(c, diag2); \ 51 const uint8x8_t D = vrhadd_u8(d, diag1); \ [all …]
|
/external/libjpeg-turbo/simd/arm/aarch32/ |
D | jchuff-neon.c | 99 uint8x8_t row0_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 101 uint8x8_t row1_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 103 uint8x8_t row2_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 105 uint8x8_t row3_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 185 uint8x8_t row4_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 187 uint8x8_t row5_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 189 uint8x8_t row6_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 191 uint8x8_t row7_nbits = vsub_u8(vdup_n_u8(16), in jsimd_huff_encode_one_block_neon() 226 uint8x8_t row0_nbits_gt0 = vcgt_u8(row0_nbits, vdup_n_u8(0)); in jsimd_huff_encode_one_block_neon() 227 uint8x8_t row1_nbits_gt0 = vcgt_u8(row1_nbits, vdup_n_u8(0)); in jsimd_huff_encode_one_block_neon() [all …]
|
/external/libjpeg-turbo/simd/arm/aarch64/ |
D | jchuff-neon.c | 191 uint8x8_t abs_row0_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row0), in jsimd_huff_encode_one_block_neon() 193 uint8x8_t abs_row1_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row1), in jsimd_huff_encode_one_block_neon() 195 uint8x8_t abs_row2_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row2), in jsimd_huff_encode_one_block_neon() 197 uint8x8_t abs_row3_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row3), in jsimd_huff_encode_one_block_neon() 199 uint8x8_t abs_row4_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row4), in jsimd_huff_encode_one_block_neon() 201 uint8x8_t abs_row5_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row5), in jsimd_huff_encode_one_block_neon() 203 uint8x8_t abs_row6_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row6), in jsimd_huff_encode_one_block_neon() 205 uint8x8_t abs_row7_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row7), in jsimd_huff_encode_one_block_neon() 209 const uint8x8_t bitmap_mask = in jsimd_huff_encode_one_block_neon() 221 uint8x8_t bitmap_rows_10 = vpadd_u8(abs_row1_gt0, abs_row0_gt0); in jsimd_huff_encode_one_block_neon() [all …]
|