/external/libvpx/libvpx/vpx_dsp/arm/ |
D | transpose_neon.h | 58 static INLINE uint16x8x2_t vpx_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) { in vpx_vtrnq_u64_to_u16() 59 uint16x8x2_t b0; in vpx_vtrnq_u64_to_u16() 184 const uint16x8x2_t d0 = in transpose_u16_4x4q() 442 const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); in transpose_u16_8x4() 443 const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); in transpose_u16_8x4() 546 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), in transpose_u8_8x8() 548 const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), in transpose_u8_8x8() 665 const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); in transpose_u16_8x8() 666 const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); in transpose_u16_8x8() 667 const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5); in transpose_u16_8x8() [all …]
|
D | highbd_intrapred_neon.c | 146 const uint16x8x2_t ref_u16 = vld2q_u16(ref); in dc_sum_16() 155 uint16x8x2_t dc_dup; in dc_store_16x16() 166 const uint16x8x2_t a = vld2q_u16(above); in vpx_highbd_dc_predictor_16x16_neon() 167 const uint16x8x2_t l = vld2q_u16(left); in vpx_highbd_dc_predictor_16x16_neon() 225 uint16x8x2_t dc_dup; in dc_store_32x32() 728 const uint16x8x2_t row = vld2q_u16(above); in vpx_highbd_v_predictor_16x16_neon() 741 const uint16x8x2_t row0 = vld2q_u16(above); in vpx_highbd_v_predictor_32x32_neon() 742 const uint16x8x2_t row1 = vld2q_u16(above + 16); in vpx_highbd_v_predictor_32x32_neon()
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | common_neon.h | 440 inline uint16x8x2_t VtrnqU64(uint32x4_t a0, uint32x4_t a1) { in VtrnqU64() 441 uint16x8x2_t b0; in VtrnqU64() 570 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), in Transpose8x8() 572 const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), in Transpose8x8() 641 const uint16x8x2_t b0 = vtrnq_u16(a[0], a[1]); in Transpose8x8() 642 const uint16x8x2_t b1 = vtrnq_u16(a[2], a[3]); in Transpose8x8() 643 const uint16x8x2_t b2 = vtrnq_u16(a[4], a[5]); in Transpose8x8() 644 const uint16x8x2_t b3 = vtrnq_u16(a[6], a[7]); in Transpose8x8() 655 const uint16x8x2_t d0 = VtrnqU64(c0.val[0], c2.val[0]); in Transpose8x8() 656 const uint16x8x2_t d1 = VtrnqU64(c1.val[0], c3.val[0]); in Transpose8x8() [all …]
|
D | loop_restoration_neon.cc | 43 inline uint16x8_t VshrU128(const uint16x8x2_t src) { in VshrU128() 584 inline void Prepare3_16(const uint16x8x2_t src, uint16x4_t low[3], in Prepare3_16() 606 inline void Prepare5_16(const uint16x8x2_t src, uint16x4_t low[5], in Prepare5_16() 688 inline uint32x4x2_t Sum3WHorizontal(const uint16x8x2_t src) { in Sum3WHorizontal() 706 inline uint32x4x2_t Sum5WHorizontal(const uint16x8x2_t src) { in Sum5WHorizontal() 723 void SumHorizontal(const uint8x8x2_t src, const uint16x8x2_t sq, in SumHorizontal() 752 inline uint32x4x2_t Sum343W(const uint16x8x2_t src) { in Sum343W() 777 inline uint32x4x2_t Sum565W(const uint16x8x2_t src) { in Sum565W() 786 inline void Store343_444(const uint8x8x2_t ma3, const uint16x8x2_t b3, in Store343_444() 818 inline void Store343_444(const uint8x8x2_t ma3, const uint16x8x2_t b3, in Store343_444() [all …]
|
D | loop_filter_neon.cc | 1000 const uint16x8x2_t in02 = vtrnq_u16(vreinterpretq_u16_u8(in01.val[0]), in Vertical14_NEON() 1002 const uint16x8x2_t in13 = vtrnq_u16(vreinterpretq_u16_u8(in01.val[1]), in Vertical14_NEON() 1107 const uint16x8x2_t out02 = vtrnq_u16(vreinterpretq_u16_u8(p0q0_p4q4), in Vertical14_NEON() 1109 const uint16x8x2_t out13 = vtrnq_u16(vreinterpretq_u16_u8(p1q1_p5q5), in Vertical14_NEON()
|
D | intrapred_directional_neon.cc | 364 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), in DirectionalZone3_WxH() 366 const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), in DirectionalZone3_WxH()
|
D | film_grain_neon.cc | 218 const uint16x8x2_t src = vld2q_u16(luma); in GetAverageLuma()
|
/external/libaom/libaom/av1/common/arm/ |
D | cfl_neon.c | 72 uint16x8x2_t sum; in cfl_luma_subsampling_420_lbd_neon() 98 uint16x8x2_t sum; in cfl_luma_subsampling_422_lbd_neon() 180 uint16x8x2_t sum; in cfl_luma_subsampling_420_hbd_neon() 205 const uint16x8x2_t top = vld2q_u16(input); in cfl_luma_subsampling_422_hbd_neon() 215 uint16x8x2_t result = { { vshlq_n_u16(hsum_0, 2), in cfl_luma_subsampling_422_hbd_neon() 236 uint16x8x2_t top = vld2q_u16(input); in cfl_luma_subsampling_444_hbd_neon() 525 static INLINE uint16x8x2_t clamp2q_s16(int16x8x2_t a, int16x8_t max) { in clamp2q_s16() 526 uint16x8x2_t result; in clamp2q_s16()
|
D | transpose_neon.h | 45 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), in transpose_u8_8x8() 47 const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), in transpose_u8_8x8() 341 const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); in transpose_u16_8x8() 342 const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); in transpose_u16_8x8() 343 const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5); in transpose_u16_8x8() 344 const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7); in transpose_u16_8x8()
|
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/ARM/ |
D | arm-vlddup.ll | 20 %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 48 declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0i8(i8*, i32) 158 define %struct.uint16x8x2_t @test_vld2q_dup_u16(i8* %src) { 160 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* %src, i32 2) 161 ret %struct.uint16x8x2_t %tmp
|
D | arm-vld1.ll | 20 %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 52 declare %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0i16(i16*) nounwind readonly 154 define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) nounwind { 155 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0i16(i16* %a) 156 ret %struct.uint16x8x2_t %tmp
|
D | arm-vst1.ll | 20 ; %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 48 %struct.uint16x8x2_t = type { [2 x <8 x i16>] } 227 define void @test_vst1q_u16_x2(i16* %a, %struct.uint16x8x2_t %b) nounwind { 229 %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0 230 %b1 = extractvalue %struct.uint16x8x2_t %b, 0, 1
|
/external/gemmlowp/internal/ |
D | pack_neon.h | 176 uint16x8x2_t src_lines_intertwined_2x[2 * kCells]; in Pack() 183 uint16x8x2_t src_lines_intertwined_4x[2 * kCells]; in Pack()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-perm.c | 1245 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { in test_vuzpq_u16() 1709 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { in test_vzipq_u16() 2173 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { in test_vtrnq_u16()
|
D | aarch64-neon-ldst-one.c | 335 uint16x8x2_t test_vld2q_dup_u16(uint16_t *a) { in test_vld2q_dup_u16() 2304 uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { in test_vld2q_lane_u16() 5626 void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) { in test_vst2q_lane_u16()
|
D | arm_neon_intrinsics.c | 4858 uint16x8x2_t test_vld2q_u16(uint16_t const * a) { in test_vld2q_u16() 5425 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { in test_vld2q_lane_u16() 18478 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) { in test_vst2q_u16() 19000 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) { in test_vst2q_lane_u16() 23019 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { in test_vtrnq_u16() 23559 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { in test_vuzpq_u16() 23914 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { in test_vzipq_u16()
|
D | aarch64-neon-intrinsics.c | 10148 uint16x8x2_t test_vld2q_u16(uint16_t const *a) { in test_vld2q_u16() 11812 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { in test_vst2q_u16() 14015 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { in test_vld1q_u16_x2() 15503 void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { in test_vst1q_u16_x2()
|
/external/webp/src/dsp/ |
D | dec_neon.c | 99 const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]), in Load4x16_NEON() 101 const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]), in Load4x16_NEON() 194 const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]), in Load8x8x2T_NEON() 196 const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]), in Load8x8x2T_NEON() 198 const uint16x8x2_t row46 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[0]), in Load8x8x2T_NEON() 200 const uint16x8x2_t row57 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[1]), in Load8x8x2T_NEON()
|
/external/libvpx/libvpx/vp8/common/arm/neon/ |
D | vp8_loopfilter_neon.c | 327 uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; in vp8_loop_filter_vertical_edge_y_neon() 439 uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; in vp8_loop_filter_vertical_edge_uv_neon()
|
D | mbloopfilter_neon.c | 299 uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; in vp8_mbloop_filter_vertical_edge_y_neon() 463 uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; in vp8_mbloop_filter_vertical_edge_uv_neon()
|
/external/llvm/test/CodeGen/AArch64/ |
D | neon-perm.ll | 16 %struct.uint16x8x2_t = type { [2 x <8 x i16>] } 2634 define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) { 2641 %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 2642 %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 2643 ret %struct.uint16x8x2_t %.fca.0.1.insert 2850 define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) { 2857 %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 2858 %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 2859 ret %struct.uint16x8x2_t %.fca.0.1.insert 3066 define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) { [all …]
|
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/AArch64/ |
D | neon-perm.ll | 16 %struct.uint16x8x2_t = type { [2 x <8 x i16>] } 2634 define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) { 2641 %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 2642 %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 2643 ret %struct.uint16x8x2_t %.fca.0.1.insert 2850 define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) { 2857 %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 2858 %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 2859 ret %struct.uint16x8x2_t %.fca.0.1.insert 3066 define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) { [all …]
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v128_intrinsics_arm.h | 524 uint16x8x2_t r = in v128_unziplo_16() 535 uint16x8x2_t r = in v128_unziphi_16()
|
/external/neon_2_sse/ |
D | NEON_2_SSE.h | 234 typedef struct int16x8x2_t uint16x8x2_t; typedef 1329 _NEON2SSESTORAGE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d… 1433 _NEON2SSESTORAGE uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2… 1486 _NEON2SSESTORAGE void vst2q_u16_ptr(__transfersize(16) uint16_t * ptr, uint16x8x2_t * val); // VST2… 1552 _NEON2SSESTORAGE void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t * val, __co… 2231 _NEON2SSESTORAGE uint16x8x2_t vtrnq_u16(uint16x8_t a, uint16x8_t b); // VTRN.16 q0,q0 2250 _NEON2SSESTORAGE uint16x8x2_t vzipq_u16(uint16x8_t a, uint16x8_t b); // VZIP.16 q0,q0 2269 _NEON2SSESTORAGE uint16x8x2_t vuzpq_u16(uint16x8_t a, uint16x8_t b); // VUZP.16 q0,q0 9795 _NEON2SSESTORAGE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr); // VLD2.16 {d0, d… 9796 _NEON2SSE_INLINE uint16x8x2_t vld2q_u16(__transfersize(16) uint16_t const * ptr) // VLD2.16 {d0, d2… [all …]
|