/external/libgav1/libgav1/src/dsp/arm/ |
D | distance_weighted_blend_neon.cc | 197 inline uint16x4x2_t ComputeWeightedAverage8(const uint16x4x2_t pred0, in ComputeWeightedAverage8() 198 const uint16x4x2_t pred1, in ComputeWeightedAverage8() 209 uint16x4x2_t result; in ComputeWeightedAverage8() 250 inline uint16x4x2_t LoadU16x4_x2(uint16_t const* ptr) { in LoadU16x4_x2() 251 uint16x4x2_t x; in LoadU16x4_x2() 284 const uint16x4x2_t src0 = LoadU16x4_x2(pred_0); in DistanceWeightedBlend_NEON() 285 const uint16x4x2_t src1 = LoadU16x4_x2(pred_1); in DistanceWeightedBlend_NEON() 286 const uint16x4x2_t res = ComputeWeightedAverage8(src0, src1, weights); in DistanceWeightedBlend_NEON()
|
D | common_neon.h | 476 const uint16x4x2_t b = vtrn_u16(a[0], a[1]); in Transpose4x4() 480 const uint16x4x2_t c = vtrn_u16(a[2], a[3]); in Transpose4x4() 505 const uint16x4x2_t c = in Transpose4x4() 531 const uint16x4x2_t c0 = in Transpose8x4() 533 const uint16x4x2_t c1 = in Transpose8x4()
|
D | intrapred_directional_neon.cc | 856 const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u8(b0.val[0]), in DirectionalIntraPredictorZone3_NEON() 858 const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u8(b0.val[1]), in DirectionalIntraPredictorZone3_NEON() 937 inline void LoadEdgeVals(uint16x4x2_t* dest, const uint16_t* const source, in LoadEdgeVals() 990 uint16x4x2_t sampled_top_row; in DirectionalZone1_4xH() 1241 uint16x4x2_t sampled_left_col; in DirectionalZone3_4x4()
|
D | inverse_transform_neon.cc | 180 uint16x4x2_t b0 = vtrn_u16(vget_low_u16(in[0]), vget_low_u16(in[1])); in Transpose4x8To8x4() 181 uint16x4x2_t b1 = vtrn_u16(vget_low_u16(in[2]), vget_low_u16(in[3])); in Transpose4x8To8x4() 182 uint16x4x2_t b2 = vtrn_u16(vget_low_u16(in[4]), vget_low_u16(in[5])); in Transpose4x8To8x4() 183 uint16x4x2_t b3 = vtrn_u16(vget_low_u16(in[6]), vget_low_u16(in[7])); in Transpose4x8To8x4()
|
/external/libaom/libaom/av1/common/arm/ |
D | transpose_neon.h | 92 const uint16x4x2_t c0 = in transpose_u8_8x4() 94 const uint16x4x2_t c1 = in transpose_u8_8x4() 111 const uint16x4x2_t b0 = in transpose_u8_4x4() 166 const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]), in transpose_u8_4x8() 168 const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]), in transpose_u8_4x8() 213 uint16x4x2_t b0 = vtrn_u16(*a0, *a1); in transpose_u16_4x8() 214 uint16x4x2_t b1 = vtrn_u16(*a2, *a3); in transpose_u16_4x8() 215 uint16x4x2_t b2 = vtrn_u16(*a4, *a5); in transpose_u16_4x8() 216 uint16x4x2_t b3 = vtrn_u16(*a6, *a7); in transpose_u16_4x8()
|
D | blend_a64_vmask_neon.c | 116 const uint16x4x2_t m_trn = in aom_blend_a64_vmask_neon() 121 const uint16x4x2_t max_minus_m_trn = vtrn_u16( in aom_blend_a64_vmask_neon()
|
/external/XNNPACK/src/x8-zip/ |
D | xm-neon.c | 44 …const uint16x4x2_t vxyzw_lo = vzip_u16(vreinterpret_u16_u8(vxy.val[0]), vreinterpret_u16_u8(vzw.va… in xnn_x8_zip_xm_ukernel__neon() 45 …const uint16x4x2_t vxyzw_hi = vzip_u16(vreinterpret_u16_u8(vxy.val[1]), vreinterpret_u16_u8(vzw.va… in xnn_x8_zip_xm_ukernel__neon() 87 …const uint16x4x2_t vxyzw_lo = vzip_u16(vreinterpret_u16_u8(vxy.val[0]), vreinterpret_u16_u8(vzw.va… in xnn_x8_zip_xm_ukernel__neon() 88 …const uint16x4x2_t vxyzw_hi = vzip_u16(vreinterpret_u16_u8(vxy.val[1]), vreinterpret_u16_u8(vzw.va… in xnn_x8_zip_xm_ukernel__neon()
|
/external/libjpeg-turbo/simd/arm/aarch32/ |
D | jccolext-neon.c | 60 const uint16x4x2_t consts = vld1_u16_x2(jsimd_rgb_ycc_neon_consts); in jsimd_rgb_ycc_convert_neon() 65 const uint16x4x2_t consts = { { consts1, consts2 } }; in jsimd_rgb_ycc_convert_neon()
|
/external/llvm-project/llvm/test/CodeGen/ARM/ |
D | arm-vlddup.ll | 4 %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 33 declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0i8(i8*, i32) 61 define %struct.uint16x4x2_t @test_vld2_dup_u16(i8* %src) { 63 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* %src, i32 2) 64 ret %struct.uint16x4x2_t %tmp
|
D | arm-vld1.ll | 4 %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 36 declare %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16*) nounwind readonly 70 define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) nounwind { 71 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16* %a) 72 ret %struct.uint16x4x2_t %tmp
|
D | arm-vst1.ll | 4 ; %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 36 %struct.uint16x4x2_t = type { [2 x <4 x i16>] } 95 define void @test_vst1_u16_x2(i16* %a, %struct.uint16x4x2_t %b) nounwind { 97 %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0 98 %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
|
/external/libvpx/libvpx/vp8/common/arm/neon/ |
D | loopfiltersimpleverticaledge_neon.c | 114 const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), in read_4x8() 116 const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), in read_4x8()
|
D | vp8_loopfilter_neon.c | 271 const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), in write_4x8() 273 const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), in write_4x8()
|
/external/libvpx/libvpx/vpx_dsp/arm/ |
D | transpose_neon.h | 75 const uint16x4x2_t b0 = in transpose_u8_4x4() 225 const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]), in transpose_u8_4x8() 227 const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]), in transpose_u8_4x8() 418 const uint16x4x2_t c0 = in transpose_u8_8x4() 420 const uint16x4x2_t c1 = in transpose_u8_8x4()
|
/external/llvm-project/clang/test/CodeGen/ |
D | arm-neon-vld.c | 493 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { in test_vld1_u16_x2() 1541 void test_vld2_dup_u16(uint16x4x2_t *dest, const uint16_t *src) { in test_vld2_dup_u16()
|
D | aarch64-neon-perm.c | 991 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { in test_vuzp_u16() 1375 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { in test_vzip_u16() 1759 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { in test_vtrn_u16()
|
D | arm-neon-vst.c | 803 void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { in test_vst1_u16_x2()
|
D | aarch64-neon-ldst-one.c | 1529 uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { in test_vld2_lane_u16() 4783 void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { in test_vst2_lane_u16()
|
/external/clang/test/CodeGen/ |
D | aarch64-neon-perm.c | 1014 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { in test_vuzp_u16() 1478 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { in test_vzip_u16() 1942 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { in test_vtrn_u16()
|
D | aarch64-neon-ldst-one.c | 581 uint16x4x2_t test_vld2_dup_u16(uint16_t *a) { in test_vld2_dup_u16() 2707 uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { in test_vld2_lane_u16() 5961 void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { in test_vst2_lane_u16()
|
/external/libaom/libaom/aom_dsp/simd/ |
D | v64_intrinsics_arm.h | 490 uint16x4x2_t r = vuzp_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)); in v64_unziplo_16() 500 uint16x4x2_t r = vuzp_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)); in v64_unziphi_16()
|
/external/llvm/test/CodeGen/AArch64/ |
D | neon-perm.ll | 7 %struct.uint16x4x2_t = type { [2 x <4 x i16>] } 2526 define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) { 2533 %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 2534 %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 2535 ret %struct.uint16x4x2_t %.fca.0.1.insert 2742 define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) { 2749 %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 2750 %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 2751 ret %struct.uint16x4x2_t %.fca.0.1.insert 2958 define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) { [all …]
|
/external/llvm-project/llvm/test/CodeGen/AArch64/ |
D | neon-perm.ll | 7 %struct.uint16x4x2_t = type { [2 x <4 x i16>] } 2526 define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) { 2533 %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 2534 %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 2535 ret %struct.uint16x4x2_t %.fca.0.1.insert 2742 define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) { 2749 %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 2750 %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 2751 ret %struct.uint16x4x2_t %.fca.0.1.insert 2958 define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) { [all …]
|
/external/libjpeg-turbo/simd/arm/ |
D | jidctred-neon.c | 472 uint16x4x2_t output_01_23 = { { in jsimd_idct_4x4_neon()
|
/external/libhevc/encoder/arm/ |
D | ihevce_coarse_layer_sad_neon.c | 346 uint16x4x2_t tmp_a; in hme_store_4x4_sads_high_quality_neon()
|