1; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \ 2; RUN: -asm-verbose=false | FileCheck %s 3 4%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> } 5%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 6%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 7 8%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> } 9%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 10%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 11 12%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> } 13%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 14%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 15 16%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> } 17%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 18%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 19 20%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> } 21%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 22%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 23 24%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> } 25%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 26%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 27 28%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> } 29%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 30%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 31 32declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0i8(i8*, i32) 33declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0i8(i8*, i32) 34declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0i8(i8*, i32) 35declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0i8(i8*, i32) 36 37declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0i8(i8*, i32) 38declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0i8(i8*, i32) 39declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0i8(i8*, i32) 40declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0i8(i8*, i32) 41 42declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0i8(i8*, i32) 43declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0i8(i8*, i32) 44declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0i8(i8*, i32) 45declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0i8(i8*, i32) 46 47declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0i8(i8*, i32) 48declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0i8(i8*, i32) 49declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0i8(i8*, i32) 50 51declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0i8(i8*, i32) 52declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0i8(i8*, i32) 53declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0i8(i8*, i32) 54 55declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0i8(i8*, i32) 56declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0i8(i8*, i32) 57declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0i8(i8*, i32) 58 59; CHECK-LABEL: test_vld2_dup_u16 60; CHECK: vld2.16 {d16[], d17[]}, [r0] 61define %struct.uint16x4x2_t @test_vld2_dup_u16(i8* %src) { 62entry: 63 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0i8(i8* %src, i32 2) 64 ret %struct.uint16x4x2_t %tmp 65} 66 67; CHECK-LABEL: test_vld2_dup_u32 68; CHECK: vld2.32 {d16[], d17[]}, [r0] 69define %struct.uint32x2x2_t @test_vld2_dup_u32(i8* %src) { 70entry: 71 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0i8(i8* %src, i32 4) 72 ret %struct.uint32x2x2_t %tmp 73} 74 75; CHECK-LABEL: test_vld2_dup_u64 76; CHECK: vld1.64 {d16, d17}, [r0:64] 77define %struct.uint64x1x2_t @test_vld2_dup_u64(i8* %src) { 78entry: 79 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0i8(i8* %src, i32 8) 80 ret %struct.uint64x1x2_t %tmp 81} 82 83; CHECK-LABEL: test_vld2_dup_u8 84; CHECK: vld2.8 {d16[], d17[]}, [r0] 85define %struct.uint8x8x2_t @test_vld2_dup_u8(i8* %src) { 86entry: 87 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0i8(i8* %src, i32 1) 88 ret %struct.uint8x8x2_t %tmp 89} 90 91; CHECK-LABEL: test_vld3_dup_u16 92; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1] 93define %struct.uint16x4x3_t @test_vld3_dup_u16(i8* %src) { 94entry: 95 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0i8(i8* %src, i32 2) 96 ret %struct.uint16x4x3_t %tmp 97} 98 99; CHECK-LABEL: test_vld3_dup_u32 100; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1] 101define %struct.uint32x2x3_t @test_vld3_dup_u32(i8* %src) { 102entry: 103 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0i8(i8* %src, i32 4) 104 ret %struct.uint32x2x3_t %tmp 105} 106 107; CHECK-LABEL: test_vld3_dup_u64 108; CHECK: vld1.64 {d16, d17, d18}, [r1] 109define %struct.uint64x1x3_t @test_vld3_dup_u64(i8* %src) { 110entry: 111 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0i8(i8* %src, i32 8) 112 ret %struct.uint64x1x3_t %tmp 113} 114 115; CHECK-LABEL: test_vld3_dup_u8 116; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] 117define %struct.uint8x8x3_t @test_vld3_dup_u8(i8* %src) { 118entry: 119 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0i8(i8* %src, i32 1) 120 ret %struct.uint8x8x3_t %tmp 121} 122 123; CHECK-LABEL: test_vld4_dup_u16 124; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1] 125define %struct.uint16x4x4_t @test_vld4_dup_u16(i8* %src) { 126entry: 127 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0i8(i8* %src, i32 2) 128 ret %struct.uint16x4x4_t %tmp 129} 130 131; CHECK-LABEL: test_vld4_dup_u32 132; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1] 133define %struct.uint32x2x4_t @test_vld4_dup_u32(i8* %src) { 134entry: 135 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0i8(i8* %src, i32 4) 136 ret %struct.uint32x2x4_t %tmp 137} 138 139; CHECK-LABEL: test_vld4_dup_u64 140; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64] 141define %struct.uint64x1x4_t @test_vld4_dup_u64(i8* %src) { 142entry: 143 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0i8(i8* %src, i32 8) 144 ret %struct.uint64x1x4_t %tmp 145} 146 147; CHECK-LABEL: test_vld4_dup_u8 148; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] 149define %struct.uint8x8x4_t @test_vld4_dup_u8(i8* %src) { 150entry: 151 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0i8(i8* %src, i32 1) 152 ret %struct.uint8x8x4_t %tmp 153} 154 155; CHECK-LABEL: test_vld2q_dup_u16 156; CHECK: vld2.16 {d16[], d18[]}, [r1] 157; CHECK: vld2.16 {d17[], d19[]}, [r1] 158define %struct.uint16x8x2_t @test_vld2q_dup_u16(i8* %src) { 159entry: 160 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0i8(i8* %src, i32 2) 161 ret %struct.uint16x8x2_t %tmp 162} 163 164; CHECK-LABEL: test_vld2q_dup_u32 165; CHECK: vld2.32 {d16[], d18[]}, [r1] 166; CHECK: vld2.32 {d17[], d19[]}, [r1] 167define %struct.uint32x4x2_t @test_vld2q_dup_u32(i8* %src) { 168entry: 169 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0i8(i8* %src, i32 4) 170 ret %struct.uint32x4x2_t %tmp 171} 172 173; CHECK-LABEL: test_vld2q_dup_u8 174; CHECK: vld2.8 {d16[], d18[]}, [r1] 175; CHECK: vld2.8 {d17[], d19[]}, [r1] 176define %struct.uint8x16x2_t @test_vld2q_dup_u8(i8* %src) { 177entry: 178 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0i8(i8* %src, i32 1) 179 ret %struct.uint8x16x2_t %tmp 180} 181 182; CHECK-LABEL: test_vld3q_dup_u16 183; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1] 184; CHECK: vld3.16 {d17[], d19[], d21[]}, [r1] 185define %struct.uint16x8x3_t @test_vld3q_dup_u16(i8* %src) { 186entry: 187 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0i8(i8* %src, i32 2) 188 ret %struct.uint16x8x3_t %tmp 189} 190 191; CHECK-LABEL: test_vld3q_dup_u32 192; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1] 193; CHECK: vld3.32 {d17[], d19[], d21[]}, [r1] 194define %struct.uint32x4x3_t @test_vld3q_dup_u32(i8* %src) { 195entry: 196 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0i8(i8* %src, i32 4) 197 ret %struct.uint32x4x3_t %tmp 198} 199 200; CHECK-LABEL: test_vld3q_dup_u8 201; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1] 202; CHECK: vld3.8 {d17[], d19[], d21[]}, [r1] 203define %struct.uint8x16x3_t @test_vld3q_dup_u8(i8* %src) { 204entry: 205 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0i8(i8* %src, i32 1) 206 ret %struct.uint8x16x3_t %tmp 207} 208 209; CHECK-LABEL: test_vld4q_dup_u16 210; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1] 211; CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r1] 212define %struct.uint16x8x4_t @test_vld4q_dup_u16(i8* %src) { 213entry: 214 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0i8(i8* %src, i32 2) 215 ret %struct.uint16x8x4_t %tmp 216} 217 218; CHECK-LABEL: test_vld4q_dup_u32 219; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1] 220; CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r1] 221define %struct.uint32x4x4_t @test_vld4q_dup_u32(i8* %src) { 222entry: 223 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0i8(i8* %src, i32 4) 224 ret %struct.uint32x4x4_t %tmp 225} 226 227; CHECK-LABEL: test_vld4q_dup_u8 228; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1] 229; CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r1] 230define %struct.uint8x16x4_t @test_vld4q_dup_u8(i8* %src) { 231entry: 232 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0i8(i8* %src, i32 1) 233 ret %struct.uint8x16x4_t %tmp 234} 235