1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 4; RUN: < %s | FileCheck %s 5 6; On future CPU with PC Relative addressing enabled, it is possible for the 7; linker to optimize GOT indirect accesses. In order for the linker to do this 8; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation. 9; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation 10; correctly. 11 12@input8 = external local_unnamed_addr global i8, align 1 13@output8 = external local_unnamed_addr global i8, align 1 14@input16 = external local_unnamed_addr global i16, align 2 15@output16 = external local_unnamed_addr global i16, align 2 16@input32 = external global i32, align 4 17@output32 = external local_unnamed_addr global i32, align 4 18@input64 = external local_unnamed_addr global i64, align 8 19@output64 = external local_unnamed_addr global i64, align 8 20@input128 = external local_unnamed_addr global i128, align 16 21@output128 = external local_unnamed_addr global i128, align 16 22@inputf32 = external local_unnamed_addr global float, align 4 23@outputf32 = external local_unnamed_addr global float, align 4 24@inputf64 = external local_unnamed_addr global double, align 8 25@outputf64 = external local_unnamed_addr global double, align 8 26@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16 27@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16 28@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16 29@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16 30@ArrayIn = external global [10 x i32], align 4 31@ArrayOut = external local_unnamed_addr global [10 x i32], align 4 32@IntPtrIn = external local_unnamed_addr global i32*, align 8 33@IntPtrOut = external local_unnamed_addr global i32*, align 8 34@FuncPtrIn = external local_unnamed_addr global void (...)*, align 8 35@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8 36 37define dso_local void @ReadWrite8() local_unnamed_addr #0 { 38; CHECK-LABEL: ReadWrite8: 39; CHECK: # %bb.0: # %entry 40; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 41; CHECK-NEXT: .Lpcrel: 42; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 43; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8) 44; CHECK-NEXT: lbz r3, 0(r3) 45; In this test the stb r3, 0(r4) cannot be optimized because it 46; uses the register r3 and that register is defined by lbz r3, 0(r3) 47; which is defined between the pld and the stb. 48; CHECK-NEXT: stb r3, 0(r4) 49; CHECK-NEXT: blr 50entry: 51 %0 = load i8, i8* @input8, align 1 52 store i8 %0, i8* @output8, align 1 53 ret void 54} 55 56define dso_local void @ReadWrite16() local_unnamed_addr #0 { 57; CHECK-LABEL: ReadWrite16: 58; CHECK: # %bb.0: # %entry 59; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 60; CHECK-NEXT: .Lpcrel0: 61; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 62; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) 63; CHECK-NEXT: lhz r3, 0(r3) 64; In this test the sth r3, 0(r4) cannot be optimized because it 65; uses the register r3 and that register is defined by lhz r3, 0(r3) 66; which is defined between the pld and the sth. 67; CHECK-NEXT: sth r3, 0(r4) 68; CHECK-NEXT: blr 69entry: 70 %0 = load i16, i16* @input16, align 2 71 store i16 %0, i16* @output16, align 2 72 ret void 73} 74 75define dso_local void @ReadWrite32() local_unnamed_addr #0 { 76; CHECK-LABEL: ReadWrite32: 77; CHECK: # %bb.0: # %entry 78; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 79; CHECK-NEXT: .Lpcrel1: 80; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 81; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) 82; CHECK-NEXT: lwz r3, 0(r3) 83; CHECK-NEXT: stw r3, 0(r4) 84; CHECK-NEXT: blr 85entry: 86 %0 = load i32, i32* @input32, align 4 87 store i32 %0, i32* @output32, align 4 88 ret void 89} 90 91define dso_local void @ReadWrite64() local_unnamed_addr #0 { 92; CHECK-LABEL: ReadWrite64: 93; CHECK: # %bb.0: # %entry 94; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 95; CHECK-NEXT: .Lpcrel2: 96; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 97; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) 98; CHECK-NEXT: ld r3, 0(r3) 99; CHECK-NEXT: std r3, 0(r4) 100; CHECK-NEXT: blr 101entry: 102 %0 = load i64, i64* @input64, align 8 103 store i64 %0, i64* @output64, align 8 104 ret void 105} 106 107; FIXME: we should always convert X-Form instructions that use 108; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. 109define dso_local void @ReadWrite128() local_unnamed_addr #0 { 110; CHECK-LABEL: ReadWrite128: 111; CHECK: # %bb.0: # %entry 112; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1 113; CHECK-NEXT: lxvx vs0, 0, r3 114; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1 115; CHECK-NEXT: stxvx vs0, 0, r3 116; CHECK-NEXT: blr 117entry: 118 %0 = load i128, i128* @input128, align 16 119 store i128 %0, i128* @output128, align 16 120 ret void 121} 122 123define dso_local void @ReadWritef32() local_unnamed_addr #0 { 124; CHECK-LABEL: ReadWritef32: 125; CHECK: # %bb.0: # %entry 126; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 127; CHECK-NEXT: .Lpcrel3: 128; CHECK-NEXT: xxspltidp vs1, 1078103900 129; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) 130; CHECK-NEXT: lfs f0, 0(r3) 131; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 132; CHECK-NEXT: xsaddsp f0, f0, f1 133; CHECK-NEXT: stfs f0, 0(r3) 134; CHECK-NEXT: blr 135entry: 136 %0 = load float, float* @inputf32, align 4 137 %add = fadd float %0, 0x400851EB80000000 138 store float %add, float* @outputf32, align 4 139 ret void 140} 141 142define dso_local void @ReadWritef64() local_unnamed_addr #0 { 143; CHECK-LABEL: ReadWritef64: 144; CHECK: # %bb.0: # %entry 145; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 146; CHECK-NEXT: .Lpcrel4: 147; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 148; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) 149; CHECK-NEXT: lfd f0, 0(r3) 150; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 151; CHECK-NEXT: xsadddp f0, f0, f1 152; CHECK-NEXT: stfd f0, 0(r3) 153; CHECK-NEXT: blr 154entry: 155 %0 = load double, double* @inputf64, align 8 156 %add = fadd double %0, 6.800000e+00 157 store double %add, double* @outputf64, align 8 158 ret void 159} 160 161; FIXME: we should always convert X-Form instructions that use 162; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. 163define dso_local void @ReadWriteVi32() local_unnamed_addr #0 { 164; CHECK-LABEL: ReadWriteVi32: 165; CHECK: # %bb.0: # %entry 166; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 167; CHECK-NEXT: li r4, 45 168; CHECK-NEXT: mtfprwz f1, r4 169; CHECK-NEXT: lxvx vs0, 0, r3 170; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1 171; CHECK-NEXT: xxinsertw vs0, vs1, 8 172; CHECK-NEXT: stxvx vs0, 0, r3 173; CHECK-NEXT: blr 174entry: 175 %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 176 %vecins = insertelement <4 x i32> %0, i32 45, i32 1 177 store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16 178 ret void 179} 180 181define dso_local void @ReadWriteVi64() local_unnamed_addr #0 { 182; CHECK-LABEL: ReadWriteVi64: 183; CHECK: # %bb.0: # %entry 184; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1 185; CHECK-NEXT: lxvx vs0, 0, r3 186; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1 187; CHECK-NEXT: stxvx vs0, 0, r3 188; CHECK-NEXT: blr 189entry: 190 %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16 191 store <2 x i64> %0, <2 x i64>* @outputVi64, align 16 192 ret void 193} 194 195define dso_local void @ReadWriteArray() local_unnamed_addr #0 { 196; CHECK-LABEL: ReadWriteArray: 197; CHECK: # %bb.0: # %entry 198; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 199; CHECK-NEXT: .Lpcrel5: 200; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 201; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) 202; CHECK-NEXT: lwz r3, 28(r3) 203; CHECK-NEXT: addi r3, r3, 42 204; CHECK-NEXT: stw r3, 8(r4) 205; CHECK-NEXT: blr 206entry: 207 %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4 208 %add = add nsw i32 %0, 42 209 store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4 210 ret void 211} 212 213define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 { 214; CHECK-LABEL: ReadWriteSameArray: 215; CHECK: # %bb.0: # %entry 216; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 217; CHECK-NEXT: lwz r4, 12(r3) 218; CHECK-NEXT: addi r4, r4, 8 219; CHECK-NEXT: stw r4, 24(r3) 220; CHECK-NEXT: blr 221entry: 222 %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4 223 %add = add nsw i32 %0, 8 224 store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4 225 ret void 226} 227 228define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 { 229; CHECK-LABEL: ReadWriteIntPtr: 230; CHECK: # %bb.0: # %entry 231; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 232; CHECK-NEXT: .Lpcrel6: 233; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 234; CHECK-NEXT: .Lpcrel7: 235; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) 236; CHECK-NEXT: ld r3, 0(r3) 237; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) 238; CHECK-NEXT: ld r4, 0(r4) 239; CHECK-NEXT: lwz r5, 216(r3) 240; CHECK-NEXT: lwz r3, 48(r3) 241; CHECK-NEXT: add r3, r3, r5 242; CHECK-NEXT: stw r3, 136(r4) 243; CHECK-NEXT: blr 244entry: 245 %0 = load i32*, i32** @IntPtrIn, align 8 246 %arrayidx = getelementptr inbounds i32, i32* %0, i64 54 247 %1 = load i32, i32* %arrayidx, align 4 248 %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12 249 %2 = load i32, i32* %arrayidx1, align 4 250 %add = add nsw i32 %2, %1 251 %3 = load i32*, i32** @IntPtrOut, align 8 252 %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34 253 store i32 %add, i32* %arrayidx2, align 4 254 ret void 255} 256 257define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 { 258; CHECK-LABEL: ReadWriteFuncPtr: 259; CHECK: # %bb.0: # %entry 260; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 261; CHECK-NEXT: .Lpcrel8: 262; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 263; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) 264; CHECK-NEXT: ld r3, 0(r3) 265; CHECK-NEXT: std r3, 0(r4) 266; CHECK-NEXT: blr 267entry: 268 %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8 269 store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8 270 ret void 271} 272 273define dso_local void @FuncPtrCopy() local_unnamed_addr #0 { 274; CHECK-LABEL: FuncPtrCopy: 275; CHECK: # %bb.0: # %entry 276; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 277; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 278; CHECK-NEXT: std r4, 0(r3) 279; CHECK-NEXT: blr 280entry: 281 store void (...)* @Callee, void (...)** @FuncPtrOut, align 8 282 ret void 283} 284 285declare void @Callee(...) 286 287define dso_local void @FuncPtrCall() local_unnamed_addr #0 { 288; CHECK-LABEL: FuncPtrCall: 289; CHECK: .localentry FuncPtrCall, 1 290; CHECK-NEXT: # %bb.0: # %entry 291; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 292; CHECK-NEXT: .Lpcrel9: 293; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) 294; CHECK-NEXT: ld r12, 0(r3) 295; CHECK-NEXT: mtctr r12 296; CHECK-NEXT: bctr 297; CHECK-NEXT: #TC_RETURNr8 ctr 0 298entry: 299 %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8 300 tail call void %0() 301 ret void 302} 303 304define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 { 305; CHECK-LABEL: ReadVecElement: 306; CHECK: # %bb.0: # %entry 307; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 308; CHECK-NEXT: .Lpcrel10: 309; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) 310; CHECK-NEXT: lwa r3, 4(r3) 311; CHECK-NEXT: blr 312entry: 313 %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 314 %vecext = extractelement <4 x i32> %0, i32 1 315 ret i32 %vecext 316} 317 318define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 { 319; CHECK-LABEL: VecMultiUse: 320; CHECK: .localentry VecMultiUse, 1 321; CHECK-NEXT: # %bb.0: # %entry 322; CHECK-NEXT: mflr r0 323; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill 324; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 325; CHECK-NEXT: std r0, 16(r1) 326; CHECK-NEXT: stdu r1, -64(r1) 327; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1 328; CHECK-NEXT: lwz r29, 4(r30) 329; CHECK-NEXT: bl Callee@notoc 330; CHECK-NEXT: lwz r3, 8(r30) 331; CHECK-NEXT: add r29, r3, r29 332; CHECK-NEXT: bl Callee@notoc 333; CHECK-NEXT: lwz r3, 0(r30) 334; CHECK-NEXT: add r3, r29, r3 335; CHECK-NEXT: extsw r3, r3 336; CHECK-NEXT: addi r1, r1, 64 337; CHECK-NEXT: ld r0, 16(r1) 338; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 339; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 340; CHECK-NEXT: mtlr r0 341; CHECK-NEXT: blr 342entry: 343 %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 344 tail call void bitcast (void (...)* @Callee to void ()*)() 345 %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 346 %2 = extractelement <4 x i32> %1, i32 2 347 %3 = extractelement <4 x i32> %0, i64 1 348 %4 = add nsw i32 %2, %3 349 tail call void bitcast (void (...)* @Callee to void ()*)() 350 %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16 351 %vecext2 = extractelement <4 x i32> %5, i32 0 352 %add3 = add nsw i32 %4, %vecext2 353 ret i32 %add3 354} 355 356define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 { 357; CHECK-LABEL: UseAddr: 358; CHECK: .localentry UseAddr, 1 359; CHECK-NEXT: # %bb.0: # %entry 360; CHECK-NEXT: mflr r0 361; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 362; CHECK-NEXT: std r0, 16(r1) 363; CHECK-NEXT: stdu r1, -48(r1) 364; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1 365; CHECK-NEXT: lwz r5, 16(r4) 366; CHECK-NEXT: add r30, r5, r3 367; CHECK-NEXT: mr r3, r4 368; CHECK-NEXT: bl getAddr@notoc 369; CHECK-NEXT: add r3, r30, r3 370; CHECK-NEXT: extsw r3, r3 371; CHECK-NEXT: addi r1, r1, 48 372; CHECK-NEXT: ld r0, 16(r1) 373; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 374; CHECK-NEXT: mtlr r0 375; CHECK-NEXT: blr 376entry: 377 %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4 378 %add = add nsw i32 %0, %a 379 %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0)) 380 %add1 = add nsw i32 %add, %call 381 ret i32 %add1 382} 383 384declare signext i32 @getAddr(i32*) local_unnamed_addr 385 386define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr #0 { 387; CHECK-LABEL: AddrTaken32: 388; CHECK: # %bb.0: # %entry 389; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 390; CHECK-NEXT: blr 391entry: 392 ret i32* @input32 393} 394 395attributes #0 = { nounwind } 396