• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
4; RUN:   < %s | FileCheck %s
5
6; On future CPU with PC Relative addressing enabled, it is possible for the
7; linker to optimize GOT indirect accesses. In order for the linker to do this
8; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
9; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
10; correctly.
11
12@input8 = external local_unnamed_addr global i8, align 1
13@output8 = external local_unnamed_addr global i8, align 1
14@input16 = external local_unnamed_addr global i16, align 2
15@output16 = external local_unnamed_addr global i16, align 2
16@input32 = external global i32, align 4
17@output32 = external local_unnamed_addr global i32, align 4
18@input64 = external local_unnamed_addr global i64, align 8
19@output64 = external local_unnamed_addr global i64, align 8
20@input128 = external local_unnamed_addr global i128, align 16
21@output128 = external local_unnamed_addr global i128, align 16
22@inputf32 = external local_unnamed_addr global float, align 4
23@outputf32 = external local_unnamed_addr global float, align 4
24@inputf64 = external local_unnamed_addr global double, align 8
25@outputf64 = external local_unnamed_addr global double, align 8
26@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
27@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
28@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
29@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
30@ArrayIn = external global [10 x i32], align 4
31@ArrayOut = external local_unnamed_addr global [10 x i32], align 4
32@IntPtrIn = external local_unnamed_addr global i32*, align 8
33@IntPtrOut = external local_unnamed_addr global i32*, align 8
34@FuncPtrIn = external local_unnamed_addr global void (...)*, align 8
35@FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
36
37define dso_local void @ReadWrite8() local_unnamed_addr #0 {
38; CHECK-LABEL: ReadWrite8:
39; CHECK:       # %bb.0: # %entry
40; CHECK-NEXT:    pld r3, input8@got@pcrel(0), 1
41; CHECK-NEXT:  .Lpcrel:
42; CHECK-NEXT:    pld r4, output8@got@pcrel(0), 1
43; CHECK-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
44; CHECK-NEXT:    lbz r3, 0(r3)
45; In this test the stb r3, 0(r4) cannot be optimized because it
46; uses the register r3 and that register is defined by lbz r3, 0(r3)
47; which is defined between the pld and the stb.
48; CHECK-NEXT:    stb r3, 0(r4)
49; CHECK-NEXT:    blr
50entry:
51  %0 = load i8, i8* @input8, align 1
52  store i8 %0, i8* @output8, align 1
53  ret void
54}
55
56define dso_local void @ReadWrite16() local_unnamed_addr #0 {
57; CHECK-LABEL: ReadWrite16:
58; CHECK:       # %bb.0: # %entry
59; CHECK-NEXT:    pld r3, input16@got@pcrel(0), 1
60; CHECK-NEXT:  .Lpcrel0:
61; CHECK-NEXT:    pld r4, output16@got@pcrel(0), 1
62; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
63; CHECK-NEXT:    lhz r3, 0(r3)
64; In this test the sth r3, 0(r4) cannot be optimized because it
65; uses the register r3 and that register is defined by lhz r3, 0(r3)
66; which is defined between the pld and the sth.
67; CHECK-NEXT:    sth r3, 0(r4)
68; CHECK-NEXT:    blr
69entry:
70  %0 = load i16, i16* @input16, align 2
71  store i16 %0, i16* @output16, align 2
72  ret void
73}
74
75define dso_local void @ReadWrite32() local_unnamed_addr #0 {
76; CHECK-LABEL: ReadWrite32:
77; CHECK:       # %bb.0: # %entry
78; CHECK-NEXT:    pld r3, input32@got@pcrel(0), 1
79; CHECK-NEXT:  .Lpcrel1:
80; CHECK-NEXT:    pld r4, output32@got@pcrel(0), 1
81; CHECK-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
82; CHECK-NEXT:    lwz r3, 0(r3)
83; CHECK-NEXT:    stw r3, 0(r4)
84; CHECK-NEXT:    blr
85entry:
86  %0 = load i32, i32* @input32, align 4
87  store i32 %0, i32* @output32, align 4
88  ret void
89}
90
91define dso_local void @ReadWrite64() local_unnamed_addr #0 {
92; CHECK-LABEL: ReadWrite64:
93; CHECK:       # %bb.0: # %entry
94; CHECK-NEXT:    pld r3, input64@got@pcrel(0), 1
95; CHECK-NEXT:  .Lpcrel2:
96; CHECK-NEXT:    pld r4, output64@got@pcrel(0), 1
97; CHECK-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
98; CHECK-NEXT:    ld r3, 0(r3)
99; CHECK-NEXT:    std r3, 0(r4)
100; CHECK-NEXT:    blr
101entry:
102  %0 = load i64, i64* @input64, align 8
103  store i64 %0, i64* @output64, align 8
104  ret void
105}
106
107; FIXME: we should always convert X-Form instructions that use
108; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
109define dso_local void @ReadWrite128() local_unnamed_addr #0 {
110; CHECK-LABEL: ReadWrite128:
111; CHECK:       # %bb.0: # %entry
112; CHECK-NEXT:    pld r3, input128@got@pcrel(0), 1
113; CHECK-NEXT:    lxvx vs0, 0, r3
114; CHECK-NEXT:    pld r3, output128@got@pcrel(0), 1
115; CHECK-NEXT:    stxvx vs0, 0, r3
116; CHECK-NEXT:    blr
117entry:
118  %0 = load i128, i128* @input128, align 16
119  store i128 %0, i128* @output128, align 16
120  ret void
121}
122
123define dso_local void @ReadWritef32() local_unnamed_addr #0 {
124; CHECK-LABEL: ReadWritef32:
125; CHECK:       # %bb.0: # %entry
126; CHECK-NEXT:    pld r3, inputf32@got@pcrel(0), 1
127; CHECK-NEXT:  .Lpcrel3:
128; CHECK-NEXT:    xxspltidp vs1, 1078103900
129; CHECK-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
130; CHECK-NEXT:    lfs f0, 0(r3)
131; CHECK-NEXT:    pld r3, outputf32@got@pcrel(0), 1
132; CHECK-NEXT:    xsaddsp f0, f0, f1
133; CHECK-NEXT:    stfs f0, 0(r3)
134; CHECK-NEXT:    blr
135entry:
136  %0 = load float, float* @inputf32, align 4
137  %add = fadd float %0, 0x400851EB80000000
138  store float %add, float* @outputf32, align 4
139  ret void
140}
141
142define dso_local void @ReadWritef64() local_unnamed_addr #0 {
143; CHECK-LABEL: ReadWritef64:
144; CHECK:       # %bb.0: # %entry
145; CHECK-NEXT:    pld r3, inputf64@got@pcrel(0), 1
146; CHECK-NEXT:  .Lpcrel4:
147; CHECK-NEXT:    plfd f1, .LCPI6_0@PCREL(0), 1
148; CHECK-NEXT:    .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
149; CHECK-NEXT:    lfd f0, 0(r3)
150; CHECK-NEXT:    pld r3, outputf64@got@pcrel(0), 1
151; CHECK-NEXT:    xsadddp f0, f0, f1
152; CHECK-NEXT:    stfd f0, 0(r3)
153; CHECK-NEXT:    blr
154entry:
155  %0 = load double, double* @inputf64, align 8
156  %add = fadd double %0, 6.800000e+00
157  store double %add, double* @outputf64, align 8
158  ret void
159}
160
161; FIXME: we should always convert X-Form instructions that use
162; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
163define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
164; CHECK-LABEL: ReadWriteVi32:
165; CHECK:       # %bb.0: # %entry
166; CHECK-NEXT:    pld r3, inputVi32@got@pcrel(0), 1
167; CHECK-NEXT:    li r4, 45
168; CHECK-NEXT:    mtfprwz f1, r4
169; CHECK-NEXT:    lxvx vs0, 0, r3
170; CHECK-NEXT:    pld r3, outputVi32@got@pcrel(0), 1
171; CHECK-NEXT:    xxinsertw vs0, vs1, 8
172; CHECK-NEXT:    stxvx vs0, 0, r3
173; CHECK-NEXT:    blr
174entry:
175  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
176  %vecins = insertelement <4 x i32> %0, i32 45, i32 1
177  store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16
178  ret void
179}
180
181define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
182; CHECK-LABEL: ReadWriteVi64:
183; CHECK:       # %bb.0: # %entry
184; CHECK-NEXT:    pld r3, inputVi64@got@pcrel(0), 1
185; CHECK-NEXT:    lxvx vs0, 0, r3
186; CHECK-NEXT:    pld r3, outputVi64@got@pcrel(0), 1
187; CHECK-NEXT:    stxvx vs0, 0, r3
188; CHECK-NEXT:    blr
189entry:
190  %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16
191  store <2 x i64> %0, <2 x i64>* @outputVi64, align 16
192  ret void
193}
194
195define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
196; CHECK-LABEL: ReadWriteArray:
197; CHECK:       # %bb.0: # %entry
198; CHECK-NEXT:    pld r3, ArrayIn@got@pcrel(0), 1
199; CHECK-NEXT:  .Lpcrel5:
200; CHECK-NEXT:    pld r4, ArrayOut@got@pcrel(0), 1
201; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
202; CHECK-NEXT:    lwz r3, 28(r3)
203; CHECK-NEXT:    addi r3, r3, 42
204; CHECK-NEXT:    stw r3, 8(r4)
205; CHECK-NEXT:    blr
206entry:
207  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4
208  %add = add nsw i32 %0, 42
209  store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4
210  ret void
211}
212
213define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
214; CHECK-LABEL: ReadWriteSameArray:
215; CHECK:       # %bb.0: # %entry
216; CHECK-NEXT:    pld r3, ArrayIn@got@pcrel(0), 1
217; CHECK-NEXT:    lwz r4, 12(r3)
218; CHECK-NEXT:    addi r4, r4, 8
219; CHECK-NEXT:    stw r4, 24(r3)
220; CHECK-NEXT:    blr
221entry:
222  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4
223  %add = add nsw i32 %0, 8
224  store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4
225  ret void
226}
227
228define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
229; CHECK-LABEL: ReadWriteIntPtr:
230; CHECK:       # %bb.0: # %entry
231; CHECK-NEXT:    pld r3, IntPtrIn@got@pcrel(0), 1
232; CHECK-NEXT:  .Lpcrel6:
233; CHECK-NEXT:    pld r4, IntPtrOut@got@pcrel(0), 1
234; CHECK-NEXT:  .Lpcrel7:
235; CHECK-NEXT:    .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
236; CHECK-NEXT:    ld r3, 0(r3)
237; CHECK-NEXT:    .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
238; CHECK-NEXT:    ld r4, 0(r4)
239; CHECK-NEXT:    lwz r5, 216(r3)
240; CHECK-NEXT:    lwz r3, 48(r3)
241; CHECK-NEXT:    add r3, r3, r5
242; CHECK-NEXT:    stw r3, 136(r4)
243; CHECK-NEXT:    blr
244entry:
245  %0 = load i32*, i32** @IntPtrIn, align 8
246  %arrayidx = getelementptr inbounds i32, i32* %0, i64 54
247  %1 = load i32, i32* %arrayidx, align 4
248  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12
249  %2 = load i32, i32* %arrayidx1, align 4
250  %add = add nsw i32 %2, %1
251  %3 = load i32*, i32** @IntPtrOut, align 8
252  %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34
253  store i32 %add, i32* %arrayidx2, align 4
254  ret void
255}
256
257define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
258; CHECK-LABEL: ReadWriteFuncPtr:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
261; CHECK-NEXT:  .Lpcrel8:
262; CHECK-NEXT:    pld r4, FuncPtrOut@got@pcrel(0), 1
263; CHECK-NEXT:    .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
264; CHECK-NEXT:    ld r3, 0(r3)
265; CHECK-NEXT:    std r3, 0(r4)
266; CHECK-NEXT:    blr
267entry:
268  %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8
269  store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8
270  ret void
271}
272
273define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
274; CHECK-LABEL: FuncPtrCopy:
275; CHECK:       # %bb.0: # %entry
276; CHECK-NEXT:    pld r3, FuncPtrOut@got@pcrel(0), 1
277; CHECK-NEXT:    pld r4, Callee@got@pcrel(0), 1
278; CHECK-NEXT:    std r4, 0(r3)
279; CHECK-NEXT:    blr
280entry:
281  store void (...)* @Callee, void (...)** @FuncPtrOut, align 8
282  ret void
283}
284
285declare void @Callee(...)
286
287define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
288; CHECK-LABEL: FuncPtrCall:
289; CHECK:         .localentry FuncPtrCall, 1
290; CHECK-NEXT:  # %bb.0: # %entry
291; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
292; CHECK-NEXT:  .Lpcrel9:
293; CHECK-NEXT:    .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
294; CHECK-NEXT:    ld r12, 0(r3)
295; CHECK-NEXT:    mtctr r12
296; CHECK-NEXT:    bctr
297; CHECK-NEXT:    #TC_RETURNr8 ctr 0
298entry:
299  %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8
300  tail call void %0()
301  ret void
302}
303
304define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
305; CHECK-LABEL: ReadVecElement:
306; CHECK:       # %bb.0: # %entry
307; CHECK-NEXT:    pld r3, inputVi32@got@pcrel(0), 1
308; CHECK-NEXT:  .Lpcrel10:
309; CHECK-NEXT:    .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
310; CHECK-NEXT:    lwa r3, 4(r3)
311; CHECK-NEXT:    blr
312entry:
313  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
314  %vecext = extractelement <4 x i32> %0, i32 1
315  ret i32 %vecext
316}
317
318define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
319; CHECK-LABEL: VecMultiUse:
320; CHECK:         .localentry VecMultiUse, 1
321; CHECK-NEXT:  # %bb.0: # %entry
322; CHECK-NEXT:    mflr r0
323; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
324; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
325; CHECK-NEXT:    std r0, 16(r1)
326; CHECK-NEXT:    stdu r1, -64(r1)
327; CHECK-NEXT:    pld r30, inputVi32@got@pcrel(0), 1
328; CHECK-NEXT:    lwz r29, 4(r30)
329; CHECK-NEXT:    bl Callee@notoc
330; CHECK-NEXT:    lwz r3, 8(r30)
331; CHECK-NEXT:    add r29, r3, r29
332; CHECK-NEXT:    bl Callee@notoc
333; CHECK-NEXT:    lwz r3, 0(r30)
334; CHECK-NEXT:    add r3, r29, r3
335; CHECK-NEXT:    extsw r3, r3
336; CHECK-NEXT:    addi r1, r1, 64
337; CHECK-NEXT:    ld r0, 16(r1)
338; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
339; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
340; CHECK-NEXT:    mtlr r0
341; CHECK-NEXT:    blr
342entry:
343  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
344  tail call void bitcast (void (...)* @Callee to void ()*)()
345  %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
346  %2 = extractelement <4 x i32> %1, i32 2
347  %3 = extractelement <4 x i32> %0, i64 1
348  %4 = add nsw i32 %2, %3
349  tail call void bitcast (void (...)* @Callee to void ()*)()
350  %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
351  %vecext2 = extractelement <4 x i32> %5, i32 0
352  %add3 = add nsw i32 %4, %vecext2
353  ret i32 %add3
354}
355
356define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
357; CHECK-LABEL: UseAddr:
358; CHECK:         .localentry UseAddr, 1
359; CHECK-NEXT:  # %bb.0: # %entry
360; CHECK-NEXT:    mflr r0
361; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
362; CHECK-NEXT:    std r0, 16(r1)
363; CHECK-NEXT:    stdu r1, -48(r1)
364; CHECK-NEXT:    pld r4, ArrayIn@got@pcrel(0), 1
365; CHECK-NEXT:    lwz r5, 16(r4)
366; CHECK-NEXT:    add r30, r5, r3
367; CHECK-NEXT:    mr r3, r4
368; CHECK-NEXT:    bl getAddr@notoc
369; CHECK-NEXT:    add r3, r30, r3
370; CHECK-NEXT:    extsw r3, r3
371; CHECK-NEXT:    addi r1, r1, 48
372; CHECK-NEXT:    ld r0, 16(r1)
373; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
374; CHECK-NEXT:    mtlr r0
375; CHECK-NEXT:    blr
376entry:
377  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4
378  %add = add nsw i32 %0, %a
379  %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0))
380  %add1 = add nsw i32 %add, %call
381  ret i32 %add1
382}
383
384declare signext i32 @getAddr(i32*) local_unnamed_addr
385
386define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr #0 {
387; CHECK-LABEL: AddrTaken32:
388; CHECK:       # %bb.0: # %entry
389; CHECK-NEXT:    pld r3, input32@got@pcrel(0), 1
390; CHECK-NEXT:    blr
391entry:
392  ret i32* @input32
393}
394
395attributes #0 = { nounwind }
396