• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
2; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
3
4; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
5; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
6
7declare void @bar(i32)
8
9; Test a simple indirect call and tail call.
10define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
11entry:
12  tail call void @bar(i32 %x)
13  tail call void %fp(i32 %x)
14  tail call void @bar(i32 %x)
15  tail call void %fp(i32 %x)
16  ret void
17}
18
19; X64-LABEL: icall_reg:
20; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
21; X64-DAG:   movl %esi, %[[x:[^ ]*]]
22; X64:       movl %esi, %edi
23; X64:       callq bar
24; X64-DAG:   movl %[[x]], %edi
25; X64-DAG:   movq %[[fp]], %r11
26; X64:       callq __llvm_retpoline_r11
27; X64:       movl %[[x]], %edi
28; X64:       callq bar
29; X64-DAG:   movl %[[x]], %edi
30; X64-DAG:   movq %[[fp]], %r11
31; X64:       jmp __llvm_retpoline_r11 # TAILCALL
32
33; X64FAST-LABEL: icall_reg:
34; X64FAST:       callq bar
35; X64FAST:       callq __llvm_retpoline_r11
36; X64FAST:       callq bar
37; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
38
39; X86-LABEL: icall_reg:
40; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
41; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
42; X86:       pushl %[[x]]
43; X86:       calll bar
44; X86:       movl %[[fp]], %eax
45; X86:       pushl %[[x]]
46; X86:       calll __llvm_retpoline_eax
47; X86:       pushl %[[x]]
48; X86:       calll bar
49; X86:       movl %[[fp]], %eax
50; X86:       pushl %[[x]]
51; X86:       calll __llvm_retpoline_eax
52; X86-NOT:   # TAILCALL
53
54; X86FAST-LABEL: icall_reg:
55; X86FAST:       calll bar
56; X86FAST:       calll __llvm_retpoline_eax
57; X86FAST:       calll bar
58; X86FAST:       calll __llvm_retpoline_eax
59
60
61@global_fp = external dso_local global void (i32)*
62
63; Test an indirect call through a global variable.
64define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
65  %fp1 = load void (i32)*, void (i32)** @global_fp
66  call void %fp1(i32 %x)
67  %fp2 = load void (i32)*, void (i32)** @global_fp
68  tail call void %fp2(i32 %x)
69  ret void
70}
71
72; X64-LABEL: icall_global_fp:
73; X64-DAG:   movl %edi, %[[x:[^ ]*]]
74; X64-DAG:   movq global_fp(%rip), %r11
75; X64:       callq __llvm_retpoline_r11
76; X64-DAG:   movl %[[x]], %edi
77; X64-DAG:   movq global_fp(%rip), %r11
78; X64:       jmp __llvm_retpoline_r11 # TAILCALL
79
80; X64FAST-LABEL: icall_global_fp:
81; X64FAST:       movq global_fp(%rip), %r11
82; X64FAST:       callq __llvm_retpoline_r11
83; X64FAST:       movq global_fp(%rip), %r11
84; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
85
86; X86-LABEL: icall_global_fp:
87; X86:       movl global_fp, %eax
88; X86:       pushl 4(%esp)
89; X86:       calll __llvm_retpoline_eax
90; X86:       addl $4, %esp
91; X86:       movl global_fp, %eax
92; X86:       jmp __llvm_retpoline_eax # TAILCALL
93
94; X86FAST-LABEL: icall_global_fp:
95; X86FAST:       calll __llvm_retpoline_eax
96; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
97
98
99%struct.Foo = type { void (%struct.Foo*)** }
100
101; Test an indirect call through a vtable.
102define void @vcall(%struct.Foo* %obj) #0 {
103  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
104  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
105  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
106  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
107  tail call void %fp(%struct.Foo* %obj)
108  tail call void %fp(%struct.Foo* %obj)
109  ret void
110}
111
112; X64-LABEL: vcall:
113; X64:       movq %rdi, %[[obj:[^ ]*]]
114; X64:       movq (%rdi), %[[vptr:[^ ]*]]
115; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
116; X64:       movq %[[fp]], %r11
117; X64:       callq __llvm_retpoline_r11
118; X64-DAG:   movq %[[obj]], %rdi
119; X64-DAG:   movq %[[fp]], %r11
120; X64:       jmp __llvm_retpoline_r11 # TAILCALL
121
122; X64FAST-LABEL: vcall:
123; X64FAST:       callq __llvm_retpoline_r11
124; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
125
126; X86-LABEL: vcall:
127; X86:       movl 8(%esp), %[[obj:[^ ]*]]
128; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
129; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
130; X86:       movl %[[fp]], %eax
131; X86:       pushl %[[obj]]
132; X86:       calll __llvm_retpoline_eax
133; X86:       addl $4, %esp
134; X86:       movl %[[fp]], %eax
135; X86:       jmp __llvm_retpoline_eax # TAILCALL
136
137; X86FAST-LABEL: vcall:
138; X86FAST:       calll __llvm_retpoline_eax
139; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
140
141
142declare void @direct_callee()
143
144define void @direct_tail() #0 {
145  tail call void @direct_callee()
146  ret void
147}
148
149; X64-LABEL: direct_tail:
150; X64:       jmp direct_callee@PLT # TAILCALL
151; X64FAST-LABEL: direct_tail:
152; X64FAST:   jmp direct_callee@PLT # TAILCALL
153; X86-LABEL: direct_tail:
154; X86:       jmp direct_callee@PLT # TAILCALL
155; X86FAST-LABEL: direct_tail:
156; X86FAST:   jmp direct_callee@PLT # TAILCALL
157
158
159declare void @nonlazybind_callee() #2
160
161define void @nonlazybind_caller() #0 {
162  call void @nonlazybind_callee()
163  tail call void @nonlazybind_callee()
164  ret void
165}
166
167; X64-LABEL: nonlazybind_caller:
168; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
169; X64:       movq %[[REG]], %r11
170; X64:       callq __llvm_retpoline_r11
171; X64:       movq %[[REG]], %r11
172; X64:       jmp __llvm_retpoline_r11 # TAILCALL
173; X64FAST-LABEL: nonlazybind_caller:
174; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
175; X64FAST:   callq __llvm_retpoline_r11
176; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
177; X64FAST:   jmp __llvm_retpoline_r11 # TAILCALL
178; X86-LABEL: nonlazybind_caller:
179; X86:       calll nonlazybind_callee@PLT
180; X86:       jmp nonlazybind_callee@PLT # TAILCALL
181; X86FAST-LABEL: nonlazybind_caller:
182; X86FAST:   calll nonlazybind_callee@PLT
183; X86FAST:   jmp nonlazybind_callee@PLT # TAILCALL
184
185
186; Check that a switch gets lowered using a jump table when retpolines are only
187; enabled for calls.
188define void @switch_jumptable(i32* %ptr, i64* %sink) #0 {
189; X64-LABEL: switch_jumptable:
190; X64:         jmpq *
191; X86-LABEL: switch_jumptable:
192; X86:         jmpl *
193entry:
194  br label %header
195
196header:
197  %i = load volatile i32, i32* %ptr
198  switch i32 %i, label %bb0 [
199    i32 1, label %bb1
200    i32 2, label %bb2
201    i32 3, label %bb3
202    i32 4, label %bb4
203    i32 5, label %bb5
204    i32 6, label %bb6
205    i32 7, label %bb7
206    i32 8, label %bb8
207    i32 9, label %bb9
208  ]
209
210bb0:
211  store volatile i64 0, i64* %sink
212  br label %header
213
214bb1:
215  store volatile i64 1, i64* %sink
216  br label %header
217
218bb2:
219  store volatile i64 2, i64* %sink
220  br label %header
221
222bb3:
223  store volatile i64 3, i64* %sink
224  br label %header
225
226bb4:
227  store volatile i64 4, i64* %sink
228  br label %header
229
230bb5:
231  store volatile i64 5, i64* %sink
232  br label %header
233
234bb6:
235  store volatile i64 6, i64* %sink
236  br label %header
237
238bb7:
239  store volatile i64 7, i64* %sink
240  br label %header
241
242bb8:
243  store volatile i64 8, i64* %sink
244  br label %header
245
246bb9:
247  store volatile i64 9, i64* %sink
248  br label %header
249}
250
251
252@indirectbr_preserved.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_preserved, %bb0),
253                                                     i8* blockaddress(@indirectbr_preserved, %bb1),
254                                                     i8* blockaddress(@indirectbr_preserved, %bb2),
255                                                     i8* blockaddress(@indirectbr_preserved, %bb3),
256                                                     i8* blockaddress(@indirectbr_preserved, %bb4),
257                                                     i8* blockaddress(@indirectbr_preserved, %bb5),
258                                                     i8* blockaddress(@indirectbr_preserved, %bb6),
259                                                     i8* blockaddress(@indirectbr_preserved, %bb7),
260                                                     i8* blockaddress(@indirectbr_preserved, %bb8),
261                                                     i8* blockaddress(@indirectbr_preserved, %bb9)]
262
263; Check that we preserve indirectbr when only calls are retpolined.
264define void @indirectbr_preserved(i64* readonly %p, i64* %sink) #0 {
265; X64-LABEL: indirectbr_preserved:
266; X64:         jmpq *
267; X86-LABEL: indirectbr_preserved:
268; X86:         jmpl *
269entry:
270  %i0 = load i64, i64* %p
271  %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i0
272  %target0 = load i8*, i8** %target.i0
273  indirectbr i8* %target0, [label %bb1, label %bb3]
274
275bb0:
276  store volatile i64 0, i64* %sink
277  br label %latch
278
279bb1:
280  store volatile i64 1, i64* %sink
281  br label %latch
282
283bb2:
284  store volatile i64 2, i64* %sink
285  br label %latch
286
287bb3:
288  store volatile i64 3, i64* %sink
289  br label %latch
290
291bb4:
292  store volatile i64 4, i64* %sink
293  br label %latch
294
295bb5:
296  store volatile i64 5, i64* %sink
297  br label %latch
298
299bb6:
300  store volatile i64 6, i64* %sink
301  br label %latch
302
303bb7:
304  store volatile i64 7, i64* %sink
305  br label %latch
306
307bb8:
308  store volatile i64 8, i64* %sink
309  br label %latch
310
311bb9:
312  store volatile i64 9, i64* %sink
313  br label %latch
314
315latch:
316  %i.next = load i64, i64* %p
317  %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i.next
318  %target.next = load i8*, i8** %target.i.next
319  ; Potentially hit a full 10 successors here so that even if we rewrite as
320  ; a switch it will try to be lowered with a jump table.
321  indirectbr i8* %target.next, [label %bb0,
322                                label %bb1,
323                                label %bb2,
324                                label %bb3,
325                                label %bb4,
326                                label %bb5,
327                                label %bb6,
328                                label %bb7,
329                                label %bb8,
330                                label %bb9]
331}
332
333@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
334                                                   i8* blockaddress(@indirectbr_rewrite, %bb1),
335                                                   i8* blockaddress(@indirectbr_rewrite, %bb2),
336                                                   i8* blockaddress(@indirectbr_rewrite, %bb3),
337                                                   i8* blockaddress(@indirectbr_rewrite, %bb4),
338                                                   i8* blockaddress(@indirectbr_rewrite, %bb5),
339                                                   i8* blockaddress(@indirectbr_rewrite, %bb6),
340                                                   i8* blockaddress(@indirectbr_rewrite, %bb7),
341                                                   i8* blockaddress(@indirectbr_rewrite, %bb8),
342                                                   i8* blockaddress(@indirectbr_rewrite, %bb9)]
343
344; Check that when retpolines are enabled for indirect branches the indirectbr
345; instruction gets rewritten to use switch, and that in turn doesn't get lowered
346; as a jump table.
347define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #1 {
348; X64-LABEL: indirectbr_rewrite:
349; X64-NOT:     jmpq
350; X86-LABEL: indirectbr_rewrite:
351; X86-NOT:     jmpl
352entry:
353  %i0 = load i64, i64* %p
354  %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
355  %target0 = load i8*, i8** %target.i0
356  indirectbr i8* %target0, [label %bb1, label %bb3]
357
358bb0:
359  store volatile i64 0, i64* %sink
360  br label %latch
361
362bb1:
363  store volatile i64 1, i64* %sink
364  br label %latch
365
366bb2:
367  store volatile i64 2, i64* %sink
368  br label %latch
369
370bb3:
371  store volatile i64 3, i64* %sink
372  br label %latch
373
374bb4:
375  store volatile i64 4, i64* %sink
376  br label %latch
377
378bb5:
379  store volatile i64 5, i64* %sink
380  br label %latch
381
382bb6:
383  store volatile i64 6, i64* %sink
384  br label %latch
385
386bb7:
387  store volatile i64 7, i64* %sink
388  br label %latch
389
390bb8:
391  store volatile i64 8, i64* %sink
392  br label %latch
393
394bb9:
395  store volatile i64 9, i64* %sink
396  br label %latch
397
398latch:
399  %i.next = load i64, i64* %p
400  %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
401  %target.next = load i8*, i8** %target.i.next
402  ; Potentially hit a full 10 successors here so that even if we rewrite as
403  ; a switch it will try to be lowered with a jump table.
404  indirectbr i8* %target.next, [label %bb0,
405                                label %bb1,
406                                label %bb2,
407                                label %bb3,
408                                label %bb4,
409                                label %bb5,
410                                label %bb6,
411                                label %bb7,
412                                label %bb8,
413                                label %bb9]
414}
415
416; Lastly check that the necessary thunks were emitted.
417;
418; X64-LABEL:         .section        .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
419; X64-NEXT:          .hidden __llvm_retpoline_r11
420; X64-NEXT:          .weak   __llvm_retpoline_r11
421; X64:       __llvm_retpoline_r11:
422; X64-NEXT:  # {{.*}}                                # %entry
423; X64-NEXT:          callq   [[CALL_TARGET:.*]]
424; X64-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
425; X64-NEXT:                                          # %entry
426; X64-NEXT:                                          # =>This Inner Loop Header: Depth=1
427; X64-NEXT:          pause
428; X64-NEXT:          lfence
429; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
430; X64-NEXT:          .p2align        4, 0x90
431; X64-NEXT:  {{.*}}                                  # Block address taken
432; X64-NEXT:                                          # %entry
433; X64-NEXT:  [[CALL_TARGET]]:
434; X64-NEXT:          movq    %r11, (%rsp)
435; X64-NEXT:          retq
436;
437; X86-LABEL:         .section        .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
438; X86-NEXT:          .hidden __llvm_retpoline_eax
439; X86-NEXT:          .weak   __llvm_retpoline_eax
440; X86:       __llvm_retpoline_eax:
441; X86-NEXT:  # {{.*}}                                # %entry
442; X86-NEXT:          calll   [[CALL_TARGET:.*]]
443; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
444; X86-NEXT:                                          # %entry
445; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
446; X86-NEXT:          pause
447; X86-NEXT:          lfence
448; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
449; X86-NEXT:          .p2align        4, 0x90
450; X86-NEXT:  {{.*}}                                  # Block address taken
451; X86-NEXT:                                          # %entry
452; X86-NEXT:  [[CALL_TARGET]]:
453; X86-NEXT:          movl    %eax, (%esp)
454; X86-NEXT:          retl
455;
456; X86-LABEL:         .section        .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
457; X86-NEXT:          .hidden __llvm_retpoline_ecx
458; X86-NEXT:          .weak   __llvm_retpoline_ecx
459; X86:       __llvm_retpoline_ecx:
460; X86-NEXT:  # {{.*}}                                # %entry
461; X86-NEXT:          calll   [[CALL_TARGET:.*]]
462; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
463; X86-NEXT:                                          # %entry
464; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
465; X86-NEXT:          pause
466; X86-NEXT:          lfence
467; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
468; X86-NEXT:          .p2align        4, 0x90
469; X86-NEXT:  {{.*}}                                  # Block address taken
470; X86-NEXT:                                          # %entry
471; X86-NEXT:  [[CALL_TARGET]]:
472; X86-NEXT:          movl    %ecx, (%esp)
473; X86-NEXT:          retl
474;
475; X86-LABEL:         .section        .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
476; X86-NEXT:          .hidden __llvm_retpoline_edx
477; X86-NEXT:          .weak   __llvm_retpoline_edx
478; X86:       __llvm_retpoline_edx:
479; X86-NEXT:  # {{.*}}                                # %entry
480; X86-NEXT:          calll   [[CALL_TARGET:.*]]
481; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
482; X86-NEXT:                                          # %entry
483; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
484; X86-NEXT:          pause
485; X86-NEXT:          lfence
486; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
487; X86-NEXT:          .p2align        4, 0x90
488; X86-NEXT:  {{.*}}                                  # Block address taken
489; X86-NEXT:                                          # %entry
490; X86-NEXT:  [[CALL_TARGET]]:
491; X86-NEXT:          movl    %edx, (%esp)
492; X86-NEXT:          retl
493;
494; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
495; X86-NEXT:          .hidden __llvm_retpoline_edi
496; X86-NEXT:          .weak   __llvm_retpoline_edi
497; X86:       __llvm_retpoline_edi:
498; X86-NEXT:  # {{.*}}                                # %entry
499; X86-NEXT:          calll   [[CALL_TARGET:.*]]
500; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
501; X86-NEXT:                                          # %entry
502; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
503; X86-NEXT:          pause
504; X86-NEXT:          lfence
505; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
506; X86-NEXT:          .p2align        4, 0x90
507; X86-NEXT:  {{.*}}                                  # Block address taken
508; X86-NEXT:                                          # %entry
509; X86-NEXT:  [[CALL_TARGET]]:
510; X86-NEXT:          movl    %edi, (%esp)
511; X86-NEXT:          retl
512
513
514attributes #0 = { "target-features"="+retpoline-indirect-calls" }
515attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" }
516attributes #2 = { nonlazybind }
517