1; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 2; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST 3 4; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 5; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST 6 7declare void @bar(i32) 8 9; Test a simple indirect call and tail call. 10define void @icall_reg(void (i32)* %fp, i32 %x) #0 { 11entry: 12 tail call void @bar(i32 %x) 13 tail call void %fp(i32 %x) 14 tail call void @bar(i32 %x) 15 tail call void %fp(i32 %x) 16 ret void 17} 18 19; X64-LABEL: icall_reg: 20; X64-DAG: movq %rdi, %[[fp:[^ ]*]] 21; X64-DAG: movl %esi, %[[x:[^ ]*]] 22; X64: movl %esi, %edi 23; X64: callq bar 24; X64-DAG: movl %[[x]], %edi 25; X64-DAG: movq %[[fp]], %r11 26; X64: callq __llvm_retpoline_r11 27; X64: movl %[[x]], %edi 28; X64: callq bar 29; X64-DAG: movl %[[x]], %edi 30; X64-DAG: movq %[[fp]], %r11 31; X64: jmp __llvm_retpoline_r11 # TAILCALL 32 33; X64FAST-LABEL: icall_reg: 34; X64FAST: callq bar 35; X64FAST: callq __llvm_retpoline_r11 36; X64FAST: callq bar 37; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 38 39; X86-LABEL: icall_reg: 40; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] 41; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] 42; X86: pushl %[[x]] 43; X86: calll bar 44; X86: movl %[[fp]], %eax 45; X86: pushl %[[x]] 46; X86: calll __llvm_retpoline_eax 47; X86: pushl %[[x]] 48; X86: calll bar 49; X86: movl %[[fp]], %eax 50; X86: pushl %[[x]] 51; X86: calll __llvm_retpoline_eax 52; X86-NOT: # TAILCALL 53 54; X86FAST-LABEL: icall_reg: 55; X86FAST: calll bar 56; X86FAST: calll __llvm_retpoline_eax 57; X86FAST: calll bar 58; X86FAST: calll __llvm_retpoline_eax 59 60 61@global_fp = external global void (i32)* 62 63; Test an indirect call through a global variable. 64define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { 65 %fp1 = load void (i32)*, void (i32)** @global_fp 66 call void %fp1(i32 %x) 67 %fp2 = load void (i32)*, void (i32)** @global_fp 68 tail call void %fp2(i32 %x) 69 ret void 70} 71 72; X64-LABEL: icall_global_fp: 73; X64-DAG: movl %edi, %[[x:[^ ]*]] 74; X64-DAG: movq global_fp(%rip), %r11 75; X64: callq __llvm_retpoline_r11 76; X64-DAG: movl %[[x]], %edi 77; X64-DAG: movq global_fp(%rip), %r11 78; X64: jmp __llvm_retpoline_r11 # TAILCALL 79 80; X64FAST-LABEL: icall_global_fp: 81; X64FAST: movq global_fp(%rip), %r11 82; X64FAST: callq __llvm_retpoline_r11 83; X64FAST: movq global_fp(%rip), %r11 84; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 85 86; X86-LABEL: icall_global_fp: 87; X86: movl global_fp, %eax 88; X86: pushl 4(%esp) 89; X86: calll __llvm_retpoline_eax 90; X86: addl $4, %esp 91; X86: movl global_fp, %eax 92; X86: jmp __llvm_retpoline_eax # TAILCALL 93 94; X86FAST-LABEL: icall_global_fp: 95; X86FAST: calll __llvm_retpoline_eax 96; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 97 98 99%struct.Foo = type { void (%struct.Foo*)** } 100 101; Test an indirect call through a vtable. 102define void @vcall(%struct.Foo* %obj) #0 { 103 %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 104 %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field 105 %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 106 %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot 107 tail call void %fp(%struct.Foo* %obj) 108 tail call void %fp(%struct.Foo* %obj) 109 ret void 110} 111 112; X64-LABEL: vcall: 113; X64: movq %rdi, %[[obj:[^ ]*]] 114; X64: movq (%rdi), %[[vptr:[^ ]*]] 115; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] 116; X64: movq %[[fp]], %r11 117; X64: callq __llvm_retpoline_r11 118; X64-DAG: movq %[[obj]], %rdi 119; X64-DAG: movq %[[fp]], %r11 120; X64: jmp __llvm_retpoline_r11 # TAILCALL 121 122; X64FAST-LABEL: vcall: 123; X64FAST: callq __llvm_retpoline_r11 124; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 125 126; X86-LABEL: vcall: 127; X86: movl 8(%esp), %[[obj:[^ ]*]] 128; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] 129; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] 130; X86: movl %[[fp]], %eax 131; X86: pushl %[[obj]] 132; X86: calll __llvm_retpoline_eax 133; X86: addl $4, %esp 134; X86: movl %[[fp]], %eax 135; X86: jmp __llvm_retpoline_eax # TAILCALL 136 137; X86FAST-LABEL: vcall: 138; X86FAST: calll __llvm_retpoline_eax 139; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 140 141 142declare void @direct_callee() 143 144define void @direct_tail() #0 { 145 tail call void @direct_callee() 146 ret void 147} 148 149; X64-LABEL: direct_tail: 150; X64: jmp direct_callee # TAILCALL 151; X64FAST-LABEL: direct_tail: 152; X64FAST: jmp direct_callee # TAILCALL 153; X86-LABEL: direct_tail: 154; X86: jmp direct_callee # TAILCALL 155; X86FAST-LABEL: direct_tail: 156; X86FAST: jmp direct_callee # TAILCALL 157 158 159declare void @nonlazybind_callee() #1 160 161define void @nonlazybind_caller() #0 { 162 call void @nonlazybind_callee() 163 tail call void @nonlazybind_callee() 164 ret void 165} 166 167; X64-LABEL: nonlazybind_caller: 168; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] 169; X64: movq %[[REG]], %r11 170; X64: callq __llvm_retpoline_r11 171; X64: movq %[[REG]], %r11 172; X64: jmp __llvm_retpoline_r11 # TAILCALL 173; X64FAST-LABEL: nonlazybind_caller: 174; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 175; X64FAST: callq __llvm_retpoline_r11 176; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 177; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 178; X86-LABEL: nonlazybind_caller: 179; X86: calll nonlazybind_callee@PLT 180; X86: jmp nonlazybind_callee@PLT # TAILCALL 181; X86FAST-LABEL: nonlazybind_caller: 182; X86FAST: calll nonlazybind_callee@PLT 183; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL 184 185 186@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), 187 i8* blockaddress(@indirectbr_rewrite, %bb1), 188 i8* blockaddress(@indirectbr_rewrite, %bb2), 189 i8* blockaddress(@indirectbr_rewrite, %bb3), 190 i8* blockaddress(@indirectbr_rewrite, %bb4), 191 i8* blockaddress(@indirectbr_rewrite, %bb5), 192 i8* blockaddress(@indirectbr_rewrite, %bb6), 193 i8* blockaddress(@indirectbr_rewrite, %bb7), 194 i8* blockaddress(@indirectbr_rewrite, %bb8), 195 i8* blockaddress(@indirectbr_rewrite, %bb9)] 196 197; Check that when retpolines are enabled a function with indirectbr gets 198; rewritten to use switch, and that in turn doesn't get lowered as a jump 199; table. 200define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { 201; X64-LABEL: indirectbr_rewrite: 202; X64-NOT: jmpq 203; X86-LABEL: indirectbr_rewrite: 204; X86-NOT: jmpl 205entry: 206 %i0 = load i64, i64* %p 207 %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 208 %target0 = load i8*, i8** %target.i0 209 indirectbr i8* %target0, [label %bb1, label %bb3] 210 211bb0: 212 store volatile i64 0, i64* %sink 213 br label %latch 214 215bb1: 216 store volatile i64 1, i64* %sink 217 br label %latch 218 219bb2: 220 store volatile i64 2, i64* %sink 221 br label %latch 222 223bb3: 224 store volatile i64 3, i64* %sink 225 br label %latch 226 227bb4: 228 store volatile i64 4, i64* %sink 229 br label %latch 230 231bb5: 232 store volatile i64 5, i64* %sink 233 br label %latch 234 235bb6: 236 store volatile i64 6, i64* %sink 237 br label %latch 238 239bb7: 240 store volatile i64 7, i64* %sink 241 br label %latch 242 243bb8: 244 store volatile i64 8, i64* %sink 245 br label %latch 246 247bb9: 248 store volatile i64 9, i64* %sink 249 br label %latch 250 251latch: 252 %i.next = load i64, i64* %p 253 %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next 254 %target.next = load i8*, i8** %target.i.next 255 ; Potentially hit a full 10 successors here so that even if we rewrite as 256 ; a switch it will try to be lowered with a jump table. 257 indirectbr i8* %target.next, [label %bb0, 258 label %bb1, 259 label %bb2, 260 label %bb3, 261 label %bb4, 262 label %bb5, 263 label %bb6, 264 label %bb7, 265 label %bb8, 266 label %bb9] 267} 268 269; Lastly check that the necessary thunks were emitted. 270; 271; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat 272; X64-NEXT: .hidden __llvm_retpoline_r11 273; X64-NEXT: .weak __llvm_retpoline_r11 274; X64: __llvm_retpoline_r11: 275; X64-NEXT: # {{.*}} # %entry 276; X64-NEXT: callq [[CALL_TARGET:.*]] 277; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 278; X64-NEXT: # %entry 279; X64-NEXT: # =>This Inner Loop Header: Depth=1 280; X64-NEXT: pause 281; X64-NEXT: lfence 282; X64-NEXT: jmp [[CAPTURE_SPEC]] 283; X64-NEXT: .p2align 4, 0x90 284; X64-NEXT: [[CALL_TARGET]]: # Block address taken 285; X64-NEXT: # %entry 286; X64-NEXT: movq %r11, (%rsp) 287; X64-NEXT: retq 288; 289; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat 290; X86-NEXT: .hidden __llvm_retpoline_eax 291; X86-NEXT: .weak __llvm_retpoline_eax 292; X86: __llvm_retpoline_eax: 293; X86-NEXT: # {{.*}} # %entry 294; X86-NEXT: calll [[CALL_TARGET:.*]] 295; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 296; X86-NEXT: # %entry 297; X86-NEXT: # =>This Inner Loop Header: Depth=1 298; X86-NEXT: pause 299; X86-NEXT: lfence 300; X86-NEXT: jmp [[CAPTURE_SPEC]] 301; X86-NEXT: .p2align 4, 0x90 302; X86-NEXT: [[CALL_TARGET]]: # Block address taken 303; X86-NEXT: # %entry 304; X86-NEXT: movl %eax, (%esp) 305; X86-NEXT: retl 306; 307; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat 308; X86-NEXT: .hidden __llvm_retpoline_ecx 309; X86-NEXT: .weak __llvm_retpoline_ecx 310; X86: __llvm_retpoline_ecx: 311; X86-NEXT: # {{.*}} # %entry 312; X86-NEXT: calll [[CALL_TARGET:.*]] 313; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 314; X86-NEXT: # %entry 315; X86-NEXT: # =>This Inner Loop Header: Depth=1 316; X86-NEXT: pause 317; X86-NEXT: lfence 318; X86-NEXT: jmp [[CAPTURE_SPEC]] 319; X86-NEXT: .p2align 4, 0x90 320; X86-NEXT: [[CALL_TARGET]]: # Block address taken 321; X86-NEXT: # %entry 322; X86-NEXT: movl %ecx, (%esp) 323; X86-NEXT: retl 324; 325; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat 326; X86-NEXT: .hidden __llvm_retpoline_edx 327; X86-NEXT: .weak __llvm_retpoline_edx 328; X86: __llvm_retpoline_edx: 329; X86-NEXT: # {{.*}} # %entry 330; X86-NEXT: calll [[CALL_TARGET:.*]] 331; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 332; X86-NEXT: # %entry 333; X86-NEXT: # =>This Inner Loop Header: Depth=1 334; X86-NEXT: pause 335; X86-NEXT: lfence 336; X86-NEXT: jmp [[CAPTURE_SPEC]] 337; X86-NEXT: .p2align 4, 0x90 338; X86-NEXT: [[CALL_TARGET]]: # Block address taken 339; X86-NEXT: # %entry 340; X86-NEXT: movl %edx, (%esp) 341; X86-NEXT: retl 342; 343; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat 344; X86-NEXT: .hidden __llvm_retpoline_edi 345; X86-NEXT: .weak __llvm_retpoline_edi 346; X86: __llvm_retpoline_edi: 347; X86-NEXT: # {{.*}} # %entry 348; X86-NEXT: calll [[CALL_TARGET:.*]] 349; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 350; X86-NEXT: # %entry 351; X86-NEXT: # =>This Inner Loop Header: Depth=1 352; X86-NEXT: pause 353; X86-NEXT: lfence 354; X86-NEXT: jmp [[CAPTURE_SPEC]] 355; X86-NEXT: .p2align 4, 0x90 356; X86-NEXT: [[CALL_TARGET]]: # Block address taken 357; X86-NEXT: # %entry 358; X86-NEXT: movl %edi, (%esp) 359; X86-NEXT: retl 360 361 362attributes #0 = { "target-features"="+retpoline" } 363attributes #1 = { nonlazybind } 364