1; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 2; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST 3 4; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 5; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST 6 7declare void @bar(i32) 8 9; Test a simple indirect call and tail call. 10define void @icall_reg(void (i32)* %fp, i32 %x) #0 { 11entry: 12 tail call void @bar(i32 %x) 13 tail call void %fp(i32 %x) 14 tail call void @bar(i32 %x) 15 tail call void %fp(i32 %x) 16 ret void 17} 18 19; X64-LABEL: icall_reg: 20; X64-DAG: movq %rdi, %[[fp:[^ ]*]] 21; X64-DAG: movl %esi, %[[x:[^ ]*]] 22; X64: movl %esi, %edi 23; X64: callq bar 24; X64-DAG: movl %[[x]], %edi 25; X64-DAG: movq %[[fp]], %r11 26; X64: callq __llvm_retpoline_r11 27; X64: movl %[[x]], %edi 28; X64: callq bar 29; X64-DAG: movl %[[x]], %edi 30; X64-DAG: movq %[[fp]], %r11 31; X64: jmp __llvm_retpoline_r11 # TAILCALL 32 33; X64FAST-LABEL: icall_reg: 34; X64FAST: callq bar 35; X64FAST: callq __llvm_retpoline_r11 36; X64FAST: callq bar 37; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 38 39; X86-LABEL: icall_reg: 40; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] 41; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] 42; X86: pushl %[[x]] 43; X86: calll bar 44; X86: movl %[[fp]], %eax 45; X86: pushl %[[x]] 46; X86: calll __llvm_retpoline_eax 47; X86: pushl %[[x]] 48; X86: calll bar 49; X86: movl %[[fp]], %eax 50; X86: pushl %[[x]] 51; X86: calll __llvm_retpoline_eax 52; X86-NOT: # TAILCALL 53 54; X86FAST-LABEL: icall_reg: 55; X86FAST: calll bar 56; X86FAST: calll __llvm_retpoline_eax 57; X86FAST: calll bar 58; X86FAST: calll __llvm_retpoline_eax 59 60 61@global_fp = external dso_local global void (i32)* 62 63; Test an indirect call through a global variable. 64define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { 65 %fp1 = load void (i32)*, void (i32)** @global_fp 66 call void %fp1(i32 %x) 67 %fp2 = load void (i32)*, void (i32)** @global_fp 68 tail call void %fp2(i32 %x) 69 ret void 70} 71 72; X64-LABEL: icall_global_fp: 73; X64-DAG: movl %edi, %[[x:[^ ]*]] 74; X64-DAG: movq global_fp(%rip), %r11 75; X64: callq __llvm_retpoline_r11 76; X64-DAG: movl %[[x]], %edi 77; X64-DAG: movq global_fp(%rip), %r11 78; X64: jmp __llvm_retpoline_r11 # TAILCALL 79 80; X64FAST-LABEL: icall_global_fp: 81; X64FAST: movq global_fp(%rip), %r11 82; X64FAST: callq __llvm_retpoline_r11 83; X64FAST: movq global_fp(%rip), %r11 84; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 85 86; X86-LABEL: icall_global_fp: 87; X86: movl global_fp, %eax 88; X86: pushl 4(%esp) 89; X86: calll __llvm_retpoline_eax 90; X86: addl $4, %esp 91; X86: movl global_fp, %eax 92; X86: jmp __llvm_retpoline_eax # TAILCALL 93 94; X86FAST-LABEL: icall_global_fp: 95; X86FAST: calll __llvm_retpoline_eax 96; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 97 98 99%struct.Foo = type { void (%struct.Foo*)** } 100 101; Test an indirect call through a vtable. 102define void @vcall(%struct.Foo* %obj) #0 { 103 %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 104 %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field 105 %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 106 %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot 107 tail call void %fp(%struct.Foo* %obj) 108 tail call void %fp(%struct.Foo* %obj) 109 ret void 110} 111 112; X64-LABEL: vcall: 113; X64: movq %rdi, %[[obj:[^ ]*]] 114; X64: movq (%rdi), %[[vptr:[^ ]*]] 115; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] 116; X64: movq %[[fp]], %r11 117; X64: callq __llvm_retpoline_r11 118; X64-DAG: movq %[[obj]], %rdi 119; X64-DAG: movq %[[fp]], %r11 120; X64: jmp __llvm_retpoline_r11 # TAILCALL 121 122; X64FAST-LABEL: vcall: 123; X64FAST: callq __llvm_retpoline_r11 124; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 125 126; X86-LABEL: vcall: 127; X86: movl 8(%esp), %[[obj:[^ ]*]] 128; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] 129; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] 130; X86: movl %[[fp]], %eax 131; X86: pushl %[[obj]] 132; X86: calll __llvm_retpoline_eax 133; X86: addl $4, %esp 134; X86: movl %[[fp]], %eax 135; X86: jmp __llvm_retpoline_eax # TAILCALL 136 137; X86FAST-LABEL: vcall: 138; X86FAST: calll __llvm_retpoline_eax 139; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 140 141 142declare void @direct_callee() 143 144define void @direct_tail() #0 { 145 tail call void @direct_callee() 146 ret void 147} 148 149; X64-LABEL: direct_tail: 150; X64: jmp direct_callee@PLT # TAILCALL 151; X64FAST-LABEL: direct_tail: 152; X64FAST: jmp direct_callee@PLT # TAILCALL 153; X86-LABEL: direct_tail: 154; X86: jmp direct_callee@PLT # TAILCALL 155; X86FAST-LABEL: direct_tail: 156; X86FAST: jmp direct_callee@PLT # TAILCALL 157 158 159declare void @nonlazybind_callee() #2 160 161define void @nonlazybind_caller() #0 { 162 call void @nonlazybind_callee() 163 tail call void @nonlazybind_callee() 164 ret void 165} 166 167; X64-LABEL: nonlazybind_caller: 168; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] 169; X64: movq %[[REG]], %r11 170; X64: callq __llvm_retpoline_r11 171; X64: movq %[[REG]], %r11 172; X64: jmp __llvm_retpoline_r11 # TAILCALL 173; X64FAST-LABEL: nonlazybind_caller: 174; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 175; X64FAST: callq __llvm_retpoline_r11 176; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 177; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 178; X86-LABEL: nonlazybind_caller: 179; X86: calll nonlazybind_callee@PLT 180; X86: jmp nonlazybind_callee@PLT # TAILCALL 181; X86FAST-LABEL: nonlazybind_caller: 182; X86FAST: calll nonlazybind_callee@PLT 183; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL 184 185 186; Check that a switch gets lowered using a jump table when retpolines are only 187; enabled for calls. 188define void @switch_jumptable(i32* %ptr, i64* %sink) #0 { 189; X64-LABEL: switch_jumptable: 190; X64: jmpq * 191; X86-LABEL: switch_jumptable: 192; X86: jmpl * 193entry: 194 br label %header 195 196header: 197 %i = load volatile i32, i32* %ptr 198 switch i32 %i, label %bb0 [ 199 i32 1, label %bb1 200 i32 2, label %bb2 201 i32 3, label %bb3 202 i32 4, label %bb4 203 i32 5, label %bb5 204 i32 6, label %bb6 205 i32 7, label %bb7 206 i32 8, label %bb8 207 i32 9, label %bb9 208 ] 209 210bb0: 211 store volatile i64 0, i64* %sink 212 br label %header 213 214bb1: 215 store volatile i64 1, i64* %sink 216 br label %header 217 218bb2: 219 store volatile i64 2, i64* %sink 220 br label %header 221 222bb3: 223 store volatile i64 3, i64* %sink 224 br label %header 225 226bb4: 227 store volatile i64 4, i64* %sink 228 br label %header 229 230bb5: 231 store volatile i64 5, i64* %sink 232 br label %header 233 234bb6: 235 store volatile i64 6, i64* %sink 236 br label %header 237 238bb7: 239 store volatile i64 7, i64* %sink 240 br label %header 241 242bb8: 243 store volatile i64 8, i64* %sink 244 br label %header 245 246bb9: 247 store volatile i64 9, i64* %sink 248 br label %header 249} 250 251 252@indirectbr_preserved.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_preserved, %bb0), 253 i8* blockaddress(@indirectbr_preserved, %bb1), 254 i8* blockaddress(@indirectbr_preserved, %bb2), 255 i8* blockaddress(@indirectbr_preserved, %bb3), 256 i8* blockaddress(@indirectbr_preserved, %bb4), 257 i8* blockaddress(@indirectbr_preserved, %bb5), 258 i8* blockaddress(@indirectbr_preserved, %bb6), 259 i8* blockaddress(@indirectbr_preserved, %bb7), 260 i8* blockaddress(@indirectbr_preserved, %bb8), 261 i8* blockaddress(@indirectbr_preserved, %bb9)] 262 263; Check that we preserve indirectbr when only calls are retpolined. 264define void @indirectbr_preserved(i64* readonly %p, i64* %sink) #0 { 265; X64-LABEL: indirectbr_preserved: 266; X64: jmpq * 267; X86-LABEL: indirectbr_preserved: 268; X86: jmpl * 269entry: 270 %i0 = load i64, i64* %p 271 %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i0 272 %target0 = load i8*, i8** %target.i0 273 indirectbr i8* %target0, [label %bb1, label %bb3] 274 275bb0: 276 store volatile i64 0, i64* %sink 277 br label %latch 278 279bb1: 280 store volatile i64 1, i64* %sink 281 br label %latch 282 283bb2: 284 store volatile i64 2, i64* %sink 285 br label %latch 286 287bb3: 288 store volatile i64 3, i64* %sink 289 br label %latch 290 291bb4: 292 store volatile i64 4, i64* %sink 293 br label %latch 294 295bb5: 296 store volatile i64 5, i64* %sink 297 br label %latch 298 299bb6: 300 store volatile i64 6, i64* %sink 301 br label %latch 302 303bb7: 304 store volatile i64 7, i64* %sink 305 br label %latch 306 307bb8: 308 store volatile i64 8, i64* %sink 309 br label %latch 310 311bb9: 312 store volatile i64 9, i64* %sink 313 br label %latch 314 315latch: 316 %i.next = load i64, i64* %p 317 %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i.next 318 %target.next = load i8*, i8** %target.i.next 319 ; Potentially hit a full 10 successors here so that even if we rewrite as 320 ; a switch it will try to be lowered with a jump table. 321 indirectbr i8* %target.next, [label %bb0, 322 label %bb1, 323 label %bb2, 324 label %bb3, 325 label %bb4, 326 label %bb5, 327 label %bb6, 328 label %bb7, 329 label %bb8, 330 label %bb9] 331} 332 333@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), 334 i8* blockaddress(@indirectbr_rewrite, %bb1), 335 i8* blockaddress(@indirectbr_rewrite, %bb2), 336 i8* blockaddress(@indirectbr_rewrite, %bb3), 337 i8* blockaddress(@indirectbr_rewrite, %bb4), 338 i8* blockaddress(@indirectbr_rewrite, %bb5), 339 i8* blockaddress(@indirectbr_rewrite, %bb6), 340 i8* blockaddress(@indirectbr_rewrite, %bb7), 341 i8* blockaddress(@indirectbr_rewrite, %bb8), 342 i8* blockaddress(@indirectbr_rewrite, %bb9)] 343 344; Check that when retpolines are enabled for indirect branches the indirectbr 345; instruction gets rewritten to use switch, and that in turn doesn't get lowered 346; as a jump table. 347define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #1 { 348; X64-LABEL: indirectbr_rewrite: 349; X64-NOT: jmpq 350; X86-LABEL: indirectbr_rewrite: 351; X86-NOT: jmpl 352entry: 353 %i0 = load i64, i64* %p 354 %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 355 %target0 = load i8*, i8** %target.i0 356 indirectbr i8* %target0, [label %bb1, label %bb3] 357 358bb0: 359 store volatile i64 0, i64* %sink 360 br label %latch 361 362bb1: 363 store volatile i64 1, i64* %sink 364 br label %latch 365 366bb2: 367 store volatile i64 2, i64* %sink 368 br label %latch 369 370bb3: 371 store volatile i64 3, i64* %sink 372 br label %latch 373 374bb4: 375 store volatile i64 4, i64* %sink 376 br label %latch 377 378bb5: 379 store volatile i64 5, i64* %sink 380 br label %latch 381 382bb6: 383 store volatile i64 6, i64* %sink 384 br label %latch 385 386bb7: 387 store volatile i64 7, i64* %sink 388 br label %latch 389 390bb8: 391 store volatile i64 8, i64* %sink 392 br label %latch 393 394bb9: 395 store volatile i64 9, i64* %sink 396 br label %latch 397 398latch: 399 %i.next = load i64, i64* %p 400 %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next 401 %target.next = load i8*, i8** %target.i.next 402 ; Potentially hit a full 10 successors here so that even if we rewrite as 403 ; a switch it will try to be lowered with a jump table. 404 indirectbr i8* %target.next, [label %bb0, 405 label %bb1, 406 label %bb2, 407 label %bb3, 408 label %bb4, 409 label %bb5, 410 label %bb6, 411 label %bb7, 412 label %bb8, 413 label %bb9] 414} 415 416; Lastly check that the necessary thunks were emitted. 417; 418; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat 419; X64-NEXT: .hidden __llvm_retpoline_r11 420; X64-NEXT: .weak __llvm_retpoline_r11 421; X64: __llvm_retpoline_r11: 422; X64-NEXT: # {{.*}} # %entry 423; X64-NEXT: callq [[CALL_TARGET:.*]] 424; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 425; X64-NEXT: # %entry 426; X64-NEXT: # =>This Inner Loop Header: Depth=1 427; X64-NEXT: pause 428; X64-NEXT: lfence 429; X64-NEXT: jmp [[CAPTURE_SPEC]] 430; X64-NEXT: .p2align 4, 0x90 431; X64-NEXT: {{.*}} # Block address taken 432; X64-NEXT: # %entry 433; X64-NEXT: [[CALL_TARGET]]: 434; X64-NEXT: movq %r11, (%rsp) 435; X64-NEXT: retq 436; 437; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat 438; X86-NEXT: .hidden __llvm_retpoline_eax 439; X86-NEXT: .weak __llvm_retpoline_eax 440; X86: __llvm_retpoline_eax: 441; X86-NEXT: # {{.*}} # %entry 442; X86-NEXT: calll [[CALL_TARGET:.*]] 443; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 444; X86-NEXT: # %entry 445; X86-NEXT: # =>This Inner Loop Header: Depth=1 446; X86-NEXT: pause 447; X86-NEXT: lfence 448; X86-NEXT: jmp [[CAPTURE_SPEC]] 449; X86-NEXT: .p2align 4, 0x90 450; X86-NEXT: {{.*}} # Block address taken 451; X86-NEXT: # %entry 452; X86-NEXT: [[CALL_TARGET]]: 453; X86-NEXT: movl %eax, (%esp) 454; X86-NEXT: retl 455; 456; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat 457; X86-NEXT: .hidden __llvm_retpoline_ecx 458; X86-NEXT: .weak __llvm_retpoline_ecx 459; X86: __llvm_retpoline_ecx: 460; X86-NEXT: # {{.*}} # %entry 461; X86-NEXT: calll [[CALL_TARGET:.*]] 462; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 463; X86-NEXT: # %entry 464; X86-NEXT: # =>This Inner Loop Header: Depth=1 465; X86-NEXT: pause 466; X86-NEXT: lfence 467; X86-NEXT: jmp [[CAPTURE_SPEC]] 468; X86-NEXT: .p2align 4, 0x90 469; X86-NEXT: {{.*}} # Block address taken 470; X86-NEXT: # %entry 471; X86-NEXT: [[CALL_TARGET]]: 472; X86-NEXT: movl %ecx, (%esp) 473; X86-NEXT: retl 474; 475; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat 476; X86-NEXT: .hidden __llvm_retpoline_edx 477; X86-NEXT: .weak __llvm_retpoline_edx 478; X86: __llvm_retpoline_edx: 479; X86-NEXT: # {{.*}} # %entry 480; X86-NEXT: calll [[CALL_TARGET:.*]] 481; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 482; X86-NEXT: # %entry 483; X86-NEXT: # =>This Inner Loop Header: Depth=1 484; X86-NEXT: pause 485; X86-NEXT: lfence 486; X86-NEXT: jmp [[CAPTURE_SPEC]] 487; X86-NEXT: .p2align 4, 0x90 488; X86-NEXT: {{.*}} # Block address taken 489; X86-NEXT: # %entry 490; X86-NEXT: [[CALL_TARGET]]: 491; X86-NEXT: movl %edx, (%esp) 492; X86-NEXT: retl 493; 494; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat 495; X86-NEXT: .hidden __llvm_retpoline_edi 496; X86-NEXT: .weak __llvm_retpoline_edi 497; X86: __llvm_retpoline_edi: 498; X86-NEXT: # {{.*}} # %entry 499; X86-NEXT: calll [[CALL_TARGET:.*]] 500; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 501; X86-NEXT: # %entry 502; X86-NEXT: # =>This Inner Loop Header: Depth=1 503; X86-NEXT: pause 504; X86-NEXT: lfence 505; X86-NEXT: jmp [[CAPTURE_SPEC]] 506; X86-NEXT: .p2align 4, 0x90 507; X86-NEXT: {{.*}} # Block address taken 508; X86-NEXT: # %entry 509; X86-NEXT: [[CALL_TARGET]]: 510; X86-NEXT: movl %edi, (%esp) 511; X86-NEXT: retl 512 513 514attributes #0 = { "target-features"="+retpoline-indirect-calls" } 515attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" } 516attributes #2 = { nonlazybind } 517