1; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck %s 2; TLS function were wrongly model and after fixing that, shrink-wrapping 3; cannot help here. To achieve the expected lowering, we need to playing 4; tricks similar to AArch64 fast TLS calling convention (r255821). 5; Applying tricks on x86-64 similar to r255821. 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0 8%struct.S = type { i8 } 9 10@sg = internal thread_local global %struct.S zeroinitializer, align 1 11@__dso_handle = external global i8 12@__tls_guard = internal thread_local unnamed_addr global i1 false 13@sum1 = internal thread_local global i32 0, align 4 14 15declare void @_ZN1SC1Ev(%struct.S*) 16declare void @_ZN1SD1Ev(%struct.S*) 17declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) 18 19; Every GPR should be saved - except rdi, rax, and rsp 20; CHECK-LABEL: _ZTW2sg 21; CHECK-NOT: pushq %r11 22; CHECK-NOT: pushq %r10 23; CHECK-NOT: pushq %r9 24; CHECK-NOT: pushq %r8 25; CHECK-NOT: pushq %rsi 26; CHECK-NOT: pushq %rdx 27; CHECK-NOT: pushq %rcx 28; CHECK-NOT: pushq %rbx 29; CHECK: callq 30; CHECK: jne 31; CHECK: callq 32; CHECK: tlv_atexit 33; CHECK: callq 34; CHECK-NOT: popq %rbx 35; CHECK-NOT: popq %rcx 36; CHECK-NOT: popq %rdx 37; CHECK-NOT: popq %rsi 38; CHECK-NOT: popq %r8 39; CHECK-NOT: popq %r9 40; CHECK-NOT: popq %r10 41; CHECK-NOT: popq %r11 42 43; CHECK-O0-LABEL: _ZTW2sg 44; CHECK-O0: pushq %r11 45; CHECK-O0: pushq %r10 46; CHECK-O0: pushq %r9 47; CHECK-O0: pushq %r8 48; CHECK-O0: pushq %rsi 49; CHECK-O0: pushq %rdx 50; CHECK-O0: pushq %rcx 51; CHECK-O0: callq 52; CHECK-O0: jne 53; CHECK-O0: callq 54; CHECK-O0: tlv_atexit 55; CHECK-O0: callq 56; CHECK-O0: popq %rcx 57; CHECK-O0: popq %rdx 58; CHECK-O0: popq %rsi 59; CHECK-O0: popq %r8 60; CHECK-O0: popq %r9 61; CHECK-O0: popq %r10 62; CHECK-O0: popq %r11 63define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { 64 %.b.i = load i1, i1* @__tls_guard, align 1 65 br i1 %.b.i, label %__tls_init.exit, label %init.i 66 67init.i: 68 store i1 true, i1* @__tls_guard, align 1 69 tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2 70 %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2 71 br label %__tls_init.exit 72 73__tls_init.exit: 74 ret %struct.S* @sg 75} 76 77; CHECK-LABEL: _ZTW4sum1 78; CHECK-NOT: pushq %r11 79; CHECK-NOT: pushq %r10 80; CHECK-NOT: pushq %r9 81; CHECK-NOT: pushq %r8 82; CHECK-NOT: pushq %rsi 83; CHECK-NOT: pushq %rdx 84; CHECK-NOT: pushq %rcx 85; CHECK-NOT: pushq %rbx 86; CHECK: callq 87; CHECK-O0-LABEL: _ZTW4sum1 88; CHECK-O0-NOT: pushq %r11 89; CHECK-O0-NOT: pushq %r10 90; CHECK-O0-NOT: pushq %r9 91; CHECK-O0-NOT: pushq %r8 92; CHECK-O0-NOT: pushq %rsi 93; CHECK-O0-NOT: pushq %rdx 94; CHECK-O0-NOT: pushq %rcx 95; CHECK-O0-NOT: pushq %rbx 96; CHECK-O0-NOT: movq %r11 97; CHECK-O0-NOT: movq %r10 98; CHECK-O0-NOT: movq %r9 99; CHECK-O0-NOT: movq %r8 100; CHECK-O0-NOT: movq %rsi 101; CHECK-O0-NOT: movq %rdx 102; CHECK-O0-NOT: movq %rcx 103; CHECK-O0-NOT: movq %rbx 104; CHECK-O0: callq 105define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { 106 ret i32* @sum1 107} 108 109; Make sure at O0 we don't overwrite RBP. 110; CHECK-O0-LABEL: _ZTW4sum2 111; CHECK-O0: pushq %rbp 112; CHECK-O0: movq %rsp, %rbp 113; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) 114define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { 115 ret i32* @sum1 116} 117 118; Make sure at O0, we don't generate spilling/reloading of the CSRs. 119; CHECK-O0-LABEL: tls_test2 120; CHECK-O0-NOT: pushq %r11 121; CHECK-O0-NOT: pushq %r10 122; CHECK-O0-NOT: pushq %r9 123; CHECK-O0-NOT: pushq %r8 124; CHECK-O0-NOT: pushq %rsi 125; CHECK-O0-NOT: pushq %rdx 126; CHECK-O0: callq {{.*}}tls_helper 127; CHECK-O0-NOT: popq %rdx 128; CHECK-O0-NOT: popq %rsi 129; CHECK-O0-NOT: popq %r8 130; CHECK-O0-NOT: popq %r9 131; CHECK-O0-NOT: popq %r10 132; CHECK-O0-NOT: popq %r11 133; CHECK-O0: ret 134%class.C = type { i32 } 135@tC = internal thread_local global %class.C zeroinitializer, align 4 136declare cxx_fast_tlscc void @tls_helper() 137define cxx_fast_tlscc %class.C* @tls_test2() #1 { 138 call cxx_fast_tlscc void @tls_helper() 139 ret %class.C* @tC 140} 141 142; Make sure we do not allow tail call when caller and callee have different 143; calling conventions. 144declare %class.C* @_ZN1CD1Ev(%class.C* readnone returned %this) 145; CHECK-LABEL: tls_test 146; CHECK: callq {{.*}}tlv_atexit 147define cxx_fast_tlscc void @tls_test() { 148entry: 149 store i32 0, i32* getelementptr inbounds (%class.C, %class.C* @tC, i64 0, i32 0), align 4 150 %0 = tail call i32 @_tlv_atexit(void (i8*)* bitcast (%class.C* (%class.C*)* @_ZN1CD1Ev to void (i8*)*), i8* bitcast (%class.C* @tC to i8*), i8* nonnull @__dso_handle) #1 151 ret void 152} 153 154@ssp_var = internal thread_local global i8 0, align 1 155 156; CHECK-LABEL: test_ssp 157; CHECK-NOT: pushq %r11 158; CHECK-NOT: pushq %r10 159; CHECK-NOT: pushq %r9 160; CHECK-NOT: pushq %r8 161; CHECK-NOT: pushq %rsi 162; CHECK-NOT: pushq %rdx 163; CHECK-NOT: pushq %rcx 164; CHECK-NOT: pushq %rbx 165; CHECK: callq 166define cxx_fast_tlscc nonnull i8* @test_ssp() #2 { 167 ret i8* @ssp_var 168} 169attributes #0 = { nounwind "no-frame-pointer-elim"="true" } 170attributes #1 = { nounwind } 171attributes #2 = { nounwind sspreq } 172