1; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64 2; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX 3 4; By default, windows CoreCLR requires an inline prologue stack expansion check 5; if more than 4096 bytes are allocated on the stack. 6 7; Prolog stack allocation >= 4096 bytes will require the probe sequence 8define i32 @main4k() nounwind { 9entry: 10; WIN_X64-LABEL:main4k: 11; WIN_X64: # BB#0: 12; WIN_X64: movl $4096, %eax 13; WIN_X64: movq %rcx, 8(%rsp) 14; WIN_X64: movq %rdx, 16(%rsp) 15; WIN_X64: xorq %rcx, %rcx 16; WIN_X64: movq %rsp, %rdx 17; WIN_X64: subq %rax, %rdx 18; WIN_X64: cmovbq %rcx, %rdx 19; WIN_X64: movq %gs:16, %rcx 20; WIN_X64: cmpq %rcx, %rdx 21; WIN_X64: jae .LBB0_3 22; WIN_X64:# BB#1: 23; WIN_X64: andq $-4096, %rdx 24; WIN_X64:.LBB0_2: 25; WIN_X64: leaq -4096(%rcx), %rcx 26; WIN_X64: movb $0, (%rcx) 27; WIN_X64: cmpq %rcx, %rdx 28; WIN_X64: jne .LBB0_2 29; WIN_X64:.LBB0_3: 30; WIN_X64: movq 8(%rsp), %rcx 31; WIN_X64: movq 16(%rsp), %rdx 32; WIN_X64: subq %rax, %rsp 33; WIN_X64: xorl %eax, %eax 34; WIN_X64: addq $4096, %rsp 35; WIN_X64: retq 36; LINUX-LABEL:main4k: 37; LINUX-NOT: movq %gs:16, %rcx 38; LINUX: retq 39 %a = alloca [4096 x i8] 40 ret i32 0 41} 42 43; Prolog stack allocation >= 4096 bytes will require the probe sequence 44; Case with frame pointer 45define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" { 46entry: 47; WIN_X64-LABEL:main4k_frame: 48; WIN_X64: movq %rcx, 16(%rsp) 49; WIN_X64: movq %gs:16, %rcx 50; LINUX-LABEL:main4k_frame: 51; LINUX-NOT: movq %gs:16, %rcx 52; LINUX: retq 53 %a = alloca [4096 x i8] 54 ret i32 0 55} 56 57; Prolog stack allocation >= 4096 bytes will require the probe sequence 58; Case with INT args 59define i32 @main4k_intargs(i32 %x, i32 %y) nounwind { 60entry: 61; WIN_X64: movq %rcx, 8(%rsp) 62; WIN_X64: movq %gs:16, %rcx 63; LINUX-NOT: movq %gs:16, %rcx 64; LINUX: retq 65 %a = alloca [4096 x i8] 66 %t = add i32 %x, %y 67 ret i32 %t 68} 69 70; Prolog stack allocation >= 4096 bytes will require the probe sequence 71; Case with FP regs 72define i32 @main4k_fpargs(double %x, double %y) nounwind { 73entry: 74; WIN_X64: movq %rcx, 8(%rsp) 75; WIN_X64: movq %gs:16, %rcx 76; LINUX-NOT: movq %gs:16, %rcx 77; LINUX: retq 78 %a = alloca [4096 x i8] 79 ret i32 0 80} 81 82; Prolog stack allocation >= 4096 bytes will require the probe sequence 83; Case with mixed regs 84define i32 @main4k_mixargs(double %x, i32 %y) nounwind { 85entry: 86; WIN_X64: movq %gs:16, %rcx 87; LINUX-NOT: movq %gs:16, %rcx 88; LINUX: retq 89 %a = alloca [4096 x i8] 90 ret i32 %y 91} 92 93; Make sure we don't emit the probe for a smaller prolog stack allocation. 94define i32 @main128() nounwind { 95entry: 96; WIN_X64-NOT: movq %gs:16, %rcx 97; WIN_X64: retq 98; LINUX-NOT: movq %gs:16, %rcx 99; LINUX: retq 100 %a = alloca [128 x i8] 101 ret i32 0 102} 103 104; Make sure we don't emit the probe sequence if not on windows even if the 105; caller has the Win64 calling convention. 106define x86_64_win64cc i32 @main4k_win64() nounwind { 107entry: 108; WIN_X64: movq %gs:16, %rcx 109; LINUX-NOT: movq %gs:16, %rcx 110; LINUX: retq 111 %a = alloca [4096 x i8] 112 ret i32 0 113} 114 115declare i32 @bar(i8*) nounwind 116 117; Within-body inline probe expansion 118define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind { 119entry: 120; WIN_X64: callq bar 121; WIN_X64: movq %gs:16, [[R:%r.*]] 122; WIN_X64: callq bar 123; LINUX: callq bar 124; LINUX-NOT: movq %gs:16, [[R:%r.*]] 125; LINUX: callq bar 126 %a = alloca i8, i64 1024 127 %ra = call i32 @bar(i8* %a) nounwind 128 %b = alloca i8, i64 %n 129 %rb = call i32 @bar(i8* %b) nounwind 130 %r = add i32 %ra, %rb 131 ret i32 %r 132} 133 134; Influence of stack-probe-size attribute 135; Note this is not exposed in coreclr 136define i32 @test_probe_size() "stack-probe-size"="8192" nounwind { 137; WIN_X64-NOT: movq %gs:16, %rcx 138; WIN_X64: retq 139; LINUX-NOT: movq %gs:16, %rcx 140; LINUX: retq 141 %a = alloca [4096 x i8] 142 ret i32 0 143} 144