1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP 2; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH 3; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH 5 6declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g) 7declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) 8declare void @eightparams16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) 9declare void @eightparams64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) 10declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j) 11declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i8* %h, i32 %i, i64 %j) 12declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) 13 14; We should get pushes for the last 4 parameters. Test that the 15; in-register parameters are all in the right places, and check 16; that the stack manipulations are correct and correctly 17; described by the DWARF directives. Test that the switch 18; to disable the optimization works and that the optimization 19; doesn't kick in on Windows64 where it is not allowed. 20; NORMAL-LABEL: test1 21; NORMAL: pushq 22; NORMAL-DAG: movl $1, %edi 23; NORMAL-DAG: movl $2, %esi 24; NORMAL-DAG: movl $3, %edx 25; NORMAL-DAG: movl $4, %ecx 26; NORMAL-DAG: movl $5, %r8d 27; NORMAL-DAG: movl $6, %r9d 28; NORMAL: pushq $10 29; NORMAL: .cfi_adjust_cfa_offset 8 30; NORMAL: pushq $9 31; NORMAL: .cfi_adjust_cfa_offset 8 32; NORMAL: pushq $8 33; NORMAL: .cfi_adjust_cfa_offset 8 34; NORMAL: pushq $7 35; NORMAL: .cfi_adjust_cfa_offset 8 36; NORMAL: callq ten_params 37; NORMAL: addq $32, %rsp 38; NORMAL: .cfi_adjust_cfa_offset -32 39; NORMAL: popq 40; NORMAL: retq 41; NOPUSH-LABEL: test1 42; NOPUSH-NOT: pushq 43; NOPUSH: retq 44define void @test1() { 45entry: 46 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 47 ret void 48} 49 50; The presence of a frame pointer should not prevent pushes. But we 51; don't need the CFI directives in that case. 52; Also check that we generate the right pushes for >8bit immediates. 53; NORMALFP-LABEL: test2 54; NORMALFP: pushq $10000 55; NORMALFP-NEXT: pushq $9000 56; NORMALFP-NEXT: pushq $8000 57; NORMALFP-NEXT: pushq $7000 58; NORMALFP-NEXT: callq {{_?}}ten_params 59define void @test2(i32 %k) { 60entry: 61 %a = alloca i32, i32 %k 62 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000) 63 ret void 64} 65 66; Parameters 7 & 8 should push a 64-bit register. 67; TODO: Note that the regular expressions disallow r8 and r9. That's fine for 68; now, because the pushes will always follow the moves into r8 and r9. 69; Eventually, though, we want to be able to schedule the pushes better. 70; In this example, it will save two copies, because we have to move the 71; incoming parameters out of %rdi and %rsi to make room for the outgoing 72; parameters. 73; NORMAL-LABEL: test3 74; NORMAL: pushq $10000 75; NORMAL: pushq $9000 76; NORMAL: pushq %r{{..}} 77; NORMAL: pushq %r{{..}} 78; NORMAL: callq ten_params 79define void @test3(i32 %a, i64 %b) { 80entry: 81 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000) 82 ret void 83} 84 85; Check that we avoid the optimization for just one push. 86; NORMAL-LABEL: test4 87; NORMAL: movl $7, (%rsp) 88; NORMAL: callq seven_params 89define void @test4() { 90entry: 91 call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7) 92 ret void 93} 94 95; Check that pushing link-time constant addresses works correctly 96; NORMAL-LABEL: test5 97; NORMAL: pushq $10 98; NORMAL: pushq $9 99; NORMAL: pushq $ext 100; NORMAL: pushq $7 101; NORMAL: callq ten_params_ptr 102@ext = external dso_local constant i8 103define void @test5() { 104entry: 105 call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i8* @ext, i32 9, i64 10) 106 ret void 107} 108 109; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem. 110; NORMAL-LABEL: test6 111; NORMAL: movq %rsi, [[REG64:%.+]] 112; NORMAL: pushq $10 113; NORMAL: pushq $9 114; NORMAL: pushq ([[REG64]]) 115; NORMAL: pushq {{%r..}} 116; NORMAL: callq ten_params 117define void @test6(i32* %p32, i64* %p64) { 118entry: 119 %v32 = load i32, i32* %p32 120 %v64 = load i64, i64* %p64 121 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10) 122 ret void 123} 124 125; Fold stack-relative loads into the push with correct offsets. 126; Do the same for an indirect call whose address is loaded from the stack. 127; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call 128; sequence, 72 bytes are allocated to the stack, 48 for register saves and 129; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at 130; 88(%rsp). The call address can be stored anywhere in the local space but 131; happens to be stored at 8(%rsp). Each push bumps these offsets up by 132; 8 bytes. 133; NORMAL-LABEL: test7 134; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}} 135; NORMAL: pushq 88(%rsp) 136; NORMAL: pushq $9 137; NORMAL: pushq 96(%rsp) 138; NORMAL: pushq $7 139; NORMAL: callq *40(%rsp) 140define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) { 141entry: 142 %stack_fptr = alloca void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* 143 store void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* @ten_params, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr 144 %ten_params_ptr = load volatile void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr 145 call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 146 call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8) 147 ret void 148} 149 150; We can't fold the load from the global into the push because of 151; interference from the store 152; NORMAL-LABEL: test8 153; NORMAL: movq the_global(%rip), [[REG:%r.+]] 154; NORMAL: movq $42, the_global 155; NORMAL: pushq $10 156; NORMAL: pushq $9 157; NORMAL: pushq [[REG]] 158; NORMAL: pushq $7 159; NORMAL: callq ten_params 160@the_global = external dso_local global i64 161define void @test8() { 162 %myload = load i64, i64* @the_global 163 store i64 42, i64* @the_global 164 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10) 165 ret void 166} 167 168 169; Converting one function call to use pushes negatively affects 170; other calls that pass arguments on the stack without pushes. 171; If the cost outweighs the benefit, avoid using pushes. 172; NORMAL-LABEL: test9 173; NORMAL: callq cannot_push 174; NORMAL-NOT: push 175; NORMAL: callq ten_params 176define void @test9(float %p1) { 177 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 178 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 179 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 180 ret void 181} 182 183; But if the benefit outweighs the cost, use pushes. 184; NORMAL-LABEL: test10 185; NORMAL: callq cannot_push 186; NORMAL: pushq $10 187; NORMAL: pushq $9 188; NORMAL: pushq $8 189; NORMAL: pushq $7 190; NORMAL: callq ten_params 191define void @test10(float %p1) { 192 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 193 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 194 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 195 ret void 196} 197 198; NORMAL-LABEL: pr34863_16 199; NORMAL: pushq ${{-1|65535}} 200; NORMAL-NEXT: pushq $0 201; NORMAL-NEXT: call 202define void @pr34863_16(i16 %x) minsize nounwind { 203entry: 204 tail call void @eightparams16(i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 0, i16 -1) 205 ret void 206} 207 208; NORMAL-LABEL: pr34863_32 209; NORMAL: pushq ${{-1|65535}} 210; NORMAL-NEXT: pushq $0 211; NORMAL-NEXT: call 212define void @pr34863_32(i32 %x) minsize nounwind { 213entry: 214 tail call void @eightparams(i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 0, i32 -1) 215 ret void 216} 217 218; NORMAL-LABEL: pr34863_64 219; NORMAL: pushq ${{-1|65535}} 220; NORMAL-NEXT: pushq $0 221; NORMAL-NEXT: call 222define void @pr34863_64(i64 %x) minsize nounwind { 223entry: 224 tail call void @eightparams64(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 0, i64 -1) 225 ret void 226} 227