1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP 2; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH 3; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH 5 6declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g) 7declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j) 8declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i8* %h, i32 %i, i64 %j) 9declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) 10 11; We should get pushes for the last 4 parameters. Test that the 12; in-register parameters are all in the right places, and check 13; that the stack manipulations are correct and correctly 14; described by the DWARF directives. Test that the switch 15; to disable the optimization works and that the optimization 16; doesn't kick in on Windows64 where it is not allowed. 17; NORMAL-LABEL: test1 18; NORMAL: pushq 19; NORMAL-DAG: movl $1, %edi 20; NORMAL-DAG: movl $2, %esi 21; NORMAL-DAG: movl $3, %edx 22; NORMAL-DAG: movl $4, %ecx 23; NORMAL-DAG: movl $5, %r8d 24; NORMAL-DAG: movl $6, %r9d 25; NORMAL: pushq $10 26; NORMAL: .cfi_adjust_cfa_offset 8 27; NORMAL: pushq $9 28; NORMAL: .cfi_adjust_cfa_offset 8 29; NORMAL: pushq $8 30; NORMAL: .cfi_adjust_cfa_offset 8 31; NORMAL: pushq $7 32; NORMAL: .cfi_adjust_cfa_offset 8 33; NORMAL: callq ten_params 34; NORMAL: addq $32, %rsp 35; NORMAL: .cfi_adjust_cfa_offset -32 36; NORMAL: popq 37; NORMAL: retq 38; NOPUSH-LABEL: test1 39; NOPUSH-NOT: pushq 40; NOPUSH: retq 41define void @test1() { 42entry: 43 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 44 ret void 45} 46 47; The presence of a frame pointer should not prevent pushes. But we 48; don't need the CFI directives in that case. 49; Also check that we generate the right pushes for >8bit immediates. 50; NORMALFP-LABEL: test2 51; NORMALFP: pushq $10000 52; NORMALFP-NEXT: pushq $9000 53; NORMALFP-NEXT: pushq $8000 54; NORMALFP-NEXT: pushq $7000 55; NORMALFP-NEXT: callq {{_?}}ten_params 56define void @test2(i32 %k) { 57entry: 58 %a = alloca i32, i32 %k 59 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000) 60 ret void 61} 62 63; Parameters 7 & 8 should push a 64-bit register. 64; TODO: Note that the regular expressions disallow r8 and r9. That's fine for 65; now, because the pushes will always follow the moves into r8 and r9. 66; Eventually, though, we want to be able to schedule the pushes better. 67; In this example, it will save two copies, because we have to move the 68; incoming parameters out of %rdi and %rsi to make room for the outgoing 69; parameters. 70; NORMAL-LABEL: test3 71; NORMAL: pushq $10000 72; NORMAL: pushq $9000 73; NORMAL: pushq %r{{..}} 74; NORMAL: pushq %r{{..}} 75; NORMAL: callq ten_params 76define void @test3(i32 %a, i64 %b) { 77entry: 78 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000) 79 ret void 80} 81 82; Check that we avoid the optimization for just one push. 83; NORMAL-LABEL: test4 84; NORMAL: movl $7, (%rsp) 85; NORMAL: callq seven_params 86define void @test4() { 87entry: 88 call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7) 89 ret void 90} 91 92; Check that pushing link-time constant addresses works correctly 93; NORMAL-LABEL: test5 94; NORMAL: pushq $10 95; NORMAL: pushq $9 96; NORMAL: pushq $ext 97; NORMAL: pushq $7 98; NORMAL: callq ten_params_ptr 99@ext = external constant i8 100define void @test5() { 101entry: 102 call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i8* @ext, i32 9, i64 10) 103 ret void 104} 105 106; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem. 107; NORMAL-LABEL: test6 108; NORMAL: movq %rsi, [[REG64:%.+]] 109; NORMAL: pushq $10 110; NORMAL: pushq $9 111; NORMAL: pushq ([[REG64]]) 112; NORMAL: pushq {{%r..}} 113; NORMAL: callq ten_params 114define void @test6(i32* %p32, i64* %p64) { 115entry: 116 %v32 = load i32, i32* %p32 117 %v64 = load i64, i64* %p64 118 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10) 119 ret void 120} 121 122; Fold stack-relative loads into the push with correct offsets. 123; Do the same for an indirect call whose address is loaded from the stack. 124; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call 125; sequence, 72 bytes are allocated to the stack, 48 for register saves and 126; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at 127; 88(%rsp). The call address can be stored anywhere in the local space but 128; happens to be stored at 8(%rsp). Each push bumps these offsets up by 129; 8 bytes. 130; NORMAL-LABEL: test7 131; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}} 132; NORMAL: pushq 88(%rsp) 133; NORMAL: pushq $9 134; NORMAL: pushq 96(%rsp) 135; NORMAL: pushq $7 136; NORMAL: callq *40(%rsp) 137define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) { 138entry: 139 %stack_fptr = alloca void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* 140 store void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* @ten_params, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr 141 %ten_params_ptr = load volatile void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr 142 call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 143 call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8) 144 ret void 145} 146 147; We can't fold the load from the global into the push because of 148; interference from the store 149; NORMAL-LABEL: test8 150; NORMAL: movq the_global(%rip), [[REG:%r.+]] 151; NORMAL: movq $42, the_global 152; NORMAL: pushq $10 153; NORMAL: pushq $9 154; NORMAL: pushq [[REG]] 155; NORMAL: pushq $7 156; NORMAL: callq ten_params 157@the_global = external global i64 158define void @test8() { 159 %myload = load i64, i64* @the_global 160 store i64 42, i64* @the_global 161 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10) 162 ret void 163} 164 165 166; Converting one function call to use pushes negatively affects 167; other calls that pass arguments on the stack without pushes. 168; If the cost outweighs the benefit, avoid using pushes. 169; NORMAL-LABEL: test9 170; NORMAL: callq cannot_push 171; NORMAL-NOT: push 172; NORMAL: callq ten_params 173define void @test9(float %p1) { 174 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 175 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 176 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 177 ret void 178} 179 180; But if the benefit outweighs the cost, use pushes. 181; NORMAL-LABEL: test10 182; NORMAL: callq cannot_push 183; NORMAL: pushq $10 184; NORMAL: pushq $9 185; NORMAL: pushq $8 186; NORMAL: pushq $7 187; NORMAL: callq ten_params 188define void @test10(float %p1) { 189 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 190 call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1) 191 call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10) 192 ret void 193} 194