• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP
2; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH
3; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
5
6declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g)
7declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j)
8declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i8* %h, i32 %i, i64 %j)
9declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i)
10
11; We should get pushes for the last 4 parameters. Test that the
12; in-register parameters are all in the right places, and check
13; that the stack manipulations are correct and correctly
14; described by the DWARF directives. Test that the switch
15; to disable the optimization works and that the optimization
16; doesn't kick in on Windows64 where it is not allowed.
17; NORMAL-LABEL: test1
18; NORMAL: pushq
19; NORMAL-DAG: movl $1, %edi
20; NORMAL-DAG: movl $2, %esi
21; NORMAL-DAG: movl $3, %edx
22; NORMAL-DAG: movl $4, %ecx
23; NORMAL-DAG: movl $5, %r8d
24; NORMAL-DAG: movl $6, %r9d
25; NORMAL: pushq $10
26; NORMAL: .cfi_adjust_cfa_offset 8
27; NORMAL: pushq $9
28; NORMAL: .cfi_adjust_cfa_offset 8
29; NORMAL: pushq $8
30; NORMAL: .cfi_adjust_cfa_offset 8
31; NORMAL: pushq $7
32; NORMAL: .cfi_adjust_cfa_offset 8
33; NORMAL: callq ten_params
34; NORMAL: addq $32, %rsp
35; NORMAL: .cfi_adjust_cfa_offset -32
36; NORMAL: popq
37; NORMAL: retq
38; NOPUSH-LABEL: test1
39; NOPUSH-NOT: pushq
40; NOPUSH: retq
41define void @test1() {
42entry:
43  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
44  ret void
45}
46
47; The presence of a frame pointer should not prevent pushes. But we
48; don't need the CFI directives in that case.
49; Also check that we generate the right pushes for >8bit immediates.
50; NORMALFP-LABEL: test2
51; NORMALFP: pushq $10000
52; NORMALFP-NEXT: pushq $9000
53; NORMALFP-NEXT: pushq $8000
54; NORMALFP-NEXT: pushq $7000
55; NORMALFP-NEXT: callq {{_?}}ten_params
56define void @test2(i32 %k) {
57entry:
58  %a = alloca i32, i32 %k
59  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000)
60  ret void
61}
62
63; Parameters 7 & 8 should push a 64-bit register.
64; TODO: Note that the regular expressions disallow r8 and r9. That's fine for
65;       now, because the pushes will always follow the moves into r8 and r9.
66;       Eventually, though, we want to be able to schedule the pushes better.
67;       In this example, it will save two copies, because we have to move the
68;       incoming parameters out of %rdi and %rsi to make room for the outgoing
69;       parameters.
70; NORMAL-LABEL: test3
71; NORMAL: pushq $10000
72; NORMAL: pushq $9000
73; NORMAL: pushq %r{{..}}
74; NORMAL: pushq %r{{..}}
75; NORMAL: callq ten_params
76define void @test3(i32 %a, i64 %b) {
77entry:
78  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000)
79  ret void
80}
81
82; Check that we avoid the optimization for just one push.
83; NORMAL-LABEL: test4
84; NORMAL: movl $7, (%rsp)
85; NORMAL: callq seven_params
86define void @test4() {
87entry:
88  call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7)
89  ret void
90}
91
92; Check that pushing link-time constant addresses works correctly
93; NORMAL-LABEL: test5
94; NORMAL: pushq $10
95; NORMAL: pushq $9
96; NORMAL: pushq $ext
97; NORMAL: pushq $7
98; NORMAL: callq ten_params_ptr
99@ext = external constant i8
100define void @test5() {
101entry:
102  call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i8* @ext, i32 9, i64 10)
103  ret void
104}
105
106; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem.
107; NORMAL-LABEL: test6
108; NORMAL: movq %rsi, [[REG64:%.+]]
109; NORMAL: pushq $10
110; NORMAL: pushq $9
111; NORMAL: pushq ([[REG64]])
112; NORMAL: pushq {{%r..}}
113; NORMAL: callq ten_params
114define void @test6(i32* %p32, i64* %p64) {
115entry:
116  %v32 = load i32, i32* %p32
117  %v64 = load i64, i64* %p64
118  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10)
119  ret void
120}
121
122; Fold stack-relative loads into the push with correct offsets.
123; Do the same for an indirect call whose address is loaded from the stack.
124; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call
125; sequence, 72 bytes are allocated to the stack, 48 for register saves and
126; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at
127; 88(%rsp). The call address can be stored anywhere in the local space but
128; happens to be stored at 8(%rsp). Each push bumps these offsets up by
129; 8 bytes.
130; NORMAL-LABEL: test7
131; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}}
132; NORMAL: pushq 88(%rsp)
133; NORMAL: pushq $9
134; NORMAL: pushq 96(%rsp)
135; NORMAL: pushq $7
136; NORMAL: callq *40(%rsp)
137define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) {
138entry:
139  %stack_fptr = alloca void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*
140  store void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* @ten_params, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
141  %ten_params_ptr = load volatile void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
142  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
143  call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8)
144  ret void
145}
146
147; We can't fold the load from the global into the push because of
148; interference from the store
149; NORMAL-LABEL: test8
150; NORMAL: movq the_global(%rip), [[REG:%r.+]]
151; NORMAL: movq $42, the_global
152; NORMAL: pushq $10
153; NORMAL: pushq $9
154; NORMAL: pushq [[REG]]
155; NORMAL: pushq $7
156; NORMAL: callq ten_params
157@the_global = external global i64
158define void @test8() {
159  %myload = load i64, i64* @the_global
160  store i64 42, i64* @the_global
161  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10)
162  ret void
163}
164
165
166; Converting one function call to use pushes negatively affects
167; other calls that pass arguments on the stack without pushes.
168; If the cost outweighs the benefit, avoid using pushes.
169; NORMAL-LABEL: test9
170; NORMAL: callq cannot_push
171; NORMAL-NOT: push
172; NORMAL: callq ten_params
173define void @test9(float %p1) {
174  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
175  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
176  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
177  ret void
178}
179
180; But if the benefit outweighs the cost, use pushes.
181; NORMAL-LABEL: test10
182; NORMAL: callq cannot_push
183; NORMAL: pushq $10
184; NORMAL: pushq $9
185; NORMAL: pushq $8
186; NORMAL: pushq $7
187; NORMAL: callq ten_params
188define void @test10(float %p1) {
189  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
190  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
191  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
192  ret void
193}
194