1; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s 2; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN 3; rdar://11496434 4 5; no VLAs or dynamic alignment 6define i32 @t1() nounwind uwtable ssp { 7entry: 8 %a = alloca i32, align 4 9 call void @t1_helper(i32* %a) nounwind 10 %0 = load i32* %a, align 4 11 %add = add nsw i32 %0, 13 12 ret i32 %add 13 14; CHECK: _t1 15; CHECK-NOT: andq $-{{[0-9]+}}, %rsp 16; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi 17; CHECK: callq _t1_helper 18; CHECK: movl [[OFFSET]](%rsp), %eax 19; CHECK: addl $13, %eax 20} 21 22declare void @t1_helper(i32*) 23 24; dynamic realignment 25define i32 @t2() nounwind uwtable ssp { 26entry: 27 %a = alloca i32, align 4 28 %v = alloca <8 x float>, align 32 29 call void @t2_helper(i32* %a, <8 x float>* %v) nounwind 30 %0 = load i32* %a, align 4 31 %add = add nsw i32 %0, 13 32 ret i32 %add 33 34; CHECK: _t2 35; CHECK: pushq %rbp 36; CHECK: movq %rsp, %rbp 37; CHECK: andq $-32, %rsp 38; CHECK: subq ${{[0-9]+}}, %rsp 39; 40; CHECK: leaq {{[0-9]*}}(%rsp), %rdi 41; CHECK: leaq {{[0-9]*}}(%rsp), %rsi 42; CHECK: callq _t2_helper 43; 44; CHECK: movq %rbp, %rsp 45; CHECK: popq %rbp 46} 47 48declare void @t2_helper(i32*, <8 x float>*) 49 50; VLAs 51define i32 @t3(i64 %sz) nounwind uwtable ssp { 52entry: 53 %a = alloca i32, align 4 54 %vla = alloca i32, i64 %sz, align 16 55 call void @t3_helper(i32* %a, i32* %vla) nounwind 56 %0 = load i32* %a, align 4 57 %add = add nsw i32 %0, 13 58 ret i32 %add 59 60; CHECK: _t3 61; CHECK: pushq %rbp 62; CHECK: movq %rsp, %rbp 63; CHECK: pushq %rbx 64; CHECK-NOT: andq $-{{[0-9]+}}, %rsp 65; CHECK: subq ${{[0-9]+}}, %rsp 66; 67; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp 68; CHECK: popq %rbx 69; CHECK: popq %rbp 70} 71 72declare void @t3_helper(i32*, i32*) 73 74; VLAs + Dynamic realignment 75define i32 @t4(i64 %sz) nounwind uwtable ssp { 76entry: 77 %a = alloca i32, align 4 78 %v = alloca <8 x float>, align 32 79 %vla = alloca i32, i64 %sz, align 16 80 call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind 81 %0 = load i32* %a, align 4 82 %add = add nsw i32 %0, 13 83 ret i32 %add 84 85; CHECK: _t4 86; CHECK: pushq %rbp 87; CHECK: movq %rsp, %rbp 88; CHECK: pushq %r14 89; CHECK: pushq %rbx 90; CHECK: andq $-32, %rsp 91; CHECK: subq ${{[0-9]+}}, %rsp 92; CHECK: movq %rsp, %rbx 93; 94; CHECK: leaq {{[0-9]*}}(%rbx), %rdi 95; CHECK: leaq {{[0-9]*}}(%rbx), %rdx 96; CHECK: callq _t4_helper 97; 98; CHECK: leaq -16(%rbp), %rsp 99; CHECK: popq %rbx 100; CHECK: popq %r14 101; CHECK: popq %rbp 102} 103 104declare void @t4_helper(i32*, i32*, <8 x float>*) 105 106; Dynamic realignment + Spill 107define i32 @t5(float* nocapture %f) nounwind uwtable ssp { 108entry: 109 %a = alloca i32, align 4 110 %0 = bitcast float* %f to <8 x float>* 111 %1 = load <8 x float>* %0, align 32 112 call void @t5_helper1(i32* %a) nounwind 113 call void @t5_helper2(<8 x float> %1) nounwind 114 %2 = load i32* %a, align 4 115 %add = add nsw i32 %2, 13 116 ret i32 %add 117 118; CHECK: _t5 119; CHECK: pushq %rbp 120; CHECK: movq %rsp, %rbp 121; CHECK: andq $-32, %rsp 122; CHECK: subq ${{[0-9]+}}, %rsp 123; 124; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]] 125; CHECK: vmovaps [[AVXREG]], (%rsp) 126; CHECK: leaq {{[0-9]+}}(%rsp), %rdi 127; CHECK: callq _t5_helper1 128; CHECK: vmovaps (%rsp), %ymm0 129; CHECK: callq _t5_helper2 130; CHECK: movl {{[0-9]+}}(%rsp), %eax 131; 132; CHECK: movq %rbp, %rsp 133; CHECK: popq %rbp 134} 135 136declare void @t5_helper1(i32*) 137 138declare void @t5_helper2(<8 x float>) 139 140; VLAs + Dynamic realignment + Spill 141; FIXME: RA has already reserved RBX, so we can't do dynamic realignment. 142define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp { 143entry: 144; CHECK: _t6 145 %a = alloca i32, align 4 146 %0 = bitcast float* %f to <8 x float>* 147 %1 = load <8 x float>* %0, align 32 148 %vla = alloca i32, i64 %sz, align 16 149 call void @t6_helper1(i32* %a, i32* %vla) nounwind 150 call void @t6_helper2(<8 x float> %1) nounwind 151 %2 = load i32* %a, align 4 152 %add = add nsw i32 %2, 13 153 ret i32 %add 154} 155 156declare void @t6_helper1(i32*, i32*) 157 158declare void @t6_helper2(<8 x float>) 159 160; VLAs + Dynamic realignment + byval 161; The byval adjust the sp after the prolog, but if we're restoring the sp from 162; the base pointer we use the original adjustment. 163%struct.struct_t = type { [5 x i32] } 164 165define void @t7(i32 %size, %struct.struct_t* byval align 8 %arg1) nounwind uwtable { 166entry: 167 %x = alloca i32, align 32 168 store i32 0, i32* %x, align 32 169 %0 = zext i32 %size to i64 170 %vla = alloca i32, i64 %0, align 16 171 %1 = load i32* %x, align 32 172 call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1) 173 ret void 174 175; CHECK: _t7 176; CHECK: pushq %rbp 177; CHECK: movq %rsp, %rbp 178; CHECK: pushq %rbx 179; CHECK: andq $-32, %rsp 180; CHECK: subq ${{[0-9]+}}, %rsp 181; CHECK: movq %rsp, %rbx 182 183; Stack adjustment for byval 184; CHECK: subq {{.*}}, %rsp 185; CHECK: callq _bar 186; CHECK-NOT: addq {{.*}}, %rsp 187; CHECK: leaq -8(%rbp), %rsp 188; CHECK: popq %rbx 189; CHECK: popq %rbp 190} 191 192declare i8* @llvm.stacksave() nounwind 193 194declare void @bar(i32, i32*, %struct.struct_t* byval align 8) 195 196declare void @llvm.stackrestore(i8*) nounwind 197 198 199; Test when forcing stack alignment 200define i32 @t8() nounwind uwtable { 201entry: 202 %a = alloca i32, align 4 203 call void @t1_helper(i32* %a) nounwind 204 %0 = load i32* %a, align 4 205 %add = add nsw i32 %0, 13 206 ret i32 %add 207 208; FORCE-ALIGN: _t8 209; FORCE-ALIGN: movq %rsp, %rbp 210; FORCE-ALIGN: andq $-32, %rsp 211; FORCE-ALIGN-NEXT: subq $32, %rsp 212; FORCE-ALIGN: movq %rbp, %rsp 213; FORCE-ALIGN: popq %rbp 214} 215 216; VLAs 217define i32 @t9(i64 %sz) nounwind uwtable { 218entry: 219 %a = alloca i32, align 4 220 %vla = alloca i32, i64 %sz, align 16 221 call void @t3_helper(i32* %a, i32* %vla) nounwind 222 %0 = load i32* %a, align 4 223 %add = add nsw i32 %0, 13 224 ret i32 %add 225 226; FORCE-ALIGN: _t9 227; FORCE-ALIGN: pushq %rbp 228; FORCE-ALIGN: movq %rsp, %rbp 229; FORCE-ALIGN: pushq %rbx 230; FORCE-ALIGN: andq $-32, %rsp 231; FORCE-ALIGN: subq $32, %rsp 232; FORCE-ALIGN: movq %rsp, %rbx 233 234; FORCE-ALIGN: leaq -8(%rbp), %rsp 235; FORCE-ALIGN: popq %rbx 236; FORCE-ALIGN: popq %rbp 237} 238