1; We specify -mcpu explicitly to avoid instruction reordering that happens on 2; some setups (e.g., Atom) from affecting the output. 3; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 4; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 5; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN 6; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX 7; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 8; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 9; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN 10; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX 11 12; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer 13; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer 14; arguments are caller-cleanup like normal arguments. 15 16define void @sret1(i8* sret %x) nounwind { 17entry: 18; WIN32-LABEL: _sret1: 19; WIN32: movb $42, (%eax) 20; WIN32-NOT: popl %eax 21; WIN32: {{retl$}} 22 23; MINGW_X86-LABEL: _sret1: 24; MINGW_X86: {{retl$}} 25 26; CYGWIN-LABEL: _sret1: 27; CYGWIN: retl $4 28 29; LINUX-LABEL: sret1: 30; LINUX: retl $4 31 32 store i8 42, i8* %x, align 4 33 ret void 34} 35 36define void @sret2(i8* sret %x, i8 %y) nounwind { 37entry: 38; WIN32-LABEL: _sret2: 39; WIN32: movb {{.*}}, (%eax) 40; WIN32-NOT: popl %eax 41; WIN32: {{retl$}} 42 43; MINGW_X86-LABEL: _sret2: 44; MINGW_X86: {{retl$}} 45 46; CYGWIN-LABEL: _sret2: 47; CYGWIN: retl $4 48 49; LINUX-LABEL: sret2: 50; LINUX: retl $4 51 52 store i8 %y, i8* %x 53 ret void 54} 55 56define void @sret3(i8* sret %x, i8* %y) nounwind { 57entry: 58; WIN32-LABEL: _sret3: 59; WIN32: movb $42, (%eax) 60; WIN32-NOT: movb $13, (%eax) 61; WIN32-NOT: popl %eax 62; WIN32: {{retl$}} 63 64; MINGW_X86-LABEL: _sret3: 65; MINGW_X86: {{retl$}} 66 67; CYGWIN-LABEL: _sret3: 68; CYGWIN: retl $4 69 70; LINUX-LABEL: sret3: 71; LINUX: retl $4 72 73 store i8 42, i8* %x 74 store i8 13, i8* %y 75 ret void 76} 77 78; PR15556 79%struct.S4 = type { i32, i32, i32 } 80 81define void @sret4(%struct.S4* noalias sret %agg.result) { 82entry: 83; WIN32-LABEL: _sret4: 84; WIN32: movl $42, (%eax) 85; WIN32-NOT: popl %eax 86; WIN32: {{retl$}} 87 88; MINGW_X86-LABEL: _sret4: 89; MINGW_X86: {{retl$}} 90 91; CYGWIN-LABEL: _sret4: 92; CYGWIN: retl $4 93 94; LINUX-LABEL: sret4: 95; LINUX: retl $4 96 97 %x = getelementptr inbounds %struct.S4, %struct.S4* %agg.result, i32 0, i32 0 98 store i32 42, i32* %x, align 4 99 ret void 100} 101 102%struct.S5 = type { i32 } 103%class.C5 = type { i8 } 104 105define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) { 106entry: 107 %this.addr = alloca %class.C5*, align 4 108 store %class.C5* %this, %class.C5** %this.addr, align 4 109 %this1 = load %class.C5*, %class.C5** %this.addr 110 %x = getelementptr inbounds %struct.S5, %struct.S5* %agg.result, i32 0, i32 0 111 store i32 42, i32* %x, align 4 112 ret void 113; WIN32-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 114; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 115; CYGWIN-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 116; LINUX-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 117 118; The address of the return structure is passed as an implicit parameter. 119; In the -O0 build, %eax is spilled at the beginning of the function, hence we 120; should match both 4(%esp) and 8(%esp). 121; WIN32: {{[48]}}(%esp), %eax 122; WIN32: movl $42, (%eax) 123; WIN32: retl $4 124} 125 126define void @call_foo5() { 127entry: 128 %c = alloca %class.C5, align 1 129 %s = alloca %struct.S5, align 4 130 call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c) 131; WIN32-LABEL: {{^}}_call_foo5: 132; MINGW_X86-LABEL: {{^}}_call_foo5: 133; CYGWIN-LABEL: {{^}}_call_foo5: 134; LINUX-LABEL: {{^}}call_foo5: 135 136 137; Load the address of the result and put it onto stack 138; The this pointer goes to ECX. 139; (through %ecx in the -O0 build). 140; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x 141; WIN32: leal {{[0-9]*}}(%esp), %ecx 142; WIN32: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}} 143; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ" 144; WIN32: retl 145 ret void 146} 147 148 149%struct.test6 = type { i32, i32, i32 } 150define void @test6_f(%struct.test6* %x) nounwind { 151; WIN32-LABEL: _test6_f: 152; MINGW_X86-LABEL: _test6_f: 153; CYGWIN-LABEL: _test6_f: 154; LINUX-LABEL: test6_f: 155 156; The %x argument is moved to %ecx. It will be the this pointer. 157; WIN32: movl {{16|20}}(%esp), %ecx 158 159 160; The sret pointer is (%esp) 161; WIN32: leal {{4?}}(%esp), %eax 162; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 163 164; The sret pointer is %ecx 165; The %x argument is moved to (%esp). It will be the this pointer. 166; MINGW_X86: leal {{4?}}(%esp), %ecx 167; MINGW_X86-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} 168; MINGW_X86-NEXT: calll _test6_g 169 170; CYGWIN: leal {{4?}}(%esp), %ecx 171; CYGWIN-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} 172; CYGWIN-NEXT: calll _test6_g 173 174 %tmp = alloca %struct.test6, align 4 175 call x86_thiscallcc void @test6_g(%struct.test6* sret %tmp, %struct.test6* %x) 176 ret void 177} 178declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*) 179 180; Flipping the parameters at the IR level generates the same code. 181%struct.test7 = type { i32, i32, i32 } 182define void @test7_f(%struct.test7* %x) nounwind { 183; WIN32-LABEL: _test7_f: 184; MINGW_X86-LABEL: _test7_f: 185; CYGWIN-LABEL: _test7_f: 186; LINUX-LABEL: test7_f: 187 188; The %x argument is moved to %ecx on all OSs. It will be the this pointer. 189; WIN32: movl {{16|20}}(%esp), %ecx 190; MINGW_X86: movl {{16|20}}(%esp), %ecx 191; CYGWIN: movl {{16|20}}(%esp), %ecx 192 193; The sret pointer is (%esp) 194; WIN32: leal {{4?}}(%esp), %eax 195; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 196; MINGW_X86: leal {{4?}}(%esp), %eax 197; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 198; CYGWIN: leal {{4?}}(%esp), %eax 199; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 200 201 %tmp = alloca %struct.test7, align 4 202 call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp) 203 ret void 204} 205 206define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) { 207 %s = getelementptr %struct.test7, %struct.test7* %in, i32 0, i32 0 208 %d = getelementptr %struct.test7, %struct.test7* %out, i32 0, i32 0 209 %v = load i32, i32* %s 210 store i32 %v, i32* %d 211 call void @clobber_eax() 212 ret void 213 214; Make sure we return the second parameter in %eax. 215; WIN32-LABEL: _test7_g: 216; WIN32: calll _clobber_eax 217; WIN32: movl {{.*}}, %eax 218; WIN32: retl 219} 220 221declare void @clobber_eax() 222 223; Test what happens if the first parameter has to be split by codegen. 224; Realistically, no frontend will generate code like this, but here it is for 225; completeness. 226define void @test8_f(i64 inreg %a, i64* sret %out) { 227 store i64 %a, i64* %out 228 call void @clobber_eax() 229 ret void 230 231; WIN32-LABEL: _test8_f: 232; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]] 233; WIN32-DAG: movl %edx, 4(%[[out]]) 234; WIN32-DAG: movl %eax, (%[[out]]) 235; WIN32: calll _clobber_eax 236; WIN32: movl {{.*}}, %eax 237; WIN32: retl 238} 239