• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs  | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs  | FileCheck %s --check-prefix=WIN64
4; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs  | FileCheck %s --check-prefix=LINUXOSX64
5
6; Test regcall when receiving/returning i1
7define x86_regcallcc i1 @test_argReti1(i1 %a)  {
8; X32-LABEL: test_argReti1:
9; X32:       # %bb.0:
10; X32-NEXT:    incb %al
11; X32-NEXT:    # kill: def $al killed $al killed $eax
12; X32-NEXT:    retl
13;
14; WIN64-LABEL: test_argReti1:
15; WIN64:       # %bb.0:
16; WIN64-NEXT:    incb %al
17; WIN64-NEXT:    # kill: def $al killed $al killed $eax
18; WIN64-NEXT:    retq
19;
20; LINUXOSX64-LABEL: test_argReti1:
21; LINUXOSX64:       # %bb.0:
22; LINUXOSX64-NEXT:    incb %al
23; LINUXOSX64-NEXT:    # kill: def $al killed $al killed $eax
24; LINUXOSX64-NEXT:    retq
25  %add = add i1 %a, 1
26  ret i1 %add
27}
28
29; Test regcall when passing/retrieving i1
30define x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
31; X32-LABEL: test_CallargReti1:
32; X32:       # %bb.0:
33; X32-NEXT:    pushl %esp
34; X32-NEXT:    incb %al
35; X32-NEXT:    movzbl %al, %eax
36; X32-NEXT:    calll _test_argReti1
37; X32-NEXT:    incb %al
38; X32-NEXT:    popl %esp
39; X32-NEXT:    retl
40;
41; WIN64-LABEL: test_CallargReti1:
42; WIN64:       # %bb.0:
43; WIN64-NEXT:    pushq %rsp
44; WIN64-NEXT:    .seh_pushreg %rsp
45; WIN64-NEXT:    .seh_endprologue
46; WIN64-NEXT:    incb %al
47; WIN64-NEXT:    movzbl %al, %eax
48; WIN64-NEXT:    callq test_argReti1
49; WIN64-NEXT:    incb %al
50; WIN64-NEXT:    popq %rsp
51; WIN64-NEXT:    retq
52; WIN64-NEXT:    .seh_endproc
53;
54; LINUXOSX64-LABEL: test_CallargReti1:
55; LINUXOSX64:       # %bb.0:
56; LINUXOSX64-NEXT:    pushq %rsp
57; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
58; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
59; LINUXOSX64-NEXT:    incb %al
60; LINUXOSX64-NEXT:    movzbl %al, %eax
61; LINUXOSX64-NEXT:    callq test_argReti1
62; LINUXOSX64-NEXT:    incb %al
63; LINUXOSX64-NEXT:    popq %rsp
64; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
65; LINUXOSX64-NEXT:    retq
66  %b = add i1 %a, 1
67  %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
68  %d = add i1 %c, 1
69  ret i1 %d
70}
71
72; Test regcall when receiving/returning i8
73define x86_regcallcc i8 @test_argReti8(i8 %a)  {
74; X32-LABEL: test_argReti8:
75; X32:       # %bb.0:
76; X32-NEXT:    incb %al
77; X32-NEXT:    # kill: def $al killed $al killed $eax
78; X32-NEXT:    retl
79;
80; WIN64-LABEL: test_argReti8:
81; WIN64:       # %bb.0:
82; WIN64-NEXT:    incb %al
83; WIN64-NEXT:    # kill: def $al killed $al killed $eax
84; WIN64-NEXT:    retq
85;
86; LINUXOSX64-LABEL: test_argReti8:
87; LINUXOSX64:       # %bb.0:
88; LINUXOSX64-NEXT:    incb %al
89; LINUXOSX64-NEXT:    # kill: def $al killed $al killed $eax
90; LINUXOSX64-NEXT:    retq
91  %add = add i8 %a, 1
92  ret i8 %add
93}
94
95; Test regcall when passing/retrieving i8
96define x86_regcallcc i8 @test_CallargReti8(i8 %a)  {
97; X32-LABEL: test_CallargReti8:
98; X32:       # %bb.0:
99; X32-NEXT:    pushl %esp
100; X32-NEXT:    incb %al
101; X32-NEXT:    movzbl %al, %eax
102; X32-NEXT:    calll _test_argReti8
103; X32-NEXT:    incb %al
104; X32-NEXT:    popl %esp
105; X32-NEXT:    retl
106;
107; WIN64-LABEL: test_CallargReti8:
108; WIN64:       # %bb.0:
109; WIN64-NEXT:    pushq %rsp
110; WIN64-NEXT:    .seh_pushreg %rsp
111; WIN64-NEXT:    .seh_endprologue
112; WIN64-NEXT:    incb %al
113; WIN64-NEXT:    movzbl %al, %eax
114; WIN64-NEXT:    callq test_argReti8
115; WIN64-NEXT:    incb %al
116; WIN64-NEXT:    popq %rsp
117; WIN64-NEXT:    retq
118; WIN64-NEXT:    .seh_endproc
119;
120; LINUXOSX64-LABEL: test_CallargReti8:
121; LINUXOSX64:       # %bb.0:
122; LINUXOSX64-NEXT:    pushq %rsp
123; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
124; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
125; LINUXOSX64-NEXT:    incb %al
126; LINUXOSX64-NEXT:    movzbl %al, %eax
127; LINUXOSX64-NEXT:    callq test_argReti8
128; LINUXOSX64-NEXT:    incb %al
129; LINUXOSX64-NEXT:    popq %rsp
130; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
131; LINUXOSX64-NEXT:    retq
132  %b = add i8 %a, 1
133  %c = call x86_regcallcc i8 @test_argReti8(i8 %b)
134  %d = add i8 %c, 1
135  ret i8 %d
136}
137
138; Test regcall when receiving/returning i16
139define x86_regcallcc i16 @test_argReti16(i16 %a)  {
140; X32-LABEL: test_argReti16:
141; X32:       # %bb.0:
142; X32-NEXT:    incl %eax
143; X32-NEXT:    # kill: def $ax killed $ax killed $eax
144; X32-NEXT:    retl
145;
146; WIN64-LABEL: test_argReti16:
147; WIN64:       # %bb.0:
148; WIN64-NEXT:    incl %eax
149; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
150; WIN64-NEXT:    retq
151;
152; LINUXOSX64-LABEL: test_argReti16:
153; LINUXOSX64:       # %bb.0:
154; LINUXOSX64-NEXT:    incl %eax
155; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
156; LINUXOSX64-NEXT:    retq
157  %add = add i16 %a, 1
158  ret i16 %add
159}
160
161; Test regcall when passing/retrieving i16
162define x86_regcallcc i16 @test_CallargReti16(i16 %a)  {
163; X32-LABEL: test_CallargReti16:
164; X32:       # %bb.0:
165; X32-NEXT:    pushl %esp
166; X32-NEXT:    incl %eax
167; X32-NEXT:    calll _test_argReti16
168; X32-NEXT:    # kill: def $ax killed $ax def $eax
169; X32-NEXT:    incl %eax
170; X32-NEXT:    # kill: def $ax killed $ax killed $eax
171; X32-NEXT:    popl %esp
172; X32-NEXT:    retl
173;
174; WIN64-LABEL: test_CallargReti16:
175; WIN64:       # %bb.0:
176; WIN64-NEXT:    pushq %rsp
177; WIN64-NEXT:    .seh_pushreg %rsp
178; WIN64-NEXT:    .seh_endprologue
179; WIN64-NEXT:    incl %eax
180; WIN64-NEXT:    callq test_argReti16
181; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
182; WIN64-NEXT:    incl %eax
183; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
184; WIN64-NEXT:    popq %rsp
185; WIN64-NEXT:    retq
186; WIN64-NEXT:    .seh_endproc
187;
188; LINUXOSX64-LABEL: test_CallargReti16:
189; LINUXOSX64:       # %bb.0:
190; LINUXOSX64-NEXT:    pushq %rsp
191; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
192; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
193; LINUXOSX64-NEXT:    incl %eax
194; LINUXOSX64-NEXT:    callq test_argReti16
195; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
196; LINUXOSX64-NEXT:    incl %eax
197; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
198; LINUXOSX64-NEXT:    popq %rsp
199; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
200; LINUXOSX64-NEXT:    retq
201  %b = add i16 %a, 1
202  %c = call x86_regcallcc i16 @test_argReti16(i16 %b)
203  %d = add i16 %c, 1
204  ret i16 %d
205}
206
207; Test regcall when receiving/returning i32
208define x86_regcallcc i32 @test_argReti32(i32 %a)  {
209; X32-LABEL: test_argReti32:
210; X32:       # %bb.0:
211; X32-NEXT:    incl %eax
212; X32-NEXT:    retl
213;
214; WIN64-LABEL: test_argReti32:
215; WIN64:       # %bb.0:
216; WIN64-NEXT:    incl %eax
217; WIN64-NEXT:    retq
218;
219; LINUXOSX64-LABEL: test_argReti32:
220; LINUXOSX64:       # %bb.0:
221; LINUXOSX64-NEXT:    incl %eax
222; LINUXOSX64-NEXT:    retq
223  %add = add i32 %a, 1
224  ret i32 %add
225}
226
227; Test regcall when passing/retrieving i32
228define x86_regcallcc i32 @test_CallargReti32(i32 %a)  {
229; X32-LABEL: test_CallargReti32:
230; X32:       # %bb.0:
231; X32-NEXT:    pushl %esp
232; X32-NEXT:    incl %eax
233; X32-NEXT:    calll _test_argReti32
234; X32-NEXT:    incl %eax
235; X32-NEXT:    popl %esp
236; X32-NEXT:    retl
237;
238; WIN64-LABEL: test_CallargReti32:
239; WIN64:       # %bb.0:
240; WIN64-NEXT:    pushq %rsp
241; WIN64-NEXT:    .seh_pushreg %rsp
242; WIN64-NEXT:    .seh_endprologue
243; WIN64-NEXT:    incl %eax
244; WIN64-NEXT:    callq test_argReti32
245; WIN64-NEXT:    incl %eax
246; WIN64-NEXT:    popq %rsp
247; WIN64-NEXT:    retq
248; WIN64-NEXT:    .seh_endproc
249;
250; LINUXOSX64-LABEL: test_CallargReti32:
251; LINUXOSX64:       # %bb.0:
252; LINUXOSX64-NEXT:    pushq %rsp
253; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
254; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
255; LINUXOSX64-NEXT:    incl %eax
256; LINUXOSX64-NEXT:    callq test_argReti32
257; LINUXOSX64-NEXT:    incl %eax
258; LINUXOSX64-NEXT:    popq %rsp
259; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
260; LINUXOSX64-NEXT:    retq
261  %b = add i32 %a, 1
262  %c = call x86_regcallcc i32 @test_argReti32(i32 %b)
263  %d = add i32 %c, 1
264  ret i32 %d
265}
266
267; Test regcall when receiving/returning i64
268define x86_regcallcc i64 @test_argReti64(i64 %a)  {
269; X32-LABEL: test_argReti64:
270; X32:       # %bb.0:
271; X32-NEXT:    addl $3, %eax
272; X32-NEXT:    adcl $1, %ecx
273; X32-NEXT:    retl
274;
275; WIN64-LABEL: test_argReti64:
276; WIN64:       # %bb.0:
277; WIN64-NEXT:    movabsq $4294967299, %rcx # imm = 0x100000003
278; WIN64-NEXT:    addq %rcx, %rax
279; WIN64-NEXT:    retq
280;
281; LINUXOSX64-LABEL: test_argReti64:
282; LINUXOSX64:       # %bb.0:
283; LINUXOSX64-NEXT:    movabsq $4294967299, %rcx # imm = 0x100000003
284; LINUXOSX64-NEXT:    addq %rcx, %rax
285; LINUXOSX64-NEXT:    retq
286  %add = add i64 %a, 4294967299
287  ret i64 %add
288}
289
290; Test regcall when passing/retrieving i64
291define x86_regcallcc i64 @test_CallargReti64(i64 %a)  {
292; X32-LABEL: test_CallargReti64:
293; X32:       # %bb.0:
294; X32-NEXT:    pushl %esp
295; X32-NEXT:    addl $1, %eax
296; X32-NEXT:    adcl $0, %ecx
297; X32-NEXT:    calll _test_argReti64
298; X32-NEXT:    addl $1, %eax
299; X32-NEXT:    adcl $0, %ecx
300; X32-NEXT:    popl %esp
301; X32-NEXT:    retl
302;
303; WIN64-LABEL: test_CallargReti64:
304; WIN64:       # %bb.0:
305; WIN64-NEXT:    pushq %rsp
306; WIN64-NEXT:    .seh_pushreg %rsp
307; WIN64-NEXT:    .seh_endprologue
308; WIN64-NEXT:    incq %rax
309; WIN64-NEXT:    callq test_argReti64
310; WIN64-NEXT:    incq %rax
311; WIN64-NEXT:    popq %rsp
312; WIN64-NEXT:    retq
313; WIN64-NEXT:    .seh_endproc
314;
315; LINUXOSX64-LABEL: test_CallargReti64:
316; LINUXOSX64:       # %bb.0:
317; LINUXOSX64-NEXT:    pushq %rsp
318; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
319; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
320; LINUXOSX64-NEXT:    incq %rax
321; LINUXOSX64-NEXT:    callq test_argReti64
322; LINUXOSX64-NEXT:    incq %rax
323; LINUXOSX64-NEXT:    popq %rsp
324; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
325; LINUXOSX64-NEXT:    retq
326  %b = add i64 %a, 1
327  %c = call x86_regcallcc i64 @test_argReti64(i64 %b)
328  %d = add i64 %c, 1
329  ret i64 %d
330}
331
332; Test regcall when receiving/returning float
333define x86_regcallcc float @test_argRetFloat(float %a)  {
334; X32-LABEL: test_argRetFloat:
335; X32:       # %bb.0:
336; X32-NEXT:    vaddss __real@3f800000, %xmm0, %xmm0
337; X32-NEXT:    retl
338;
339; WIN64-LABEL: test_argRetFloat:
340; WIN64:       # %bb.0:
341; WIN64-NEXT:    vaddss __real@{{.*}}(%rip), %xmm0, %xmm0
342; WIN64-NEXT:    retq
343;
344; LINUXOSX64-LABEL: test_argRetFloat:
345; LINUXOSX64:       # %bb.0:
346; LINUXOSX64-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
347; LINUXOSX64-NEXT:    retq
348  %add = fadd float 1.0, %a
349  ret float %add
350}
351
352; Test regcall when passing/retrieving float
353define x86_regcallcc float @test_CallargRetFloat(float %a)  {
354; X32-LABEL: test_CallargRetFloat:
355; X32:       # %bb.0:
356; X32-NEXT:    pushl %esp
357; X32-NEXT:    subl $24, %esp
358; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
359; X32-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
360; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
361; X32-NEXT:    calll _test_argRetFloat
362; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
363; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
364; X32-NEXT:    addl $24, %esp
365; X32-NEXT:    popl %esp
366; X32-NEXT:    retl
367;
368; WIN64-LABEL: test_CallargRetFloat:
369; WIN64:       # %bb.0:
370; WIN64-NEXT:    pushq %rsp
371; WIN64-NEXT:    .seh_pushreg %rsp
372; WIN64-NEXT:    subq $16, %rsp
373; WIN64-NEXT:    .seh_stackalloc 16
374; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
375; WIN64-NEXT:    .seh_savexmm %xmm8, 0
376; WIN64-NEXT:    .seh_endprologue
377; WIN64-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
378; WIN64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
379; WIN64-NEXT:    callq test_argRetFloat
380; WIN64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
381; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
382; WIN64-NEXT:    addq $16, %rsp
383; WIN64-NEXT:    popq %rsp
384; WIN64-NEXT:    retq
385; WIN64-NEXT:    .seh_endproc
386;
387; LINUXOSX64-LABEL: test_CallargRetFloat:
388; LINUXOSX64:       # %bb.0:
389; LINUXOSX64-NEXT:    pushq %rsp
390; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
391; LINUXOSX64-NEXT:    subq $16, %rsp
392; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
393; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
394; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
395; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
396; LINUXOSX64-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
397; LINUXOSX64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
398; LINUXOSX64-NEXT:    callq test_argRetFloat
399; LINUXOSX64-NEXT:    vaddss %xmm0, %xmm8, %xmm0
400; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
401; LINUXOSX64-NEXT:    addq $16, %rsp
402; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
403; LINUXOSX64-NEXT:    popq %rsp
404; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
405; LINUXOSX64-NEXT:    retq
406  %b = fadd float 1.0, %a
407  %c = call x86_regcallcc float @test_argRetFloat(float %b)
408  %d = fadd float 1.0, %c
409  ret float %d
410}
411
412; Test regcall when receiving/returning double
413define x86_regcallcc double @test_argRetDouble(double %a)  {
414; X32-LABEL: test_argRetDouble:
415; X32:       # %bb.0:
416; X32-NEXT:    vaddsd __real@3ff0000000000000, %xmm0, %xmm0
417; X32-NEXT:    retl
418;
419; WIN64-LABEL: test_argRetDouble:
420; WIN64:       # %bb.0:
421; WIN64-NEXT:    vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0
422; WIN64-NEXT:    retq
423;
424; LINUXOSX64-LABEL: test_argRetDouble:
425; LINUXOSX64:       # %bb.0:
426; LINUXOSX64-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
427; LINUXOSX64-NEXT:    retq
428  %add = fadd double %a, 1.0
429  ret double %add
430}
431
432; Test regcall when passing/retrieving double
433define x86_regcallcc double @test_CallargRetDouble(double %a)  {
434; X32-LABEL: test_CallargRetDouble:
435; X32:       # %bb.0:
436; X32-NEXT:    pushl %esp
437; X32-NEXT:    subl $24, %esp
438; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
439; X32-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
440; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
441; X32-NEXT:    calll _test_argRetDouble
442; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
443; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
444; X32-NEXT:    addl $24, %esp
445; X32-NEXT:    popl %esp
446; X32-NEXT:    retl
447;
448; WIN64-LABEL: test_CallargRetDouble:
449; WIN64:       # %bb.0:
450; WIN64-NEXT:    pushq %rsp
451; WIN64-NEXT:    .seh_pushreg %rsp
452; WIN64-NEXT:    subq $16, %rsp
453; WIN64-NEXT:    .seh_stackalloc 16
454; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
455; WIN64-NEXT:    .seh_savexmm %xmm8, 0
456; WIN64-NEXT:    .seh_endprologue
457; WIN64-NEXT:    vmovsd {{.*#+}} xmm8 = mem[0],zero
458; WIN64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
459; WIN64-NEXT:    callq test_argRetDouble
460; WIN64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
461; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
462; WIN64-NEXT:    addq $16, %rsp
463; WIN64-NEXT:    popq %rsp
464; WIN64-NEXT:    retq
465; WIN64-NEXT:    .seh_endproc
466;
467; LINUXOSX64-LABEL: test_CallargRetDouble:
468; LINUXOSX64:       # %bb.0:
469; LINUXOSX64-NEXT:    pushq %rsp
470; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
471; LINUXOSX64-NEXT:    subq $16, %rsp
472; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
473; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
474; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
475; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
476; LINUXOSX64-NEXT:    vmovsd {{.*#+}} xmm8 = mem[0],zero
477; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
478; LINUXOSX64-NEXT:    callq test_argRetDouble
479; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm8, %xmm0
480; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
481; LINUXOSX64-NEXT:    addq $16, %rsp
482; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
483; LINUXOSX64-NEXT:    popq %rsp
484; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
485; LINUXOSX64-NEXT:    retq
486  %b = fadd double 1.0, %a
487  %c = call x86_regcallcc double @test_argRetDouble(double %b)
488  %d = fadd double 1.0, %c
489  ret double %d
490}
491
492; Test regcall when receiving/returning long double
493define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
494; X32-LABEL: test_argRetf80:
495; X32:       # %bb.0:
496; X32-NEXT:    fadd %st, %st(0)
497; X32-NEXT:    retl
498;
499; WIN64-LABEL: test_argRetf80:
500; WIN64:       # %bb.0:
501; WIN64-NEXT:    fadd %st, %st(0)
502; WIN64-NEXT:    retq
503;
504; LINUXOSX64-LABEL: test_argRetf80:
505; LINUXOSX64:       # %bb.0:
506; LINUXOSX64-NEXT:    fadd %st, %st(0)
507; LINUXOSX64-NEXT:    retq
508  %r0 = fadd x86_fp80 %a0, %a0
509  ret x86_fp80 %r0
510}
511
512; Test regcall when receiving/returning long double
513define x86_regcallcc double @test_argParamf80(x86_fp80 %a0) nounwind {
514; X32-LABEL: test_argParamf80:
515; X32:       # %bb.0:
516; X32-NEXT:    pushl %ebp
517; X32-NEXT:    movl %esp, %ebp
518; X32-NEXT:    andl $-8, %esp
519; X32-NEXT:    subl $8, %esp
520; X32-NEXT:    fstpl (%esp)
521; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
522; X32-NEXT:    movl %ebp, %esp
523; X32-NEXT:    popl %ebp
524; X32-NEXT:    retl
525;
526; WIN64-LABEL: test_argParamf80:
527; WIN64:       # %bb.0:
528; WIN64-NEXT:    pushq %rax
529; WIN64-NEXT:    fstpl (%rsp)
530; WIN64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
531; WIN64-NEXT:    popq %rax
532; WIN64-NEXT:    retq
533;
534; LINUXOSX64-LABEL: test_argParamf80:
535; LINUXOSX64:       # %bb.0:
536; LINUXOSX64-NEXT:    fstpl -{{[0-9]+}}(%rsp)
537; LINUXOSX64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
538; LINUXOSX64-NEXT:    retq
539  %r0 = fptrunc x86_fp80 %a0 to double
540  ret double %r0
541}
542
543; Test regcall when passing/retrieving long double
544define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
545; X32-LABEL: test_CallargRetf80:
546; X32:       # %bb.0:
547; X32-NEXT:    pushl %esp
548; X32-NEXT:    fadd %st, %st(0)
549; X32-NEXT:    calll _test_argRetf80
550; X32-NEXT:    fadd %st, %st(0)
551; X32-NEXT:    popl %esp
552; X32-NEXT:    retl
553;
554; WIN64-LABEL: test_CallargRetf80:
555; WIN64:       # %bb.0:
556; WIN64-NEXT:    pushq %rsp
557; WIN64-NEXT:    .seh_pushreg %rsp
558; WIN64-NEXT:    .seh_endprologue
559; WIN64-NEXT:    fadd %st, %st(0)
560; WIN64-NEXT:    callq test_argRetf80
561; WIN64-NEXT:    fadd %st, %st(0)
562; WIN64-NEXT:    popq %rsp
563; WIN64-NEXT:    retq
564; WIN64-NEXT:    .seh_endproc
565;
566; LINUXOSX64-LABEL: test_CallargRetf80:
567; LINUXOSX64:       # %bb.0:
568; LINUXOSX64-NEXT:    pushq %rsp
569; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
570; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
571; LINUXOSX64-NEXT:    fadd %st, %st(0)
572; LINUXOSX64-NEXT:    callq test_argRetf80
573; LINUXOSX64-NEXT:    fadd %st, %st(0)
574; LINUXOSX64-NEXT:    popq %rsp
575; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
576; LINUXOSX64-NEXT:    retq
577  %b = fadd x86_fp80 %a, %a
578  %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
579  %d = fadd x86_fp80 %c, %c
580  ret x86_fp80 %d
581}
582
583define x86_regcallcc double @test_CallargParamf80(x86_fp80 %a)  {
584; X32-LABEL: test_CallargParamf80:
585; X32:       # %bb.0:
586; X32-NEXT:    pushl %esp
587; X32-NEXT:    fadd %st, %st(0)
588; X32-NEXT:    calll _test_argParamf80
589; X32-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
590; X32-NEXT:    popl %esp
591; X32-NEXT:    retl
592;
593; WIN64-LABEL: test_CallargParamf80:
594; WIN64:       # %bb.0:
595; WIN64-NEXT:    pushq %rsp
596; WIN64-NEXT:    .seh_pushreg %rsp
597; WIN64-NEXT:    .seh_endprologue
598; WIN64-NEXT:    fadd %st, %st(0)
599; WIN64-NEXT:    callq test_argParamf80
600; WIN64-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
601; WIN64-NEXT:    popq %rsp
602; WIN64-NEXT:    retq
603; WIN64-NEXT:    .seh_endproc
604;
605; LINUXOSX64-LABEL: test_CallargParamf80:
606; LINUXOSX64:       # %bb.0:
607; LINUXOSX64-NEXT:    pushq %rsp
608; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
609; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
610; LINUXOSX64-NEXT:    fadd %st, %st(0)
611; LINUXOSX64-NEXT:    callq test_argParamf80
612; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
613; LINUXOSX64-NEXT:    popq %rsp
614; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
615; LINUXOSX64-NEXT:    retq
616  %b = fadd x86_fp80 %a, %a
617  %c = call x86_regcallcc double @test_argParamf80(x86_fp80 %b)
618  %d = fadd double %c, %c
619  ret double %d
620}
621
622; Test regcall when receiving/returning pointer
623define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a)  {
624; X32-LABEL: test_argRetPointer:
625; X32:       # %bb.0:
626; X32-NEXT:    incl %eax
627; X32-NEXT:    retl
628;
629; WIN64-LABEL: test_argRetPointer:
630; WIN64:       # %bb.0:
631; WIN64-NEXT:    incl %eax
632; WIN64-NEXT:    retq
633;
634; LINUXOSX64-LABEL: test_argRetPointer:
635; LINUXOSX64:       # %bb.0:
636; LINUXOSX64-NEXT:    incl %eax
637; LINUXOSX64-NEXT:    retq
638  %b = ptrtoint [4 x i32]* %a to i32
639  %c = add i32 %b, 1
640  %d = inttoptr i32 %c to [4 x i32]*
641  ret [4 x i32]* %d
642}
643
644; Test regcall when passing/retrieving pointer
645define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a)  {
646; X32-LABEL: test_CallargRetPointer:
647; X32:       # %bb.0:
648; X32-NEXT:    pushl %esp
649; X32-NEXT:    incl %eax
650; X32-NEXT:    calll _test_argRetPointer
651; X32-NEXT:    incl %eax
652; X32-NEXT:    popl %esp
653; X32-NEXT:    retl
654;
655; WIN64-LABEL: test_CallargRetPointer:
656; WIN64:       # %bb.0:
657; WIN64-NEXT:    pushq %rsp
658; WIN64-NEXT:    .seh_pushreg %rsp
659; WIN64-NEXT:    .seh_endprologue
660; WIN64-NEXT:    incl %eax
661; WIN64-NEXT:    callq test_argRetPointer
662; WIN64-NEXT:    incl %eax
663; WIN64-NEXT:    popq %rsp
664; WIN64-NEXT:    retq
665; WIN64-NEXT:    .seh_endproc
666;
667; LINUXOSX64-LABEL: test_CallargRetPointer:
668; LINUXOSX64:       # %bb.0:
669; LINUXOSX64-NEXT:    pushq %rsp
670; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
671; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
672; LINUXOSX64-NEXT:    incl %eax
673; LINUXOSX64-NEXT:    callq test_argRetPointer
674; LINUXOSX64-NEXT:    incl %eax
675; LINUXOSX64-NEXT:    popq %rsp
676; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
677; LINUXOSX64-NEXT:    retq
678  %b = ptrtoint [4 x i32]* %a to i32
679  %c = add i32 %b, 1
680  %d = inttoptr i32 %c to [4 x i32]*
681  %e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
682  %f = ptrtoint [4 x i32]* %e to i32
683  %g = add i32 %f, 1
684  %h = inttoptr i32 %g to [4 x i32]*
685  ret [4 x i32]* %h
686}
687
688; Test regcall when receiving/returning 128 bit vector
689define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %b)  {
690; X32-LABEL: test_argRet128Vector:
691; X32:       # %bb.0:
692; X32-NEXT:    vpslld $31, %xmm0, %xmm0
693; X32-NEXT:    vpmovd2m %xmm0, %k1
694; X32-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
695; X32-NEXT:    retl
696;
697; WIN64-LABEL: test_argRet128Vector:
698; WIN64:       # %bb.0:
699; WIN64-NEXT:    vpslld $31, %xmm0, %xmm0
700; WIN64-NEXT:    vpmovd2m %xmm0, %k1
701; WIN64-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
702; WIN64-NEXT:    retq
703;
704; LINUXOSX64-LABEL: test_argRet128Vector:
705; LINUXOSX64:       # %bb.0:
706; LINUXOSX64-NEXT:    vpslld $31, %xmm0, %xmm0
707; LINUXOSX64-NEXT:    vpmovd2m %xmm0, %k1
708; LINUXOSX64-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
709; LINUXOSX64-NEXT:    retq
710  %d = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
711  ret <4 x i32> %d
712}
713
714; Test regcall when passing/retrieving 128 bit vector
715define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a)  {
716; X32-LABEL: test_CallargRet128Vector:
717; X32:       # %bb.0:
718; X32-NEXT:    pushl %esp
719; X32-NEXT:    subl $40, %esp
720; X32-NEXT:    vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
721; X32-NEXT:    vmovdqa %xmm1, %xmm4
722; X32-NEXT:    vpslld $31, %xmm0, %xmm1
723; X32-NEXT:    vpmovd2m %xmm1, %k1
724; X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
725; X32-NEXT:    vmovdqa %xmm4, %xmm1
726; X32-NEXT:    vmovdqa %xmm4, %xmm2
727; X32-NEXT:    calll _test_argRet128Vector
728; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
729; X32-NEXT:    vmovdqa32 %xmm4, %xmm0 {%k1}
730; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload
731; X32-NEXT:    addl $40, %esp
732; X32-NEXT:    popl %esp
733; X32-NEXT:    retl
734;
735; WIN64-LABEL: test_CallargRet128Vector:
736; WIN64:       # %bb.0:
737; WIN64-NEXT:    pushq %rsp
738; WIN64-NEXT:    .seh_pushreg %rsp
739; WIN64-NEXT:    subq $32, %rsp
740; WIN64-NEXT:    .seh_stackalloc 32
741; WIN64-NEXT:    vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
742; WIN64-NEXT:    .seh_savexmm %xmm8, 16
743; WIN64-NEXT:    .seh_endprologue
744; WIN64-NEXT:    vmovdqa %xmm1, %xmm8
745; WIN64-NEXT:    vpslld $31, %xmm0, %xmm1
746; WIN64-NEXT:    vpmovd2m %xmm1, %k1
747; WIN64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
748; WIN64-NEXT:    vmovdqa %xmm8, %xmm1
749; WIN64-NEXT:    vmovdqa %xmm8, %xmm2
750; WIN64-NEXT:    callq test_argRet128Vector
751; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
752; WIN64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
753; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
754; WIN64-NEXT:    addq $32, %rsp
755; WIN64-NEXT:    popq %rsp
756; WIN64-NEXT:    retq
757; WIN64-NEXT:    .seh_endproc
758;
759; LINUXOSX64-LABEL: test_CallargRet128Vector:
760; LINUXOSX64:       # %bb.0:
761; LINUXOSX64-NEXT:    pushq %rsp
762; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
763; LINUXOSX64-NEXT:    subq $32, %rsp
764; LINUXOSX64-NEXT:    vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
765; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 48
766; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
767; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
768; LINUXOSX64-NEXT:    vmovdqa %xmm1, %xmm8
769; LINUXOSX64-NEXT:    vpslld $31, %xmm0, %xmm1
770; LINUXOSX64-NEXT:    vpmovd2m %xmm1, %k1
771; LINUXOSX64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
772; LINUXOSX64-NEXT:    vmovdqa %xmm8, %xmm1
773; LINUXOSX64-NEXT:    vmovdqa %xmm8, %xmm2
774; LINUXOSX64-NEXT:    callq test_argRet128Vector
775; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
776; LINUXOSX64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
777; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
778; LINUXOSX64-NEXT:    addq $32, %rsp
779; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
780; LINUXOSX64-NEXT:    popq %rsp
781; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
782; LINUXOSX64-NEXT:    retq
783  %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a)
784  %c = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
785  ret <4 x i32> %c
786}
787
788; Test regcall when receiving/returning 256 bit vector
789define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %b)  {
790; X32-LABEL: test_argRet256Vector:
791; X32:       # %bb.0:
792; X32-NEXT:    kmovd %eax, %k1
793; X32-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
794; X32-NEXT:    retl
795;
796; WIN64-LABEL: test_argRet256Vector:
797; WIN64:       # %bb.0:
798; WIN64-NEXT:    kmovd %eax, %k1
799; WIN64-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
800; WIN64-NEXT:    retq
801;
802; LINUXOSX64-LABEL: test_argRet256Vector:
803; LINUXOSX64:       # %bb.0:
804; LINUXOSX64-NEXT:    kmovd %eax, %k1
805; LINUXOSX64-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
806; LINUXOSX64-NEXT:    retq
807  %d = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
808  ret <8 x i32> %d
809}
810
811; Test regcall when passing/retrieving 256 bit vector
812define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a)  {
813; X32-LABEL: test_CallargRet256Vector:
814; X32:       # %bb.0:
815; X32-NEXT:    pushl %esp
816; X32-NEXT:    subl $88, %esp
817; X32-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
818; X32-NEXT:    kmovd %eax, %k1
819; X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
820; X32-NEXT:    vmovdqa %ymm0, %ymm1
821; X32-NEXT:    calll _test_argRet256Vector
822; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
823; X32-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload
824; X32-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
825; X32-NEXT:    addl $88, %esp
826; X32-NEXT:    popl %esp
827; X32-NEXT:    retl
828;
829; WIN64-LABEL: test_CallargRet256Vector:
830; WIN64:       # %bb.0:
831; WIN64-NEXT:    pushq %rsp
832; WIN64-NEXT:    .seh_pushreg %rsp
833; WIN64-NEXT:    subq $80, %rsp
834; WIN64-NEXT:    .seh_stackalloc 80
835; WIN64-NEXT:    .seh_endprologue
836; WIN64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
837; WIN64-NEXT:    kmovd %eax, %k1
838; WIN64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
839; WIN64-NEXT:    vmovdqa %ymm0, %ymm1
840; WIN64-NEXT:    callq test_argRet256Vector
841; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
842; WIN64-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
843; WIN64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
844; WIN64-NEXT:    addq $80, %rsp
845; WIN64-NEXT:    popq %rsp
846; WIN64-NEXT:    retq
847; WIN64-NEXT:    .seh_endproc
848;
849; LINUXOSX64-LABEL: test_CallargRet256Vector:
850; LINUXOSX64:       # %bb.0:
851; LINUXOSX64-NEXT:    pushq %rsp
852; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
853; LINUXOSX64-NEXT:    subq $80, %rsp
854; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 96
855; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
856; LINUXOSX64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
857; LINUXOSX64-NEXT:    kmovd %eax, %k1
858; LINUXOSX64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
859; LINUXOSX64-NEXT:    vmovdqa %ymm0, %ymm1
860; LINUXOSX64-NEXT:    callq test_argRet256Vector
861; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
862; LINUXOSX64-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
863; LINUXOSX64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
864; LINUXOSX64-NEXT:    addq $80, %rsp
865; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
866; LINUXOSX64-NEXT:    popq %rsp
867; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
868; LINUXOSX64-NEXT:    retq
869  %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a)
870  %c = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
871  ret <8 x i32> %c
872}
873
874; Test regcall when receiving/returning 512 bit vector
875define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %b)  {
876; X32-LABEL: test_argRet512Vector:
877; X32:       # %bb.0:
878; X32-NEXT:    kmovd %eax, %k1
879; X32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
880; X32-NEXT:    retl
881;
882; WIN64-LABEL: test_argRet512Vector:
883; WIN64:       # %bb.0:
884; WIN64-NEXT:    kmovd %eax, %k1
885; WIN64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
886; WIN64-NEXT:    retq
887;
888; LINUXOSX64-LABEL: test_argRet512Vector:
889; LINUXOSX64:       # %bb.0:
890; LINUXOSX64-NEXT:    kmovd %eax, %k1
891; LINUXOSX64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
892; LINUXOSX64-NEXT:    retq
893  %d = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
894  ret <16 x i32> %d
895}
896
897; Test regcall when passing/retrieving 512 bit vector
898define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a)  {
899; X32-LABEL: test_CallargRet512Vector:
900; X32:       # %bb.0:
901; X32-NEXT:    pushl %esp
902; X32-NEXT:    subl $184, %esp
903; X32-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
904; X32-NEXT:    kmovd %eax, %k1
905; X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
906; X32-NEXT:    vmovdqa64 %zmm0, %zmm1
907; X32-NEXT:    calll _test_argRet512Vector
908; X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
909; X32-NEXT:    vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload
910; X32-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
911; X32-NEXT:    addl $184, %esp
912; X32-NEXT:    popl %esp
913; X32-NEXT:    retl
914;
915; WIN64-LABEL: test_CallargRet512Vector:
916; WIN64:       # %bb.0:
917; WIN64-NEXT:    pushq %rsp
918; WIN64-NEXT:    .seh_pushreg %rsp
919; WIN64-NEXT:    subq $176, %rsp
920; WIN64-NEXT:    .seh_stackalloc 176
921; WIN64-NEXT:    .seh_endprologue
922; WIN64-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
923; WIN64-NEXT:    kmovd %eax, %k1
924; WIN64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
925; WIN64-NEXT:    vmovdqa64 %zmm0, %zmm1
926; WIN64-NEXT:    callq test_argRet512Vector
927; WIN64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
928; WIN64-NEXT:    vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
929; WIN64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
930; WIN64-NEXT:    addq $176, %rsp
931; WIN64-NEXT:    popq %rsp
932; WIN64-NEXT:    retq
933; WIN64-NEXT:    .seh_endproc
934;
935; LINUXOSX64-LABEL: test_CallargRet512Vector:
936; LINUXOSX64:       # %bb.0:
937; LINUXOSX64-NEXT:    pushq %rsp
938; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
939; LINUXOSX64-NEXT:    subq $176, %rsp
940; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 192
941; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
942; LINUXOSX64-NEXT:    vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
943; LINUXOSX64-NEXT:    kmovd %eax, %k1
944; LINUXOSX64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
945; LINUXOSX64-NEXT:    vmovdqa64 %zmm0, %zmm1
946; LINUXOSX64-NEXT:    callq test_argRet512Vector
947; LINUXOSX64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
948; LINUXOSX64-NEXT:    vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
949; LINUXOSX64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
950; LINUXOSX64-NEXT:    addq $176, %rsp
951; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
952; LINUXOSX64-NEXT:    popq %rsp
953; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
954; LINUXOSX64-NEXT:    retq
955  %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a)
956  %c = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
957  ret <16 x i32> %c
958}
959
960; Test regcall when running multiple input parameters - callee saved xmms
961define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
962; X32-LABEL: testf32_inp:
963; X32:       # %bb.0:
964; X32-NEXT:    subl $44, %esp
965; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
966; X32-NEXT:    vmovups %xmm6, (%esp) # 16-byte Spill
967; X32-NEXT:    vaddps %zmm2, %zmm0, %zmm6
968; X32-NEXT:    vaddps %zmm3, %zmm1, %zmm7
969; X32-NEXT:    vmulps %zmm2, %zmm0, %zmm0
970; X32-NEXT:    vsubps %zmm0, %zmm6, %zmm0
971; X32-NEXT:    vmulps %zmm3, %zmm1, %zmm1
972; X32-NEXT:    vsubps %zmm1, %zmm7, %zmm1
973; X32-NEXT:    vaddps %zmm4, %zmm0, %zmm0
974; X32-NEXT:    vaddps %zmm5, %zmm1, %zmm1
975; X32-NEXT:    vmovups (%esp), %xmm6 # 16-byte Reload
976; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
977; X32-NEXT:    addl $44, %esp
978; X32-NEXT:    retl
979;
980; WIN64-LABEL: testf32_inp:
981; WIN64:       # %bb.0:
982; WIN64-NEXT:    vaddps %zmm2, %zmm0, %zmm6
983; WIN64-NEXT:    vaddps %zmm3, %zmm1, %zmm7
984; WIN64-NEXT:    vmulps %zmm2, %zmm0, %zmm0
985; WIN64-NEXT:    vsubps %zmm0, %zmm6, %zmm0
986; WIN64-NEXT:    vmulps %zmm3, %zmm1, %zmm1
987; WIN64-NEXT:    vsubps %zmm1, %zmm7, %zmm1
988; WIN64-NEXT:    vaddps %zmm4, %zmm0, %zmm0
989; WIN64-NEXT:    vaddps %zmm5, %zmm1, %zmm1
990; WIN64-NEXT:    retq
991;
992; LINUXOSX64-LABEL: testf32_inp:
993; LINUXOSX64:       # %bb.0:
994; LINUXOSX64-NEXT:    vaddps %zmm2, %zmm0, %zmm6
995; LINUXOSX64-NEXT:    vaddps %zmm3, %zmm1, %zmm7
996; LINUXOSX64-NEXT:    vmulps %zmm2, %zmm0, %zmm0
997; LINUXOSX64-NEXT:    vsubps %zmm0, %zmm6, %zmm0
998; LINUXOSX64-NEXT:    vmulps %zmm3, %zmm1, %zmm1
999; LINUXOSX64-NEXT:    vsubps %zmm1, %zmm7, %zmm1
1000; LINUXOSX64-NEXT:    vaddps %zmm4, %zmm0, %zmm0
1001; LINUXOSX64-NEXT:    vaddps %zmm5, %zmm1, %zmm1
1002; LINUXOSX64-NEXT:    retq
1003  %x1 = fadd <32 x float> %a, %b
1004  %x2 = fmul <32 x float> %a, %b
1005  %x3 = fsub <32 x float> %x1, %x2
1006  %x4 = fadd <32 x float> %x3, %c
1007  ret <32 x float> %x4
1008}
1009
1010; Test regcall when running multiple input parameters - callee saved GPRs
1011define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
1012; X32-LABEL: testi32_inp:
1013; X32:       # %bb.0:
1014; X32-NEXT:    pushl %ebp
1015; X32-NEXT:    pushl %ebx
1016; X32-NEXT:    subl $20, %esp
1017; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1018; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1019; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
1020; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1021; X32-NEXT:    movl %eax, %ebx
1022; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1023; X32-NEXT:    subl %ecx, %ebx
1024; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1025; X32-NEXT:    movl %esi, %ebp
1026; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
1027; X32-NEXT:    imull %ebp, %ebx
1028; X32-NEXT:    movl %edx, %ebp
1029; X32-NEXT:    subl %edi, %ebp
1030; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1031; X32-NEXT:    movl %edx, %ecx
1032; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1033; X32-NEXT:    imull %ebp, %ecx
1034; X32-NEXT:    addl %ecx, %ebx
1035; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1036; X32-NEXT:    movl %edi, %ebp
1037; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
1038; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1039; X32-NEXT:    movl %ecx, %eax
1040; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
1041; X32-NEXT:    imull %ebp, %eax
1042; X32-NEXT:    addl %eax, %ebx
1043; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1044; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
1045; X32-NEXT:    movl (%esp), %ebp # 4-byte Reload
1046; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
1047; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi
1048; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi
1049; X32-NEXT:    imull %eax, %esi
1050; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
1051; X32-NEXT:    imull %ebp, %edx
1052; X32-NEXT:    addl %esi, %edx
1053; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
1054; X32-NEXT:    imull %edi, %ecx
1055; X32-NEXT:    addl %edx, %ecx
1056; X32-NEXT:    addl %ecx, %ebx
1057; X32-NEXT:    movl %ebx, %eax
1058; X32-NEXT:    addl $20, %esp
1059; X32-NEXT:    popl %ebx
1060; X32-NEXT:    popl %ebp
1061; X32-NEXT:    retl
1062;
1063; WIN64-LABEL: testi32_inp:
1064; WIN64:       # %bb.0:
1065; WIN64-NEXT:    pushq %r13
1066; WIN64-NEXT:    pushq %rbp
1067; WIN64-NEXT:    pushq %rbx
1068; WIN64-NEXT:    movl %eax, %r13d
1069; WIN64-NEXT:    subl %ecx, %eax
1070; WIN64-NEXT:    movl %edx, %ebp
1071; WIN64-NEXT:    subl %edi, %ebp
1072; WIN64-NEXT:    movl %r9d, %ebx
1073; WIN64-NEXT:    subl %r10d, %ebx
1074; WIN64-NEXT:    imull %ebx, %eax
1075; WIN64-NEXT:    movl %r11d, %ebx
1076; WIN64-NEXT:    subl %r12d, %ebx
1077; WIN64-NEXT:    imull %ebp, %ebx
1078; WIN64-NEXT:    movl %esi, %ebp
1079; WIN64-NEXT:    subl %r8d, %ebp
1080; WIN64-NEXT:    addl %ebx, %eax
1081; WIN64-NEXT:    movl %r14d, %ebx
1082; WIN64-NEXT:    subl %r15d, %ebx
1083; WIN64-NEXT:    imull %ebp, %ebx
1084; WIN64-NEXT:    addl %ebx, %eax
1085; WIN64-NEXT:    addl %ecx, %r13d
1086; WIN64-NEXT:    addl %edi, %edx
1087; WIN64-NEXT:    addl %r8d, %esi
1088; WIN64-NEXT:    addl %r10d, %r9d
1089; WIN64-NEXT:    imull %r13d, %r9d
1090; WIN64-NEXT:    addl %r12d, %r11d
1091; WIN64-NEXT:    imull %edx, %r11d
1092; WIN64-NEXT:    addl %r9d, %r11d
1093; WIN64-NEXT:    addl %r15d, %r14d
1094; WIN64-NEXT:    imull %esi, %r14d
1095; WIN64-NEXT:    addl %r11d, %r14d
1096; WIN64-NEXT:    addl %r14d, %eax
1097; WIN64-NEXT:    popq %rbx
1098; WIN64-NEXT:    popq %rbp
1099; WIN64-NEXT:    popq %r13
1100; WIN64-NEXT:    retq
1101;
1102; LINUXOSX64-LABEL: testi32_inp:
1103; LINUXOSX64:       # %bb.0:
1104; LINUXOSX64-NEXT:    pushq %rbp
1105; LINUXOSX64-NEXT:    pushq %rbx
1106; LINUXOSX64-NEXT:    movl %eax, %r10d
1107; LINUXOSX64-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
1108; LINUXOSX64-NEXT:    subl %ecx, %eax
1109; LINUXOSX64-NEXT:    movl %edx, %ebx
1110; LINUXOSX64-NEXT:    subl %edi, %ebx
1111; LINUXOSX64-NEXT:    movl %r9d, %ebp
1112; LINUXOSX64-NEXT:    subl %r12d, %ebp
1113; LINUXOSX64-NEXT:    imull %ebp, %eax
1114; LINUXOSX64-NEXT:    movl %r13d, %ebp
1115; LINUXOSX64-NEXT:    subl %r14d, %ebp
1116; LINUXOSX64-NEXT:    imull %ebx, %ebp
1117; LINUXOSX64-NEXT:    movl %esi, %ebx
1118; LINUXOSX64-NEXT:    subl %r8d, %ebx
1119; LINUXOSX64-NEXT:    addl %ebp, %eax
1120; LINUXOSX64-NEXT:    movl %r15d, %ebp
1121; LINUXOSX64-NEXT:    subl %r11d, %ebp
1122; LINUXOSX64-NEXT:    imull %ebx, %ebp
1123; LINUXOSX64-NEXT:    addl %ebp, %eax
1124; LINUXOSX64-NEXT:    addl %ecx, %r10d
1125; LINUXOSX64-NEXT:    addl %edi, %edx
1126; LINUXOSX64-NEXT:    addl %r8d, %esi
1127; LINUXOSX64-NEXT:    addl %r12d, %r9d
1128; LINUXOSX64-NEXT:    imull %r10d, %r9d
1129; LINUXOSX64-NEXT:    addl %r14d, %r13d
1130; LINUXOSX64-NEXT:    imull %edx, %r13d
1131; LINUXOSX64-NEXT:    addl %r9d, %r13d
1132; LINUXOSX64-NEXT:    addl %r11d, %r15d
1133; LINUXOSX64-NEXT:    imull %esi, %r15d
1134; LINUXOSX64-NEXT:    addl %r13d, %r15d
1135; LINUXOSX64-NEXT:    addl %r15d, %eax
1136; LINUXOSX64-NEXT:    popq %rbx
1137; LINUXOSX64-NEXT:    popq %rbp
1138; LINUXOSX64-NEXT:    retq
1139  %x1 = sub i32 %a1, %a2
1140  %x2 = sub i32 %a3, %a4
1141  %x3 = sub i32 %a5, %a6
1142  %y1 = sub i32 %b1, %b2
1143  %y2 = sub i32 %b3, %b4
1144  %y3 = sub i32 %b5, %b6
1145  %v1 = add i32 %a1, %a2
1146  %v2 = add i32 %a3, %a4
1147  %v3 = add i32 %a5, %a6
1148  %w1 = add i32 %b1, %b2
1149  %w2 = add i32 %b3, %b4
1150  %w3 = add i32 %b5, %b6
1151  %s1 = mul i32 %x1, %y1
1152  %s2 = mul i32 %x2, %y2
1153  %s3 = mul i32 %x3, %y3
1154  %t1 = mul i32 %v1, %w1
1155  %t2 = mul i32 %v2, %w2
1156  %t3 = mul i32 %v3, %w3
1157  %m1 = add i32 %s1, %s2
1158  %m2 = add i32 %m1, %s3
1159  %n1 = add i32 %t1, %t2
1160  %n2 = add i32 %n1, %t3
1161  %r1 = add i32 %m2, %n2
1162  ret i32 %r1
1163}
1164
1165; Test that parameters, overflowing register capacity, are passed through the stack
1166define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
1167; X32-LABEL: testf32_stack:
1168; X32:       # %bb.0:
1169; X32-NEXT:    pushl %ebp
1170; X32-NEXT:    movl %esp, %ebp
1171; X32-NEXT:    andl $-64, %esp
1172; X32-NEXT:    subl $64, %esp
1173; X32-NEXT:    vaddps %zmm3, %zmm1, %zmm1
1174; X32-NEXT:    vaddps %zmm2, %zmm0, %zmm0
1175; X32-NEXT:    vaddps %zmm0, %zmm4, %zmm0
1176; X32-NEXT:    vaddps %zmm1, %zmm5, %zmm1
1177; X32-NEXT:    vaddps %zmm1, %zmm7, %zmm1
1178; X32-NEXT:    vaddps %zmm0, %zmm6, %zmm0
1179; X32-NEXT:    vaddps 8(%ebp), %zmm0, %zmm0
1180; X32-NEXT:    vaddps 72(%ebp), %zmm1, %zmm1
1181; X32-NEXT:    vaddps 200(%ebp), %zmm1, %zmm1
1182; X32-NEXT:    vaddps 136(%ebp), %zmm0, %zmm0
1183; X32-NEXT:    vaddps 264(%ebp), %zmm0, %zmm0
1184; X32-NEXT:    vaddps 328(%ebp), %zmm1, %zmm1
1185; X32-NEXT:    vaddps 456(%ebp), %zmm1, %zmm1
1186; X32-NEXT:    vaddps 392(%ebp), %zmm0, %zmm0
1187; X32-NEXT:    vaddps 520(%ebp), %zmm0, %zmm0
1188; X32-NEXT:    vaddps 584(%ebp), %zmm1, %zmm1
1189; X32-NEXT:    movl %ebp, %esp
1190; X32-NEXT:    popl %ebp
1191; X32-NEXT:    retl
1192;
1193; WIN64-LABEL: testf32_stack:
1194; WIN64:       # %bb.0:
1195; WIN64-NEXT:    pushq %rbp
1196; WIN64-NEXT:    subq $48, %rsp
1197; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
1198; WIN64-NEXT:    andq $-64, %rsp
1199; WIN64-NEXT:    vaddps %zmm3, %zmm1, %zmm1
1200; WIN64-NEXT:    vaddps %zmm2, %zmm0, %zmm0
1201; WIN64-NEXT:    vaddps %zmm0, %zmm4, %zmm0
1202; WIN64-NEXT:    vaddps %zmm1, %zmm5, %zmm1
1203; WIN64-NEXT:    vaddps %zmm1, %zmm7, %zmm1
1204; WIN64-NEXT:    vaddps %zmm0, %zmm6, %zmm0
1205; WIN64-NEXT:    vaddps %zmm0, %zmm8, %zmm0
1206; WIN64-NEXT:    vaddps %zmm1, %zmm9, %zmm1
1207; WIN64-NEXT:    vaddps %zmm1, %zmm11, %zmm1
1208; WIN64-NEXT:    vaddps %zmm0, %zmm10, %zmm0
1209; WIN64-NEXT:    vaddps %zmm0, %zmm12, %zmm0
1210; WIN64-NEXT:    vaddps %zmm1, %zmm13, %zmm1
1211; WIN64-NEXT:    vaddps %zmm1, %zmm15, %zmm1
1212; WIN64-NEXT:    vaddps %zmm0, %zmm14, %zmm0
1213; WIN64-NEXT:    vaddps 16(%rbp), %zmm0, %zmm0
1214; WIN64-NEXT:    vaddps 80(%rbp), %zmm1, %zmm1
1215; WIN64-NEXT:    movq %rbp, %rsp
1216; WIN64-NEXT:    popq %rbp
1217; WIN64-NEXT:    retq
1218;
1219; LINUXOSX64-LABEL: testf32_stack:
1220; LINUXOSX64:       # %bb.0:
1221; LINUXOSX64-NEXT:    pushq %rbp
1222; LINUXOSX64-NEXT:    movq %rsp, %rbp
1223; LINUXOSX64-NEXT:    andq $-64, %rsp
1224; LINUXOSX64-NEXT:    subq $64, %rsp
1225; LINUXOSX64-NEXT:    vaddps %zmm3, %zmm1, %zmm1
1226; LINUXOSX64-NEXT:    vaddps %zmm2, %zmm0, %zmm0
1227; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm4, %zmm0
1228; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm5, %zmm1
1229; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm7, %zmm1
1230; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm6, %zmm0
1231; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm8, %zmm0
1232; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm9, %zmm1
1233; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm11, %zmm1
1234; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm10, %zmm0
1235; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm12, %zmm0
1236; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm13, %zmm1
1237; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm15, %zmm1
1238; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm14, %zmm0
1239; LINUXOSX64-NEXT:    vaddps 16(%rbp), %zmm0, %zmm0
1240; LINUXOSX64-NEXT:    vaddps 80(%rbp), %zmm1, %zmm1
1241; LINUXOSX64-NEXT:    movq %rbp, %rsp
1242; LINUXOSX64-NEXT:    popq %rbp
1243; LINUXOSX64-NEXT:    retq
1244  %x1 = fadd <32 x float> %a0, %b0
1245  %x2 = fadd <32 x float> %c0, %x1
1246  %x3 = fadd <32 x float> %a1, %x2
1247  %x4 = fadd <32 x float> %b1, %x3
1248  %x5 = fadd <32 x float> %c1, %x4
1249  %x6 = fadd <32 x float> %a2, %x5
1250  %x7 = fadd <32 x float> %b2, %x6
1251  %x8 = fadd <32 x float> %c2, %x7
1252  ret <32 x float> %x8
1253}
1254
1255; Test regcall when passing/retrieving mixed types
1256define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
1257; X32-LABEL: test_argRetMixTypes:
1258; X32:       # %bb.0:
1259; X32-NEXT:    pushl %ebx
1260; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
1261; X32-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
1262; X32-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1263; X32-NEXT:    vcvtsi2sd %eax, %xmm2, %xmm1
1264; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1265; X32-NEXT:    vcvtsi2sd %ecx, %xmm2, %xmm1
1266; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1267; X32-NEXT:    vmovd %edx, %xmm1
1268; X32-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
1269; X32-NEXT:    vcvtqq2pd %ymm1, %ymm1
1270; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1271; X32-NEXT:    vcvtsi2sd %esi, %xmm2, %xmm1
1272; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1273; X32-NEXT:    vcvtsi2sdl (%ebx), %xmm2, %xmm1
1274; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1275; X32-NEXT:    vcvttsd2si %xmm0, %eax
1276; X32-NEXT:    popl %ebx
1277; X32-NEXT:    vzeroupper
1278; X32-NEXT:    retl
1279;
1280; WIN64-LABEL: test_argRetMixTypes:
1281; WIN64:       # %bb.0:
1282; WIN64-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
1283; WIN64-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1284; WIN64-NEXT:    vcvtsi2sd %eax, %xmm2, %xmm1
1285; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1286; WIN64-NEXT:    vcvtsi2sd %ecx, %xmm2, %xmm1
1287; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1288; WIN64-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm1
1289; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1290; WIN64-NEXT:    vcvtsi2sd %edi, %xmm2, %xmm1
1291; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1292; WIN64-NEXT:    vcvtsi2sdl (%rsi), %xmm2, %xmm1
1293; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1294; WIN64-NEXT:    vcvttsd2si %xmm0, %eax
1295; WIN64-NEXT:    retq
1296;
1297; LINUXOSX64-LABEL: test_argRetMixTypes:
1298; LINUXOSX64:       # %bb.0:
1299; LINUXOSX64-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
1300; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1301; LINUXOSX64-NEXT:    vcvtsi2sd %eax, %xmm2, %xmm1
1302; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1303; LINUXOSX64-NEXT:    vcvtsi2sd %ecx, %xmm2, %xmm1
1304; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1305; LINUXOSX64-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm1
1306; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1307; LINUXOSX64-NEXT:    vcvtsi2sd %edi, %xmm2, %xmm1
1308; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1309; LINUXOSX64-NEXT:    vcvtsi2sdl (%rsi), %xmm2, %xmm1
1310; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
1311; LINUXOSX64-NEXT:    vcvttsd2si %xmm0, %eax
1312; LINUXOSX64-NEXT:    retq
1313  %8 = fpext float %1 to double
1314  %9 = fadd double %8, %0
1315  %10 = sitofp i8 %2 to double
1316  %11 = fadd double %9, %10
1317  %12 = sitofp i32 %3 to double
1318  %13 = fadd double %11, %12
1319  %14 = sitofp i64 %4 to double
1320  %15 = fadd double %13, %14
1321  %16 = sitofp i16 %5 to double
1322  %17 = fadd double %15, %16
1323  %18 = load i32, i32* %6, align 4
1324  %19 = sitofp i32 %18 to double
1325  %20 = fadd double %17, %19
1326  %21 = fptosi double %20 to i32
1327  ret i32 %21
1328}
1329
1330%struct.complex = type { float, double, i32, i8, i64}
1331
1332define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
1333; X32-LABEL: test_argMultiRet:
1334; X32:       # %bb.0:
1335; X32-NEXT:    vaddsd __real@4014000000000000, %xmm1, %xmm1
1336; X32-NEXT:    movl $4, %eax
1337; X32-NEXT:    movb $7, %cl
1338; X32-NEXT:    movl $999, %edx # imm = 0x3E7
1339; X32-NEXT:    xorl %edi, %edi
1340; X32-NEXT:    retl
1341;
1342; WIN64-LABEL: test_argMultiRet:
1343; WIN64:       # %bb.0:
1344; WIN64-NEXT:    vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1
1345; WIN64-NEXT:    movl $999, %edx # imm = 0x3E7
1346; WIN64-NEXT:    movl $4, %eax
1347; WIN64-NEXT:    movb $7, %cl
1348; WIN64-NEXT:    retq
1349;
1350; LINUXOSX64-LABEL: test_argMultiRet:
1351; LINUXOSX64:       # %bb.0:
1352; LINUXOSX64-NEXT:    vaddsd {{.*}}(%rip), %xmm1, %xmm1
1353; LINUXOSX64-NEXT:    movl $999, %edx # imm = 0x3E7
1354; LINUXOSX64-NEXT:    movl $4, %eax
1355; LINUXOSX64-NEXT:    movb $7, %cl
1356; LINUXOSX64-NEXT:    retq
1357  %6 = fadd double %1, 5.000000e+00
1358  %7 = insertvalue %struct.complex undef, float %0, 0
1359  %8 = insertvalue %struct.complex %7, double %6, 1
1360  %9 = insertvalue %struct.complex %8, i32 4, 2
1361  %10 = insertvalue %struct.complex %9, i8 7, 3
1362  %11 = insertvalue %struct.complex %10, i64 999, 4
1363  ret %struct.complex %11
1364}
1365