• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512bw  | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
5
6; Test regcall when receiving arguments of v64i1 type
7define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12)  {
8; X32-LABEL: test_argv64i1:
9; X32:       # %bb.0:
10; X32-NEXT:    addl %edx, %eax
11; X32-NEXT:    adcl %edi, %ecx
12; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
13; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
14; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
15; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
16; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
17; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
18; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
19; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
20; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
21; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
22; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
23; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
24; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
25; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
26; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
27; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
28; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
29; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
30; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
31; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
32; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
33; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
34; X32-NEXT:    retl
35;
36; WIN64-LABEL: test_argv64i1:
37; WIN64:       # %bb.0:
38; WIN64-NEXT:    addq %rcx, %rax
39; WIN64-NEXT:    addq %rdx, %rax
40; WIN64-NEXT:    addq %rdi, %rax
41; WIN64-NEXT:    addq %rsi, %rax
42; WIN64-NEXT:    addq %r8, %rax
43; WIN64-NEXT:    addq %r9, %rax
44; WIN64-NEXT:    addq %r10, %rax
45; WIN64-NEXT:    addq %r11, %rax
46; WIN64-NEXT:    addq %r12, %rax
47; WIN64-NEXT:    addq %r14, %rax
48; WIN64-NEXT:    addq %r15, %rax
49; WIN64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
50; WIN64-NEXT:    retq
51;
52; LINUXOSX64-LABEL: test_argv64i1:
53; LINUXOSX64:       # %bb.0:
54; LINUXOSX64-NEXT:    addq %rcx, %rax
55; LINUXOSX64-NEXT:    addq %rdx, %rax
56; LINUXOSX64-NEXT:    addq %rdi, %rax
57; LINUXOSX64-NEXT:    addq %rsi, %rax
58; LINUXOSX64-NEXT:    addq %r8, %rax
59; LINUXOSX64-NEXT:    addq %r9, %rax
60; LINUXOSX64-NEXT:    addq %r12, %rax
61; LINUXOSX64-NEXT:    addq %r13, %rax
62; LINUXOSX64-NEXT:    addq %r14, %rax
63; LINUXOSX64-NEXT:    addq %r15, %rax
64; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
65; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
66; LINUXOSX64-NEXT:    retq
67  %y0 = bitcast <64 x i1> %x0 to i64
68  %y1 = bitcast <64 x i1> %x1 to i64
69  %y2 = bitcast <64 x i1> %x2 to i64
70  %y3 = bitcast <64 x i1> %x3 to i64
71  %y4 = bitcast <64 x i1> %x4 to i64
72  %y5 = bitcast <64 x i1> %x5 to i64
73  %y6 = bitcast <64 x i1> %x6 to i64
74  %y7 = bitcast <64 x i1> %x7 to i64
75  %y8 = bitcast <64 x i1> %x8 to i64
76  %y9 = bitcast <64 x i1> %x9 to i64
77  %y10 = bitcast <64 x i1> %x10 to i64
78  %y11 = bitcast <64 x i1> %x11 to i64
79  %y12 = bitcast <64 x i1> %x12 to i64
80  %add1 = add i64 %y0, %y1
81  %add2 = add i64 %add1, %y2
82  %add3 = add i64 %add2, %y3
83  %add4 = add i64 %add3, %y4
84  %add5 = add i64 %add4, %y5
85  %add6 = add i64 %add5, %y6
86  %add7 = add i64 %add6, %y7
87  %add8 = add i64 %add7, %y8
88  %add9 = add i64 %add8, %y9
89  %add10 = add i64 %add9, %y10
90  %add11 = add i64 %add10, %y11
91  %add12 = add i64 %add11, %y12
92  ret i64 %add12
93}
94
95; Test regcall when passing arguments of v64i1 type
96define i64 @caller_argv64i1() #0 {
97; X32-LABEL: caller_argv64i1:
98; X32:       # %bb.0: # %entry
99; X32-NEXT:    pushl %edi
100; X32-NEXT:    subl $88, %esp
101; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [2,1,2,1]
102; X32-NEXT:    vmovups %xmm0, {{[0-9]+}}(%esp)
103; X32-NEXT:    vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
104; X32-NEXT:    vmovups %zmm0, (%esp)
105; X32-NEXT:    movl $1, {{[0-9]+}}(%esp)
106; X32-NEXT:    movl $2, {{[0-9]+}}(%esp)
107; X32-NEXT:    movl $2, %eax
108; X32-NEXT:    movl $1, %ecx
109; X32-NEXT:    movl $2, %edx
110; X32-NEXT:    movl $1, %edi
111; X32-NEXT:    vzeroupper
112; X32-NEXT:    calll _test_argv64i1
113; X32-NEXT:    movl %ecx, %edx
114; X32-NEXT:    addl $88, %esp
115; X32-NEXT:    popl %edi
116; X32-NEXT:    retl
117;
118; WIN64-LABEL: caller_argv64i1:
119; WIN64:       # %bb.0: # %entry
120; WIN64-NEXT:    pushq %r15
121; WIN64-NEXT:    .seh_pushreg 15
122; WIN64-NEXT:    pushq %r14
123; WIN64-NEXT:    .seh_pushreg 14
124; WIN64-NEXT:    pushq %r12
125; WIN64-NEXT:    .seh_pushreg 12
126; WIN64-NEXT:    pushq %rsi
127; WIN64-NEXT:    .seh_pushreg 6
128; WIN64-NEXT:    pushq %rdi
129; WIN64-NEXT:    .seh_pushreg 7
130; WIN64-NEXT:    subq $48, %rsp
131; WIN64-NEXT:    .seh_stackalloc 48
132; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
133; WIN64-NEXT:    .seh_savexmm 7, 32
134; WIN64-NEXT:    vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill
135; WIN64-NEXT:    .seh_savexmm 6, 16
136; WIN64-NEXT:    .seh_endprologue
137; WIN64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
138; WIN64-NEXT:    movq %rax, (%rsp)
139; WIN64-NEXT:    movq %rax, %rcx
140; WIN64-NEXT:    movq %rax, %rdx
141; WIN64-NEXT:    movq %rax, %rdi
142; WIN64-NEXT:    movq %rax, %rsi
143; WIN64-NEXT:    movq %rax, %r8
144; WIN64-NEXT:    movq %rax, %r9
145; WIN64-NEXT:    movq %rax, %r10
146; WIN64-NEXT:    movq %rax, %r11
147; WIN64-NEXT:    movq %rax, %r12
148; WIN64-NEXT:    movq %rax, %r14
149; WIN64-NEXT:    movq %rax, %r15
150; WIN64-NEXT:    callq test_argv64i1
151; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload
152; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
153; WIN64-NEXT:    addq $48, %rsp
154; WIN64-NEXT:    popq %rdi
155; WIN64-NEXT:    popq %rsi
156; WIN64-NEXT:    popq %r12
157; WIN64-NEXT:    popq %r14
158; WIN64-NEXT:    popq %r15
159; WIN64-NEXT:    retq
160; WIN64-NEXT:    .seh_handlerdata
161; WIN64-NEXT:    .text
162; WIN64-NEXT:    .seh_endproc
163;
164; LINUXOSX64-LABEL: caller_argv64i1:
165; LINUXOSX64:       # %bb.0: # %entry
166; LINUXOSX64-NEXT:    pushq %r15
167; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
168; LINUXOSX64-NEXT:    pushq %r14
169; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
170; LINUXOSX64-NEXT:    pushq %r13
171; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
172; LINUXOSX64-NEXT:    pushq %r12
173; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 40
174; LINUXOSX64-NEXT:    pushq %rax
175; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 48
176; LINUXOSX64-NEXT:    .cfi_offset %r12, -40
177; LINUXOSX64-NEXT:    .cfi_offset %r13, -32
178; LINUXOSX64-NEXT:    .cfi_offset %r14, -24
179; LINUXOSX64-NEXT:    .cfi_offset %r15, -16
180; LINUXOSX64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
181; LINUXOSX64-NEXT:    movq %rax, %rcx
182; LINUXOSX64-NEXT:    movq %rax, %rdx
183; LINUXOSX64-NEXT:    movq %rax, %rdi
184; LINUXOSX64-NEXT:    movq %rax, %rsi
185; LINUXOSX64-NEXT:    movq %rax, %r8
186; LINUXOSX64-NEXT:    movq %rax, %r9
187; LINUXOSX64-NEXT:    movq %rax, %r12
188; LINUXOSX64-NEXT:    movq %rax, %r13
189; LINUXOSX64-NEXT:    movq %rax, %r14
190; LINUXOSX64-NEXT:    movq %rax, %r15
191; LINUXOSX64-NEXT:    pushq %rax
192; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
193; LINUXOSX64-NEXT:    pushq %rax
194; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
195; LINUXOSX64-NEXT:    callq test_argv64i1
196; LINUXOSX64-NEXT:    addq $24, %rsp
197; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset -24
198; LINUXOSX64-NEXT:    popq %r12
199; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
200; LINUXOSX64-NEXT:    popq %r13
201; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
202; LINUXOSX64-NEXT:    popq %r14
203; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
204; LINUXOSX64-NEXT:    popq %r15
205; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
206; LINUXOSX64-NEXT:    retq
207entry:
208  %v0 = bitcast i64 4294967298 to <64 x i1>
209  %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
210                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
211                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
212                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
213                                                <64 x i1> %v0)
214  ret i64 %call
215}
216
217; Test regcall when returning v64i1 type
218define x86_regcallcc <64 x i1> @test_retv64i1()  {
219; X32-LABEL: test_retv64i1:
220; X32:       # %bb.0:
221; X32-NEXT:    movl $2, %eax
222; X32-NEXT:    movl $1, %ecx
223; X32-NEXT:    retl
224;
225; CHECK64-LABEL: test_retv64i1:
226; CHECK64:       # %bb.0:
227; CHECK64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
228; CHECK64-NEXT:    retq
229  %a = bitcast i64 4294967298 to <64 x i1>
230 ret <64 x i1> %a
231}
232
233; Test regcall when processing result of v64i1 type
234define <64 x i1> @caller_retv64i1() #0 {
235; X32-LABEL: caller_retv64i1:
236; X32:       # %bb.0: # %entry
237; X32-NEXT:    calll _test_retv64i1
238; X32-NEXT:    kmovd %eax, %k0
239; X32-NEXT:    kmovd %ecx, %k1
240; X32-NEXT:    kunpckdq %k0, %k1, %k0
241; X32-NEXT:    vpmovm2b %k0, %zmm0
242; X32-NEXT:    retl
243;
244; WIN64-LABEL: caller_retv64i1:
245; WIN64:       # %bb.0: # %entry
246; WIN64-NEXT:    pushq %rsi
247; WIN64-NEXT:    .seh_pushreg 6
248; WIN64-NEXT:    pushq %rdi
249; WIN64-NEXT:    .seh_pushreg 7
250; WIN64-NEXT:    subq $40, %rsp
251; WIN64-NEXT:    .seh_stackalloc 40
252; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
253; WIN64-NEXT:    .seh_savexmm 7, 16
254; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
255; WIN64-NEXT:    .seh_savexmm 6, 0
256; WIN64-NEXT:    .seh_endprologue
257; WIN64-NEXT:    callq test_retv64i1
258; WIN64-NEXT:    kmovq %rax, %k0
259; WIN64-NEXT:    vpmovm2b %k0, %zmm0
260; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
261; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
262; WIN64-NEXT:    addq $40, %rsp
263; WIN64-NEXT:    popq %rdi
264; WIN64-NEXT:    popq %rsi
265; WIN64-NEXT:    retq
266; WIN64-NEXT:    .seh_handlerdata
267; WIN64-NEXT:    .text
268; WIN64-NEXT:    .seh_endproc
269;
270; LINUXOSX64-LABEL: caller_retv64i1:
271; LINUXOSX64:       # %bb.0: # %entry
272; LINUXOSX64-NEXT:    pushq %rax
273; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
274; LINUXOSX64-NEXT:    callq test_retv64i1
275; LINUXOSX64-NEXT:    kmovq %rax, %k0
276; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm0
277; LINUXOSX64-NEXT:    popq %rax
278; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
279; LINUXOSX64-NEXT:    retq
280entry:
281  %call = call x86_regcallcc <64 x i1> @test_retv64i1()
282  ret <64 x i1> %call
283}
284
285; Test regcall when receiving arguments of v32i1 type
286declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
287define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)  {
288; X32-LABEL: test_argv32i1:
289; X32:       # %bb.0: # %entry
290; X32-NEXT:    pushl %esp
291; X32-NEXT:    subl $72, %esp
292; X32-NEXT:    vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
293; X32-NEXT:    vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
294; X32-NEXT:    vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
295; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
296; X32-NEXT:    kmovd %edx, %k0
297; X32-NEXT:    kmovd %ecx, %k1
298; X32-NEXT:    kmovd %eax, %k2
299; X32-NEXT:    vpmovm2b %k2, %zmm0
300; X32-NEXT:    vpmovm2b %k1, %zmm1
301; X32-NEXT:    vpmovm2b %k0, %zmm2
302; X32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
303; X32-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
304; X32-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
305; X32-NEXT:    calll _test_argv32i1helper
306; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
307; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
308; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
309; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
310; X32-NEXT:    addl $72, %esp
311; X32-NEXT:    popl %esp
312; X32-NEXT:    vzeroupper
313; X32-NEXT:    retl
314;
315; WIN64-LABEL: test_argv32i1:
316; WIN64:       # %bb.0: # %entry
317; WIN64-NEXT:    pushq %r11
318; WIN64-NEXT:    .seh_pushreg 11
319; WIN64-NEXT:    pushq %r10
320; WIN64-NEXT:    .seh_pushreg 10
321; WIN64-NEXT:    pushq %rsp
322; WIN64-NEXT:    .seh_pushreg 4
323; WIN64-NEXT:    subq $32, %rsp
324; WIN64-NEXT:    .seh_stackalloc 32
325; WIN64-NEXT:    .seh_endprologue
326; WIN64-NEXT:    kmovd %edx, %k0
327; WIN64-NEXT:    kmovd %ecx, %k1
328; WIN64-NEXT:    kmovd %eax, %k2
329; WIN64-NEXT:    vpmovm2b %k2, %zmm0
330; WIN64-NEXT:    vpmovm2b %k1, %zmm1
331; WIN64-NEXT:    vpmovm2b %k0, %zmm2
332; WIN64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
333; WIN64-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
334; WIN64-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
335; WIN64-NEXT:    callq test_argv32i1helper
336; WIN64-NEXT:    nop
337; WIN64-NEXT:    addq $32, %rsp
338; WIN64-NEXT:    popq %rsp
339; WIN64-NEXT:    popq %r10
340; WIN64-NEXT:    popq %r11
341; WIN64-NEXT:    vzeroupper
342; WIN64-NEXT:    retq
343; WIN64-NEXT:    .seh_handlerdata
344; WIN64-NEXT:    .text
345; WIN64-NEXT:    .seh_endproc
346;
347; LINUXOSX64-LABEL: test_argv32i1:
348; LINUXOSX64:       # %bb.0: # %entry
349; LINUXOSX64-NEXT:    pushq %rsp
350; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
351; LINUXOSX64-NEXT:    subq $128, %rsp
352; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
353; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
354; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
355; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
356; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
357; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
358; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
359; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
360; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
361; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
362; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
363; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
364; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
365; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
366; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
367; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
368; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
369; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
370; LINUXOSX64-NEXT:    kmovd %edx, %k0
371; LINUXOSX64-NEXT:    kmovd %ecx, %k1
372; LINUXOSX64-NEXT:    kmovd %eax, %k2
373; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
374; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
375; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
376; LINUXOSX64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
377; LINUXOSX64-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
378; LINUXOSX64-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
379; LINUXOSX64-NEXT:    callq test_argv32i1helper
380; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
381; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
382; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
383; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
384; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
385; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
386; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
387; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
388; LINUXOSX64-NEXT:    addq $128, %rsp
389; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
390; LINUXOSX64-NEXT:    popq %rsp
391; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
392; LINUXOSX64-NEXT:    vzeroupper
393; LINUXOSX64-NEXT:    retq
394entry:
395  %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
396  ret i32 %res
397}
398
399; Test regcall when passing arguments of v32i1 type
400define i32 @caller_argv32i1() #0 {
401; X32-LABEL: caller_argv32i1:
402; X32:       # %bb.0: # %entry
403; X32-NEXT:    movl $1, %eax
404; X32-NEXT:    movl $1, %ecx
405; X32-NEXT:    movl $1, %edx
406; X32-NEXT:    calll _test_argv32i1
407; X32-NEXT:    retl
408;
409; WIN64-LABEL: caller_argv32i1:
410; WIN64:       # %bb.0: # %entry
411; WIN64-NEXT:    pushq %rsi
412; WIN64-NEXT:    .seh_pushreg 6
413; WIN64-NEXT:    pushq %rdi
414; WIN64-NEXT:    .seh_pushreg 7
415; WIN64-NEXT:    subq $40, %rsp
416; WIN64-NEXT:    .seh_stackalloc 40
417; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
418; WIN64-NEXT:    .seh_savexmm 7, 16
419; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
420; WIN64-NEXT:    .seh_savexmm 6, 0
421; WIN64-NEXT:    .seh_endprologue
422; WIN64-NEXT:    movl $1, %eax
423; WIN64-NEXT:    movl $1, %ecx
424; WIN64-NEXT:    movl $1, %edx
425; WIN64-NEXT:    callq test_argv32i1
426; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
427; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
428; WIN64-NEXT:    addq $40, %rsp
429; WIN64-NEXT:    popq %rdi
430; WIN64-NEXT:    popq %rsi
431; WIN64-NEXT:    retq
432; WIN64-NEXT:    .seh_handlerdata
433; WIN64-NEXT:    .text
434; WIN64-NEXT:    .seh_endproc
435;
436; LINUXOSX64-LABEL: caller_argv32i1:
437; LINUXOSX64:       # %bb.0: # %entry
438; LINUXOSX64-NEXT:    pushq %rax
439; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
440; LINUXOSX64-NEXT:    movl $1, %eax
441; LINUXOSX64-NEXT:    movl $1, %ecx
442; LINUXOSX64-NEXT:    movl $1, %edx
443; LINUXOSX64-NEXT:    callq test_argv32i1
444; LINUXOSX64-NEXT:    popq %rcx
445; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
446; LINUXOSX64-NEXT:    retq
447entry:
448  %v0 = bitcast i32 1 to <32 x i1>
449  %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
450  ret i32 %call
451}
452
453; Test regcall when returning v32i1 type
454define x86_regcallcc <32 x i1> @test_retv32i1()  {
455; X32-LABEL: test_retv32i1:
456; X32:       # %bb.0:
457; X32-NEXT:    movl $1, %eax
458; X32-NEXT:    retl
459;
460; CHECK64-LABEL: test_retv32i1:
461; CHECK64:       # %bb.0:
462; CHECK64-NEXT:    movl $1, %eax
463; CHECK64-NEXT:    retq
464  %a = bitcast i32 1 to <32 x i1>
465  ret <32 x i1> %a
466}
467
468; Test regcall when processing result of v32i1 type
469define i32 @caller_retv32i1() #0 {
470; X32-LABEL: caller_retv32i1:
471; X32:       # %bb.0: # %entry
472; X32-NEXT:    calll _test_retv32i1
473; X32-NEXT:    incl %eax
474; X32-NEXT:    retl
475;
476; WIN64-LABEL: caller_retv32i1:
477; WIN64:       # %bb.0: # %entry
478; WIN64-NEXT:    pushq %rsi
479; WIN64-NEXT:    .seh_pushreg 6
480; WIN64-NEXT:    pushq %rdi
481; WIN64-NEXT:    .seh_pushreg 7
482; WIN64-NEXT:    subq $40, %rsp
483; WIN64-NEXT:    .seh_stackalloc 40
484; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
485; WIN64-NEXT:    .seh_savexmm 7, 16
486; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
487; WIN64-NEXT:    .seh_savexmm 6, 0
488; WIN64-NEXT:    .seh_endprologue
489; WIN64-NEXT:    callq test_retv32i1
490; WIN64-NEXT:    incl %eax
491; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
492; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
493; WIN64-NEXT:    addq $40, %rsp
494; WIN64-NEXT:    popq %rdi
495; WIN64-NEXT:    popq %rsi
496; WIN64-NEXT:    retq
497; WIN64-NEXT:    .seh_handlerdata
498; WIN64-NEXT:    .text
499; WIN64-NEXT:    .seh_endproc
500;
501; LINUXOSX64-LABEL: caller_retv32i1:
502; LINUXOSX64:       # %bb.0: # %entry
503; LINUXOSX64-NEXT:    pushq %rax
504; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
505; LINUXOSX64-NEXT:    callq test_retv32i1
506; LINUXOSX64-NEXT:    incl %eax
507; LINUXOSX64-NEXT:    popq %rcx
508; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
509; LINUXOSX64-NEXT:    retq
510entry:
511  %call = call x86_regcallcc <32 x i1> @test_retv32i1()
512  %c = bitcast <32 x i1> %call to i32
513  %add = add i32 %c, 1
514  ret i32 %add
515}
516
517; Test regcall when receiving arguments of v16i1 type
518declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
519define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)  {
520; X32-LABEL: test_argv16i1:
521; X32:       # %bb.0:
522; X32-NEXT:    pushl %esp
523; X32-NEXT:    subl $72, %esp
524; X32-NEXT:    vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
525; X32-NEXT:    vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
526; X32-NEXT:    vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
527; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
528; X32-NEXT:    kmovd %edx, %k0
529; X32-NEXT:    kmovd %ecx, %k1
530; X32-NEXT:    kmovd %eax, %k2
531; X32-NEXT:    vpmovm2b %k2, %zmm0
532; X32-NEXT:    vpmovm2b %k1, %zmm1
533; X32-NEXT:    vpmovm2b %k0, %zmm2
534; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
535; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
536; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
537; X32-NEXT:    vzeroupper
538; X32-NEXT:    calll _test_argv16i1helper
539; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
540; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
541; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
542; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
543; X32-NEXT:    addl $72, %esp
544; X32-NEXT:    popl %esp
545; X32-NEXT:    retl
546;
547; WIN64-LABEL: test_argv16i1:
548; WIN64:       # %bb.0:
549; WIN64-NEXT:    pushq %r11
550; WIN64-NEXT:    .seh_pushreg 11
551; WIN64-NEXT:    pushq %r10
552; WIN64-NEXT:    .seh_pushreg 10
553; WIN64-NEXT:    pushq %rsp
554; WIN64-NEXT:    .seh_pushreg 4
555; WIN64-NEXT:    subq $32, %rsp
556; WIN64-NEXT:    .seh_stackalloc 32
557; WIN64-NEXT:    .seh_endprologue
558; WIN64-NEXT:    kmovd %edx, %k0
559; WIN64-NEXT:    kmovd %ecx, %k1
560; WIN64-NEXT:    kmovd %eax, %k2
561; WIN64-NEXT:    vpmovm2b %k2, %zmm0
562; WIN64-NEXT:    vpmovm2b %k1, %zmm1
563; WIN64-NEXT:    vpmovm2b %k0, %zmm2
564; WIN64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
565; WIN64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
566; WIN64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
567; WIN64-NEXT:    vzeroupper
568; WIN64-NEXT:    callq test_argv16i1helper
569; WIN64-NEXT:    nop
570; WIN64-NEXT:    addq $32, %rsp
571; WIN64-NEXT:    popq %rsp
572; WIN64-NEXT:    popq %r10
573; WIN64-NEXT:    popq %r11
574; WIN64-NEXT:    retq
575; WIN64-NEXT:    .seh_handlerdata
576; WIN64-NEXT:    .text
577; WIN64-NEXT:    .seh_endproc
578;
579; LINUXOSX64-LABEL: test_argv16i1:
580; LINUXOSX64:       # %bb.0:
581; LINUXOSX64-NEXT:    pushq %rsp
582; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
583; LINUXOSX64-NEXT:    subq $128, %rsp
584; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
585; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
586; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
587; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
588; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
589; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
590; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
591; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
592; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
593; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
594; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
595; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
596; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
597; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
598; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
599; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
600; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
601; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
602; LINUXOSX64-NEXT:    kmovd %edx, %k0
603; LINUXOSX64-NEXT:    kmovd %ecx, %k1
604; LINUXOSX64-NEXT:    kmovd %eax, %k2
605; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
606; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
607; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
608; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
609; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
610; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
611; LINUXOSX64-NEXT:    vzeroupper
612; LINUXOSX64-NEXT:    callq test_argv16i1helper
613; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
614; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
615; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
616; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
617; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
618; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
619; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
620; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
621; LINUXOSX64-NEXT:    addq $128, %rsp
622; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
623; LINUXOSX64-NEXT:    popq %rsp
624; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
625; LINUXOSX64-NEXT:    retq
626  %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
627  ret i16 %res
628}
629
630; Test regcall when passing arguments of v16i1 type
631define i16 @caller_argv16i1() #0 {
632; X32-LABEL: caller_argv16i1:
633; X32:       # %bb.0: # %entry
634; X32-NEXT:    movl $1, %eax
635; X32-NEXT:    movl $1, %ecx
636; X32-NEXT:    movl $1, %edx
637; X32-NEXT:    calll _test_argv16i1
638; X32-NEXT:    retl
639;
640; WIN64-LABEL: caller_argv16i1:
641; WIN64:       # %bb.0: # %entry
642; WIN64-NEXT:    pushq %rsi
643; WIN64-NEXT:    .seh_pushreg 6
644; WIN64-NEXT:    pushq %rdi
645; WIN64-NEXT:    .seh_pushreg 7
646; WIN64-NEXT:    subq $40, %rsp
647; WIN64-NEXT:    .seh_stackalloc 40
648; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
649; WIN64-NEXT:    .seh_savexmm 7, 16
650; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
651; WIN64-NEXT:    .seh_savexmm 6, 0
652; WIN64-NEXT:    .seh_endprologue
653; WIN64-NEXT:    movl $1, %eax
654; WIN64-NEXT:    movl $1, %ecx
655; WIN64-NEXT:    movl $1, %edx
656; WIN64-NEXT:    callq test_argv16i1
657; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
658; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
659; WIN64-NEXT:    addq $40, %rsp
660; WIN64-NEXT:    popq %rdi
661; WIN64-NEXT:    popq %rsi
662; WIN64-NEXT:    retq
663; WIN64-NEXT:    .seh_handlerdata
664; WIN64-NEXT:    .text
665; WIN64-NEXT:    .seh_endproc
666;
667; LINUXOSX64-LABEL: caller_argv16i1:
668; LINUXOSX64:       # %bb.0: # %entry
669; LINUXOSX64-NEXT:    pushq %rax
670; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
671; LINUXOSX64-NEXT:    movl $1, %eax
672; LINUXOSX64-NEXT:    movl $1, %ecx
673; LINUXOSX64-NEXT:    movl $1, %edx
674; LINUXOSX64-NEXT:    callq test_argv16i1
675; LINUXOSX64-NEXT:    popq %rcx
676; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
677; LINUXOSX64-NEXT:    retq
678entry:
679  %v0 = bitcast i16 1 to <16 x i1>
680  %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
681  ret i16 %call
682}
683
684; Test regcall when returning v16i1 type
685define x86_regcallcc <16 x i1> @test_retv16i1()  {
686; X32-LABEL: test_retv16i1:
687; X32:       # %bb.0:
688; X32-NEXT:    movw $1, %ax
689; X32-NEXT:    retl
690;
691; CHECK64-LABEL: test_retv16i1:
692; CHECK64:       # %bb.0:
693; CHECK64-NEXT:    movw $1, %ax
694; CHECK64-NEXT:    retq
695  %a = bitcast i16 1 to <16 x i1>
696  ret <16 x i1> %a
697}
698
699; Test regcall when processing result of v16i1 type
700define i16 @caller_retv16i1() #0 {
701; X32-LABEL: caller_retv16i1:
702; X32:       # %bb.0: # %entry
703; X32-NEXT:    calll _test_retv16i1
704; X32-NEXT:    # kill: def $ax killed $ax def $eax
705; X32-NEXT:    incl %eax
706; X32-NEXT:    # kill: def $ax killed $ax killed $eax
707; X32-NEXT:    retl
708;
709; WIN64-LABEL: caller_retv16i1:
710; WIN64:       # %bb.0: # %entry
711; WIN64-NEXT:    pushq %rsi
712; WIN64-NEXT:    .seh_pushreg 6
713; WIN64-NEXT:    pushq %rdi
714; WIN64-NEXT:    .seh_pushreg 7
715; WIN64-NEXT:    subq $40, %rsp
716; WIN64-NEXT:    .seh_stackalloc 40
717; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
718; WIN64-NEXT:    .seh_savexmm 7, 16
719; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
720; WIN64-NEXT:    .seh_savexmm 6, 0
721; WIN64-NEXT:    .seh_endprologue
722; WIN64-NEXT:    callq test_retv16i1
723; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
724; WIN64-NEXT:    incl %eax
725; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
726; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
727; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
728; WIN64-NEXT:    addq $40, %rsp
729; WIN64-NEXT:    popq %rdi
730; WIN64-NEXT:    popq %rsi
731; WIN64-NEXT:    retq
732; WIN64-NEXT:    .seh_handlerdata
733; WIN64-NEXT:    .text
734; WIN64-NEXT:    .seh_endproc
735;
736; LINUXOSX64-LABEL: caller_retv16i1:
737; LINUXOSX64:       # %bb.0: # %entry
738; LINUXOSX64-NEXT:    pushq %rax
739; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
740; LINUXOSX64-NEXT:    callq test_retv16i1
741; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
742; LINUXOSX64-NEXT:    incl %eax
743; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
744; LINUXOSX64-NEXT:    popq %rcx
745; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
746; LINUXOSX64-NEXT:    retq
747entry:
748  %call = call x86_regcallcc <16 x i1> @test_retv16i1()
749  %c = bitcast <16 x i1> %call to i16
750  %add = add i16 %c, 1
751  ret i16 %add
752}
753
754; Test regcall when receiving arguments of v8i1 type
755declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
756define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)  {
757; X32-LABEL: test_argv8i1:
758; X32:       # %bb.0:
759; X32-NEXT:    pushl %esp
760; X32-NEXT:    subl $72, %esp
761; X32-NEXT:    vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
762; X32-NEXT:    vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
763; X32-NEXT:    vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
764; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
765; X32-NEXT:    kmovd %edx, %k0
766; X32-NEXT:    kmovd %ecx, %k1
767; X32-NEXT:    kmovd %eax, %k2
768; X32-NEXT:    vpmovm2w %k2, %zmm0
769; X32-NEXT:    vpmovm2w %k1, %zmm1
770; X32-NEXT:    vpmovm2w %k0, %zmm2
771; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
772; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
773; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
774; X32-NEXT:    vzeroupper
775; X32-NEXT:    calll _test_argv8i1helper
776; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
777; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
778; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
779; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
780; X32-NEXT:    addl $72, %esp
781; X32-NEXT:    popl %esp
782; X32-NEXT:    retl
783;
784; WIN64-LABEL: test_argv8i1:
785; WIN64:       # %bb.0:
786; WIN64-NEXT:    pushq %r11
787; WIN64-NEXT:    .seh_pushreg 11
788; WIN64-NEXT:    pushq %r10
789; WIN64-NEXT:    .seh_pushreg 10
790; WIN64-NEXT:    pushq %rsp
791; WIN64-NEXT:    .seh_pushreg 4
792; WIN64-NEXT:    subq $32, %rsp
793; WIN64-NEXT:    .seh_stackalloc 32
794; WIN64-NEXT:    .seh_endprologue
795; WIN64-NEXT:    kmovd %edx, %k0
796; WIN64-NEXT:    kmovd %ecx, %k1
797; WIN64-NEXT:    kmovd %eax, %k2
798; WIN64-NEXT:    vpmovm2w %k2, %zmm0
799; WIN64-NEXT:    vpmovm2w %k1, %zmm1
800; WIN64-NEXT:    vpmovm2w %k0, %zmm2
801; WIN64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
802; WIN64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
803; WIN64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
804; WIN64-NEXT:    vzeroupper
805; WIN64-NEXT:    callq test_argv8i1helper
806; WIN64-NEXT:    nop
807; WIN64-NEXT:    addq $32, %rsp
808; WIN64-NEXT:    popq %rsp
809; WIN64-NEXT:    popq %r10
810; WIN64-NEXT:    popq %r11
811; WIN64-NEXT:    retq
812; WIN64-NEXT:    .seh_handlerdata
813; WIN64-NEXT:    .text
814; WIN64-NEXT:    .seh_endproc
815;
816; LINUXOSX64-LABEL: test_argv8i1:
817; LINUXOSX64:       # %bb.0:
818; LINUXOSX64-NEXT:    pushq %rsp
819; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
820; LINUXOSX64-NEXT:    subq $128, %rsp
821; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
822; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
823; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
824; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
825; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
826; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
827; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
828; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
829; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
830; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
831; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
832; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
833; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
834; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
835; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
836; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
837; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
838; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
839; LINUXOSX64-NEXT:    kmovd %edx, %k0
840; LINUXOSX64-NEXT:    kmovd %ecx, %k1
841; LINUXOSX64-NEXT:    kmovd %eax, %k2
842; LINUXOSX64-NEXT:    vpmovm2w %k2, %zmm0
843; LINUXOSX64-NEXT:    vpmovm2w %k1, %zmm1
844; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm2
845; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
846; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
847; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
848; LINUXOSX64-NEXT:    vzeroupper
849; LINUXOSX64-NEXT:    callq test_argv8i1helper
850; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
851; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
852; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
853; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
854; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
855; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
856; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
857; LINUXOSX64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
858; LINUXOSX64-NEXT:    addq $128, %rsp
859; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
860; LINUXOSX64-NEXT:    popq %rsp
861; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
862; LINUXOSX64-NEXT:    retq
863  %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
864  ret i8 %res
865}
866
867; Test regcall when passing arguments of v8i1 type
868define i8 @caller_argv8i1() #0 {
869; X32-LABEL: caller_argv8i1:
870; X32:       # %bb.0: # %entry
871; X32-NEXT:    movl $1, %eax
872; X32-NEXT:    movl $1, %ecx
873; X32-NEXT:    movl $1, %edx
874; X32-NEXT:    calll _test_argv8i1
875; X32-NEXT:    retl
876;
877; WIN64-LABEL: caller_argv8i1:
878; WIN64:       # %bb.0: # %entry
879; WIN64-NEXT:    pushq %rsi
880; WIN64-NEXT:    .seh_pushreg 6
881; WIN64-NEXT:    pushq %rdi
882; WIN64-NEXT:    .seh_pushreg 7
883; WIN64-NEXT:    subq $40, %rsp
884; WIN64-NEXT:    .seh_stackalloc 40
885; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
886; WIN64-NEXT:    .seh_savexmm 7, 16
887; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
888; WIN64-NEXT:    .seh_savexmm 6, 0
889; WIN64-NEXT:    .seh_endprologue
890; WIN64-NEXT:    movl $1, %eax
891; WIN64-NEXT:    movl $1, %ecx
892; WIN64-NEXT:    movl $1, %edx
893; WIN64-NEXT:    callq test_argv8i1
894; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
895; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
896; WIN64-NEXT:    addq $40, %rsp
897; WIN64-NEXT:    popq %rdi
898; WIN64-NEXT:    popq %rsi
899; WIN64-NEXT:    retq
900; WIN64-NEXT:    .seh_handlerdata
901; WIN64-NEXT:    .text
902; WIN64-NEXT:    .seh_endproc
903;
904; LINUXOSX64-LABEL: caller_argv8i1:
905; LINUXOSX64:       # %bb.0: # %entry
906; LINUXOSX64-NEXT:    pushq %rax
907; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
908; LINUXOSX64-NEXT:    movl $1, %eax
909; LINUXOSX64-NEXT:    movl $1, %ecx
910; LINUXOSX64-NEXT:    movl $1, %edx
911; LINUXOSX64-NEXT:    callq test_argv8i1
912; LINUXOSX64-NEXT:    popq %rcx
913; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
914; LINUXOSX64-NEXT:    retq
915entry:
916  %v0 = bitcast i8 1 to <8 x i1>
917  %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
918  ret i8 %call
919}
920
921; Test regcall when returning v8i1 type
922define x86_regcallcc <8 x i1> @test_retv8i1()  {
923; X32-LABEL: test_retv8i1:
924; X32:       # %bb.0:
925; X32-NEXT:    movb $1, %al
926; X32-NEXT:    retl
927;
928; CHECK64-LABEL: test_retv8i1:
929; CHECK64:       # %bb.0:
930; CHECK64-NEXT:    movb $1, %al
931; CHECK64-NEXT:    retq
932  %a = bitcast i8 1 to <8 x i1>
933  ret <8 x i1> %a
934}
935
936; Test regcall when processing result of v8i1 type
937define <8 x i1> @caller_retv8i1() #0 {
938; X32-LABEL: caller_retv8i1:
939; X32:       # %bb.0: # %entry
940; X32-NEXT:    calll _test_retv8i1
941; X32-NEXT:    # kill: def $al killed $al def $eax
942; X32-NEXT:    kmovd %eax, %k0
943; X32-NEXT:    vpmovm2w %k0, %zmm0
944; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
945; X32-NEXT:    vzeroupper
946; X32-NEXT:    retl
947;
948; WIN64-LABEL: caller_retv8i1:
949; WIN64:       # %bb.0: # %entry
950; WIN64-NEXT:    pushq %rsi
951; WIN64-NEXT:    .seh_pushreg 6
952; WIN64-NEXT:    pushq %rdi
953; WIN64-NEXT:    .seh_pushreg 7
954; WIN64-NEXT:    subq $40, %rsp
955; WIN64-NEXT:    .seh_stackalloc 40
956; WIN64-NEXT:    vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
957; WIN64-NEXT:    .seh_savexmm 7, 16
958; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
959; WIN64-NEXT:    .seh_savexmm 6, 0
960; WIN64-NEXT:    .seh_endprologue
961; WIN64-NEXT:    callq test_retv8i1
962; WIN64-NEXT:    # kill: def $al killed $al def $eax
963; WIN64-NEXT:    kmovd %eax, %k0
964; WIN64-NEXT:    vpmovm2w %k0, %zmm0
965; WIN64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
966; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
967; WIN64-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
968; WIN64-NEXT:    addq $40, %rsp
969; WIN64-NEXT:    popq %rdi
970; WIN64-NEXT:    popq %rsi
971; WIN64-NEXT:    vzeroupper
972; WIN64-NEXT:    retq
973; WIN64-NEXT:    .seh_handlerdata
974; WIN64-NEXT:    .text
975; WIN64-NEXT:    .seh_endproc
976;
977; LINUXOSX64-LABEL: caller_retv8i1:
978; LINUXOSX64:       # %bb.0: # %entry
979; LINUXOSX64-NEXT:    pushq %rax
980; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
981; LINUXOSX64-NEXT:    callq test_retv8i1
982; LINUXOSX64-NEXT:    # kill: def $al killed $al def $eax
983; LINUXOSX64-NEXT:    kmovd %eax, %k0
984; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm0
985; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
986; LINUXOSX64-NEXT:    popq %rax
987; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
988; LINUXOSX64-NEXT:    vzeroupper
989; LINUXOSX64-NEXT:    retq
990entry:
991  %call = call x86_regcallcc <8 x i1> @test_retv8i1()
992  ret <8 x i1> %call
993}
994
995