• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE1
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
6
7; https://bugs.llvm.org/show_bug.cgi?id=37104
8
9; All the advanced stuff (negative tests, commutativity) is handled in the
10; scalar version of the test only.
11
12; ============================================================================ ;
13; 8-bit vector width
14; ============================================================================ ;
15
16define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
17; CHECK-LABEL: out_v1i8:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    movl %edx, %eax
20; CHECK-NEXT:    andl %edx, %edi
21; CHECK-NEXT:    notb %al
22; CHECK-NEXT:    andb %sil, %al
23; CHECK-NEXT:    orb %dil, %al
24; CHECK-NEXT:    # kill: def $al killed $al killed $eax
25; CHECK-NEXT:    retq
26  %mx = and <1 x i8> %x, %mask
27  %notmask = xor <1 x i8> %mask, <i8 -1>
28  %my = and <1 x i8> %y, %notmask
29  %r = or <1 x i8> %mx, %my
30  ret <1 x i8> %r
31}
32
33; ============================================================================ ;
34; 16-bit vector width
35; ============================================================================ ;
36
37define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
38; CHECK-BASELINE-LABEL: out_v2i8:
39; CHECK-BASELINE:       # %bb.0:
40; CHECK-BASELINE-NEXT:    movl %r8d, %eax
41; CHECK-BASELINE-NEXT:    andl %r9d, %esi
42; CHECK-BASELINE-NEXT:    andl %r8d, %edi
43; CHECK-BASELINE-NEXT:    notb %al
44; CHECK-BASELINE-NEXT:    notb %r9b
45; CHECK-BASELINE-NEXT:    andb %cl, %r9b
46; CHECK-BASELINE-NEXT:    andb %dl, %al
47; CHECK-BASELINE-NEXT:    orb %dil, %al
48; CHECK-BASELINE-NEXT:    orb %sil, %r9b
49; CHECK-BASELINE-NEXT:    # kill: def $al killed $al killed $eax
50; CHECK-BASELINE-NEXT:    movl %r9d, %edx
51; CHECK-BASELINE-NEXT:    retq
52;
53; CHECK-SSE1-LABEL: out_v2i8:
54; CHECK-SSE1:       # %bb.0:
55; CHECK-SSE1-NEXT:    movl %r8d, %eax
56; CHECK-SSE1-NEXT:    andl %r9d, %esi
57; CHECK-SSE1-NEXT:    andl %r8d, %edi
58; CHECK-SSE1-NEXT:    notb %al
59; CHECK-SSE1-NEXT:    notb %r9b
60; CHECK-SSE1-NEXT:    andb %cl, %r9b
61; CHECK-SSE1-NEXT:    andb %dl, %al
62; CHECK-SSE1-NEXT:    orb %dil, %al
63; CHECK-SSE1-NEXT:    orb %sil, %r9b
64; CHECK-SSE1-NEXT:    # kill: def $al killed $al killed $eax
65; CHECK-SSE1-NEXT:    movl %r9d, %edx
66; CHECK-SSE1-NEXT:    retq
67;
68; CHECK-SSE2-LABEL: out_v2i8:
69; CHECK-SSE2:       # %bb.0:
70; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
71; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
72; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
73; CHECK-SSE2-NEXT:    retq
74;
75; CHECK-XOP-LABEL: out_v2i8:
76; CHECK-XOP:       # %bb.0:
77; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
78; CHECK-XOP-NEXT:    retq
79  %mx = and <2 x i8> %x, %mask
80  %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1>
81  %my = and <2 x i8> %y, %notmask
82  %r = or <2 x i8> %mx, %my
83  ret <2 x i8> %r
84}
85
86define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
87; CHECK-LABEL: out_v1i16:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    movl %edx, %eax
90; CHECK-NEXT:    andl %edx, %edi
91; CHECK-NEXT:    notl %eax
92; CHECK-NEXT:    andl %esi, %eax
93; CHECK-NEXT:    orl %edi, %eax
94; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
95; CHECK-NEXT:    retq
96  %mx = and <1 x i16> %x, %mask
97  %notmask = xor <1 x i16> %mask, <i16 -1>
98  %my = and <1 x i16> %y, %notmask
99  %r = or <1 x i16> %mx, %my
100  ret <1 x i16> %r
101}
102
103; ============================================================================ ;
104; 32-bit vector width
105; ============================================================================ ;
106
107define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
108; CHECK-BASELINE-LABEL: out_v4i8:
109; CHECK-BASELINE:       # %bb.0:
110; CHECK-BASELINE-NEXT:    pushq %rbx
111; CHECK-BASELINE-NEXT:    movq %rdi, %rax
112; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
113; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
114; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
115; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
116; CHECK-BASELINE-NEXT:    andb %bl, %r8b
117; CHECK-BASELINE-NEXT:    andb %r11b, %cl
118; CHECK-BASELINE-NEXT:    andb %r10b, %dl
119; CHECK-BASELINE-NEXT:    andb %dil, %sil
120; CHECK-BASELINE-NEXT:    notb %r10b
121; CHECK-BASELINE-NEXT:    notb %r11b
122; CHECK-BASELINE-NEXT:    notb %bl
123; CHECK-BASELINE-NEXT:    notb %dil
124; CHECK-BASELINE-NEXT:    andb %r9b, %dil
125; CHECK-BASELINE-NEXT:    orb %sil, %dil
126; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
127; CHECK-BASELINE-NEXT:    orb %r8b, %bl
128; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
129; CHECK-BASELINE-NEXT:    orb %cl, %r11b
130; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
131; CHECK-BASELINE-NEXT:    orb %dl, %r10b
132; CHECK-BASELINE-NEXT:    movb %bl, 3(%rax)
133; CHECK-BASELINE-NEXT:    movb %r11b, 2(%rax)
134; CHECK-BASELINE-NEXT:    movb %r10b, 1(%rax)
135; CHECK-BASELINE-NEXT:    movb %dil, (%rax)
136; CHECK-BASELINE-NEXT:    popq %rbx
137; CHECK-BASELINE-NEXT:    retq
138;
139; CHECK-SSE1-LABEL: out_v4i8:
140; CHECK-SSE1:       # %bb.0:
141; CHECK-SSE1-NEXT:    pushq %rbx
142; CHECK-SSE1-NEXT:    movq %rdi, %rax
143; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
144; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
145; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
146; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
147; CHECK-SSE1-NEXT:    andb %bl, %r8b
148; CHECK-SSE1-NEXT:    andb %r11b, %cl
149; CHECK-SSE1-NEXT:    andb %r10b, %dl
150; CHECK-SSE1-NEXT:    andb %dil, %sil
151; CHECK-SSE1-NEXT:    notb %r10b
152; CHECK-SSE1-NEXT:    notb %r11b
153; CHECK-SSE1-NEXT:    notb %bl
154; CHECK-SSE1-NEXT:    notb %dil
155; CHECK-SSE1-NEXT:    andb %r9b, %dil
156; CHECK-SSE1-NEXT:    orb %sil, %dil
157; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
158; CHECK-SSE1-NEXT:    orb %r8b, %bl
159; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
160; CHECK-SSE1-NEXT:    orb %cl, %r11b
161; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
162; CHECK-SSE1-NEXT:    orb %dl, %r10b
163; CHECK-SSE1-NEXT:    movb %bl, 3(%rax)
164; CHECK-SSE1-NEXT:    movb %r11b, 2(%rax)
165; CHECK-SSE1-NEXT:    movb %r10b, 1(%rax)
166; CHECK-SSE1-NEXT:    movb %dil, (%rax)
167; CHECK-SSE1-NEXT:    popq %rbx
168; CHECK-SSE1-NEXT:    retq
169;
170; CHECK-SSE2-LABEL: out_v4i8:
171; CHECK-SSE2:       # %bb.0:
172; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
173; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
174; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
175; CHECK-SSE2-NEXT:    retq
176;
177; CHECK-XOP-LABEL: out_v4i8:
178; CHECK-XOP:       # %bb.0:
179; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
180; CHECK-XOP-NEXT:    retq
181  %mx = and <4 x i8> %x, %mask
182  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
183  %my = and <4 x i8> %y, %notmask
184  %r = or <4 x i8> %mx, %my
185  ret <4 x i8> %r
186}
187
188define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
189; CHECK-BASELINE-LABEL: out_v4i8_undef:
190; CHECK-BASELINE:       # %bb.0:
191; CHECK-BASELINE-NEXT:    movq %rdi, %rax
192; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
193; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
194; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
195; CHECK-BASELINE-NEXT:    andb %r11b, %r8b
196; CHECK-BASELINE-NEXT:    andb %r10b, %dl
197; CHECK-BASELINE-NEXT:    andb %dil, %sil
198; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
199; CHECK-BASELINE-NEXT:    notb %r10b
200; CHECK-BASELINE-NEXT:    notb %r11b
201; CHECK-BASELINE-NEXT:    notb %dil
202; CHECK-BASELINE-NEXT:    andb %r9b, %dil
203; CHECK-BASELINE-NEXT:    orb %sil, %dil
204; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
205; CHECK-BASELINE-NEXT:    orb %r8b, %r11b
206; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
207; CHECK-BASELINE-NEXT:    orb %dl, %r10b
208; CHECK-BASELINE-NEXT:    movb %cl, 2(%rax)
209; CHECK-BASELINE-NEXT:    movb %r11b, 3(%rax)
210; CHECK-BASELINE-NEXT:    movb %r10b, 1(%rax)
211; CHECK-BASELINE-NEXT:    movb %dil, (%rax)
212; CHECK-BASELINE-NEXT:    retq
213;
214; CHECK-SSE1-LABEL: out_v4i8_undef:
215; CHECK-SSE1:       # %bb.0:
216; CHECK-SSE1-NEXT:    movq %rdi, %rax
217; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
218; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
219; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
220; CHECK-SSE1-NEXT:    andb %r11b, %r8b
221; CHECK-SSE1-NEXT:    andb %r10b, %dl
222; CHECK-SSE1-NEXT:    andb %dil, %sil
223; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
224; CHECK-SSE1-NEXT:    notb %r10b
225; CHECK-SSE1-NEXT:    notb %r11b
226; CHECK-SSE1-NEXT:    notb %dil
227; CHECK-SSE1-NEXT:    andb %r9b, %dil
228; CHECK-SSE1-NEXT:    orb %sil, %dil
229; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
230; CHECK-SSE1-NEXT:    orb %r8b, %r11b
231; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
232; CHECK-SSE1-NEXT:    orb %dl, %r10b
233; CHECK-SSE1-NEXT:    movb %cl, 2(%rax)
234; CHECK-SSE1-NEXT:    movb %r11b, 3(%rax)
235; CHECK-SSE1-NEXT:    movb %r10b, 1(%rax)
236; CHECK-SSE1-NEXT:    movb %dil, (%rax)
237; CHECK-SSE1-NEXT:    retq
238;
239; CHECK-SSE2-LABEL: out_v4i8_undef:
240; CHECK-SSE2:       # %bb.0:
241; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
242; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
243; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
244; CHECK-SSE2-NEXT:    retq
245;
246; CHECK-XOP-LABEL: out_v4i8_undef:
247; CHECK-XOP:       # %bb.0:
248; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
249; CHECK-XOP-NEXT:    retq
250  %mx = and <4 x i8> %x, %mask
251  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
252  %my = and <4 x i8> %y, %notmask
253  %r = or <4 x i8> %mx, %my
254  ret <4 x i8> %r
255}
256
257define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
258; CHECK-BASELINE-LABEL: out_v2i16:
259; CHECK-BASELINE:       # %bb.0:
260; CHECK-BASELINE-NEXT:    movl %r8d, %eax
261; CHECK-BASELINE-NEXT:    andl %r9d, %esi
262; CHECK-BASELINE-NEXT:    andl %r8d, %edi
263; CHECK-BASELINE-NEXT:    notl %eax
264; CHECK-BASELINE-NEXT:    notl %r9d
265; CHECK-BASELINE-NEXT:    andl %ecx, %r9d
266; CHECK-BASELINE-NEXT:    orl %esi, %r9d
267; CHECK-BASELINE-NEXT:    andl %edx, %eax
268; CHECK-BASELINE-NEXT:    orl %edi, %eax
269; CHECK-BASELINE-NEXT:    # kill: def $ax killed $ax killed $eax
270; CHECK-BASELINE-NEXT:    movl %r9d, %edx
271; CHECK-BASELINE-NEXT:    retq
272;
273; CHECK-SSE1-LABEL: out_v2i16:
274; CHECK-SSE1:       # %bb.0:
275; CHECK-SSE1-NEXT:    movl %r8d, %eax
276; CHECK-SSE1-NEXT:    andl %r9d, %esi
277; CHECK-SSE1-NEXT:    andl %r8d, %edi
278; CHECK-SSE1-NEXT:    notl %eax
279; CHECK-SSE1-NEXT:    notl %r9d
280; CHECK-SSE1-NEXT:    andl %ecx, %r9d
281; CHECK-SSE1-NEXT:    orl %esi, %r9d
282; CHECK-SSE1-NEXT:    andl %edx, %eax
283; CHECK-SSE1-NEXT:    orl %edi, %eax
284; CHECK-SSE1-NEXT:    # kill: def $ax killed $ax killed $eax
285; CHECK-SSE1-NEXT:    movl %r9d, %edx
286; CHECK-SSE1-NEXT:    retq
287;
288; CHECK-SSE2-LABEL: out_v2i16:
289; CHECK-SSE2:       # %bb.0:
290; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
291; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
292; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
293; CHECK-SSE2-NEXT:    retq
294;
295; CHECK-XOP-LABEL: out_v2i16:
296; CHECK-XOP:       # %bb.0:
297; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
298; CHECK-XOP-NEXT:    retq
299  %mx = and <2 x i16> %x, %mask
300  %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>
301  %my = and <2 x i16> %y, %notmask
302  %r = or <2 x i16> %mx, %my
303  ret <2 x i16> %r
304}
305
306define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
307; CHECK-LABEL: out_v1i32:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    movl %edx, %eax
310; CHECK-NEXT:    andl %edx, %edi
311; CHECK-NEXT:    notl %eax
312; CHECK-NEXT:    andl %esi, %eax
313; CHECK-NEXT:    orl %edi, %eax
314; CHECK-NEXT:    retq
315  %mx = and <1 x i32> %x, %mask
316  %notmask = xor <1 x i32> %mask, <i32 -1>
317  %my = and <1 x i32> %y, %notmask
318  %r = or <1 x i32> %mx, %my
319  ret <1 x i32> %r
320}
321
322; ============================================================================ ;
323; 64-bit vector width
324; ============================================================================ ;
325
326define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
327; CHECK-BASELINE-LABEL: out_v8i8:
328; CHECK-BASELINE:       # %bb.0:
329; CHECK-BASELINE-NEXT:    pushq %rbp
330; CHECK-BASELINE-NEXT:    pushq %r15
331; CHECK-BASELINE-NEXT:    pushq %r14
332; CHECK-BASELINE-NEXT:    pushq %r13
333; CHECK-BASELINE-NEXT:    pushq %r12
334; CHECK-BASELINE-NEXT:    pushq %rbx
335; CHECK-BASELINE-NEXT:    movq %rdi, %rax
336; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
337; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
338; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
339; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
340; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
341; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
342; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
343; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
344; CHECK-BASELINE-NEXT:    andb %bl, %r9b
345; CHECK-BASELINE-NEXT:    andb %r15b, %r8b
346; CHECK-BASELINE-NEXT:    andb %bpl, %cl
347; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
348; CHECK-BASELINE-NEXT:    andb %r11b, %dl
349; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
350; CHECK-BASELINE-NEXT:    andb %dil, %sil
351; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
352; CHECK-BASELINE-NEXT:    andb %r12b, %r13b
353; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
354; CHECK-BASELINE-NEXT:    andb %r14b, %cl
355; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
356; CHECK-BASELINE-NEXT:    andb %r10b, %dl
357; CHECK-BASELINE-NEXT:    notb %dil
358; CHECK-BASELINE-NEXT:    notb %r11b
359; CHECK-BASELINE-NEXT:    notb %bpl
360; CHECK-BASELINE-NEXT:    notb %r15b
361; CHECK-BASELINE-NEXT:    notb %bl
362; CHECK-BASELINE-NEXT:    notb %r10b
363; CHECK-BASELINE-NEXT:    notb %r14b
364; CHECK-BASELINE-NEXT:    notb %r12b
365; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
366; CHECK-BASELINE-NEXT:    orb %r13b, %r12b
367; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
368; CHECK-BASELINE-NEXT:    orb %cl, %r14b
369; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
370; CHECK-BASELINE-NEXT:    orb %dl, %r10b
371; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
372; CHECK-BASELINE-NEXT:    orb %r9b, %bl
373; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
374; CHECK-BASELINE-NEXT:    orb %r8b, %r15b
375; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
376; CHECK-BASELINE-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload
377; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
378; CHECK-BASELINE-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload
379; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dil
380; CHECK-BASELINE-NEXT:    orb %sil, %dil
381; CHECK-BASELINE-NEXT:    movb %r12b, 7(%rax)
382; CHECK-BASELINE-NEXT:    movb %r14b, 6(%rax)
383; CHECK-BASELINE-NEXT:    movb %r10b, 5(%rax)
384; CHECK-BASELINE-NEXT:    movb %bl, 4(%rax)
385; CHECK-BASELINE-NEXT:    movb %r15b, 3(%rax)
386; CHECK-BASELINE-NEXT:    movb %bpl, 2(%rax)
387; CHECK-BASELINE-NEXT:    movb %r11b, 1(%rax)
388; CHECK-BASELINE-NEXT:    movb %dil, (%rax)
389; CHECK-BASELINE-NEXT:    popq %rbx
390; CHECK-BASELINE-NEXT:    popq %r12
391; CHECK-BASELINE-NEXT:    popq %r13
392; CHECK-BASELINE-NEXT:    popq %r14
393; CHECK-BASELINE-NEXT:    popq %r15
394; CHECK-BASELINE-NEXT:    popq %rbp
395; CHECK-BASELINE-NEXT:    retq
396;
397; CHECK-SSE1-LABEL: out_v8i8:
398; CHECK-SSE1:       # %bb.0:
399; CHECK-SSE1-NEXT:    pushq %rbp
400; CHECK-SSE1-NEXT:    pushq %r15
401; CHECK-SSE1-NEXT:    pushq %r14
402; CHECK-SSE1-NEXT:    pushq %r13
403; CHECK-SSE1-NEXT:    pushq %r12
404; CHECK-SSE1-NEXT:    pushq %rbx
405; CHECK-SSE1-NEXT:    movq %rdi, %rax
406; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
407; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
408; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
409; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
410; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
411; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
412; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
413; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
414; CHECK-SSE1-NEXT:    andb %bl, %r9b
415; CHECK-SSE1-NEXT:    andb %r15b, %r8b
416; CHECK-SSE1-NEXT:    andb %bpl, %cl
417; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
418; CHECK-SSE1-NEXT:    andb %r11b, %dl
419; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
420; CHECK-SSE1-NEXT:    andb %dil, %sil
421; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
422; CHECK-SSE1-NEXT:    andb %r12b, %r13b
423; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
424; CHECK-SSE1-NEXT:    andb %r14b, %cl
425; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
426; CHECK-SSE1-NEXT:    andb %r10b, %dl
427; CHECK-SSE1-NEXT:    notb %dil
428; CHECK-SSE1-NEXT:    notb %r11b
429; CHECK-SSE1-NEXT:    notb %bpl
430; CHECK-SSE1-NEXT:    notb %r15b
431; CHECK-SSE1-NEXT:    notb %bl
432; CHECK-SSE1-NEXT:    notb %r10b
433; CHECK-SSE1-NEXT:    notb %r14b
434; CHECK-SSE1-NEXT:    notb %r12b
435; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
436; CHECK-SSE1-NEXT:    orb %r13b, %r12b
437; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
438; CHECK-SSE1-NEXT:    orb %cl, %r14b
439; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
440; CHECK-SSE1-NEXT:    orb %dl, %r10b
441; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
442; CHECK-SSE1-NEXT:    orb %r9b, %bl
443; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
444; CHECK-SSE1-NEXT:    orb %r8b, %r15b
445; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
446; CHECK-SSE1-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload
447; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
448; CHECK-SSE1-NEXT:    orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload
449; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dil
450; CHECK-SSE1-NEXT:    orb %sil, %dil
451; CHECK-SSE1-NEXT:    movb %r12b, 7(%rax)
452; CHECK-SSE1-NEXT:    movb %r14b, 6(%rax)
453; CHECK-SSE1-NEXT:    movb %r10b, 5(%rax)
454; CHECK-SSE1-NEXT:    movb %bl, 4(%rax)
455; CHECK-SSE1-NEXT:    movb %r15b, 3(%rax)
456; CHECK-SSE1-NEXT:    movb %bpl, 2(%rax)
457; CHECK-SSE1-NEXT:    movb %r11b, 1(%rax)
458; CHECK-SSE1-NEXT:    movb %dil, (%rax)
459; CHECK-SSE1-NEXT:    popq %rbx
460; CHECK-SSE1-NEXT:    popq %r12
461; CHECK-SSE1-NEXT:    popq %r13
462; CHECK-SSE1-NEXT:    popq %r14
463; CHECK-SSE1-NEXT:    popq %r15
464; CHECK-SSE1-NEXT:    popq %rbp
465; CHECK-SSE1-NEXT:    retq
466;
467; CHECK-SSE2-LABEL: out_v8i8:
468; CHECK-SSE2:       # %bb.0:
469; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
470; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
471; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
472; CHECK-SSE2-NEXT:    retq
473;
474; CHECK-XOP-LABEL: out_v8i8:
475; CHECK-XOP:       # %bb.0:
476; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
477; CHECK-XOP-NEXT:    retq
478  %mx = and <8 x i8> %x, %mask
479  %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
480  %my = and <8 x i8> %y, %notmask
481  %r = or <8 x i8> %mx, %my
482  ret <8 x i8> %r
483}
484
485define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
486; CHECK-BASELINE-LABEL: out_v4i16:
487; CHECK-BASELINE:       # %bb.0:
488; CHECK-BASELINE-NEXT:    pushq %rbx
489; CHECK-BASELINE-NEXT:    movq %rdi, %rax
490; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
491; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
492; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edi
493; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
494; CHECK-BASELINE-NEXT:    andl %ebx, %esi
495; CHECK-BASELINE-NEXT:    andl %edi, %r8d
496; CHECK-BASELINE-NEXT:    andl %r11d, %ecx
497; CHECK-BASELINE-NEXT:    andl %r10d, %edx
498; CHECK-BASELINE-NEXT:    notl %r10d
499; CHECK-BASELINE-NEXT:    notl %r11d
500; CHECK-BASELINE-NEXT:    notl %edi
501; CHECK-BASELINE-NEXT:    notl %ebx
502; CHECK-BASELINE-NEXT:    andl %r9d, %ebx
503; CHECK-BASELINE-NEXT:    orl %esi, %ebx
504; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %di
505; CHECK-BASELINE-NEXT:    orl %r8d, %edi
506; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
507; CHECK-BASELINE-NEXT:    orl %ecx, %r11d
508; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
509; CHECK-BASELINE-NEXT:    orl %edx, %r10d
510; CHECK-BASELINE-NEXT:    movw %bx, (%rax)
511; CHECK-BASELINE-NEXT:    movw %di, 6(%rax)
512; CHECK-BASELINE-NEXT:    movw %r11w, 4(%rax)
513; CHECK-BASELINE-NEXT:    movw %r10w, 2(%rax)
514; CHECK-BASELINE-NEXT:    popq %rbx
515; CHECK-BASELINE-NEXT:    retq
516;
517; CHECK-SSE1-LABEL: out_v4i16:
518; CHECK-SSE1:       # %bb.0:
519; CHECK-SSE1-NEXT:    pushq %rbx
520; CHECK-SSE1-NEXT:    movq %rdi, %rax
521; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
522; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
523; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edi
524; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
525; CHECK-SSE1-NEXT:    andl %ebx, %esi
526; CHECK-SSE1-NEXT:    andl %edi, %r8d
527; CHECK-SSE1-NEXT:    andl %r11d, %ecx
528; CHECK-SSE1-NEXT:    andl %r10d, %edx
529; CHECK-SSE1-NEXT:    notl %r10d
530; CHECK-SSE1-NEXT:    notl %r11d
531; CHECK-SSE1-NEXT:    notl %edi
532; CHECK-SSE1-NEXT:    notl %ebx
533; CHECK-SSE1-NEXT:    andl %r9d, %ebx
534; CHECK-SSE1-NEXT:    orl %esi, %ebx
535; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %di
536; CHECK-SSE1-NEXT:    orl %r8d, %edi
537; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
538; CHECK-SSE1-NEXT:    orl %ecx, %r11d
539; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
540; CHECK-SSE1-NEXT:    orl %edx, %r10d
541; CHECK-SSE1-NEXT:    movw %bx, (%rax)
542; CHECK-SSE1-NEXT:    movw %di, 6(%rax)
543; CHECK-SSE1-NEXT:    movw %r11w, 4(%rax)
544; CHECK-SSE1-NEXT:    movw %r10w, 2(%rax)
545; CHECK-SSE1-NEXT:    popq %rbx
546; CHECK-SSE1-NEXT:    retq
547;
548; CHECK-SSE2-LABEL: out_v4i16:
549; CHECK-SSE2:       # %bb.0:
550; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
551; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
552; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
553; CHECK-SSE2-NEXT:    retq
554;
555; CHECK-XOP-LABEL: out_v4i16:
556; CHECK-XOP:       # %bb.0:
557; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
558; CHECK-XOP-NEXT:    retq
559  %mx = and <4 x i16> %x, %mask
560  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1>
561  %my = and <4 x i16> %y, %notmask
562  %r = or <4 x i16> %mx, %my
563  ret <4 x i16> %r
564}
565
566define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
567; CHECK-BASELINE-LABEL: out_v4i16_undef:
568; CHECK-BASELINE:       # %bb.0:
569; CHECK-BASELINE-NEXT:    movq %rdi, %rax
570; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
571; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
572; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edi
573; CHECK-BASELINE-NEXT:    andl %edi, %esi
574; CHECK-BASELINE-NEXT:    andl %r11d, %r8d
575; CHECK-BASELINE-NEXT:    andl %r10d, %edx
576; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
577; CHECK-BASELINE-NEXT:    notl %r10d
578; CHECK-BASELINE-NEXT:    notl %r11d
579; CHECK-BASELINE-NEXT:    notl %edi
580; CHECK-BASELINE-NEXT:    andl %r9d, %edi
581; CHECK-BASELINE-NEXT:    orl %esi, %edi
582; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
583; CHECK-BASELINE-NEXT:    orl %r8d, %r11d
584; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
585; CHECK-BASELINE-NEXT:    orl %edx, %r10d
586; CHECK-BASELINE-NEXT:    movw %cx, 4(%rax)
587; CHECK-BASELINE-NEXT:    movw %di, (%rax)
588; CHECK-BASELINE-NEXT:    movw %r11w, 6(%rax)
589; CHECK-BASELINE-NEXT:    movw %r10w, 2(%rax)
590; CHECK-BASELINE-NEXT:    retq
591;
592; CHECK-SSE1-LABEL: out_v4i16_undef:
593; CHECK-SSE1:       # %bb.0:
594; CHECK-SSE1-NEXT:    movq %rdi, %rax
595; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
596; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
597; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edi
598; CHECK-SSE1-NEXT:    andl %edi, %esi
599; CHECK-SSE1-NEXT:    andl %r11d, %r8d
600; CHECK-SSE1-NEXT:    andl %r10d, %edx
601; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
602; CHECK-SSE1-NEXT:    notl %r10d
603; CHECK-SSE1-NEXT:    notl %r11d
604; CHECK-SSE1-NEXT:    notl %edi
605; CHECK-SSE1-NEXT:    andl %r9d, %edi
606; CHECK-SSE1-NEXT:    orl %esi, %edi
607; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
608; CHECK-SSE1-NEXT:    orl %r8d, %r11d
609; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
610; CHECK-SSE1-NEXT:    orl %edx, %r10d
611; CHECK-SSE1-NEXT:    movw %cx, 4(%rax)
612; CHECK-SSE1-NEXT:    movw %di, (%rax)
613; CHECK-SSE1-NEXT:    movw %r11w, 6(%rax)
614; CHECK-SSE1-NEXT:    movw %r10w, 2(%rax)
615; CHECK-SSE1-NEXT:    retq
616;
617; CHECK-SSE2-LABEL: out_v4i16_undef:
618; CHECK-SSE2:       # %bb.0:
619; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
620; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
621; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
622; CHECK-SSE2-NEXT:    retq
623;
624; CHECK-XOP-LABEL: out_v4i16_undef:
625; CHECK-XOP:       # %bb.0:
626; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
627; CHECK-XOP-NEXT:    retq
628  %mx = and <4 x i16> %x, %mask
629  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1>
630  %my = and <4 x i16> %y, %notmask
631  %r = or <4 x i16> %mx, %my
632  ret <4 x i16> %r
633}
634
635define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
636; CHECK-BASELINE-LABEL: out_v2i32:
637; CHECK-BASELINE:       # %bb.0:
638; CHECK-BASELINE-NEXT:    movl %r8d, %eax
639; CHECK-BASELINE-NEXT:    andl %r9d, %esi
640; CHECK-BASELINE-NEXT:    andl %r8d, %edi
641; CHECK-BASELINE-NEXT:    notl %eax
642; CHECK-BASELINE-NEXT:    notl %r9d
643; CHECK-BASELINE-NEXT:    andl %ecx, %r9d
644; CHECK-BASELINE-NEXT:    orl %esi, %r9d
645; CHECK-BASELINE-NEXT:    andl %edx, %eax
646; CHECK-BASELINE-NEXT:    orl %edi, %eax
647; CHECK-BASELINE-NEXT:    movl %r9d, %edx
648; CHECK-BASELINE-NEXT:    retq
649;
650; CHECK-SSE1-LABEL: out_v2i32:
651; CHECK-SSE1:       # %bb.0:
652; CHECK-SSE1-NEXT:    movl %r8d, %eax
653; CHECK-SSE1-NEXT:    andl %r9d, %esi
654; CHECK-SSE1-NEXT:    andl %r8d, %edi
655; CHECK-SSE1-NEXT:    notl %eax
656; CHECK-SSE1-NEXT:    notl %r9d
657; CHECK-SSE1-NEXT:    andl %ecx, %r9d
658; CHECK-SSE1-NEXT:    orl %esi, %r9d
659; CHECK-SSE1-NEXT:    andl %edx, %eax
660; CHECK-SSE1-NEXT:    orl %edi, %eax
661; CHECK-SSE1-NEXT:    movl %r9d, %edx
662; CHECK-SSE1-NEXT:    retq
663;
664; CHECK-SSE2-LABEL: out_v2i32:
665; CHECK-SSE2:       # %bb.0:
666; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
667; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
668; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
669; CHECK-SSE2-NEXT:    retq
670;
671; CHECK-XOP-LABEL: out_v2i32:
672; CHECK-XOP:       # %bb.0:
673; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
674; CHECK-XOP-NEXT:    retq
675  %mx = and <2 x i32> %x, %mask
676  %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1>
677  %my = and <2 x i32> %y, %notmask
678  %r = or <2 x i32> %mx, %my
679  ret <2 x i32> %r
680}
681
682define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
683; CHECK-LABEL: out_v1i64:
684; CHECK:       # %bb.0:
685; CHECK-NEXT:    movq %rdx, %rax
686; CHECK-NEXT:    andq %rdx, %rdi
687; CHECK-NEXT:    notq %rax
688; CHECK-NEXT:    andq %rsi, %rax
689; CHECK-NEXT:    orq %rdi, %rax
690; CHECK-NEXT:    retq
691  %mx = and <1 x i64> %x, %mask
692  %notmask = xor <1 x i64> %mask, <i64 -1>
693  %my = and <1 x i64> %y, %notmask
694  %r = or <1 x i64> %mx, %my
695  ret <1 x i64> %r
696}
697
698; ============================================================================ ;
699; 128-bit vector width
700; ============================================================================ ;
701
702define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
703; CHECK-BASELINE-LABEL: out_v16i8:
704; CHECK-BASELINE:       # %bb.0:
705; CHECK-BASELINE-NEXT:    pushq %rbp
706; CHECK-BASELINE-NEXT:    pushq %r15
707; CHECK-BASELINE-NEXT:    pushq %r14
708; CHECK-BASELINE-NEXT:    pushq %r13
709; CHECK-BASELINE-NEXT:    pushq %r12
710; CHECK-BASELINE-NEXT:    pushq %rbx
711; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
712; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
713; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
714; CHECK-BASELINE-NEXT:    movq %rdi, %rax
715; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
716; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
717; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
718; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
719; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
720; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
721; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
722; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
723; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
724; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
725; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
726; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
727; CHECK-BASELINE-NEXT:    andb %cl, %sil
728; CHECK-BASELINE-NEXT:    notb %cl
729; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
730; CHECK-BASELINE-NEXT:    orb %sil, %cl
731; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
732; CHECK-BASELINE-NEXT:    andb %dl, %sil
733; CHECK-BASELINE-NEXT:    notb %dl
734; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
735; CHECK-BASELINE-NEXT:    orb %sil, %dl
736; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
737; CHECK-BASELINE-NEXT:    andb %bl, %sil
738; CHECK-BASELINE-NEXT:    notb %bl
739; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
740; CHECK-BASELINE-NEXT:    orb %sil, %bl
741; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
742; CHECK-BASELINE-NEXT:    andb %r13b, %sil
743; CHECK-BASELINE-NEXT:    notb %r13b
744; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
745; CHECK-BASELINE-NEXT:    orb %sil, %r13b
746; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
747; CHECK-BASELINE-NEXT:    andb %r12b, %sil
748; CHECK-BASELINE-NEXT:    notb %r12b
749; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
750; CHECK-BASELINE-NEXT:    orb %sil, %r12b
751; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
752; CHECK-BASELINE-NEXT:    andb %r15b, %sil
753; CHECK-BASELINE-NEXT:    notb %r15b
754; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
755; CHECK-BASELINE-NEXT:    orb %sil, %r15b
756; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
757; CHECK-BASELINE-NEXT:    andb %r14b, %sil
758; CHECK-BASELINE-NEXT:    notb %r14b
759; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
760; CHECK-BASELINE-NEXT:    orb %sil, %r14b
761; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
762; CHECK-BASELINE-NEXT:    andb %bpl, %sil
763; CHECK-BASELINE-NEXT:    notb %bpl
764; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
765; CHECK-BASELINE-NEXT:    orb %sil, %bpl
766; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
767; CHECK-BASELINE-NEXT:    andb %r11b, %sil
768; CHECK-BASELINE-NEXT:    notb %r11b
769; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
770; CHECK-BASELINE-NEXT:    orb %sil, %r11b
771; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
772; CHECK-BASELINE-NEXT:    andb %r10b, %sil
773; CHECK-BASELINE-NEXT:    notb %r10b
774; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
775; CHECK-BASELINE-NEXT:    orb %sil, %r10b
776; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
777; CHECK-BASELINE-NEXT:    andb %dil, %sil
778; CHECK-BASELINE-NEXT:    notb %dil
779; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dil
780; CHECK-BASELINE-NEXT:    orb %sil, %dil
781; CHECK-BASELINE-NEXT:    movb %cl, 15(%rax)
782; CHECK-BASELINE-NEXT:    movb %dl, 14(%rax)
783; CHECK-BASELINE-NEXT:    movb %bl, 13(%rax)
784; CHECK-BASELINE-NEXT:    movb %r13b, 12(%rax)
785; CHECK-BASELINE-NEXT:    movb %r12b, 11(%rax)
786; CHECK-BASELINE-NEXT:    movb %r15b, 10(%rax)
787; CHECK-BASELINE-NEXT:    movb %r14b, 9(%rax)
788; CHECK-BASELINE-NEXT:    movb %bpl, 8(%rax)
789; CHECK-BASELINE-NEXT:    movb %r11b, 7(%rax)
790; CHECK-BASELINE-NEXT:    movb %r10b, 6(%rax)
791; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
792; CHECK-BASELINE-NEXT:    andb %cl, %r9b
793; CHECK-BASELINE-NEXT:    notb %cl
794; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
795; CHECK-BASELINE-NEXT:    orb %r9b, %cl
796; CHECK-BASELINE-NEXT:    movb %dil, 5(%rax)
797; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
798; CHECK-BASELINE-NEXT:    andb %dl, %r8b
799; CHECK-BASELINE-NEXT:    notb %dl
800; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
801; CHECK-BASELINE-NEXT:    orb %r8b, %dl
802; CHECK-BASELINE-NEXT:    movb %cl, 4(%rax)
803; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
804; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
805; CHECK-BASELINE-NEXT:    andb %cl, %sil
806; CHECK-BASELINE-NEXT:    notb %cl
807; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
808; CHECK-BASELINE-NEXT:    orb %sil, %cl
809; CHECK-BASELINE-NEXT:    movb %dl, 3(%rax)
810; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
811; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
812; CHECK-BASELINE-NEXT:    andb %dl, %sil
813; CHECK-BASELINE-NEXT:    notb %dl
814; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
815; CHECK-BASELINE-NEXT:    orb %sil, %dl
816; CHECK-BASELINE-NEXT:    movb %cl, 2(%rax)
817; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
818; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
819; CHECK-BASELINE-NEXT:    andb %cl, %sil
820; CHECK-BASELINE-NEXT:    notb %cl
821; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
822; CHECK-BASELINE-NEXT:    orb %sil, %cl
823; CHECK-BASELINE-NEXT:    movb %dl, 1(%rax)
824; CHECK-BASELINE-NEXT:    movb %cl, (%rax)
825; CHECK-BASELINE-NEXT:    popq %rbx
826; CHECK-BASELINE-NEXT:    popq %r12
827; CHECK-BASELINE-NEXT:    popq %r13
828; CHECK-BASELINE-NEXT:    popq %r14
829; CHECK-BASELINE-NEXT:    popq %r15
830; CHECK-BASELINE-NEXT:    popq %rbp
831; CHECK-BASELINE-NEXT:    retq
832;
833; CHECK-SSE1-LABEL: out_v16i8:
834; CHECK-SSE1:       # %bb.0:
835; CHECK-SSE1-NEXT:    pushq %rbp
836; CHECK-SSE1-NEXT:    pushq %r15
837; CHECK-SSE1-NEXT:    pushq %r14
838; CHECK-SSE1-NEXT:    pushq %r13
839; CHECK-SSE1-NEXT:    pushq %r12
840; CHECK-SSE1-NEXT:    pushq %rbx
841; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
842; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
843; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
844; CHECK-SSE1-NEXT:    movq %rdi, %rax
845; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
846; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
847; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
848; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
849; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
850; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
851; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
852; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
853; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
854; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
855; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
856; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
857; CHECK-SSE1-NEXT:    andb %cl, %sil
858; CHECK-SSE1-NEXT:    notb %cl
859; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
860; CHECK-SSE1-NEXT:    orb %sil, %cl
861; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
862; CHECK-SSE1-NEXT:    andb %dl, %sil
863; CHECK-SSE1-NEXT:    notb %dl
864; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
865; CHECK-SSE1-NEXT:    orb %sil, %dl
866; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
867; CHECK-SSE1-NEXT:    andb %bl, %sil
868; CHECK-SSE1-NEXT:    notb %bl
869; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
870; CHECK-SSE1-NEXT:    orb %sil, %bl
871; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
872; CHECK-SSE1-NEXT:    andb %r13b, %sil
873; CHECK-SSE1-NEXT:    notb %r13b
874; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
875; CHECK-SSE1-NEXT:    orb %sil, %r13b
876; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
877; CHECK-SSE1-NEXT:    andb %r12b, %sil
878; CHECK-SSE1-NEXT:    notb %r12b
879; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
880; CHECK-SSE1-NEXT:    orb %sil, %r12b
881; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
882; CHECK-SSE1-NEXT:    andb %r15b, %sil
883; CHECK-SSE1-NEXT:    notb %r15b
884; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
885; CHECK-SSE1-NEXT:    orb %sil, %r15b
886; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
887; CHECK-SSE1-NEXT:    andb %r14b, %sil
888; CHECK-SSE1-NEXT:    notb %r14b
889; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
890; CHECK-SSE1-NEXT:    orb %sil, %r14b
891; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
892; CHECK-SSE1-NEXT:    andb %bpl, %sil
893; CHECK-SSE1-NEXT:    notb %bpl
894; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
895; CHECK-SSE1-NEXT:    orb %sil, %bpl
896; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
897; CHECK-SSE1-NEXT:    andb %r11b, %sil
898; CHECK-SSE1-NEXT:    notb %r11b
899; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
900; CHECK-SSE1-NEXT:    orb %sil, %r11b
901; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
902; CHECK-SSE1-NEXT:    andb %r10b, %sil
903; CHECK-SSE1-NEXT:    notb %r10b
904; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
905; CHECK-SSE1-NEXT:    orb %sil, %r10b
906; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
907; CHECK-SSE1-NEXT:    andb %dil, %sil
908; CHECK-SSE1-NEXT:    notb %dil
909; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dil
910; CHECK-SSE1-NEXT:    orb %sil, %dil
911; CHECK-SSE1-NEXT:    movb %cl, 15(%rax)
912; CHECK-SSE1-NEXT:    movb %dl, 14(%rax)
913; CHECK-SSE1-NEXT:    movb %bl, 13(%rax)
914; CHECK-SSE1-NEXT:    movb %r13b, 12(%rax)
915; CHECK-SSE1-NEXT:    movb %r12b, 11(%rax)
916; CHECK-SSE1-NEXT:    movb %r15b, 10(%rax)
917; CHECK-SSE1-NEXT:    movb %r14b, 9(%rax)
918; CHECK-SSE1-NEXT:    movb %bpl, 8(%rax)
919; CHECK-SSE1-NEXT:    movb %r11b, 7(%rax)
920; CHECK-SSE1-NEXT:    movb %r10b, 6(%rax)
921; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
922; CHECK-SSE1-NEXT:    andb %cl, %r9b
923; CHECK-SSE1-NEXT:    notb %cl
924; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
925; CHECK-SSE1-NEXT:    orb %r9b, %cl
926; CHECK-SSE1-NEXT:    movb %dil, 5(%rax)
927; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
928; CHECK-SSE1-NEXT:    andb %dl, %r8b
929; CHECK-SSE1-NEXT:    notb %dl
930; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
931; CHECK-SSE1-NEXT:    orb %r8b, %dl
932; CHECK-SSE1-NEXT:    movb %cl, 4(%rax)
933; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
934; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
935; CHECK-SSE1-NEXT:    andb %cl, %sil
936; CHECK-SSE1-NEXT:    notb %cl
937; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
938; CHECK-SSE1-NEXT:    orb %sil, %cl
939; CHECK-SSE1-NEXT:    movb %dl, 3(%rax)
940; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
941; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
942; CHECK-SSE1-NEXT:    andb %dl, %sil
943; CHECK-SSE1-NEXT:    notb %dl
944; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
945; CHECK-SSE1-NEXT:    orb %sil, %dl
946; CHECK-SSE1-NEXT:    movb %cl, 2(%rax)
947; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
948; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload
949; CHECK-SSE1-NEXT:    andb %cl, %sil
950; CHECK-SSE1-NEXT:    notb %cl
951; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
952; CHECK-SSE1-NEXT:    orb %sil, %cl
953; CHECK-SSE1-NEXT:    movb %dl, 1(%rax)
954; CHECK-SSE1-NEXT:    movb %cl, (%rax)
955; CHECK-SSE1-NEXT:    popq %rbx
956; CHECK-SSE1-NEXT:    popq %r12
957; CHECK-SSE1-NEXT:    popq %r13
958; CHECK-SSE1-NEXT:    popq %r14
959; CHECK-SSE1-NEXT:    popq %r15
960; CHECK-SSE1-NEXT:    popq %rbp
961; CHECK-SSE1-NEXT:    retq
962;
963; CHECK-SSE2-LABEL: out_v16i8:
964; CHECK-SSE2:       # %bb.0:
965; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
966; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
967; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
968; CHECK-SSE2-NEXT:    retq
969;
970; CHECK-XOP-LABEL: out_v16i8:
971; CHECK-XOP:       # %bb.0:
972; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
973; CHECK-XOP-NEXT:    retq
974  %mx = and <16 x i8> %x, %mask
975  %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
976  %my = and <16 x i8> %y, %notmask
977  %r = or <16 x i8> %mx, %my
978  ret <16 x i8> %r
979}
980
981define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
982; CHECK-BASELINE-LABEL: out_v8i16:
983; CHECK-BASELINE:       # %bb.0:
984; CHECK-BASELINE-NEXT:    pushq %rbp
985; CHECK-BASELINE-NEXT:    pushq %r14
986; CHECK-BASELINE-NEXT:    pushq %rbx
987; CHECK-BASELINE-NEXT:    movq %rdi, %rax
988; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
989; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
990; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
991; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
992; CHECK-BASELINE-NEXT:    andw %r14w, %bx
993; CHECK-BASELINE-NEXT:    notl %r14d
994; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r14w
995; CHECK-BASELINE-NEXT:    orl %ebx, %r14d
996; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
997; CHECK-BASELINE-NEXT:    andw %r11w, %bx
998; CHECK-BASELINE-NEXT:    notl %r11d
999; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
1000; CHECK-BASELINE-NEXT:    orl %ebx, %r11d
1001; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
1002; CHECK-BASELINE-NEXT:    andw %r10w, %bx
1003; CHECK-BASELINE-NEXT:    notl %r10d
1004; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
1005; CHECK-BASELINE-NEXT:    orl %ebx, %r10d
1006; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
1007; CHECK-BASELINE-NEXT:    andl %ebx, %r9d
1008; CHECK-BASELINE-NEXT:    notl %ebx
1009; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bx
1010; CHECK-BASELINE-NEXT:    orl %r9d, %ebx
1011; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edi
1012; CHECK-BASELINE-NEXT:    andl %edi, %r8d
1013; CHECK-BASELINE-NEXT:    notl %edi
1014; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %di
1015; CHECK-BASELINE-NEXT:    orl %r8d, %edi
1016; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
1017; CHECK-BASELINE-NEXT:    andl %ebp, %ecx
1018; CHECK-BASELINE-NEXT:    notl %ebp
1019; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bp
1020; CHECK-BASELINE-NEXT:    orl %ecx, %ebp
1021; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
1022; CHECK-BASELINE-NEXT:    andl %ecx, %edx
1023; CHECK-BASELINE-NEXT:    notl %ecx
1024; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
1025; CHECK-BASELINE-NEXT:    orl %edx, %ecx
1026; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edx
1027; CHECK-BASELINE-NEXT:    andl %edx, %esi
1028; CHECK-BASELINE-NEXT:    notl %edx
1029; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
1030; CHECK-BASELINE-NEXT:    orl %esi, %edx
1031; CHECK-BASELINE-NEXT:    movw %r14w, 14(%rax)
1032; CHECK-BASELINE-NEXT:    movw %r11w, 12(%rax)
1033; CHECK-BASELINE-NEXT:    movw %r10w, 10(%rax)
1034; CHECK-BASELINE-NEXT:    movw %bx, 8(%rax)
1035; CHECK-BASELINE-NEXT:    movw %di, 6(%rax)
1036; CHECK-BASELINE-NEXT:    movw %bp, 4(%rax)
1037; CHECK-BASELINE-NEXT:    movw %cx, 2(%rax)
1038; CHECK-BASELINE-NEXT:    movw %dx, (%rax)
1039; CHECK-BASELINE-NEXT:    popq %rbx
1040; CHECK-BASELINE-NEXT:    popq %r14
1041; CHECK-BASELINE-NEXT:    popq %rbp
1042; CHECK-BASELINE-NEXT:    retq
1043;
1044; CHECK-SSE1-LABEL: out_v8i16:
1045; CHECK-SSE1:       # %bb.0:
1046; CHECK-SSE1-NEXT:    pushq %rbp
1047; CHECK-SSE1-NEXT:    pushq %r14
1048; CHECK-SSE1-NEXT:    pushq %rbx
1049; CHECK-SSE1-NEXT:    movq %rdi, %rax
1050; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
1051; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
1052; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r14d
1053; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
1054; CHECK-SSE1-NEXT:    andw %r14w, %bx
1055; CHECK-SSE1-NEXT:    notl %r14d
1056; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r14w
1057; CHECK-SSE1-NEXT:    orl %ebx, %r14d
1058; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
1059; CHECK-SSE1-NEXT:    andw %r11w, %bx
1060; CHECK-SSE1-NEXT:    notl %r11d
1061; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r11w
1062; CHECK-SSE1-NEXT:    orl %ebx, %r11d
1063; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
1064; CHECK-SSE1-NEXT:    andw %r10w, %bx
1065; CHECK-SSE1-NEXT:    notl %r10d
1066; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r10w
1067; CHECK-SSE1-NEXT:    orl %ebx, %r10d
1068; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
1069; CHECK-SSE1-NEXT:    andl %ebx, %r9d
1070; CHECK-SSE1-NEXT:    notl %ebx
1071; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bx
1072; CHECK-SSE1-NEXT:    orl %r9d, %ebx
1073; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edi
1074; CHECK-SSE1-NEXT:    andl %edi, %r8d
1075; CHECK-SSE1-NEXT:    notl %edi
1076; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %di
1077; CHECK-SSE1-NEXT:    orl %r8d, %edi
1078; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebp
1079; CHECK-SSE1-NEXT:    andl %ebp, %ecx
1080; CHECK-SSE1-NEXT:    notl %ebp
1081; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bp
1082; CHECK-SSE1-NEXT:    orl %ecx, %ebp
1083; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
1084; CHECK-SSE1-NEXT:    andl %ecx, %edx
1085; CHECK-SSE1-NEXT:    notl %ecx
1086; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
1087; CHECK-SSE1-NEXT:    orl %edx, %ecx
1088; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edx
1089; CHECK-SSE1-NEXT:    andl %edx, %esi
1090; CHECK-SSE1-NEXT:    notl %edx
1091; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
1092; CHECK-SSE1-NEXT:    orl %esi, %edx
1093; CHECK-SSE1-NEXT:    movw %r14w, 14(%rax)
1094; CHECK-SSE1-NEXT:    movw %r11w, 12(%rax)
1095; CHECK-SSE1-NEXT:    movw %r10w, 10(%rax)
1096; CHECK-SSE1-NEXT:    movw %bx, 8(%rax)
1097; CHECK-SSE1-NEXT:    movw %di, 6(%rax)
1098; CHECK-SSE1-NEXT:    movw %bp, 4(%rax)
1099; CHECK-SSE1-NEXT:    movw %cx, 2(%rax)
1100; CHECK-SSE1-NEXT:    movw %dx, (%rax)
1101; CHECK-SSE1-NEXT:    popq %rbx
1102; CHECK-SSE1-NEXT:    popq %r14
1103; CHECK-SSE1-NEXT:    popq %rbp
1104; CHECK-SSE1-NEXT:    retq
1105;
1106; CHECK-SSE2-LABEL: out_v8i16:
1107; CHECK-SSE2:       # %bb.0:
1108; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
1109; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
1110; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
1111; CHECK-SSE2-NEXT:    retq
1112;
1113; CHECK-XOP-LABEL: out_v8i16:
1114; CHECK-XOP:       # %bb.0:
1115; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
1116; CHECK-XOP-NEXT:    retq
1117  %mx = and <8 x i16> %x, %mask
1118  %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1119  %my = and <8 x i16> %y, %notmask
1120  %r = or <8 x i16> %mx, %my
1121  ret <8 x i16> %r
1122}
1123
1124define <4 x i32> @out_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
1125; CHECK-BASELINE-LABEL: out_v4i32:
1126; CHECK-BASELINE:       # %bb.0:
1127; CHECK-BASELINE-NEXT:    pushq %rbx
1128; CHECK-BASELINE-NEXT:    movq %rdi, %rax
1129; CHECK-BASELINE-NEXT:    movl (%rcx), %r8d
1130; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r9d
1131; CHECK-BASELINE-NEXT:    movl 8(%rcx), %edi
1132; CHECK-BASELINE-NEXT:    movl 12(%rcx), %ecx
1133; CHECK-BASELINE-NEXT:    movl 12(%rsi), %r10d
1134; CHECK-BASELINE-NEXT:    andl %ecx, %r10d
1135; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r11d
1136; CHECK-BASELINE-NEXT:    andl %edi, %r11d
1137; CHECK-BASELINE-NEXT:    movl 4(%rsi), %ebx
1138; CHECK-BASELINE-NEXT:    andl %r9d, %ebx
1139; CHECK-BASELINE-NEXT:    movl (%rsi), %esi
1140; CHECK-BASELINE-NEXT:    andl %r8d, %esi
1141; CHECK-BASELINE-NEXT:    notl %r8d
1142; CHECK-BASELINE-NEXT:    notl %r9d
1143; CHECK-BASELINE-NEXT:    notl %edi
1144; CHECK-BASELINE-NEXT:    notl %ecx
1145; CHECK-BASELINE-NEXT:    andl 12(%rdx), %ecx
1146; CHECK-BASELINE-NEXT:    orl %r10d, %ecx
1147; CHECK-BASELINE-NEXT:    andl 8(%rdx), %edi
1148; CHECK-BASELINE-NEXT:    orl %r11d, %edi
1149; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r9d
1150; CHECK-BASELINE-NEXT:    orl %ebx, %r9d
1151; CHECK-BASELINE-NEXT:    andl (%rdx), %r8d
1152; CHECK-BASELINE-NEXT:    orl %esi, %r8d
1153; CHECK-BASELINE-NEXT:    movl %ecx, 12(%rax)
1154; CHECK-BASELINE-NEXT:    movl %edi, 8(%rax)
1155; CHECK-BASELINE-NEXT:    movl %r9d, 4(%rax)
1156; CHECK-BASELINE-NEXT:    movl %r8d, (%rax)
1157; CHECK-BASELINE-NEXT:    popq %rbx
1158; CHECK-BASELINE-NEXT:    retq
1159;
1160; CHECK-SSE1-LABEL: out_v4i32:
1161; CHECK-SSE1:       # %bb.0:
1162; CHECK-SSE1-NEXT:    movq %rdi, %rax
1163; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
1164; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
1165; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
1166; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
1167; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
1168; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
1169; CHECK-SSE1-NEXT:    retq
1170;
1171; CHECK-SSE2-LABEL: out_v4i32:
1172; CHECK-SSE2:       # %bb.0:
1173; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
1174; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
1175; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
1176; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
1177; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
1178; CHECK-SSE2-NEXT:    retq
1179;
1180; CHECK-XOP-LABEL: out_v4i32:
1181; CHECK-XOP:       # %bb.0:
1182; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
1183; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
1184; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
1185; CHECK-XOP-NEXT:    retq
1186  %x = load <4 x i32>, <4 x i32> *%px, align 16
1187  %y = load <4 x i32>, <4 x i32> *%py, align 16
1188  %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
1189  %mx = and <4 x i32> %x, %mask
1190  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
1191  %my = and <4 x i32> %y, %notmask
1192  %r = or <4 x i32> %mx, %my
1193  ret <4 x i32> %r
1194}
1195
1196define <4 x i32> @out_v4i32_undef(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
1197; CHECK-BASELINE-LABEL: out_v4i32_undef:
1198; CHECK-BASELINE:       # %bb.0:
1199; CHECK-BASELINE-NEXT:    movq %rdi, %rax
1200; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r8d
1201; CHECK-BASELINE-NEXT:    movl (%rcx), %r9d
1202; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r10d
1203; CHECK-BASELINE-NEXT:    movl 12(%rcx), %edi
1204; CHECK-BASELINE-NEXT:    andl 8(%rcx), %r8d
1205; CHECK-BASELINE-NEXT:    movl 12(%rsi), %ecx
1206; CHECK-BASELINE-NEXT:    andl %edi, %ecx
1207; CHECK-BASELINE-NEXT:    movl 4(%rsi), %r11d
1208; CHECK-BASELINE-NEXT:    andl %r10d, %r11d
1209; CHECK-BASELINE-NEXT:    movl (%rsi), %esi
1210; CHECK-BASELINE-NEXT:    andl %r9d, %esi
1211; CHECK-BASELINE-NEXT:    notl %r9d
1212; CHECK-BASELINE-NEXT:    notl %r10d
1213; CHECK-BASELINE-NEXT:    notl %edi
1214; CHECK-BASELINE-NEXT:    andl 12(%rdx), %edi
1215; CHECK-BASELINE-NEXT:    orl %ecx, %edi
1216; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r10d
1217; CHECK-BASELINE-NEXT:    orl %r11d, %r10d
1218; CHECK-BASELINE-NEXT:    andl (%rdx), %r9d
1219; CHECK-BASELINE-NEXT:    orl %esi, %r9d
1220; CHECK-BASELINE-NEXT:    movl %r8d, 8(%rax)
1221; CHECK-BASELINE-NEXT:    movl %edi, 12(%rax)
1222; CHECK-BASELINE-NEXT:    movl %r10d, 4(%rax)
1223; CHECK-BASELINE-NEXT:    movl %r9d, (%rax)
1224; CHECK-BASELINE-NEXT:    retq
1225;
1226; CHECK-SSE1-LABEL: out_v4i32_undef:
1227; CHECK-SSE1:       # %bb.0:
1228; CHECK-SSE1-NEXT:    movq %rdi, %rax
1229; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
1230; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
1231; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
1232; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
1233; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
1234; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
1235; CHECK-SSE1-NEXT:    retq
1236;
1237; CHECK-SSE2-LABEL: out_v4i32_undef:
1238; CHECK-SSE2:       # %bb.0:
1239; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
1240; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm1
1241; CHECK-SSE2-NEXT:    andps %xmm0, %xmm1
1242; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
1243; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
1244; CHECK-SSE2-NEXT:    retq
1245;
1246; CHECK-XOP-LABEL: out_v4i32_undef:
1247; CHECK-XOP:       # %bb.0:
1248; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
1249; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
1250; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
1251; CHECK-XOP-NEXT:    retq
1252  %x = load <4 x i32>, <4 x i32> *%px, align 16
1253  %y = load <4 x i32>, <4 x i32> *%py, align 16
1254  %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
1255  %mx = and <4 x i32> %x, %mask
1256  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1>
1257  %my = and <4 x i32> %y, %notmask
1258  %r = or <4 x i32> %mx, %my
1259  ret <4 x i32> %r
1260}
1261
1262define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
1263; CHECK-BASELINE-LABEL: out_v2i64:
1264; CHECK-BASELINE:       # %bb.0:
1265; CHECK-BASELINE-NEXT:    movq %r8, %rax
1266; CHECK-BASELINE-NEXT:    andq %r9, %rsi
1267; CHECK-BASELINE-NEXT:    andq %r8, %rdi
1268; CHECK-BASELINE-NEXT:    notq %rax
1269; CHECK-BASELINE-NEXT:    notq %r9
1270; CHECK-BASELINE-NEXT:    andq %rcx, %r9
1271; CHECK-BASELINE-NEXT:    orq %rsi, %r9
1272; CHECK-BASELINE-NEXT:    andq %rdx, %rax
1273; CHECK-BASELINE-NEXT:    orq %rdi, %rax
1274; CHECK-BASELINE-NEXT:    movq %r9, %rdx
1275; CHECK-BASELINE-NEXT:    retq
1276;
1277; CHECK-SSE1-LABEL: out_v2i64:
1278; CHECK-SSE1:       # %bb.0:
1279; CHECK-SSE1-NEXT:    movq %r8, %rax
1280; CHECK-SSE1-NEXT:    andq %r9, %rsi
1281; CHECK-SSE1-NEXT:    andq %r8, %rdi
1282; CHECK-SSE1-NEXT:    notq %rax
1283; CHECK-SSE1-NEXT:    notq %r9
1284; CHECK-SSE1-NEXT:    andq %rcx, %r9
1285; CHECK-SSE1-NEXT:    orq %rsi, %r9
1286; CHECK-SSE1-NEXT:    andq %rdx, %rax
1287; CHECK-SSE1-NEXT:    orq %rdi, %rax
1288; CHECK-SSE1-NEXT:    movq %r9, %rdx
1289; CHECK-SSE1-NEXT:    retq
1290;
1291; CHECK-SSE2-LABEL: out_v2i64:
1292; CHECK-SSE2:       # %bb.0:
1293; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
1294; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
1295; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
1296; CHECK-SSE2-NEXT:    retq
1297;
1298; CHECK-XOP-LABEL: out_v2i64:
1299; CHECK-XOP:       # %bb.0:
1300; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
1301; CHECK-XOP-NEXT:    retq
1302  %mx = and <2 x i64> %x, %mask
1303  %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1>
1304  %my = and <2 x i64> %y, %notmask
1305  %r = or <2 x i64> %mx, %my
1306  ret <2 x i64> %r
1307}
1308
1309; ============================================================================ ;
1310; 256-bit vector width
1311; ============================================================================ ;
1312
1313define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind {
1314; CHECK-BASELINE-LABEL: out_v32i8:
1315; CHECK-BASELINE:       # %bb.0:
1316; CHECK-BASELINE-NEXT:    pushq %rbp
1317; CHECK-BASELINE-NEXT:    pushq %r15
1318; CHECK-BASELINE-NEXT:    pushq %r14
1319; CHECK-BASELINE-NEXT:    pushq %r13
1320; CHECK-BASELINE-NEXT:    pushq %r12
1321; CHECK-BASELINE-NEXT:    pushq %rbx
1322; CHECK-BASELINE-NEXT:    movq %rcx, %r15
1323; CHECK-BASELINE-NEXT:    movq %rsi, %r14
1324; CHECK-BASELINE-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1325; CHECK-BASELINE-NEXT:    movb 16(%rcx), %al
1326; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1327; CHECK-BASELINE-NEXT:    movb 17(%rcx), %al
1328; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1329; CHECK-BASELINE-NEXT:    movb 18(%rcx), %al
1330; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1331; CHECK-BASELINE-NEXT:    movb 19(%rcx), %al
1332; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1333; CHECK-BASELINE-NEXT:    movb 20(%rcx), %al
1334; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1335; CHECK-BASELINE-NEXT:    movb 21(%rcx), %r12b
1336; CHECK-BASELINE-NEXT:    movb 22(%rcx), %r9b
1337; CHECK-BASELINE-NEXT:    movb 23(%rcx), %r10b
1338; CHECK-BASELINE-NEXT:    movb 24(%rcx), %r11b
1339; CHECK-BASELINE-NEXT:    movb 25(%rcx), %bpl
1340; CHECK-BASELINE-NEXT:    movb 26(%rcx), %r13b
1341; CHECK-BASELINE-NEXT:    movb 27(%rcx), %r8b
1342; CHECK-BASELINE-NEXT:    movb 28(%rcx), %dil
1343; CHECK-BASELINE-NEXT:    movb 29(%rcx), %sil
1344; CHECK-BASELINE-NEXT:    movb 30(%rcx), %bl
1345; CHECK-BASELINE-NEXT:    movb 31(%rcx), %al
1346; CHECK-BASELINE-NEXT:    movb 31(%r14), %cl
1347; CHECK-BASELINE-NEXT:    andb %al, %cl
1348; CHECK-BASELINE-NEXT:    notb %al
1349; CHECK-BASELINE-NEXT:    andb 31(%rdx), %al
1350; CHECK-BASELINE-NEXT:    orb %cl, %al
1351; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1352; CHECK-BASELINE-NEXT:    movb 30(%r14), %al
1353; CHECK-BASELINE-NEXT:    andb %bl, %al
1354; CHECK-BASELINE-NEXT:    notb %bl
1355; CHECK-BASELINE-NEXT:    andb 30(%rdx), %bl
1356; CHECK-BASELINE-NEXT:    orb %al, %bl
1357; CHECK-BASELINE-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1358; CHECK-BASELINE-NEXT:    movb 29(%r14), %al
1359; CHECK-BASELINE-NEXT:    andb %sil, %al
1360; CHECK-BASELINE-NEXT:    notb %sil
1361; CHECK-BASELINE-NEXT:    andb 29(%rdx), %sil
1362; CHECK-BASELINE-NEXT:    orb %al, %sil
1363; CHECK-BASELINE-NEXT:    movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1364; CHECK-BASELINE-NEXT:    movb 28(%r14), %al
1365; CHECK-BASELINE-NEXT:    andb %dil, %al
1366; CHECK-BASELINE-NEXT:    notb %dil
1367; CHECK-BASELINE-NEXT:    andb 28(%rdx), %dil
1368; CHECK-BASELINE-NEXT:    orb %al, %dil
1369; CHECK-BASELINE-NEXT:    movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1370; CHECK-BASELINE-NEXT:    movb 27(%r14), %al
1371; CHECK-BASELINE-NEXT:    andb %r8b, %al
1372; CHECK-BASELINE-NEXT:    notb %r8b
1373; CHECK-BASELINE-NEXT:    andb 27(%rdx), %r8b
1374; CHECK-BASELINE-NEXT:    orb %al, %r8b
1375; CHECK-BASELINE-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1376; CHECK-BASELINE-NEXT:    movb 26(%r14), %al
1377; CHECK-BASELINE-NEXT:    andb %r13b, %al
1378; CHECK-BASELINE-NEXT:    notb %r13b
1379; CHECK-BASELINE-NEXT:    andb 26(%rdx), %r13b
1380; CHECK-BASELINE-NEXT:    orb %al, %r13b
1381; CHECK-BASELINE-NEXT:    movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1382; CHECK-BASELINE-NEXT:    movb 25(%r14), %al
1383; CHECK-BASELINE-NEXT:    andb %bpl, %al
1384; CHECK-BASELINE-NEXT:    notb %bpl
1385; CHECK-BASELINE-NEXT:    andb 25(%rdx), %bpl
1386; CHECK-BASELINE-NEXT:    orb %al, %bpl
1387; CHECK-BASELINE-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1388; CHECK-BASELINE-NEXT:    movb 24(%r14), %al
1389; CHECK-BASELINE-NEXT:    andb %r11b, %al
1390; CHECK-BASELINE-NEXT:    notb %r11b
1391; CHECK-BASELINE-NEXT:    andb 24(%rdx), %r11b
1392; CHECK-BASELINE-NEXT:    orb %al, %r11b
1393; CHECK-BASELINE-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1394; CHECK-BASELINE-NEXT:    movb 23(%r14), %al
1395; CHECK-BASELINE-NEXT:    andb %r10b, %al
1396; CHECK-BASELINE-NEXT:    notb %r10b
1397; CHECK-BASELINE-NEXT:    andb 23(%rdx), %r10b
1398; CHECK-BASELINE-NEXT:    orb %al, %r10b
1399; CHECK-BASELINE-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1400; CHECK-BASELINE-NEXT:    movb 22(%r14), %al
1401; CHECK-BASELINE-NEXT:    andb %r9b, %al
1402; CHECK-BASELINE-NEXT:    notb %r9b
1403; CHECK-BASELINE-NEXT:    andb 22(%rdx), %r9b
1404; CHECK-BASELINE-NEXT:    orb %al, %r9b
1405; CHECK-BASELINE-NEXT:    movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1406; CHECK-BASELINE-NEXT:    movb 21(%r14), %al
1407; CHECK-BASELINE-NEXT:    andb %r12b, %al
1408; CHECK-BASELINE-NEXT:    notb %r12b
1409; CHECK-BASELINE-NEXT:    andb 21(%rdx), %r12b
1410; CHECK-BASELINE-NEXT:    orb %al, %r12b
1411; CHECK-BASELINE-NEXT:    movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1412; CHECK-BASELINE-NEXT:    movb 20(%r14), %al
1413; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1414; CHECK-BASELINE-NEXT:    andb %cl, %al
1415; CHECK-BASELINE-NEXT:    notb %cl
1416; CHECK-BASELINE-NEXT:    andb 20(%rdx), %cl
1417; CHECK-BASELINE-NEXT:    orb %al, %cl
1418; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1419; CHECK-BASELINE-NEXT:    movb 19(%r14), %al
1420; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1421; CHECK-BASELINE-NEXT:    andb %cl, %al
1422; CHECK-BASELINE-NEXT:    notb %cl
1423; CHECK-BASELINE-NEXT:    andb 19(%rdx), %cl
1424; CHECK-BASELINE-NEXT:    orb %al, %cl
1425; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1426; CHECK-BASELINE-NEXT:    movb 18(%r14), %al
1427; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1428; CHECK-BASELINE-NEXT:    andb %cl, %al
1429; CHECK-BASELINE-NEXT:    notb %cl
1430; CHECK-BASELINE-NEXT:    andb 18(%rdx), %cl
1431; CHECK-BASELINE-NEXT:    orb %al, %cl
1432; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1433; CHECK-BASELINE-NEXT:    movb 17(%r14), %al
1434; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1435; CHECK-BASELINE-NEXT:    andb %cl, %al
1436; CHECK-BASELINE-NEXT:    notb %cl
1437; CHECK-BASELINE-NEXT:    movq %rdx, %rbx
1438; CHECK-BASELINE-NEXT:    andb 17(%rdx), %cl
1439; CHECK-BASELINE-NEXT:    orb %al, %cl
1440; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1441; CHECK-BASELINE-NEXT:    movb 16(%r14), %al
1442; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1443; CHECK-BASELINE-NEXT:    andb %cl, %al
1444; CHECK-BASELINE-NEXT:    notb %cl
1445; CHECK-BASELINE-NEXT:    andb 16(%rdx), %cl
1446; CHECK-BASELINE-NEXT:    orb %al, %cl
1447; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1448; CHECK-BASELINE-NEXT:    movb 15(%r15), %cl
1449; CHECK-BASELINE-NEXT:    movb 15(%r14), %al
1450; CHECK-BASELINE-NEXT:    andb %cl, %al
1451; CHECK-BASELINE-NEXT:    notb %cl
1452; CHECK-BASELINE-NEXT:    andb 15(%rdx), %cl
1453; CHECK-BASELINE-NEXT:    orb %al, %cl
1454; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1455; CHECK-BASELINE-NEXT:    movb 14(%r15), %cl
1456; CHECK-BASELINE-NEXT:    movb 14(%r14), %al
1457; CHECK-BASELINE-NEXT:    andb %cl, %al
1458; CHECK-BASELINE-NEXT:    notb %cl
1459; CHECK-BASELINE-NEXT:    andb 14(%rdx), %cl
1460; CHECK-BASELINE-NEXT:    orb %al, %cl
1461; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1462; CHECK-BASELINE-NEXT:    movb 13(%r15), %cl
1463; CHECK-BASELINE-NEXT:    movb 13(%r14), %al
1464; CHECK-BASELINE-NEXT:    andb %cl, %al
1465; CHECK-BASELINE-NEXT:    notb %cl
1466; CHECK-BASELINE-NEXT:    andb 13(%rdx), %cl
1467; CHECK-BASELINE-NEXT:    orb %al, %cl
1468; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1469; CHECK-BASELINE-NEXT:    movb 12(%r15), %cl
1470; CHECK-BASELINE-NEXT:    movb 12(%r14), %al
1471; CHECK-BASELINE-NEXT:    andb %cl, %al
1472; CHECK-BASELINE-NEXT:    notb %cl
1473; CHECK-BASELINE-NEXT:    andb 12(%rdx), %cl
1474; CHECK-BASELINE-NEXT:    orb %al, %cl
1475; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1476; CHECK-BASELINE-NEXT:    movb 11(%r15), %r13b
1477; CHECK-BASELINE-NEXT:    movb 11(%r14), %al
1478; CHECK-BASELINE-NEXT:    andb %r13b, %al
1479; CHECK-BASELINE-NEXT:    notb %r13b
1480; CHECK-BASELINE-NEXT:    andb 11(%rdx), %r13b
1481; CHECK-BASELINE-NEXT:    orb %al, %r13b
1482; CHECK-BASELINE-NEXT:    movb 10(%r15), %r12b
1483; CHECK-BASELINE-NEXT:    movb 10(%r14), %al
1484; CHECK-BASELINE-NEXT:    andb %r12b, %al
1485; CHECK-BASELINE-NEXT:    notb %r12b
1486; CHECK-BASELINE-NEXT:    andb 10(%rdx), %r12b
1487; CHECK-BASELINE-NEXT:    orb %al, %r12b
1488; CHECK-BASELINE-NEXT:    movb 9(%r15), %bpl
1489; CHECK-BASELINE-NEXT:    movb 9(%r14), %al
1490; CHECK-BASELINE-NEXT:    andb %bpl, %al
1491; CHECK-BASELINE-NEXT:    notb %bpl
1492; CHECK-BASELINE-NEXT:    andb 9(%rdx), %bpl
1493; CHECK-BASELINE-NEXT:    orb %al, %bpl
1494; CHECK-BASELINE-NEXT:    movb 8(%r15), %r11b
1495; CHECK-BASELINE-NEXT:    movb 8(%r14), %al
1496; CHECK-BASELINE-NEXT:    andb %r11b, %al
1497; CHECK-BASELINE-NEXT:    notb %r11b
1498; CHECK-BASELINE-NEXT:    andb 8(%rdx), %r11b
1499; CHECK-BASELINE-NEXT:    orb %al, %r11b
1500; CHECK-BASELINE-NEXT:    movb 7(%r15), %r10b
1501; CHECK-BASELINE-NEXT:    movb 7(%r14), %al
1502; CHECK-BASELINE-NEXT:    andb %r10b, %al
1503; CHECK-BASELINE-NEXT:    notb %r10b
1504; CHECK-BASELINE-NEXT:    andb 7(%rdx), %r10b
1505; CHECK-BASELINE-NEXT:    orb %al, %r10b
1506; CHECK-BASELINE-NEXT:    movb 6(%r15), %r9b
1507; CHECK-BASELINE-NEXT:    movb 6(%r14), %al
1508; CHECK-BASELINE-NEXT:    andb %r9b, %al
1509; CHECK-BASELINE-NEXT:    notb %r9b
1510; CHECK-BASELINE-NEXT:    andb 6(%rdx), %r9b
1511; CHECK-BASELINE-NEXT:    orb %al, %r9b
1512; CHECK-BASELINE-NEXT:    movb 5(%r15), %r8b
1513; CHECK-BASELINE-NEXT:    movb 5(%r14), %al
1514; CHECK-BASELINE-NEXT:    andb %r8b, %al
1515; CHECK-BASELINE-NEXT:    notb %r8b
1516; CHECK-BASELINE-NEXT:    andb 5(%rdx), %r8b
1517; CHECK-BASELINE-NEXT:    orb %al, %r8b
1518; CHECK-BASELINE-NEXT:    movb 4(%r15), %dil
1519; CHECK-BASELINE-NEXT:    movb 4(%r14), %al
1520; CHECK-BASELINE-NEXT:    andb %dil, %al
1521; CHECK-BASELINE-NEXT:    notb %dil
1522; CHECK-BASELINE-NEXT:    andb 4(%rdx), %dil
1523; CHECK-BASELINE-NEXT:    orb %al, %dil
1524; CHECK-BASELINE-NEXT:    movb 3(%r15), %sil
1525; CHECK-BASELINE-NEXT:    movb 3(%r14), %al
1526; CHECK-BASELINE-NEXT:    andb %sil, %al
1527; CHECK-BASELINE-NEXT:    notb %sil
1528; CHECK-BASELINE-NEXT:    andb 3(%rdx), %sil
1529; CHECK-BASELINE-NEXT:    orb %al, %sil
1530; CHECK-BASELINE-NEXT:    movb 2(%r15), %dl
1531; CHECK-BASELINE-NEXT:    movb 2(%r14), %al
1532; CHECK-BASELINE-NEXT:    andb %dl, %al
1533; CHECK-BASELINE-NEXT:    notb %dl
1534; CHECK-BASELINE-NEXT:    andb 2(%rbx), %dl
1535; CHECK-BASELINE-NEXT:    orb %al, %dl
1536; CHECK-BASELINE-NEXT:    movb 1(%r15), %al
1537; CHECK-BASELINE-NEXT:    movb 1(%r14), %cl
1538; CHECK-BASELINE-NEXT:    andb %al, %cl
1539; CHECK-BASELINE-NEXT:    notb %al
1540; CHECK-BASELINE-NEXT:    andb 1(%rbx), %al
1541; CHECK-BASELINE-NEXT:    orb %cl, %al
1542; CHECK-BASELINE-NEXT:    movb (%r15), %r15b
1543; CHECK-BASELINE-NEXT:    movb (%r14), %r14b
1544; CHECK-BASELINE-NEXT:    andb %r15b, %r14b
1545; CHECK-BASELINE-NEXT:    notb %r15b
1546; CHECK-BASELINE-NEXT:    andb (%rbx), %r15b
1547; CHECK-BASELINE-NEXT:    orb %r14b, %r15b
1548; CHECK-BASELINE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
1549; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1550; CHECK-BASELINE-NEXT:    movb %bl, 31(%rcx)
1551; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1552; CHECK-BASELINE-NEXT:    movb %bl, 30(%rcx)
1553; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1554; CHECK-BASELINE-NEXT:    movb %bl, 29(%rcx)
1555; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1556; CHECK-BASELINE-NEXT:    movb %bl, 28(%rcx)
1557; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1558; CHECK-BASELINE-NEXT:    movb %bl, 27(%rcx)
1559; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1560; CHECK-BASELINE-NEXT:    movb %bl, 26(%rcx)
1561; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1562; CHECK-BASELINE-NEXT:    movb %bl, 25(%rcx)
1563; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1564; CHECK-BASELINE-NEXT:    movb %bl, 24(%rcx)
1565; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1566; CHECK-BASELINE-NEXT:    movb %bl, 23(%rcx)
1567; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1568; CHECK-BASELINE-NEXT:    movb %bl, 22(%rcx)
1569; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1570; CHECK-BASELINE-NEXT:    movb %bl, 21(%rcx)
1571; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1572; CHECK-BASELINE-NEXT:    movb %bl, 20(%rcx)
1573; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1574; CHECK-BASELINE-NEXT:    movb %bl, 19(%rcx)
1575; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1576; CHECK-BASELINE-NEXT:    movb %bl, 18(%rcx)
1577; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1578; CHECK-BASELINE-NEXT:    movb %bl, 17(%rcx)
1579; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1580; CHECK-BASELINE-NEXT:    movb %bl, 16(%rcx)
1581; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1582; CHECK-BASELINE-NEXT:    movb %bl, 15(%rcx)
1583; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1584; CHECK-BASELINE-NEXT:    movb %bl, 14(%rcx)
1585; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1586; CHECK-BASELINE-NEXT:    movb %bl, 13(%rcx)
1587; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1588; CHECK-BASELINE-NEXT:    movb %bl, 12(%rcx)
1589; CHECK-BASELINE-NEXT:    movb %r13b, 11(%rcx)
1590; CHECK-BASELINE-NEXT:    movb %r12b, 10(%rcx)
1591; CHECK-BASELINE-NEXT:    movb %bpl, 9(%rcx)
1592; CHECK-BASELINE-NEXT:    movb %r11b, 8(%rcx)
1593; CHECK-BASELINE-NEXT:    movb %r10b, 7(%rcx)
1594; CHECK-BASELINE-NEXT:    movb %r9b, 6(%rcx)
1595; CHECK-BASELINE-NEXT:    movb %r8b, 5(%rcx)
1596; CHECK-BASELINE-NEXT:    movb %dil, 4(%rcx)
1597; CHECK-BASELINE-NEXT:    movb %sil, 3(%rcx)
1598; CHECK-BASELINE-NEXT:    movb %dl, 2(%rcx)
1599; CHECK-BASELINE-NEXT:    movb %al, 1(%rcx)
1600; CHECK-BASELINE-NEXT:    movb %r15b, (%rcx)
1601; CHECK-BASELINE-NEXT:    movq %rcx, %rax
1602; CHECK-BASELINE-NEXT:    popq %rbx
1603; CHECK-BASELINE-NEXT:    popq %r12
1604; CHECK-BASELINE-NEXT:    popq %r13
1605; CHECK-BASELINE-NEXT:    popq %r14
1606; CHECK-BASELINE-NEXT:    popq %r15
1607; CHECK-BASELINE-NEXT:    popq %rbp
1608; CHECK-BASELINE-NEXT:    retq
1609;
1610; CHECK-SSE1-LABEL: out_v32i8:
1611; CHECK-SSE1:       # %bb.0:
1612; CHECK-SSE1-NEXT:    pushq %rbp
1613; CHECK-SSE1-NEXT:    pushq %r15
1614; CHECK-SSE1-NEXT:    pushq %r14
1615; CHECK-SSE1-NEXT:    pushq %r13
1616; CHECK-SSE1-NEXT:    pushq %r12
1617; CHECK-SSE1-NEXT:    pushq %rbx
1618; CHECK-SSE1-NEXT:    movq %rcx, %r15
1619; CHECK-SSE1-NEXT:    movq %rsi, %r14
1620; CHECK-SSE1-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1621; CHECK-SSE1-NEXT:    movb 16(%rcx), %al
1622; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1623; CHECK-SSE1-NEXT:    movb 17(%rcx), %al
1624; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1625; CHECK-SSE1-NEXT:    movb 18(%rcx), %al
1626; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1627; CHECK-SSE1-NEXT:    movb 19(%rcx), %al
1628; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1629; CHECK-SSE1-NEXT:    movb 20(%rcx), %al
1630; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1631; CHECK-SSE1-NEXT:    movb 21(%rcx), %r12b
1632; CHECK-SSE1-NEXT:    movb 22(%rcx), %r9b
1633; CHECK-SSE1-NEXT:    movb 23(%rcx), %r10b
1634; CHECK-SSE1-NEXT:    movb 24(%rcx), %r11b
1635; CHECK-SSE1-NEXT:    movb 25(%rcx), %bpl
1636; CHECK-SSE1-NEXT:    movb 26(%rcx), %r13b
1637; CHECK-SSE1-NEXT:    movb 27(%rcx), %r8b
1638; CHECK-SSE1-NEXT:    movb 28(%rcx), %dil
1639; CHECK-SSE1-NEXT:    movb 29(%rcx), %sil
1640; CHECK-SSE1-NEXT:    movb 30(%rcx), %bl
1641; CHECK-SSE1-NEXT:    movb 31(%rcx), %al
1642; CHECK-SSE1-NEXT:    movb 31(%r14), %cl
1643; CHECK-SSE1-NEXT:    andb %al, %cl
1644; CHECK-SSE1-NEXT:    notb %al
1645; CHECK-SSE1-NEXT:    andb 31(%rdx), %al
1646; CHECK-SSE1-NEXT:    orb %cl, %al
1647; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1648; CHECK-SSE1-NEXT:    movb 30(%r14), %al
1649; CHECK-SSE1-NEXT:    andb %bl, %al
1650; CHECK-SSE1-NEXT:    notb %bl
1651; CHECK-SSE1-NEXT:    andb 30(%rdx), %bl
1652; CHECK-SSE1-NEXT:    orb %al, %bl
1653; CHECK-SSE1-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1654; CHECK-SSE1-NEXT:    movb 29(%r14), %al
1655; CHECK-SSE1-NEXT:    andb %sil, %al
1656; CHECK-SSE1-NEXT:    notb %sil
1657; CHECK-SSE1-NEXT:    andb 29(%rdx), %sil
1658; CHECK-SSE1-NEXT:    orb %al, %sil
1659; CHECK-SSE1-NEXT:    movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1660; CHECK-SSE1-NEXT:    movb 28(%r14), %al
1661; CHECK-SSE1-NEXT:    andb %dil, %al
1662; CHECK-SSE1-NEXT:    notb %dil
1663; CHECK-SSE1-NEXT:    andb 28(%rdx), %dil
1664; CHECK-SSE1-NEXT:    orb %al, %dil
1665; CHECK-SSE1-NEXT:    movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1666; CHECK-SSE1-NEXT:    movb 27(%r14), %al
1667; CHECK-SSE1-NEXT:    andb %r8b, %al
1668; CHECK-SSE1-NEXT:    notb %r8b
1669; CHECK-SSE1-NEXT:    andb 27(%rdx), %r8b
1670; CHECK-SSE1-NEXT:    orb %al, %r8b
1671; CHECK-SSE1-NEXT:    movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1672; CHECK-SSE1-NEXT:    movb 26(%r14), %al
1673; CHECK-SSE1-NEXT:    andb %r13b, %al
1674; CHECK-SSE1-NEXT:    notb %r13b
1675; CHECK-SSE1-NEXT:    andb 26(%rdx), %r13b
1676; CHECK-SSE1-NEXT:    orb %al, %r13b
1677; CHECK-SSE1-NEXT:    movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1678; CHECK-SSE1-NEXT:    movb 25(%r14), %al
1679; CHECK-SSE1-NEXT:    andb %bpl, %al
1680; CHECK-SSE1-NEXT:    notb %bpl
1681; CHECK-SSE1-NEXT:    andb 25(%rdx), %bpl
1682; CHECK-SSE1-NEXT:    orb %al, %bpl
1683; CHECK-SSE1-NEXT:    movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1684; CHECK-SSE1-NEXT:    movb 24(%r14), %al
1685; CHECK-SSE1-NEXT:    andb %r11b, %al
1686; CHECK-SSE1-NEXT:    notb %r11b
1687; CHECK-SSE1-NEXT:    andb 24(%rdx), %r11b
1688; CHECK-SSE1-NEXT:    orb %al, %r11b
1689; CHECK-SSE1-NEXT:    movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1690; CHECK-SSE1-NEXT:    movb 23(%r14), %al
1691; CHECK-SSE1-NEXT:    andb %r10b, %al
1692; CHECK-SSE1-NEXT:    notb %r10b
1693; CHECK-SSE1-NEXT:    andb 23(%rdx), %r10b
1694; CHECK-SSE1-NEXT:    orb %al, %r10b
1695; CHECK-SSE1-NEXT:    movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1696; CHECK-SSE1-NEXT:    movb 22(%r14), %al
1697; CHECK-SSE1-NEXT:    andb %r9b, %al
1698; CHECK-SSE1-NEXT:    notb %r9b
1699; CHECK-SSE1-NEXT:    andb 22(%rdx), %r9b
1700; CHECK-SSE1-NEXT:    orb %al, %r9b
1701; CHECK-SSE1-NEXT:    movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1702; CHECK-SSE1-NEXT:    movb 21(%r14), %al
1703; CHECK-SSE1-NEXT:    andb %r12b, %al
1704; CHECK-SSE1-NEXT:    notb %r12b
1705; CHECK-SSE1-NEXT:    andb 21(%rdx), %r12b
1706; CHECK-SSE1-NEXT:    orb %al, %r12b
1707; CHECK-SSE1-NEXT:    movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1708; CHECK-SSE1-NEXT:    movb 20(%r14), %al
1709; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1710; CHECK-SSE1-NEXT:    andb %cl, %al
1711; CHECK-SSE1-NEXT:    notb %cl
1712; CHECK-SSE1-NEXT:    andb 20(%rdx), %cl
1713; CHECK-SSE1-NEXT:    orb %al, %cl
1714; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1715; CHECK-SSE1-NEXT:    movb 19(%r14), %al
1716; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1717; CHECK-SSE1-NEXT:    andb %cl, %al
1718; CHECK-SSE1-NEXT:    notb %cl
1719; CHECK-SSE1-NEXT:    andb 19(%rdx), %cl
1720; CHECK-SSE1-NEXT:    orb %al, %cl
1721; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1722; CHECK-SSE1-NEXT:    movb 18(%r14), %al
1723; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1724; CHECK-SSE1-NEXT:    andb %cl, %al
1725; CHECK-SSE1-NEXT:    notb %cl
1726; CHECK-SSE1-NEXT:    andb 18(%rdx), %cl
1727; CHECK-SSE1-NEXT:    orb %al, %cl
1728; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1729; CHECK-SSE1-NEXT:    movb 17(%r14), %al
1730; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1731; CHECK-SSE1-NEXT:    andb %cl, %al
1732; CHECK-SSE1-NEXT:    notb %cl
1733; CHECK-SSE1-NEXT:    movq %rdx, %rbx
1734; CHECK-SSE1-NEXT:    andb 17(%rdx), %cl
1735; CHECK-SSE1-NEXT:    orb %al, %cl
1736; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1737; CHECK-SSE1-NEXT:    movb 16(%r14), %al
1738; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
1739; CHECK-SSE1-NEXT:    andb %cl, %al
1740; CHECK-SSE1-NEXT:    notb %cl
1741; CHECK-SSE1-NEXT:    andb 16(%rdx), %cl
1742; CHECK-SSE1-NEXT:    orb %al, %cl
1743; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1744; CHECK-SSE1-NEXT:    movb 15(%r15), %cl
1745; CHECK-SSE1-NEXT:    movb 15(%r14), %al
1746; CHECK-SSE1-NEXT:    andb %cl, %al
1747; CHECK-SSE1-NEXT:    notb %cl
1748; CHECK-SSE1-NEXT:    andb 15(%rdx), %cl
1749; CHECK-SSE1-NEXT:    orb %al, %cl
1750; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1751; CHECK-SSE1-NEXT:    movb 14(%r15), %cl
1752; CHECK-SSE1-NEXT:    movb 14(%r14), %al
1753; CHECK-SSE1-NEXT:    andb %cl, %al
1754; CHECK-SSE1-NEXT:    notb %cl
1755; CHECK-SSE1-NEXT:    andb 14(%rdx), %cl
1756; CHECK-SSE1-NEXT:    orb %al, %cl
1757; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1758; CHECK-SSE1-NEXT:    movb 13(%r15), %cl
1759; CHECK-SSE1-NEXT:    movb 13(%r14), %al
1760; CHECK-SSE1-NEXT:    andb %cl, %al
1761; CHECK-SSE1-NEXT:    notb %cl
1762; CHECK-SSE1-NEXT:    andb 13(%rdx), %cl
1763; CHECK-SSE1-NEXT:    orb %al, %cl
1764; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1765; CHECK-SSE1-NEXT:    movb 12(%r15), %cl
1766; CHECK-SSE1-NEXT:    movb 12(%r14), %al
1767; CHECK-SSE1-NEXT:    andb %cl, %al
1768; CHECK-SSE1-NEXT:    notb %cl
1769; CHECK-SSE1-NEXT:    andb 12(%rdx), %cl
1770; CHECK-SSE1-NEXT:    orb %al, %cl
1771; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
1772; CHECK-SSE1-NEXT:    movb 11(%r15), %r13b
1773; CHECK-SSE1-NEXT:    movb 11(%r14), %al
1774; CHECK-SSE1-NEXT:    andb %r13b, %al
1775; CHECK-SSE1-NEXT:    notb %r13b
1776; CHECK-SSE1-NEXT:    andb 11(%rdx), %r13b
1777; CHECK-SSE1-NEXT:    orb %al, %r13b
1778; CHECK-SSE1-NEXT:    movb 10(%r15), %r12b
1779; CHECK-SSE1-NEXT:    movb 10(%r14), %al
1780; CHECK-SSE1-NEXT:    andb %r12b, %al
1781; CHECK-SSE1-NEXT:    notb %r12b
1782; CHECK-SSE1-NEXT:    andb 10(%rdx), %r12b
1783; CHECK-SSE1-NEXT:    orb %al, %r12b
1784; CHECK-SSE1-NEXT:    movb 9(%r15), %bpl
1785; CHECK-SSE1-NEXT:    movb 9(%r14), %al
1786; CHECK-SSE1-NEXT:    andb %bpl, %al
1787; CHECK-SSE1-NEXT:    notb %bpl
1788; CHECK-SSE1-NEXT:    andb 9(%rdx), %bpl
1789; CHECK-SSE1-NEXT:    orb %al, %bpl
1790; CHECK-SSE1-NEXT:    movb 8(%r15), %r11b
1791; CHECK-SSE1-NEXT:    movb 8(%r14), %al
1792; CHECK-SSE1-NEXT:    andb %r11b, %al
1793; CHECK-SSE1-NEXT:    notb %r11b
1794; CHECK-SSE1-NEXT:    andb 8(%rdx), %r11b
1795; CHECK-SSE1-NEXT:    orb %al, %r11b
1796; CHECK-SSE1-NEXT:    movb 7(%r15), %r10b
1797; CHECK-SSE1-NEXT:    movb 7(%r14), %al
1798; CHECK-SSE1-NEXT:    andb %r10b, %al
1799; CHECK-SSE1-NEXT:    notb %r10b
1800; CHECK-SSE1-NEXT:    andb 7(%rdx), %r10b
1801; CHECK-SSE1-NEXT:    orb %al, %r10b
1802; CHECK-SSE1-NEXT:    movb 6(%r15), %r9b
1803; CHECK-SSE1-NEXT:    movb 6(%r14), %al
1804; CHECK-SSE1-NEXT:    andb %r9b, %al
1805; CHECK-SSE1-NEXT:    notb %r9b
1806; CHECK-SSE1-NEXT:    andb 6(%rdx), %r9b
1807; CHECK-SSE1-NEXT:    orb %al, %r9b
1808; CHECK-SSE1-NEXT:    movb 5(%r15), %r8b
1809; CHECK-SSE1-NEXT:    movb 5(%r14), %al
1810; CHECK-SSE1-NEXT:    andb %r8b, %al
1811; CHECK-SSE1-NEXT:    notb %r8b
1812; CHECK-SSE1-NEXT:    andb 5(%rdx), %r8b
1813; CHECK-SSE1-NEXT:    orb %al, %r8b
1814; CHECK-SSE1-NEXT:    movb 4(%r15), %dil
1815; CHECK-SSE1-NEXT:    movb 4(%r14), %al
1816; CHECK-SSE1-NEXT:    andb %dil, %al
1817; CHECK-SSE1-NEXT:    notb %dil
1818; CHECK-SSE1-NEXT:    andb 4(%rdx), %dil
1819; CHECK-SSE1-NEXT:    orb %al, %dil
1820; CHECK-SSE1-NEXT:    movb 3(%r15), %sil
1821; CHECK-SSE1-NEXT:    movb 3(%r14), %al
1822; CHECK-SSE1-NEXT:    andb %sil, %al
1823; CHECK-SSE1-NEXT:    notb %sil
1824; CHECK-SSE1-NEXT:    andb 3(%rdx), %sil
1825; CHECK-SSE1-NEXT:    orb %al, %sil
1826; CHECK-SSE1-NEXT:    movb 2(%r15), %dl
1827; CHECK-SSE1-NEXT:    movb 2(%r14), %al
1828; CHECK-SSE1-NEXT:    andb %dl, %al
1829; CHECK-SSE1-NEXT:    notb %dl
1830; CHECK-SSE1-NEXT:    andb 2(%rbx), %dl
1831; CHECK-SSE1-NEXT:    orb %al, %dl
1832; CHECK-SSE1-NEXT:    movb 1(%r15), %al
1833; CHECK-SSE1-NEXT:    movb 1(%r14), %cl
1834; CHECK-SSE1-NEXT:    andb %al, %cl
1835; CHECK-SSE1-NEXT:    notb %al
1836; CHECK-SSE1-NEXT:    andb 1(%rbx), %al
1837; CHECK-SSE1-NEXT:    orb %cl, %al
1838; CHECK-SSE1-NEXT:    movb (%r15), %r15b
1839; CHECK-SSE1-NEXT:    movb (%r14), %r14b
1840; CHECK-SSE1-NEXT:    andb %r15b, %r14b
1841; CHECK-SSE1-NEXT:    notb %r15b
1842; CHECK-SSE1-NEXT:    andb (%rbx), %r15b
1843; CHECK-SSE1-NEXT:    orb %r14b, %r15b
1844; CHECK-SSE1-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
1845; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1846; CHECK-SSE1-NEXT:    movb %bl, 31(%rcx)
1847; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1848; CHECK-SSE1-NEXT:    movb %bl, 30(%rcx)
1849; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1850; CHECK-SSE1-NEXT:    movb %bl, 29(%rcx)
1851; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1852; CHECK-SSE1-NEXT:    movb %bl, 28(%rcx)
1853; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1854; CHECK-SSE1-NEXT:    movb %bl, 27(%rcx)
1855; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1856; CHECK-SSE1-NEXT:    movb %bl, 26(%rcx)
1857; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1858; CHECK-SSE1-NEXT:    movb %bl, 25(%rcx)
1859; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1860; CHECK-SSE1-NEXT:    movb %bl, 24(%rcx)
1861; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1862; CHECK-SSE1-NEXT:    movb %bl, 23(%rcx)
1863; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1864; CHECK-SSE1-NEXT:    movb %bl, 22(%rcx)
1865; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1866; CHECK-SSE1-NEXT:    movb %bl, 21(%rcx)
1867; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1868; CHECK-SSE1-NEXT:    movb %bl, 20(%rcx)
1869; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1870; CHECK-SSE1-NEXT:    movb %bl, 19(%rcx)
1871; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1872; CHECK-SSE1-NEXT:    movb %bl, 18(%rcx)
1873; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1874; CHECK-SSE1-NEXT:    movb %bl, 17(%rcx)
1875; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1876; CHECK-SSE1-NEXT:    movb %bl, 16(%rcx)
1877; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1878; CHECK-SSE1-NEXT:    movb %bl, 15(%rcx)
1879; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1880; CHECK-SSE1-NEXT:    movb %bl, 14(%rcx)
1881; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1882; CHECK-SSE1-NEXT:    movb %bl, 13(%rcx)
1883; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload
1884; CHECK-SSE1-NEXT:    movb %bl, 12(%rcx)
1885; CHECK-SSE1-NEXT:    movb %r13b, 11(%rcx)
1886; CHECK-SSE1-NEXT:    movb %r12b, 10(%rcx)
1887; CHECK-SSE1-NEXT:    movb %bpl, 9(%rcx)
1888; CHECK-SSE1-NEXT:    movb %r11b, 8(%rcx)
1889; CHECK-SSE1-NEXT:    movb %r10b, 7(%rcx)
1890; CHECK-SSE1-NEXT:    movb %r9b, 6(%rcx)
1891; CHECK-SSE1-NEXT:    movb %r8b, 5(%rcx)
1892; CHECK-SSE1-NEXT:    movb %dil, 4(%rcx)
1893; CHECK-SSE1-NEXT:    movb %sil, 3(%rcx)
1894; CHECK-SSE1-NEXT:    movb %dl, 2(%rcx)
1895; CHECK-SSE1-NEXT:    movb %al, 1(%rcx)
1896; CHECK-SSE1-NEXT:    movb %r15b, (%rcx)
1897; CHECK-SSE1-NEXT:    movq %rcx, %rax
1898; CHECK-SSE1-NEXT:    popq %rbx
1899; CHECK-SSE1-NEXT:    popq %r12
1900; CHECK-SSE1-NEXT:    popq %r13
1901; CHECK-SSE1-NEXT:    popq %r14
1902; CHECK-SSE1-NEXT:    popq %r15
1903; CHECK-SSE1-NEXT:    popq %rbp
1904; CHECK-SSE1-NEXT:    retq
1905;
1906; CHECK-SSE2-LABEL: out_v32i8:
1907; CHECK-SSE2:       # %bb.0:
1908; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
1909; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
1910; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
1911; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
1912; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
1913; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
1914; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
1915; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
1916; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
1917; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
1918; CHECK-SSE2-NEXT:    retq
1919;
1920; CHECK-XOP-LABEL: out_v32i8:
1921; CHECK-XOP:       # %bb.0:
1922; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
1923; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
1924; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
1925; CHECK-XOP-NEXT:    retq
1926  %x = load <32 x i8>, <32 x i8> *%px, align 32
1927  %y = load <32 x i8>, <32 x i8> *%py, align 32
1928  %mask = load <32 x i8>, <32 x i8> *%pmask, align 32
1929  %mx = and <32 x i8> %x, %mask
1930  %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1931  %my = and <32 x i8> %y, %notmask
1932  %r = or <32 x i8> %mx, %my
1933  ret <32 x i8> %r
1934}
1935
1936define <16 x i16> @out_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind {
1937; CHECK-BASELINE-LABEL: out_v16i16:
1938; CHECK-BASELINE:       # %bb.0:
1939; CHECK-BASELINE-NEXT:    pushq %rbp
1940; CHECK-BASELINE-NEXT:    pushq %r15
1941; CHECK-BASELINE-NEXT:    pushq %r14
1942; CHECK-BASELINE-NEXT:    pushq %r13
1943; CHECK-BASELINE-NEXT:    pushq %r12
1944; CHECK-BASELINE-NEXT:    pushq %rbx
1945; CHECK-BASELINE-NEXT:    movq %rcx, %r9
1946; CHECK-BASELINE-NEXT:    movq %rdx, %r10
1947; CHECK-BASELINE-NEXT:    movq %rsi, %r8
1948; CHECK-BASELINE-NEXT:    movq %rdi, %r11
1949; CHECK-BASELINE-NEXT:    movl 12(%rcx), %eax
1950; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1951; CHECK-BASELINE-NEXT:    movzwl 14(%rcx), %edx
1952; CHECK-BASELINE-NEXT:    movl 16(%rcx), %esi
1953; CHECK-BASELINE-NEXT:    movzwl 18(%rcx), %edi
1954; CHECK-BASELINE-NEXT:    movl 20(%rcx), %ecx
1955; CHECK-BASELINE-NEXT:    movzwl 22(%r9), %r15d
1956; CHECK-BASELINE-NEXT:    movl 24(%r9), %r12d
1957; CHECK-BASELINE-NEXT:    movzwl 26(%r9), %r14d
1958; CHECK-BASELINE-NEXT:    movl 28(%r9), %ebx
1959; CHECK-BASELINE-NEXT:    movzwl 30(%r9), %ebp
1960; CHECK-BASELINE-NEXT:    movzwl 30(%r8), %r13d
1961; CHECK-BASELINE-NEXT:    andw %bp, %r13w
1962; CHECK-BASELINE-NEXT:    notl %ebp
1963; CHECK-BASELINE-NEXT:    andw 30(%r10), %bp
1964; CHECK-BASELINE-NEXT:    orl %r13d, %ebp
1965; CHECK-BASELINE-NEXT:    movzwl 28(%r8), %eax
1966; CHECK-BASELINE-NEXT:    andw %bx, %ax
1967; CHECK-BASELINE-NEXT:    notl %ebx
1968; CHECK-BASELINE-NEXT:    andw 28(%r10), %bx
1969; CHECK-BASELINE-NEXT:    orl %eax, %ebx
1970; CHECK-BASELINE-NEXT:    movzwl 26(%r8), %eax
1971; CHECK-BASELINE-NEXT:    andw %r14w, %ax
1972; CHECK-BASELINE-NEXT:    notl %r14d
1973; CHECK-BASELINE-NEXT:    andw 26(%r10), %r14w
1974; CHECK-BASELINE-NEXT:    orl %eax, %r14d
1975; CHECK-BASELINE-NEXT:    movzwl 24(%r8), %eax
1976; CHECK-BASELINE-NEXT:    andw %r12w, %ax
1977; CHECK-BASELINE-NEXT:    notl %r12d
1978; CHECK-BASELINE-NEXT:    andw 24(%r10), %r12w
1979; CHECK-BASELINE-NEXT:    orl %eax, %r12d
1980; CHECK-BASELINE-NEXT:    movzwl 22(%r8), %eax
1981; CHECK-BASELINE-NEXT:    andw %r15w, %ax
1982; CHECK-BASELINE-NEXT:    notl %r15d
1983; CHECK-BASELINE-NEXT:    andw 22(%r10), %r15w
1984; CHECK-BASELINE-NEXT:    orl %eax, %r15d
1985; CHECK-BASELINE-NEXT:    movzwl 20(%r8), %eax
1986; CHECK-BASELINE-NEXT:    andw %cx, %ax
1987; CHECK-BASELINE-NEXT:    notl %ecx
1988; CHECK-BASELINE-NEXT:    andw 20(%r10), %cx
1989; CHECK-BASELINE-NEXT:    orl %eax, %ecx
1990; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1991; CHECK-BASELINE-NEXT:    movzwl 18(%r8), %eax
1992; CHECK-BASELINE-NEXT:    andw %di, %ax
1993; CHECK-BASELINE-NEXT:    notl %edi
1994; CHECK-BASELINE-NEXT:    andw 18(%r10), %di
1995; CHECK-BASELINE-NEXT:    orl %eax, %edi
1996; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1997; CHECK-BASELINE-NEXT:    movzwl 16(%r8), %eax
1998; CHECK-BASELINE-NEXT:    andw %si, %ax
1999; CHECK-BASELINE-NEXT:    notl %esi
2000; CHECK-BASELINE-NEXT:    andw 16(%r10), %si
2001; CHECK-BASELINE-NEXT:    orl %eax, %esi
2002; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2003; CHECK-BASELINE-NEXT:    movzwl 14(%r8), %eax
2004; CHECK-BASELINE-NEXT:    andw %dx, %ax
2005; CHECK-BASELINE-NEXT:    notl %edx
2006; CHECK-BASELINE-NEXT:    andw 14(%r10), %dx
2007; CHECK-BASELINE-NEXT:    orl %eax, %edx
2008; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2009; CHECK-BASELINE-NEXT:    movzwl 12(%r8), %eax
2010; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2011; CHECK-BASELINE-NEXT:    andw %cx, %ax
2012; CHECK-BASELINE-NEXT:    notl %ecx
2013; CHECK-BASELINE-NEXT:    andw 12(%r10), %cx
2014; CHECK-BASELINE-NEXT:    orl %eax, %ecx
2015; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2016; CHECK-BASELINE-NEXT:    movzwl 10(%r9), %r13d
2017; CHECK-BASELINE-NEXT:    movzwl 10(%r8), %eax
2018; CHECK-BASELINE-NEXT:    andw %r13w, %ax
2019; CHECK-BASELINE-NEXT:    notl %r13d
2020; CHECK-BASELINE-NEXT:    andw 10(%r10), %r13w
2021; CHECK-BASELINE-NEXT:    orl %eax, %r13d
2022; CHECK-BASELINE-NEXT:    movl 8(%r9), %edi
2023; CHECK-BASELINE-NEXT:    movzwl 8(%r8), %eax
2024; CHECK-BASELINE-NEXT:    andw %di, %ax
2025; CHECK-BASELINE-NEXT:    notl %edi
2026; CHECK-BASELINE-NEXT:    andw 8(%r10), %di
2027; CHECK-BASELINE-NEXT:    orl %eax, %edi
2028; CHECK-BASELINE-NEXT:    movzwl 6(%r9), %esi
2029; CHECK-BASELINE-NEXT:    movzwl 6(%r8), %eax
2030; CHECK-BASELINE-NEXT:    andw %si, %ax
2031; CHECK-BASELINE-NEXT:    notl %esi
2032; CHECK-BASELINE-NEXT:    andw 6(%r10), %si
2033; CHECK-BASELINE-NEXT:    orl %eax, %esi
2034; CHECK-BASELINE-NEXT:    movl 4(%r9), %edx
2035; CHECK-BASELINE-NEXT:    movzwl 4(%r8), %eax
2036; CHECK-BASELINE-NEXT:    andw %dx, %ax
2037; CHECK-BASELINE-NEXT:    notl %edx
2038; CHECK-BASELINE-NEXT:    andw 4(%r10), %dx
2039; CHECK-BASELINE-NEXT:    orl %eax, %edx
2040; CHECK-BASELINE-NEXT:    movzwl 2(%r9), %eax
2041; CHECK-BASELINE-NEXT:    movzwl 2(%r8), %ecx
2042; CHECK-BASELINE-NEXT:    andw %ax, %cx
2043; CHECK-BASELINE-NEXT:    notl %eax
2044; CHECK-BASELINE-NEXT:    andw 2(%r10), %ax
2045; CHECK-BASELINE-NEXT:    orl %ecx, %eax
2046; CHECK-BASELINE-NEXT:    movl (%r9), %r9d
2047; CHECK-BASELINE-NEXT:    movzwl (%r8), %ecx
2048; CHECK-BASELINE-NEXT:    andw %r9w, %cx
2049; CHECK-BASELINE-NEXT:    notl %r9d
2050; CHECK-BASELINE-NEXT:    andw (%r10), %r9w
2051; CHECK-BASELINE-NEXT:    orl %ecx, %r9d
2052; CHECK-BASELINE-NEXT:    movw %bp, 30(%r11)
2053; CHECK-BASELINE-NEXT:    movw %bx, 28(%r11)
2054; CHECK-BASELINE-NEXT:    movw %r14w, 26(%r11)
2055; CHECK-BASELINE-NEXT:    movw %r12w, 24(%r11)
2056; CHECK-BASELINE-NEXT:    movw %r15w, 22(%r11)
2057; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2058; CHECK-BASELINE-NEXT:    movw %cx, 20(%r11)
2059; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2060; CHECK-BASELINE-NEXT:    movw %cx, 18(%r11)
2061; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2062; CHECK-BASELINE-NEXT:    movw %cx, 16(%r11)
2063; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2064; CHECK-BASELINE-NEXT:    movw %cx, 14(%r11)
2065; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2066; CHECK-BASELINE-NEXT:    movw %cx, 12(%r11)
2067; CHECK-BASELINE-NEXT:    movw %r13w, 10(%r11)
2068; CHECK-BASELINE-NEXT:    movw %di, 8(%r11)
2069; CHECK-BASELINE-NEXT:    movw %si, 6(%r11)
2070; CHECK-BASELINE-NEXT:    movw %dx, 4(%r11)
2071; CHECK-BASELINE-NEXT:    movw %ax, 2(%r11)
2072; CHECK-BASELINE-NEXT:    movw %r9w, (%r11)
2073; CHECK-BASELINE-NEXT:    movq %r11, %rax
2074; CHECK-BASELINE-NEXT:    popq %rbx
2075; CHECK-BASELINE-NEXT:    popq %r12
2076; CHECK-BASELINE-NEXT:    popq %r13
2077; CHECK-BASELINE-NEXT:    popq %r14
2078; CHECK-BASELINE-NEXT:    popq %r15
2079; CHECK-BASELINE-NEXT:    popq %rbp
2080; CHECK-BASELINE-NEXT:    retq
2081;
2082; CHECK-SSE1-LABEL: out_v16i16:
2083; CHECK-SSE1:       # %bb.0:
2084; CHECK-SSE1-NEXT:    pushq %rbp
2085; CHECK-SSE1-NEXT:    pushq %r15
2086; CHECK-SSE1-NEXT:    pushq %r14
2087; CHECK-SSE1-NEXT:    pushq %r13
2088; CHECK-SSE1-NEXT:    pushq %r12
2089; CHECK-SSE1-NEXT:    pushq %rbx
2090; CHECK-SSE1-NEXT:    movq %rcx, %r9
2091; CHECK-SSE1-NEXT:    movq %rdx, %r10
2092; CHECK-SSE1-NEXT:    movq %rsi, %r8
2093; CHECK-SSE1-NEXT:    movq %rdi, %r11
2094; CHECK-SSE1-NEXT:    movl 12(%rcx), %eax
2095; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2096; CHECK-SSE1-NEXT:    movzwl 14(%rcx), %edx
2097; CHECK-SSE1-NEXT:    movl 16(%rcx), %esi
2098; CHECK-SSE1-NEXT:    movzwl 18(%rcx), %edi
2099; CHECK-SSE1-NEXT:    movl 20(%rcx), %ecx
2100; CHECK-SSE1-NEXT:    movzwl 22(%r9), %r15d
2101; CHECK-SSE1-NEXT:    movl 24(%r9), %r12d
2102; CHECK-SSE1-NEXT:    movzwl 26(%r9), %r14d
2103; CHECK-SSE1-NEXT:    movl 28(%r9), %ebx
2104; CHECK-SSE1-NEXT:    movzwl 30(%r9), %ebp
2105; CHECK-SSE1-NEXT:    movzwl 30(%r8), %r13d
2106; CHECK-SSE1-NEXT:    andw %bp, %r13w
2107; CHECK-SSE1-NEXT:    notl %ebp
2108; CHECK-SSE1-NEXT:    andw 30(%r10), %bp
2109; CHECK-SSE1-NEXT:    orl %r13d, %ebp
2110; CHECK-SSE1-NEXT:    movzwl 28(%r8), %eax
2111; CHECK-SSE1-NEXT:    andw %bx, %ax
2112; CHECK-SSE1-NEXT:    notl %ebx
2113; CHECK-SSE1-NEXT:    andw 28(%r10), %bx
2114; CHECK-SSE1-NEXT:    orl %eax, %ebx
2115; CHECK-SSE1-NEXT:    movzwl 26(%r8), %eax
2116; CHECK-SSE1-NEXT:    andw %r14w, %ax
2117; CHECK-SSE1-NEXT:    notl %r14d
2118; CHECK-SSE1-NEXT:    andw 26(%r10), %r14w
2119; CHECK-SSE1-NEXT:    orl %eax, %r14d
2120; CHECK-SSE1-NEXT:    movzwl 24(%r8), %eax
2121; CHECK-SSE1-NEXT:    andw %r12w, %ax
2122; CHECK-SSE1-NEXT:    notl %r12d
2123; CHECK-SSE1-NEXT:    andw 24(%r10), %r12w
2124; CHECK-SSE1-NEXT:    orl %eax, %r12d
2125; CHECK-SSE1-NEXT:    movzwl 22(%r8), %eax
2126; CHECK-SSE1-NEXT:    andw %r15w, %ax
2127; CHECK-SSE1-NEXT:    notl %r15d
2128; CHECK-SSE1-NEXT:    andw 22(%r10), %r15w
2129; CHECK-SSE1-NEXT:    orl %eax, %r15d
2130; CHECK-SSE1-NEXT:    movzwl 20(%r8), %eax
2131; CHECK-SSE1-NEXT:    andw %cx, %ax
2132; CHECK-SSE1-NEXT:    notl %ecx
2133; CHECK-SSE1-NEXT:    andw 20(%r10), %cx
2134; CHECK-SSE1-NEXT:    orl %eax, %ecx
2135; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2136; CHECK-SSE1-NEXT:    movzwl 18(%r8), %eax
2137; CHECK-SSE1-NEXT:    andw %di, %ax
2138; CHECK-SSE1-NEXT:    notl %edi
2139; CHECK-SSE1-NEXT:    andw 18(%r10), %di
2140; CHECK-SSE1-NEXT:    orl %eax, %edi
2141; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2142; CHECK-SSE1-NEXT:    movzwl 16(%r8), %eax
2143; CHECK-SSE1-NEXT:    andw %si, %ax
2144; CHECK-SSE1-NEXT:    notl %esi
2145; CHECK-SSE1-NEXT:    andw 16(%r10), %si
2146; CHECK-SSE1-NEXT:    orl %eax, %esi
2147; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2148; CHECK-SSE1-NEXT:    movzwl 14(%r8), %eax
2149; CHECK-SSE1-NEXT:    andw %dx, %ax
2150; CHECK-SSE1-NEXT:    notl %edx
2151; CHECK-SSE1-NEXT:    andw 14(%r10), %dx
2152; CHECK-SSE1-NEXT:    orl %eax, %edx
2153; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2154; CHECK-SSE1-NEXT:    movzwl 12(%r8), %eax
2155; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2156; CHECK-SSE1-NEXT:    andw %cx, %ax
2157; CHECK-SSE1-NEXT:    notl %ecx
2158; CHECK-SSE1-NEXT:    andw 12(%r10), %cx
2159; CHECK-SSE1-NEXT:    orl %eax, %ecx
2160; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2161; CHECK-SSE1-NEXT:    movzwl 10(%r9), %r13d
2162; CHECK-SSE1-NEXT:    movzwl 10(%r8), %eax
2163; CHECK-SSE1-NEXT:    andw %r13w, %ax
2164; CHECK-SSE1-NEXT:    notl %r13d
2165; CHECK-SSE1-NEXT:    andw 10(%r10), %r13w
2166; CHECK-SSE1-NEXT:    orl %eax, %r13d
2167; CHECK-SSE1-NEXT:    movl 8(%r9), %edi
2168; CHECK-SSE1-NEXT:    movzwl 8(%r8), %eax
2169; CHECK-SSE1-NEXT:    andw %di, %ax
2170; CHECK-SSE1-NEXT:    notl %edi
2171; CHECK-SSE1-NEXT:    andw 8(%r10), %di
2172; CHECK-SSE1-NEXT:    orl %eax, %edi
2173; CHECK-SSE1-NEXT:    movzwl 6(%r9), %esi
2174; CHECK-SSE1-NEXT:    movzwl 6(%r8), %eax
2175; CHECK-SSE1-NEXT:    andw %si, %ax
2176; CHECK-SSE1-NEXT:    notl %esi
2177; CHECK-SSE1-NEXT:    andw 6(%r10), %si
2178; CHECK-SSE1-NEXT:    orl %eax, %esi
2179; CHECK-SSE1-NEXT:    movl 4(%r9), %edx
2180; CHECK-SSE1-NEXT:    movzwl 4(%r8), %eax
2181; CHECK-SSE1-NEXT:    andw %dx, %ax
2182; CHECK-SSE1-NEXT:    notl %edx
2183; CHECK-SSE1-NEXT:    andw 4(%r10), %dx
2184; CHECK-SSE1-NEXT:    orl %eax, %edx
2185; CHECK-SSE1-NEXT:    movzwl 2(%r9), %eax
2186; CHECK-SSE1-NEXT:    movzwl 2(%r8), %ecx
2187; CHECK-SSE1-NEXT:    andw %ax, %cx
2188; CHECK-SSE1-NEXT:    notl %eax
2189; CHECK-SSE1-NEXT:    andw 2(%r10), %ax
2190; CHECK-SSE1-NEXT:    orl %ecx, %eax
2191; CHECK-SSE1-NEXT:    movl (%r9), %r9d
2192; CHECK-SSE1-NEXT:    movzwl (%r8), %ecx
2193; CHECK-SSE1-NEXT:    andw %r9w, %cx
2194; CHECK-SSE1-NEXT:    notl %r9d
2195; CHECK-SSE1-NEXT:    andw (%r10), %r9w
2196; CHECK-SSE1-NEXT:    orl %ecx, %r9d
2197; CHECK-SSE1-NEXT:    movw %bp, 30(%r11)
2198; CHECK-SSE1-NEXT:    movw %bx, 28(%r11)
2199; CHECK-SSE1-NEXT:    movw %r14w, 26(%r11)
2200; CHECK-SSE1-NEXT:    movw %r12w, 24(%r11)
2201; CHECK-SSE1-NEXT:    movw %r15w, 22(%r11)
2202; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2203; CHECK-SSE1-NEXT:    movw %cx, 20(%r11)
2204; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2205; CHECK-SSE1-NEXT:    movw %cx, 18(%r11)
2206; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2207; CHECK-SSE1-NEXT:    movw %cx, 16(%r11)
2208; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2209; CHECK-SSE1-NEXT:    movw %cx, 14(%r11)
2210; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
2211; CHECK-SSE1-NEXT:    movw %cx, 12(%r11)
2212; CHECK-SSE1-NEXT:    movw %r13w, 10(%r11)
2213; CHECK-SSE1-NEXT:    movw %di, 8(%r11)
2214; CHECK-SSE1-NEXT:    movw %si, 6(%r11)
2215; CHECK-SSE1-NEXT:    movw %dx, 4(%r11)
2216; CHECK-SSE1-NEXT:    movw %ax, 2(%r11)
2217; CHECK-SSE1-NEXT:    movw %r9w, (%r11)
2218; CHECK-SSE1-NEXT:    movq %r11, %rax
2219; CHECK-SSE1-NEXT:    popq %rbx
2220; CHECK-SSE1-NEXT:    popq %r12
2221; CHECK-SSE1-NEXT:    popq %r13
2222; CHECK-SSE1-NEXT:    popq %r14
2223; CHECK-SSE1-NEXT:    popq %r15
2224; CHECK-SSE1-NEXT:    popq %rbp
2225; CHECK-SSE1-NEXT:    retq
2226;
2227; CHECK-SSE2-LABEL: out_v16i16:
2228; CHECK-SSE2:       # %bb.0:
2229; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
2230; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
2231; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
2232; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
2233; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
2234; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
2235; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
2236; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
2237; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
2238; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
2239; CHECK-SSE2-NEXT:    retq
2240;
2241; CHECK-XOP-LABEL: out_v16i16:
2242; CHECK-XOP:       # %bb.0:
2243; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
2244; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
2245; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
2246; CHECK-XOP-NEXT:    retq
2247  %x = load <16 x i16>, <16 x i16> *%px, align 32
2248  %y = load <16 x i16>, <16 x i16> *%py, align 32
2249  %mask = load <16 x i16>, <16 x i16> *%pmask, align 32
2250  %mx = and <16 x i16> %x, %mask
2251  %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
2252  %my = and <16 x i16> %y, %notmask
2253  %r = or <16 x i16> %mx, %my
2254  ret <16 x i16> %r
2255}
2256
2257define <8 x i32> @out_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind {
2258; CHECK-BASELINE-LABEL: out_v8i32:
2259; CHECK-BASELINE:       # %bb.0:
2260; CHECK-BASELINE-NEXT:    pushq %rbp
2261; CHECK-BASELINE-NEXT:    pushq %r15
2262; CHECK-BASELINE-NEXT:    pushq %r14
2263; CHECK-BASELINE-NEXT:    pushq %rbx
2264; CHECK-BASELINE-NEXT:    movq %rdi, %rax
2265; CHECK-BASELINE-NEXT:    movl 4(%rcx), %r8d
2266; CHECK-BASELINE-NEXT:    movl 8(%rcx), %r9d
2267; CHECK-BASELINE-NEXT:    movl 12(%rcx), %r10d
2268; CHECK-BASELINE-NEXT:    movl 16(%rcx), %r11d
2269; CHECK-BASELINE-NEXT:    movl 20(%rcx), %r15d
2270; CHECK-BASELINE-NEXT:    movl 24(%rcx), %ebx
2271; CHECK-BASELINE-NEXT:    movl 28(%rcx), %ebp
2272; CHECK-BASELINE-NEXT:    movl 28(%rsi), %r14d
2273; CHECK-BASELINE-NEXT:    andl %ebp, %r14d
2274; CHECK-BASELINE-NEXT:    notl %ebp
2275; CHECK-BASELINE-NEXT:    andl 28(%rdx), %ebp
2276; CHECK-BASELINE-NEXT:    orl %r14d, %ebp
2277; CHECK-BASELINE-NEXT:    movl 24(%rsi), %edi
2278; CHECK-BASELINE-NEXT:    andl %ebx, %edi
2279; CHECK-BASELINE-NEXT:    notl %ebx
2280; CHECK-BASELINE-NEXT:    andl 24(%rdx), %ebx
2281; CHECK-BASELINE-NEXT:    orl %edi, %ebx
2282; CHECK-BASELINE-NEXT:    movl 20(%rsi), %edi
2283; CHECK-BASELINE-NEXT:    andl %r15d, %edi
2284; CHECK-BASELINE-NEXT:    notl %r15d
2285; CHECK-BASELINE-NEXT:    andl 20(%rdx), %r15d
2286; CHECK-BASELINE-NEXT:    orl %edi, %r15d
2287; CHECK-BASELINE-NEXT:    movl 16(%rsi), %edi
2288; CHECK-BASELINE-NEXT:    andl %r11d, %edi
2289; CHECK-BASELINE-NEXT:    notl %r11d
2290; CHECK-BASELINE-NEXT:    andl 16(%rdx), %r11d
2291; CHECK-BASELINE-NEXT:    orl %edi, %r11d
2292; CHECK-BASELINE-NEXT:    movl 12(%rsi), %edi
2293; CHECK-BASELINE-NEXT:    andl %r10d, %edi
2294; CHECK-BASELINE-NEXT:    notl %r10d
2295; CHECK-BASELINE-NEXT:    andl 12(%rdx), %r10d
2296; CHECK-BASELINE-NEXT:    orl %edi, %r10d
2297; CHECK-BASELINE-NEXT:    movl 8(%rsi), %edi
2298; CHECK-BASELINE-NEXT:    andl %r9d, %edi
2299; CHECK-BASELINE-NEXT:    notl %r9d
2300; CHECK-BASELINE-NEXT:    andl 8(%rdx), %r9d
2301; CHECK-BASELINE-NEXT:    orl %edi, %r9d
2302; CHECK-BASELINE-NEXT:    movl 4(%rsi), %edi
2303; CHECK-BASELINE-NEXT:    andl %r8d, %edi
2304; CHECK-BASELINE-NEXT:    notl %r8d
2305; CHECK-BASELINE-NEXT:    andl 4(%rdx), %r8d
2306; CHECK-BASELINE-NEXT:    orl %edi, %r8d
2307; CHECK-BASELINE-NEXT:    movl (%rcx), %ecx
2308; CHECK-BASELINE-NEXT:    movl (%rsi), %esi
2309; CHECK-BASELINE-NEXT:    andl %ecx, %esi
2310; CHECK-BASELINE-NEXT:    notl %ecx
2311; CHECK-BASELINE-NEXT:    andl (%rdx), %ecx
2312; CHECK-BASELINE-NEXT:    orl %esi, %ecx
2313; CHECK-BASELINE-NEXT:    movl %ebp, 28(%rax)
2314; CHECK-BASELINE-NEXT:    movl %ebx, 24(%rax)
2315; CHECK-BASELINE-NEXT:    movl %r15d, 20(%rax)
2316; CHECK-BASELINE-NEXT:    movl %r11d, 16(%rax)
2317; CHECK-BASELINE-NEXT:    movl %r10d, 12(%rax)
2318; CHECK-BASELINE-NEXT:    movl %r9d, 8(%rax)
2319; CHECK-BASELINE-NEXT:    movl %r8d, 4(%rax)
2320; CHECK-BASELINE-NEXT:    movl %ecx, (%rax)
2321; CHECK-BASELINE-NEXT:    popq %rbx
2322; CHECK-BASELINE-NEXT:    popq %r14
2323; CHECK-BASELINE-NEXT:    popq %r15
2324; CHECK-BASELINE-NEXT:    popq %rbp
2325; CHECK-BASELINE-NEXT:    retq
2326;
2327; CHECK-SSE1-LABEL: out_v8i32:
2328; CHECK-SSE1:       # %bb.0:
2329; CHECK-SSE1-NEXT:    pushq %rbp
2330; CHECK-SSE1-NEXT:    pushq %r15
2331; CHECK-SSE1-NEXT:    pushq %r14
2332; CHECK-SSE1-NEXT:    pushq %rbx
2333; CHECK-SSE1-NEXT:    movq %rdi, %rax
2334; CHECK-SSE1-NEXT:    movl 4(%rcx), %r8d
2335; CHECK-SSE1-NEXT:    movl 8(%rcx), %r9d
2336; CHECK-SSE1-NEXT:    movl 12(%rcx), %r10d
2337; CHECK-SSE1-NEXT:    movl 16(%rcx), %r11d
2338; CHECK-SSE1-NEXT:    movl 20(%rcx), %r15d
2339; CHECK-SSE1-NEXT:    movl 24(%rcx), %ebx
2340; CHECK-SSE1-NEXT:    movl 28(%rcx), %ebp
2341; CHECK-SSE1-NEXT:    movl 28(%rsi), %r14d
2342; CHECK-SSE1-NEXT:    andl %ebp, %r14d
2343; CHECK-SSE1-NEXT:    notl %ebp
2344; CHECK-SSE1-NEXT:    andl 28(%rdx), %ebp
2345; CHECK-SSE1-NEXT:    orl %r14d, %ebp
2346; CHECK-SSE1-NEXT:    movl 24(%rsi), %edi
2347; CHECK-SSE1-NEXT:    andl %ebx, %edi
2348; CHECK-SSE1-NEXT:    notl %ebx
2349; CHECK-SSE1-NEXT:    andl 24(%rdx), %ebx
2350; CHECK-SSE1-NEXT:    orl %edi, %ebx
2351; CHECK-SSE1-NEXT:    movl 20(%rsi), %edi
2352; CHECK-SSE1-NEXT:    andl %r15d, %edi
2353; CHECK-SSE1-NEXT:    notl %r15d
2354; CHECK-SSE1-NEXT:    andl 20(%rdx), %r15d
2355; CHECK-SSE1-NEXT:    orl %edi, %r15d
2356; CHECK-SSE1-NEXT:    movl 16(%rsi), %edi
2357; CHECK-SSE1-NEXT:    andl %r11d, %edi
2358; CHECK-SSE1-NEXT:    notl %r11d
2359; CHECK-SSE1-NEXT:    andl 16(%rdx), %r11d
2360; CHECK-SSE1-NEXT:    orl %edi, %r11d
2361; CHECK-SSE1-NEXT:    movl 12(%rsi), %edi
2362; CHECK-SSE1-NEXT:    andl %r10d, %edi
2363; CHECK-SSE1-NEXT:    notl %r10d
2364; CHECK-SSE1-NEXT:    andl 12(%rdx), %r10d
2365; CHECK-SSE1-NEXT:    orl %edi, %r10d
2366; CHECK-SSE1-NEXT:    movl 8(%rsi), %edi
2367; CHECK-SSE1-NEXT:    andl %r9d, %edi
2368; CHECK-SSE1-NEXT:    notl %r9d
2369; CHECK-SSE1-NEXT:    andl 8(%rdx), %r9d
2370; CHECK-SSE1-NEXT:    orl %edi, %r9d
2371; CHECK-SSE1-NEXT:    movl 4(%rsi), %edi
2372; CHECK-SSE1-NEXT:    andl %r8d, %edi
2373; CHECK-SSE1-NEXT:    notl %r8d
2374; CHECK-SSE1-NEXT:    andl 4(%rdx), %r8d
2375; CHECK-SSE1-NEXT:    orl %edi, %r8d
2376; CHECK-SSE1-NEXT:    movl (%rcx), %ecx
2377; CHECK-SSE1-NEXT:    movl (%rsi), %esi
2378; CHECK-SSE1-NEXT:    andl %ecx, %esi
2379; CHECK-SSE1-NEXT:    notl %ecx
2380; CHECK-SSE1-NEXT:    andl (%rdx), %ecx
2381; CHECK-SSE1-NEXT:    orl %esi, %ecx
2382; CHECK-SSE1-NEXT:    movl %ebp, 28(%rax)
2383; CHECK-SSE1-NEXT:    movl %ebx, 24(%rax)
2384; CHECK-SSE1-NEXT:    movl %r15d, 20(%rax)
2385; CHECK-SSE1-NEXT:    movl %r11d, 16(%rax)
2386; CHECK-SSE1-NEXT:    movl %r10d, 12(%rax)
2387; CHECK-SSE1-NEXT:    movl %r9d, 8(%rax)
2388; CHECK-SSE1-NEXT:    movl %r8d, 4(%rax)
2389; CHECK-SSE1-NEXT:    movl %ecx, (%rax)
2390; CHECK-SSE1-NEXT:    popq %rbx
2391; CHECK-SSE1-NEXT:    popq %r14
2392; CHECK-SSE1-NEXT:    popq %r15
2393; CHECK-SSE1-NEXT:    popq %rbp
2394; CHECK-SSE1-NEXT:    retq
2395;
2396; CHECK-SSE2-LABEL: out_v8i32:
2397; CHECK-SSE2:       # %bb.0:
2398; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
2399; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
2400; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
2401; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
2402; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
2403; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
2404; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
2405; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
2406; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
2407; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
2408; CHECK-SSE2-NEXT:    retq
2409;
2410; CHECK-XOP-LABEL: out_v8i32:
2411; CHECK-XOP:       # %bb.0:
2412; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
2413; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
2414; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
2415; CHECK-XOP-NEXT:    retq
2416  %x = load <8 x i32>, <8 x i32> *%px, align 32
2417  %y = load <8 x i32>, <8 x i32> *%py, align 32
2418  %mask = load <8 x i32>, <8 x i32> *%pmask, align 32
2419  %mx = and <8 x i32> %x, %mask
2420  %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
2421  %my = and <8 x i32> %y, %notmask
2422  %r = or <8 x i32> %mx, %my
2423  ret <8 x i32> %r
2424}
2425
2426define <4 x i64> @out_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind {
2427; CHECK-BASELINE-LABEL: out_v4i64:
2428; CHECK-BASELINE:       # %bb.0:
2429; CHECK-BASELINE-NEXT:    pushq %rbx
2430; CHECK-BASELINE-NEXT:    movq %rdi, %rax
2431; CHECK-BASELINE-NEXT:    movq (%rcx), %r8
2432; CHECK-BASELINE-NEXT:    movq 8(%rcx), %r9
2433; CHECK-BASELINE-NEXT:    movq 16(%rcx), %rdi
2434; CHECK-BASELINE-NEXT:    movq 24(%rcx), %rcx
2435; CHECK-BASELINE-NEXT:    movq 24(%rsi), %r10
2436; CHECK-BASELINE-NEXT:    andq %rcx, %r10
2437; CHECK-BASELINE-NEXT:    movq 16(%rsi), %r11
2438; CHECK-BASELINE-NEXT:    andq %rdi, %r11
2439; CHECK-BASELINE-NEXT:    movq 8(%rsi), %rbx
2440; CHECK-BASELINE-NEXT:    andq %r9, %rbx
2441; CHECK-BASELINE-NEXT:    movq (%rsi), %rsi
2442; CHECK-BASELINE-NEXT:    andq %r8, %rsi
2443; CHECK-BASELINE-NEXT:    notq %r8
2444; CHECK-BASELINE-NEXT:    notq %r9
2445; CHECK-BASELINE-NEXT:    notq %rdi
2446; CHECK-BASELINE-NEXT:    notq %rcx
2447; CHECK-BASELINE-NEXT:    andq 24(%rdx), %rcx
2448; CHECK-BASELINE-NEXT:    orq %r10, %rcx
2449; CHECK-BASELINE-NEXT:    andq 16(%rdx), %rdi
2450; CHECK-BASELINE-NEXT:    orq %r11, %rdi
2451; CHECK-BASELINE-NEXT:    andq 8(%rdx), %r9
2452; CHECK-BASELINE-NEXT:    orq %rbx, %r9
2453; CHECK-BASELINE-NEXT:    andq (%rdx), %r8
2454; CHECK-BASELINE-NEXT:    orq %rsi, %r8
2455; CHECK-BASELINE-NEXT:    movq %rcx, 24(%rax)
2456; CHECK-BASELINE-NEXT:    movq %rdi, 16(%rax)
2457; CHECK-BASELINE-NEXT:    movq %r9, 8(%rax)
2458; CHECK-BASELINE-NEXT:    movq %r8, (%rax)
2459; CHECK-BASELINE-NEXT:    popq %rbx
2460; CHECK-BASELINE-NEXT:    retq
2461;
2462; CHECK-SSE1-LABEL: out_v4i64:
2463; CHECK-SSE1:       # %bb.0:
2464; CHECK-SSE1-NEXT:    pushq %rbx
2465; CHECK-SSE1-NEXT:    movq %rdi, %rax
2466; CHECK-SSE1-NEXT:    movq (%rcx), %r8
2467; CHECK-SSE1-NEXT:    movq 8(%rcx), %r9
2468; CHECK-SSE1-NEXT:    movq 16(%rcx), %rdi
2469; CHECK-SSE1-NEXT:    movq 24(%rcx), %rcx
2470; CHECK-SSE1-NEXT:    movq 24(%rsi), %r10
2471; CHECK-SSE1-NEXT:    andq %rcx, %r10
2472; CHECK-SSE1-NEXT:    movq 16(%rsi), %r11
2473; CHECK-SSE1-NEXT:    andq %rdi, %r11
2474; CHECK-SSE1-NEXT:    movq 8(%rsi), %rbx
2475; CHECK-SSE1-NEXT:    andq %r9, %rbx
2476; CHECK-SSE1-NEXT:    movq (%rsi), %rsi
2477; CHECK-SSE1-NEXT:    andq %r8, %rsi
2478; CHECK-SSE1-NEXT:    notq %r8
2479; CHECK-SSE1-NEXT:    notq %r9
2480; CHECK-SSE1-NEXT:    notq %rdi
2481; CHECK-SSE1-NEXT:    notq %rcx
2482; CHECK-SSE1-NEXT:    andq 24(%rdx), %rcx
2483; CHECK-SSE1-NEXT:    orq %r10, %rcx
2484; CHECK-SSE1-NEXT:    andq 16(%rdx), %rdi
2485; CHECK-SSE1-NEXT:    orq %r11, %rdi
2486; CHECK-SSE1-NEXT:    andq 8(%rdx), %r9
2487; CHECK-SSE1-NEXT:    orq %rbx, %r9
2488; CHECK-SSE1-NEXT:    andq (%rdx), %r8
2489; CHECK-SSE1-NEXT:    orq %rsi, %r8
2490; CHECK-SSE1-NEXT:    movq %rcx, 24(%rax)
2491; CHECK-SSE1-NEXT:    movq %rdi, 16(%rax)
2492; CHECK-SSE1-NEXT:    movq %r9, 8(%rax)
2493; CHECK-SSE1-NEXT:    movq %r8, (%rax)
2494; CHECK-SSE1-NEXT:    popq %rbx
2495; CHECK-SSE1-NEXT:    retq
2496;
2497; CHECK-SSE2-LABEL: out_v4i64:
2498; CHECK-SSE2:       # %bb.0:
2499; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
2500; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
2501; CHECK-SSE2-NEXT:    movaps 16(%rdi), %xmm2
2502; CHECK-SSE2-NEXT:    andps %xmm1, %xmm2
2503; CHECK-SSE2-NEXT:    movaps (%rdi), %xmm3
2504; CHECK-SSE2-NEXT:    andps %xmm0, %xmm3
2505; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm1
2506; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
2507; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm0
2508; CHECK-SSE2-NEXT:    orps %xmm3, %xmm0
2509; CHECK-SSE2-NEXT:    retq
2510;
2511; CHECK-XOP-LABEL: out_v4i64:
2512; CHECK-XOP:       # %bb.0:
2513; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
2514; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
2515; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
2516; CHECK-XOP-NEXT:    retq
2517  %x = load <4 x i64>, <4 x i64> *%px, align 32
2518  %y = load <4 x i64>, <4 x i64> *%py, align 32
2519  %mask = load <4 x i64>, <4 x i64> *%pmask, align 32
2520  %mx = and <4 x i64> %x, %mask
2521  %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1>
2522  %my = and <4 x i64> %y, %notmask
2523  %r = or <4 x i64> %mx, %my
2524  ret <4 x i64> %r
2525}
2526
2527;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2528; Should be the same as the previous one.
2529;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2530
2531; ============================================================================ ;
2532; 8-bit vector width
2533; ============================================================================ ;
2534
2535define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
2536; CHECK-LABEL: in_v1i8:
2537; CHECK:       # %bb.0:
2538; CHECK-NEXT:    movl %edi, %eax
2539; CHECK-NEXT:    xorl %esi, %eax
2540; CHECK-NEXT:    andl %edx, %eax
2541; CHECK-NEXT:    xorl %esi, %eax
2542; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2543; CHECK-NEXT:    retq
2544  %n0 = xor <1 x i8> %x, %y
2545  %n1 = and <1 x i8> %n0, %mask
2546  %r = xor <1 x i8> %n1, %y
2547  ret <1 x i8> %r
2548}
2549
2550; ============================================================================ ;
2551; 16-bit vector width
2552; ============================================================================ ;
2553
2554define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
2555; CHECK-BASELINE-LABEL: in_v2i8:
2556; CHECK-BASELINE:       # %bb.0:
2557; CHECK-BASELINE-NEXT:    movl %edi, %eax
2558; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2559; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2560; CHECK-BASELINE-NEXT:    andl %r9d, %esi
2561; CHECK-BASELINE-NEXT:    andl %r8d, %eax
2562; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2563; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2564; CHECK-BASELINE-NEXT:    # kill: def $al killed $al killed $eax
2565; CHECK-BASELINE-NEXT:    movl %esi, %edx
2566; CHECK-BASELINE-NEXT:    retq
2567;
2568; CHECK-SSE1-LABEL: in_v2i8:
2569; CHECK-SSE1:       # %bb.0:
2570; CHECK-SSE1-NEXT:    movl %edi, %eax
2571; CHECK-SSE1-NEXT:    xorl %edx, %eax
2572; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2573; CHECK-SSE1-NEXT:    andl %r9d, %esi
2574; CHECK-SSE1-NEXT:    andl %r8d, %eax
2575; CHECK-SSE1-NEXT:    xorl %edx, %eax
2576; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2577; CHECK-SSE1-NEXT:    # kill: def $al killed $al killed $eax
2578; CHECK-SSE1-NEXT:    movl %esi, %edx
2579; CHECK-SSE1-NEXT:    retq
2580;
2581; CHECK-SSE2-LABEL: in_v2i8:
2582; CHECK-SSE2:       # %bb.0:
2583; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2584; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2585; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2586; CHECK-SSE2-NEXT:    retq
2587;
2588; CHECK-XOP-LABEL: in_v2i8:
2589; CHECK-XOP:       # %bb.0:
2590; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2591; CHECK-XOP-NEXT:    retq
2592  %n0 = xor <2 x i8> %x, %y
2593  %n1 = and <2 x i8> %n0, %mask
2594  %r = xor <2 x i8> %n1, %y
2595  ret <2 x i8> %r
2596}
2597
2598define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
2599; CHECK-LABEL: in_v1i16:
2600; CHECK:       # %bb.0:
2601; CHECK-NEXT:    movl %edi, %eax
2602; CHECK-NEXT:    xorl %esi, %eax
2603; CHECK-NEXT:    andl %edx, %eax
2604; CHECK-NEXT:    xorl %esi, %eax
2605; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
2606; CHECK-NEXT:    retq
2607  %n0 = xor <1 x i16> %x, %y
2608  %n1 = and <1 x i16> %n0, %mask
2609  %r = xor <1 x i16> %n1, %y
2610  ret <1 x i16> %r
2611}
2612
2613; ============================================================================ ;
2614; 32-bit vector width
2615; ============================================================================ ;
2616
2617define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
2618; CHECK-BASELINE-LABEL: in_v4i8:
2619; CHECK-BASELINE:       # %bb.0:
2620; CHECK-BASELINE-NEXT:    movq %rdi, %rax
2621; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
2622; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
2623; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
2624; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
2625; CHECK-BASELINE-NEXT:    xorb %r11b, %dl
2626; CHECK-BASELINE-NEXT:    xorb %r10b, %cl
2627; CHECK-BASELINE-NEXT:    xorb %dil, %r8b
2628; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
2629; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
2630; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
2631; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
2632; CHECK-BASELINE-NEXT:    xorb %r9b, %sil
2633; CHECK-BASELINE-NEXT:    xorb %r11b, %dl
2634; CHECK-BASELINE-NEXT:    xorb %r10b, %cl
2635; CHECK-BASELINE-NEXT:    xorb %dil, %r8b
2636; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rax)
2637; CHECK-BASELINE-NEXT:    movb %cl, 2(%rax)
2638; CHECK-BASELINE-NEXT:    movb %dl, 1(%rax)
2639; CHECK-BASELINE-NEXT:    movb %sil, (%rax)
2640; CHECK-BASELINE-NEXT:    retq
2641;
2642; CHECK-SSE1-LABEL: in_v4i8:
2643; CHECK-SSE1:       # %bb.0:
2644; CHECK-SSE1-NEXT:    movq %rdi, %rax
2645; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
2646; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
2647; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
2648; CHECK-SSE1-NEXT:    xorl %r9d, %esi
2649; CHECK-SSE1-NEXT:    xorb %r11b, %dl
2650; CHECK-SSE1-NEXT:    xorb %r10b, %cl
2651; CHECK-SSE1-NEXT:    xorb %dil, %r8b
2652; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
2653; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
2654; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
2655; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
2656; CHECK-SSE1-NEXT:    xorb %r9b, %sil
2657; CHECK-SSE1-NEXT:    xorb %r11b, %dl
2658; CHECK-SSE1-NEXT:    xorb %r10b, %cl
2659; CHECK-SSE1-NEXT:    xorb %dil, %r8b
2660; CHECK-SSE1-NEXT:    movb %r8b, 3(%rax)
2661; CHECK-SSE1-NEXT:    movb %cl, 2(%rax)
2662; CHECK-SSE1-NEXT:    movb %dl, 1(%rax)
2663; CHECK-SSE1-NEXT:    movb %sil, (%rax)
2664; CHECK-SSE1-NEXT:    retq
2665;
2666; CHECK-SSE2-LABEL: in_v4i8:
2667; CHECK-SSE2:       # %bb.0:
2668; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2669; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2670; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2671; CHECK-SSE2-NEXT:    retq
2672;
2673; CHECK-XOP-LABEL: in_v4i8:
2674; CHECK-XOP:       # %bb.0:
2675; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2676; CHECK-XOP-NEXT:    retq
2677  %n0 = xor <4 x i8> %x, %y
2678  %n1 = and <4 x i8> %n0, %mask
2679  %r = xor <4 x i8> %n1, %y
2680  ret <4 x i8> %r
2681}
2682
2683define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
2684; CHECK-BASELINE-LABEL: in_v2i16:
2685; CHECK-BASELINE:       # %bb.0:
2686; CHECK-BASELINE-NEXT:    movl %edi, %eax
2687; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2688; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2689; CHECK-BASELINE-NEXT:    andl %r9d, %esi
2690; CHECK-BASELINE-NEXT:    andl %r8d, %eax
2691; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2692; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2693; CHECK-BASELINE-NEXT:    # kill: def $ax killed $ax killed $eax
2694; CHECK-BASELINE-NEXT:    movl %esi, %edx
2695; CHECK-BASELINE-NEXT:    retq
2696;
2697; CHECK-SSE1-LABEL: in_v2i16:
2698; CHECK-SSE1:       # %bb.0:
2699; CHECK-SSE1-NEXT:    movl %edi, %eax
2700; CHECK-SSE1-NEXT:    xorl %edx, %eax
2701; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2702; CHECK-SSE1-NEXT:    andl %r9d, %esi
2703; CHECK-SSE1-NEXT:    andl %r8d, %eax
2704; CHECK-SSE1-NEXT:    xorl %edx, %eax
2705; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2706; CHECK-SSE1-NEXT:    # kill: def $ax killed $ax killed $eax
2707; CHECK-SSE1-NEXT:    movl %esi, %edx
2708; CHECK-SSE1-NEXT:    retq
2709;
2710; CHECK-SSE2-LABEL: in_v2i16:
2711; CHECK-SSE2:       # %bb.0:
2712; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2713; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2714; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2715; CHECK-SSE2-NEXT:    retq
2716;
2717; CHECK-XOP-LABEL: in_v2i16:
2718; CHECK-XOP:       # %bb.0:
2719; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2720; CHECK-XOP-NEXT:    retq
2721  %n0 = xor <2 x i16> %x, %y
2722  %n1 = and <2 x i16> %n0, %mask
2723  %r = xor <2 x i16> %n1, %y
2724  ret <2 x i16> %r
2725}
2726
2727define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
2728; CHECK-LABEL: in_v1i32:
2729; CHECK:       # %bb.0:
2730; CHECK-NEXT:    movl %edi, %eax
2731; CHECK-NEXT:    xorl %esi, %eax
2732; CHECK-NEXT:    andl %edx, %eax
2733; CHECK-NEXT:    xorl %esi, %eax
2734; CHECK-NEXT:    retq
2735  %n0 = xor <1 x i32> %x, %y
2736  %n1 = and <1 x i32> %n0, %mask
2737  %r = xor <1 x i32> %n1, %y
2738  ret <1 x i32> %r
2739}
2740
2741; ============================================================================ ;
2742; 64-bit vector width
2743; ============================================================================ ;
2744
2745define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
2746; CHECK-BASELINE-LABEL: in_v8i8:
2747; CHECK-BASELINE:       # %bb.0:
2748; CHECK-BASELINE-NEXT:    pushq %rbp
2749; CHECK-BASELINE-NEXT:    pushq %r15
2750; CHECK-BASELINE-NEXT:    pushq %r14
2751; CHECK-BASELINE-NEXT:    pushq %r13
2752; CHECK-BASELINE-NEXT:    pushq %r12
2753; CHECK-BASELINE-NEXT:    pushq %rbx
2754; CHECK-BASELINE-NEXT:    movl %ecx, %r10d
2755; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
2756; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
2757; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
2758; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
2759; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
2760; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
2761; CHECK-BASELINE-NEXT:    xorb %r13b, %sil
2762; CHECK-BASELINE-NEXT:    xorb %r12b, %dl
2763; CHECK-BASELINE-NEXT:    xorb %r15b, %r10b
2764; CHECK-BASELINE-NEXT:    xorb %r14b, %r8b
2765; CHECK-BASELINE-NEXT:    xorb %bpl, %r9b
2766; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
2767; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
2768; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
2769; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
2770; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
2771; CHECK-BASELINE-NEXT:    xorb %r11b, %al
2772; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
2773; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
2774; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
2775; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
2776; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
2777; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
2778; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
2779; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
2780; CHECK-BASELINE-NEXT:    xorb %r13b, %sil
2781; CHECK-BASELINE-NEXT:    xorb %r12b, %dl
2782; CHECK-BASELINE-NEXT:    xorb %r15b, %r10b
2783; CHECK-BASELINE-NEXT:    xorb %r14b, %r8b
2784; CHECK-BASELINE-NEXT:    xorb %bpl, %r9b
2785; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
2786; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
2787; CHECK-BASELINE-NEXT:    xorb %r11b, %al
2788; CHECK-BASELINE-NEXT:    movb %al, 7(%rdi)
2789; CHECK-BASELINE-NEXT:    movb %cl, 6(%rdi)
2790; CHECK-BASELINE-NEXT:    movb %bl, 5(%rdi)
2791; CHECK-BASELINE-NEXT:    movb %r9b, 4(%rdi)
2792; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdi)
2793; CHECK-BASELINE-NEXT:    movb %r10b, 2(%rdi)
2794; CHECK-BASELINE-NEXT:    movb %dl, 1(%rdi)
2795; CHECK-BASELINE-NEXT:    movb %sil, (%rdi)
2796; CHECK-BASELINE-NEXT:    movq %rdi, %rax
2797; CHECK-BASELINE-NEXT:    popq %rbx
2798; CHECK-BASELINE-NEXT:    popq %r12
2799; CHECK-BASELINE-NEXT:    popq %r13
2800; CHECK-BASELINE-NEXT:    popq %r14
2801; CHECK-BASELINE-NEXT:    popq %r15
2802; CHECK-BASELINE-NEXT:    popq %rbp
2803; CHECK-BASELINE-NEXT:    retq
2804;
2805; CHECK-SSE1-LABEL: in_v8i8:
2806; CHECK-SSE1:       # %bb.0:
2807; CHECK-SSE1-NEXT:    pushq %rbp
2808; CHECK-SSE1-NEXT:    pushq %r15
2809; CHECK-SSE1-NEXT:    pushq %r14
2810; CHECK-SSE1-NEXT:    pushq %r13
2811; CHECK-SSE1-NEXT:    pushq %r12
2812; CHECK-SSE1-NEXT:    pushq %rbx
2813; CHECK-SSE1-NEXT:    movl %ecx, %r10d
2814; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
2815; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
2816; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
2817; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
2818; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
2819; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
2820; CHECK-SSE1-NEXT:    xorb %r13b, %sil
2821; CHECK-SSE1-NEXT:    xorb %r12b, %dl
2822; CHECK-SSE1-NEXT:    xorb %r15b, %r10b
2823; CHECK-SSE1-NEXT:    xorb %r14b, %r8b
2824; CHECK-SSE1-NEXT:    xorb %bpl, %r9b
2825; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
2826; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
2827; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
2828; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
2829; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
2830; CHECK-SSE1-NEXT:    xorb %r11b, %al
2831; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
2832; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
2833; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
2834; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
2835; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
2836; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
2837; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
2838; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
2839; CHECK-SSE1-NEXT:    xorb %r13b, %sil
2840; CHECK-SSE1-NEXT:    xorb %r12b, %dl
2841; CHECK-SSE1-NEXT:    xorb %r15b, %r10b
2842; CHECK-SSE1-NEXT:    xorb %r14b, %r8b
2843; CHECK-SSE1-NEXT:    xorb %bpl, %r9b
2844; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
2845; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %cl
2846; CHECK-SSE1-NEXT:    xorb %r11b, %al
2847; CHECK-SSE1-NEXT:    movb %al, 7(%rdi)
2848; CHECK-SSE1-NEXT:    movb %cl, 6(%rdi)
2849; CHECK-SSE1-NEXT:    movb %bl, 5(%rdi)
2850; CHECK-SSE1-NEXT:    movb %r9b, 4(%rdi)
2851; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdi)
2852; CHECK-SSE1-NEXT:    movb %r10b, 2(%rdi)
2853; CHECK-SSE1-NEXT:    movb %dl, 1(%rdi)
2854; CHECK-SSE1-NEXT:    movb %sil, (%rdi)
2855; CHECK-SSE1-NEXT:    movq %rdi, %rax
2856; CHECK-SSE1-NEXT:    popq %rbx
2857; CHECK-SSE1-NEXT:    popq %r12
2858; CHECK-SSE1-NEXT:    popq %r13
2859; CHECK-SSE1-NEXT:    popq %r14
2860; CHECK-SSE1-NEXT:    popq %r15
2861; CHECK-SSE1-NEXT:    popq %rbp
2862; CHECK-SSE1-NEXT:    retq
2863;
2864; CHECK-SSE2-LABEL: in_v8i8:
2865; CHECK-SSE2:       # %bb.0:
2866; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2867; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2868; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2869; CHECK-SSE2-NEXT:    retq
2870;
2871; CHECK-XOP-LABEL: in_v8i8:
2872; CHECK-XOP:       # %bb.0:
2873; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2874; CHECK-XOP-NEXT:    retq
2875  %n0 = xor <8 x i8> %x, %y
2876  %n1 = and <8 x i8> %n0, %mask
2877  %r = xor <8 x i8> %n1, %y
2878  ret <8 x i8> %r
2879}
2880
2881define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
2882; CHECK-BASELINE-LABEL: in_v4i16:
2883; CHECK-BASELINE:       # %bb.0:
2884; CHECK-BASELINE-NEXT:    movq %rdi, %rax
2885; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
2886; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
2887; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edi
2888; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
2889; CHECK-BASELINE-NEXT:    xorl %edi, %edx
2890; CHECK-BASELINE-NEXT:    xorl %r11d, %ecx
2891; CHECK-BASELINE-NEXT:    xorl %r10d, %r8d
2892; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
2893; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
2894; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
2895; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %si
2896; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
2897; CHECK-BASELINE-NEXT:    xorl %edi, %edx
2898; CHECK-BASELINE-NEXT:    xorl %r11d, %ecx
2899; CHECK-BASELINE-NEXT:    xorl %r10d, %r8d
2900; CHECK-BASELINE-NEXT:    movw %r8w, 6(%rax)
2901; CHECK-BASELINE-NEXT:    movw %cx, 4(%rax)
2902; CHECK-BASELINE-NEXT:    movw %dx, 2(%rax)
2903; CHECK-BASELINE-NEXT:    movw %si, (%rax)
2904; CHECK-BASELINE-NEXT:    retq
2905;
2906; CHECK-SSE1-LABEL: in_v4i16:
2907; CHECK-SSE1:       # %bb.0:
2908; CHECK-SSE1-NEXT:    movq %rdi, %rax
2909; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
2910; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
2911; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edi
2912; CHECK-SSE1-NEXT:    xorl %r9d, %esi
2913; CHECK-SSE1-NEXT:    xorl %edi, %edx
2914; CHECK-SSE1-NEXT:    xorl %r11d, %ecx
2915; CHECK-SSE1-NEXT:    xorl %r10d, %r8d
2916; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
2917; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
2918; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
2919; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %si
2920; CHECK-SSE1-NEXT:    xorl %r9d, %esi
2921; CHECK-SSE1-NEXT:    xorl %edi, %edx
2922; CHECK-SSE1-NEXT:    xorl %r11d, %ecx
2923; CHECK-SSE1-NEXT:    xorl %r10d, %r8d
2924; CHECK-SSE1-NEXT:    movw %r8w, 6(%rax)
2925; CHECK-SSE1-NEXT:    movw %cx, 4(%rax)
2926; CHECK-SSE1-NEXT:    movw %dx, 2(%rax)
2927; CHECK-SSE1-NEXT:    movw %si, (%rax)
2928; CHECK-SSE1-NEXT:    retq
2929;
2930; CHECK-SSE2-LABEL: in_v4i16:
2931; CHECK-SSE2:       # %bb.0:
2932; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2933; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2934; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2935; CHECK-SSE2-NEXT:    retq
2936;
2937; CHECK-XOP-LABEL: in_v4i16:
2938; CHECK-XOP:       # %bb.0:
2939; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2940; CHECK-XOP-NEXT:    retq
2941  %n0 = xor <4 x i16> %x, %y
2942  %n1 = and <4 x i16> %n0, %mask
2943  %r = xor <4 x i16> %n1, %y
2944  ret <4 x i16> %r
2945}
2946
2947define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
2948; CHECK-BASELINE-LABEL: in_v2i32:
2949; CHECK-BASELINE:       # %bb.0:
2950; CHECK-BASELINE-NEXT:    movl %edi, %eax
2951; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2952; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2953; CHECK-BASELINE-NEXT:    andl %r9d, %esi
2954; CHECK-BASELINE-NEXT:    andl %r8d, %eax
2955; CHECK-BASELINE-NEXT:    xorl %edx, %eax
2956; CHECK-BASELINE-NEXT:    xorl %ecx, %esi
2957; CHECK-BASELINE-NEXT:    movl %esi, %edx
2958; CHECK-BASELINE-NEXT:    retq
2959;
2960; CHECK-SSE1-LABEL: in_v2i32:
2961; CHECK-SSE1:       # %bb.0:
2962; CHECK-SSE1-NEXT:    movl %edi, %eax
2963; CHECK-SSE1-NEXT:    xorl %edx, %eax
2964; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2965; CHECK-SSE1-NEXT:    andl %r9d, %esi
2966; CHECK-SSE1-NEXT:    andl %r8d, %eax
2967; CHECK-SSE1-NEXT:    xorl %edx, %eax
2968; CHECK-SSE1-NEXT:    xorl %ecx, %esi
2969; CHECK-SSE1-NEXT:    movl %esi, %edx
2970; CHECK-SSE1-NEXT:    retq
2971;
2972; CHECK-SSE2-LABEL: in_v2i32:
2973; CHECK-SSE2:       # %bb.0:
2974; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
2975; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
2976; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
2977; CHECK-SSE2-NEXT:    retq
2978;
2979; CHECK-XOP-LABEL: in_v2i32:
2980; CHECK-XOP:       # %bb.0:
2981; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
2982; CHECK-XOP-NEXT:    retq
2983  %n0 = xor <2 x i32> %x, %y
2984  %n1 = and <2 x i32> %n0, %mask
2985  %r = xor <2 x i32> %n1, %y
2986  ret <2 x i32> %r
2987}
2988
2989define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
2990; CHECK-LABEL: in_v1i64:
2991; CHECK:       # %bb.0:
2992; CHECK-NEXT:    movq %rdi, %rax
2993; CHECK-NEXT:    xorq %rsi, %rax
2994; CHECK-NEXT:    andq %rdx, %rax
2995; CHECK-NEXT:    xorq %rsi, %rax
2996; CHECK-NEXT:    retq
2997  %n0 = xor <1 x i64> %x, %y
2998  %n1 = and <1 x i64> %n0, %mask
2999  %r = xor <1 x i64> %n1, %y
3000  ret <1 x i64> %r
3001}
3002
3003; ============================================================================ ;
3004; 128-bit vector width
3005; ============================================================================ ;
3006
3007define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
3008; CHECK-BASELINE-LABEL: in_v16i8:
3009; CHECK-BASELINE:       # %bb.0:
3010; CHECK-BASELINE-NEXT:    pushq %rbp
3011; CHECK-BASELINE-NEXT:    pushq %r15
3012; CHECK-BASELINE-NEXT:    pushq %r14
3013; CHECK-BASELINE-NEXT:    pushq %r13
3014; CHECK-BASELINE-NEXT:    pushq %r12
3015; CHECK-BASELINE-NEXT:    pushq %rbx
3016; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3017; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3018; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3019; CHECK-BASELINE-NEXT:    movq %rdi, %rdx
3020; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
3021; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
3022; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3023; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
3024; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
3025; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
3026; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
3027; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
3028; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
3029; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3030; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3031; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
3032; CHECK-BASELINE-NEXT:    xorb %dil, %r9b
3033; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
3034; CHECK-BASELINE-NEXT:    xorb %dil, %r9b
3035; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
3036; CHECK-BASELINE-NEXT:    xorb %r10b, %dil
3037; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dil
3038; CHECK-BASELINE-NEXT:    xorb %r10b, %dil
3039; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3040; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
3041; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
3042; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
3043; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3044; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
3045; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
3046; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
3047; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
3048; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
3049; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
3050; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
3051; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
3052; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
3053; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
3054; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
3055; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
3056; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
3057; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
3058; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
3059; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
3060; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
3061; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
3062; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
3063; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
3064; CHECK-BASELINE-NEXT:    xorb %bpl, %r14b
3065; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
3066; CHECK-BASELINE-NEXT:    xorb %bpl, %r14b
3067; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
3068; CHECK-BASELINE-NEXT:    xorb %al, %bpl
3069; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
3070; CHECK-BASELINE-NEXT:    xorb %al, %bpl
3071; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3072; CHECK-BASELINE-NEXT:    xorb %cl, %al
3073; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
3074; CHECK-BASELINE-NEXT:    xorb %cl, %al
3075; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
3076; CHECK-BASELINE-NEXT:    xorb %sil, %cl
3077; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3078; CHECK-BASELINE-NEXT:    xorb %sil, %cl
3079; CHECK-BASELINE-NEXT:    movb %cl, 15(%rdx)
3080; CHECK-BASELINE-NEXT:    movb %al, 14(%rdx)
3081; CHECK-BASELINE-NEXT:    movb %bpl, 13(%rdx)
3082; CHECK-BASELINE-NEXT:    movb %r14b, 12(%rdx)
3083; CHECK-BASELINE-NEXT:    movb %r15b, 11(%rdx)
3084; CHECK-BASELINE-NEXT:    movb %r12b, 10(%rdx)
3085; CHECK-BASELINE-NEXT:    movb %r13b, 9(%rdx)
3086; CHECK-BASELINE-NEXT:    movb %bl, 8(%rdx)
3087; CHECK-BASELINE-NEXT:    movb %r11b, 7(%rdx)
3088; CHECK-BASELINE-NEXT:    movb %r10b, 6(%rdx)
3089; CHECK-BASELINE-NEXT:    movb %dil, 5(%rdx)
3090; CHECK-BASELINE-NEXT:    movb %r9b, 4(%rdx)
3091; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3092; CHECK-BASELINE-NEXT:    xorb %al, %r8b
3093; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
3094; CHECK-BASELINE-NEXT:    xorb %al, %r8b
3095; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdx)
3096; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3097; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3098; CHECK-BASELINE-NEXT:    xorb %al, %cl
3099; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3100; CHECK-BASELINE-NEXT:    xorb %al, %cl
3101; CHECK-BASELINE-NEXT:    movb %cl, 2(%rdx)
3102; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3103; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3104; CHECK-BASELINE-NEXT:    xorb %al, %cl
3105; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3106; CHECK-BASELINE-NEXT:    xorb %al, %cl
3107; CHECK-BASELINE-NEXT:    movb %cl, 1(%rdx)
3108; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
3109; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3110; CHECK-BASELINE-NEXT:    xorb %al, %cl
3111; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3112; CHECK-BASELINE-NEXT:    xorb %al, %cl
3113; CHECK-BASELINE-NEXT:    movb %cl, (%rdx)
3114; CHECK-BASELINE-NEXT:    movq %rdx, %rax
3115; CHECK-BASELINE-NEXT:    popq %rbx
3116; CHECK-BASELINE-NEXT:    popq %r12
3117; CHECK-BASELINE-NEXT:    popq %r13
3118; CHECK-BASELINE-NEXT:    popq %r14
3119; CHECK-BASELINE-NEXT:    popq %r15
3120; CHECK-BASELINE-NEXT:    popq %rbp
3121; CHECK-BASELINE-NEXT:    retq
3122;
3123; CHECK-SSE1-LABEL: in_v16i8:
3124; CHECK-SSE1:       # %bb.0:
3125; CHECK-SSE1-NEXT:    pushq %rbp
3126; CHECK-SSE1-NEXT:    pushq %r15
3127; CHECK-SSE1-NEXT:    pushq %r14
3128; CHECK-SSE1-NEXT:    pushq %r13
3129; CHECK-SSE1-NEXT:    pushq %r12
3130; CHECK-SSE1-NEXT:    pushq %rbx
3131; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3132; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3133; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
3134; CHECK-SSE1-NEXT:    movq %rdi, %rdx
3135; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
3136; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
3137; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3138; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
3139; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
3140; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
3141; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
3142; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
3143; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
3144; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3145; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3146; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
3147; CHECK-SSE1-NEXT:    xorb %dil, %r9b
3148; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
3149; CHECK-SSE1-NEXT:    xorb %dil, %r9b
3150; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
3151; CHECK-SSE1-NEXT:    xorb %r10b, %dil
3152; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dil
3153; CHECK-SSE1-NEXT:    xorb %r10b, %dil
3154; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3155; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
3156; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
3157; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
3158; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3159; CHECK-SSE1-NEXT:    xorb %bl, %r11b
3160; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
3161; CHECK-SSE1-NEXT:    xorb %bl, %r11b
3162; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
3163; CHECK-SSE1-NEXT:    xorb %r13b, %bl
3164; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
3165; CHECK-SSE1-NEXT:    xorb %r13b, %bl
3166; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
3167; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
3168; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
3169; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
3170; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
3171; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
3172; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
3173; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
3174; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
3175; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
3176; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
3177; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
3178; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
3179; CHECK-SSE1-NEXT:    xorb %bpl, %r14b
3180; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
3181; CHECK-SSE1-NEXT:    xorb %bpl, %r14b
3182; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
3183; CHECK-SSE1-NEXT:    xorb %al, %bpl
3184; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
3185; CHECK-SSE1-NEXT:    xorb %al, %bpl
3186; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3187; CHECK-SSE1-NEXT:    xorb %cl, %al
3188; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
3189; CHECK-SSE1-NEXT:    xorb %cl, %al
3190; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
3191; CHECK-SSE1-NEXT:    xorb %sil, %cl
3192; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3193; CHECK-SSE1-NEXT:    xorb %sil, %cl
3194; CHECK-SSE1-NEXT:    movb %cl, 15(%rdx)
3195; CHECK-SSE1-NEXT:    movb %al, 14(%rdx)
3196; CHECK-SSE1-NEXT:    movb %bpl, 13(%rdx)
3197; CHECK-SSE1-NEXT:    movb %r14b, 12(%rdx)
3198; CHECK-SSE1-NEXT:    movb %r15b, 11(%rdx)
3199; CHECK-SSE1-NEXT:    movb %r12b, 10(%rdx)
3200; CHECK-SSE1-NEXT:    movb %r13b, 9(%rdx)
3201; CHECK-SSE1-NEXT:    movb %bl, 8(%rdx)
3202; CHECK-SSE1-NEXT:    movb %r11b, 7(%rdx)
3203; CHECK-SSE1-NEXT:    movb %r10b, 6(%rdx)
3204; CHECK-SSE1-NEXT:    movb %dil, 5(%rdx)
3205; CHECK-SSE1-NEXT:    movb %r9b, 4(%rdx)
3206; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3207; CHECK-SSE1-NEXT:    xorb %al, %r8b
3208; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
3209; CHECK-SSE1-NEXT:    xorb %al, %r8b
3210; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdx)
3211; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3212; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3213; CHECK-SSE1-NEXT:    xorb %al, %cl
3214; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3215; CHECK-SSE1-NEXT:    xorb %al, %cl
3216; CHECK-SSE1-NEXT:    movb %cl, 2(%rdx)
3217; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3218; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3219; CHECK-SSE1-NEXT:    xorb %al, %cl
3220; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3221; CHECK-SSE1-NEXT:    xorb %al, %cl
3222; CHECK-SSE1-NEXT:    movb %cl, 1(%rdx)
3223; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
3224; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
3225; CHECK-SSE1-NEXT:    xorb %al, %cl
3226; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
3227; CHECK-SSE1-NEXT:    xorb %al, %cl
3228; CHECK-SSE1-NEXT:    movb %cl, (%rdx)
3229; CHECK-SSE1-NEXT:    movq %rdx, %rax
3230; CHECK-SSE1-NEXT:    popq %rbx
3231; CHECK-SSE1-NEXT:    popq %r12
3232; CHECK-SSE1-NEXT:    popq %r13
3233; CHECK-SSE1-NEXT:    popq %r14
3234; CHECK-SSE1-NEXT:    popq %r15
3235; CHECK-SSE1-NEXT:    popq %rbp
3236; CHECK-SSE1-NEXT:    retq
3237;
3238; CHECK-SSE2-LABEL: in_v16i8:
3239; CHECK-SSE2:       # %bb.0:
3240; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
3241; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
3242; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
3243; CHECK-SSE2-NEXT:    retq
3244;
3245; CHECK-XOP-LABEL: in_v16i8:
3246; CHECK-XOP:       # %bb.0:
3247; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
3248; CHECK-XOP-NEXT:    retq
3249  %n0 = xor <16 x i8> %x, %y
3250  %n1 = and <16 x i8> %n0, %mask
3251  %r = xor <16 x i8> %n1, %y
3252  ret <16 x i8> %r
3253}
3254
3255define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
3256; CHECK-BASELINE-LABEL: in_v8i16:
3257; CHECK-BASELINE:       # %bb.0:
3258; CHECK-BASELINE-NEXT:    pushq %rbp
3259; CHECK-BASELINE-NEXT:    pushq %rbx
3260; CHECK-BASELINE-NEXT:    movq %rdi, %rax
3261; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
3262; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
3263; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %edi
3264; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3265; CHECK-BASELINE-NEXT:    xorl %ebx, %esi
3266; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %si
3267; CHECK-BASELINE-NEXT:    xorl %ebx, %esi
3268; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3269; CHECK-BASELINE-NEXT:    xorl %ebx, %edx
3270; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %dx
3271; CHECK-BASELINE-NEXT:    xorl %ebx, %edx
3272; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3273; CHECK-BASELINE-NEXT:    xorl %ebx, %ecx
3274; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %cx
3275; CHECK-BASELINE-NEXT:    xorl %ebx, %ecx
3276; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3277; CHECK-BASELINE-NEXT:    xorl %ebx, %r8d
3278; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
3279; CHECK-BASELINE-NEXT:    xorl %ebx, %r8d
3280; CHECK-BASELINE-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3281; CHECK-BASELINE-NEXT:    xorl %ebx, %r9d
3282; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %r9w
3283; CHECK-BASELINE-NEXT:    xorl %ebx, %r9d
3284; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebp
3285; CHECK-BASELINE-NEXT:    xorw %di, %bp
3286; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bp
3287; CHECK-BASELINE-NEXT:    xorl %edi, %ebp
3288; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %edi
3289; CHECK-BASELINE-NEXT:    xorw %r11w, %di
3290; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %di
3291; CHECK-BASELINE-NEXT:    xorl %r11d, %edi
3292; CHECK-BASELINE-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
3293; CHECK-BASELINE-NEXT:    xorw %r10w, %bx
3294; CHECK-BASELINE-NEXT:    andw {{[0-9]+}}(%rsp), %bx
3295; CHECK-BASELINE-NEXT:    xorl %r10d, %ebx
3296; CHECK-BASELINE-NEXT:    movw %bx, 14(%rax)
3297; CHECK-BASELINE-NEXT:    movw %di, 12(%rax)
3298; CHECK-BASELINE-NEXT:    movw %bp, 10(%rax)
3299; CHECK-BASELINE-NEXT:    movw %r9w, 8(%rax)
3300; CHECK-BASELINE-NEXT:    movw %r8w, 6(%rax)
3301; CHECK-BASELINE-NEXT:    movw %cx, 4(%rax)
3302; CHECK-BASELINE-NEXT:    movw %dx, 2(%rax)
3303; CHECK-BASELINE-NEXT:    movw %si, (%rax)
3304; CHECK-BASELINE-NEXT:    popq %rbx
3305; CHECK-BASELINE-NEXT:    popq %rbp
3306; CHECK-BASELINE-NEXT:    retq
3307;
3308; CHECK-SSE1-LABEL: in_v8i16:
3309; CHECK-SSE1:       # %bb.0:
3310; CHECK-SSE1-NEXT:    pushq %rbp
3311; CHECK-SSE1-NEXT:    pushq %rbx
3312; CHECK-SSE1-NEXT:    movq %rdi, %rax
3313; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
3314; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
3315; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %edi
3316; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3317; CHECK-SSE1-NEXT:    xorl %ebx, %esi
3318; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %si
3319; CHECK-SSE1-NEXT:    xorl %ebx, %esi
3320; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3321; CHECK-SSE1-NEXT:    xorl %ebx, %edx
3322; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %dx
3323; CHECK-SSE1-NEXT:    xorl %ebx, %edx
3324; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3325; CHECK-SSE1-NEXT:    xorl %ebx, %ecx
3326; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %cx
3327; CHECK-SSE1-NEXT:    xorl %ebx, %ecx
3328; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3329; CHECK-SSE1-NEXT:    xorl %ebx, %r8d
3330; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r8w
3331; CHECK-SSE1-NEXT:    xorl %ebx, %r8d
3332; CHECK-SSE1-NEXT:    movl {{[0-9]+}}(%rsp), %ebx
3333; CHECK-SSE1-NEXT:    xorl %ebx, %r9d
3334; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %r9w
3335; CHECK-SSE1-NEXT:    xorl %ebx, %r9d
3336; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebp
3337; CHECK-SSE1-NEXT:    xorw %di, %bp
3338; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bp
3339; CHECK-SSE1-NEXT:    xorl %edi, %ebp
3340; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %edi
3341; CHECK-SSE1-NEXT:    xorw %r11w, %di
3342; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %di
3343; CHECK-SSE1-NEXT:    xorl %r11d, %edi
3344; CHECK-SSE1-NEXT:    movzwl {{[0-9]+}}(%rsp), %ebx
3345; CHECK-SSE1-NEXT:    xorw %r10w, %bx
3346; CHECK-SSE1-NEXT:    andw {{[0-9]+}}(%rsp), %bx
3347; CHECK-SSE1-NEXT:    xorl %r10d, %ebx
3348; CHECK-SSE1-NEXT:    movw %bx, 14(%rax)
3349; CHECK-SSE1-NEXT:    movw %di, 12(%rax)
3350; CHECK-SSE1-NEXT:    movw %bp, 10(%rax)
3351; CHECK-SSE1-NEXT:    movw %r9w, 8(%rax)
3352; CHECK-SSE1-NEXT:    movw %r8w, 6(%rax)
3353; CHECK-SSE1-NEXT:    movw %cx, 4(%rax)
3354; CHECK-SSE1-NEXT:    movw %dx, 2(%rax)
3355; CHECK-SSE1-NEXT:    movw %si, (%rax)
3356; CHECK-SSE1-NEXT:    popq %rbx
3357; CHECK-SSE1-NEXT:    popq %rbp
3358; CHECK-SSE1-NEXT:    retq
3359;
3360; CHECK-SSE2-LABEL: in_v8i16:
3361; CHECK-SSE2:       # %bb.0:
3362; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
3363; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
3364; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
3365; CHECK-SSE2-NEXT:    retq
3366;
3367; CHECK-XOP-LABEL: in_v8i16:
3368; CHECK-XOP:       # %bb.0:
3369; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
3370; CHECK-XOP-NEXT:    retq
3371  %n0 = xor <8 x i16> %x, %y
3372  %n1 = and <8 x i16> %n0, %mask
3373  %r = xor <8 x i16> %n1, %y
3374  ret <8 x i16> %r
3375}
3376
3377define <4 x i32> @in_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind {
3378; CHECK-BASELINE-LABEL: in_v4i32:
3379; CHECK-BASELINE:       # %bb.0:
3380; CHECK-BASELINE-NEXT:    pushq %rbx
3381; CHECK-BASELINE-NEXT:    movq %rdi, %rax
3382; CHECK-BASELINE-NEXT:    movl 12(%rdx), %r8d
3383; CHECK-BASELINE-NEXT:    movl 8(%rdx), %r9d
3384; CHECK-BASELINE-NEXT:    movl (%rdx), %r11d
3385; CHECK-BASELINE-NEXT:    movl 4(%rdx), %r10d
3386; CHECK-BASELINE-NEXT:    movl (%rsi), %edx
3387; CHECK-BASELINE-NEXT:    xorl %r11d, %edx
3388; CHECK-BASELINE-NEXT:    movl 4(%rsi), %edi
3389; CHECK-BASELINE-NEXT:    xorl %r10d, %edi
3390; CHECK-BASELINE-NEXT:    movl 8(%rsi), %ebx
3391; CHECK-BASELINE-NEXT:    xorl %r9d, %ebx
3392; CHECK-BASELINE-NEXT:    movl 12(%rsi), %esi
3393; CHECK-BASELINE-NEXT:    xorl %r8d, %esi
3394; CHECK-BASELINE-NEXT:    andl 12(%rcx), %esi
3395; CHECK-BASELINE-NEXT:    andl 8(%rcx), %ebx
3396; CHECK-BASELINE-NEXT:    andl 4(%rcx), %edi
3397; CHECK-BASELINE-NEXT:    andl (%rcx), %edx
3398; CHECK-BASELINE-NEXT:    xorl %r11d, %edx
3399; CHECK-BASELINE-NEXT:    xorl %r10d, %edi
3400; CHECK-BASELINE-NEXT:    xorl %r9d, %ebx
3401; CHECK-BASELINE-NEXT:    xorl %r8d, %esi
3402; CHECK-BASELINE-NEXT:    movl %esi, 12(%rax)
3403; CHECK-BASELINE-NEXT:    movl %ebx, 8(%rax)
3404; CHECK-BASELINE-NEXT:    movl %edi, 4(%rax)
3405; CHECK-BASELINE-NEXT:    movl %edx, (%rax)
3406; CHECK-BASELINE-NEXT:    popq %rbx
3407; CHECK-BASELINE-NEXT:    retq
3408;
3409; CHECK-SSE1-LABEL: in_v4i32:
3410; CHECK-SSE1:       # %bb.0:
3411; CHECK-SSE1-NEXT:    movq %rdi, %rax
3412; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
3413; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
3414; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
3415; CHECK-SSE1-NEXT:    andps (%rsi), %xmm0
3416; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
3417; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
3418; CHECK-SSE1-NEXT:    retq
3419;
3420; CHECK-SSE2-LABEL: in_v4i32:
3421; CHECK-SSE2:       # %bb.0:
3422; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
3423; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm1
3424; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm1
3425; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
3426; CHECK-SSE2-NEXT:    orps %xmm1, %xmm0
3427; CHECK-SSE2-NEXT:    retq
3428;
3429; CHECK-XOP-LABEL: in_v4i32:
3430; CHECK-XOP:       # %bb.0:
3431; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
3432; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm1
3433; CHECK-XOP-NEXT:    vpcmov %xmm1, (%rsi), %xmm0, %xmm0
3434; CHECK-XOP-NEXT:    retq
3435  %x = load <4 x i32>, <4 x i32> *%px, align 16
3436  %y = load <4 x i32>, <4 x i32> *%py, align 16
3437  %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
3438  %n0 = xor <4 x i32> %x, %y
3439  %n1 = and <4 x i32> %n0, %mask
3440  %r = xor <4 x i32> %n1, %y
3441  ret <4 x i32> %r
3442}
3443
3444define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
3445; CHECK-BASELINE-LABEL: in_v2i64:
3446; CHECK-BASELINE:       # %bb.0:
3447; CHECK-BASELINE-NEXT:    movq %rdi, %rax
3448; CHECK-BASELINE-NEXT:    xorq %rdx, %rax
3449; CHECK-BASELINE-NEXT:    xorq %rcx, %rsi
3450; CHECK-BASELINE-NEXT:    andq %r9, %rsi
3451; CHECK-BASELINE-NEXT:    andq %r8, %rax
3452; CHECK-BASELINE-NEXT:    xorq %rdx, %rax
3453; CHECK-BASELINE-NEXT:    xorq %rcx, %rsi
3454; CHECK-BASELINE-NEXT:    movq %rsi, %rdx
3455; CHECK-BASELINE-NEXT:    retq
3456;
3457; CHECK-SSE1-LABEL: in_v2i64:
3458; CHECK-SSE1:       # %bb.0:
3459; CHECK-SSE1-NEXT:    movq %rdi, %rax
3460; CHECK-SSE1-NEXT:    xorq %rdx, %rax
3461; CHECK-SSE1-NEXT:    xorq %rcx, %rsi
3462; CHECK-SSE1-NEXT:    andq %r9, %rsi
3463; CHECK-SSE1-NEXT:    andq %r8, %rax
3464; CHECK-SSE1-NEXT:    xorq %rdx, %rax
3465; CHECK-SSE1-NEXT:    xorq %rcx, %rsi
3466; CHECK-SSE1-NEXT:    movq %rsi, %rdx
3467; CHECK-SSE1-NEXT:    retq
3468;
3469; CHECK-SSE2-LABEL: in_v2i64:
3470; CHECK-SSE2:       # %bb.0:
3471; CHECK-SSE2-NEXT:    andps %xmm2, %xmm0
3472; CHECK-SSE2-NEXT:    andnps %xmm1, %xmm2
3473; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
3474; CHECK-SSE2-NEXT:    retq
3475;
3476; CHECK-XOP-LABEL: in_v2i64:
3477; CHECK-XOP:       # %bb.0:
3478; CHECK-XOP-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
3479; CHECK-XOP-NEXT:    retq
3480  %n0 = xor <2 x i64> %x, %y
3481  %n1 = and <2 x i64> %n0, %mask
3482  %r = xor <2 x i64> %n1, %y
3483  ret <2 x i64> %r
3484}
3485
3486; ============================================================================ ;
3487; 256-bit vector width
3488; ============================================================================ ;
3489
3490define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind {
3491; CHECK-BASELINE-LABEL: in_v32i8:
3492; CHECK-BASELINE:       # %bb.0:
3493; CHECK-BASELINE-NEXT:    pushq %rbp
3494; CHECK-BASELINE-NEXT:    pushq %r15
3495; CHECK-BASELINE-NEXT:    pushq %r14
3496; CHECK-BASELINE-NEXT:    pushq %r13
3497; CHECK-BASELINE-NEXT:    pushq %r12
3498; CHECK-BASELINE-NEXT:    pushq %rbx
3499; CHECK-BASELINE-NEXT:    movq %rdx, %r13
3500; CHECK-BASELINE-NEXT:    movq %rsi, %rbx
3501; CHECK-BASELINE-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3502; CHECK-BASELINE-NEXT:    movb 15(%rdx), %r12b
3503; CHECK-BASELINE-NEXT:    movb 14(%rdx), %al
3504; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3505; CHECK-BASELINE-NEXT:    movb 13(%rdx), %al
3506; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3507; CHECK-BASELINE-NEXT:    movb 12(%rdx), %al
3508; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3509; CHECK-BASELINE-NEXT:    movb 11(%rdx), %al
3510; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3511; CHECK-BASELINE-NEXT:    movb 10(%rdx), %al
3512; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3513; CHECK-BASELINE-NEXT:    movb 9(%rdx), %r9b
3514; CHECK-BASELINE-NEXT:    movb 8(%rdx), %r10b
3515; CHECK-BASELINE-NEXT:    movb 7(%rdx), %r11b
3516; CHECK-BASELINE-NEXT:    movb 6(%rdx), %r8b
3517; CHECK-BASELINE-NEXT:    movb 5(%rdx), %bpl
3518; CHECK-BASELINE-NEXT:    movb 4(%rdx), %sil
3519; CHECK-BASELINE-NEXT:    movb 3(%rdx), %dil
3520; CHECK-BASELINE-NEXT:    movb 2(%rdx), %r14b
3521; CHECK-BASELINE-NEXT:    movb (%rdx), %al
3522; CHECK-BASELINE-NEXT:    movb 1(%rdx), %r15b
3523; CHECK-BASELINE-NEXT:    movb (%rbx), %dl
3524; CHECK-BASELINE-NEXT:    xorb %al, %dl
3525; CHECK-BASELINE-NEXT:    andb (%rcx), %dl
3526; CHECK-BASELINE-NEXT:    xorb %al, %dl
3527; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3528; CHECK-BASELINE-NEXT:    movb 1(%rbx), %al
3529; CHECK-BASELINE-NEXT:    xorb %r15b, %al
3530; CHECK-BASELINE-NEXT:    andb 1(%rcx), %al
3531; CHECK-BASELINE-NEXT:    xorb %r15b, %al
3532; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3533; CHECK-BASELINE-NEXT:    movb 2(%rbx), %al
3534; CHECK-BASELINE-NEXT:    xorb %r14b, %al
3535; CHECK-BASELINE-NEXT:    andb 2(%rcx), %al
3536; CHECK-BASELINE-NEXT:    xorb %r14b, %al
3537; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3538; CHECK-BASELINE-NEXT:    movb 3(%rbx), %al
3539; CHECK-BASELINE-NEXT:    xorb %dil, %al
3540; CHECK-BASELINE-NEXT:    andb 3(%rcx), %al
3541; CHECK-BASELINE-NEXT:    xorb %dil, %al
3542; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3543; CHECK-BASELINE-NEXT:    movb 4(%rbx), %al
3544; CHECK-BASELINE-NEXT:    xorb %sil, %al
3545; CHECK-BASELINE-NEXT:    andb 4(%rcx), %al
3546; CHECK-BASELINE-NEXT:    xorb %sil, %al
3547; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3548; CHECK-BASELINE-NEXT:    movb 5(%rbx), %al
3549; CHECK-BASELINE-NEXT:    xorb %bpl, %al
3550; CHECK-BASELINE-NEXT:    andb 5(%rcx), %al
3551; CHECK-BASELINE-NEXT:    xorb %bpl, %al
3552; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3553; CHECK-BASELINE-NEXT:    movb 6(%rbx), %al
3554; CHECK-BASELINE-NEXT:    xorb %r8b, %al
3555; CHECK-BASELINE-NEXT:    andb 6(%rcx), %al
3556; CHECK-BASELINE-NEXT:    xorb %r8b, %al
3557; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3558; CHECK-BASELINE-NEXT:    movb 7(%rbx), %al
3559; CHECK-BASELINE-NEXT:    xorb %r11b, %al
3560; CHECK-BASELINE-NEXT:    andb 7(%rcx), %al
3561; CHECK-BASELINE-NEXT:    xorb %r11b, %al
3562; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3563; CHECK-BASELINE-NEXT:    movb 8(%rbx), %al
3564; CHECK-BASELINE-NEXT:    xorb %r10b, %al
3565; CHECK-BASELINE-NEXT:    andb 8(%rcx), %al
3566; CHECK-BASELINE-NEXT:    xorb %r10b, %al
3567; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3568; CHECK-BASELINE-NEXT:    movb 9(%rbx), %al
3569; CHECK-BASELINE-NEXT:    xorb %r9b, %al
3570; CHECK-BASELINE-NEXT:    andb 9(%rcx), %al
3571; CHECK-BASELINE-NEXT:    xorb %r9b, %al
3572; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3573; CHECK-BASELINE-NEXT:    movb 10(%rbx), %dl
3574; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3575; CHECK-BASELINE-NEXT:    xorb %al, %dl
3576; CHECK-BASELINE-NEXT:    andb 10(%rcx), %dl
3577; CHECK-BASELINE-NEXT:    xorb %al, %dl
3578; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3579; CHECK-BASELINE-NEXT:    movb 11(%rbx), %dl
3580; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3581; CHECK-BASELINE-NEXT:    xorb %al, %dl
3582; CHECK-BASELINE-NEXT:    andb 11(%rcx), %dl
3583; CHECK-BASELINE-NEXT:    xorb %al, %dl
3584; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3585; CHECK-BASELINE-NEXT:    movb 12(%rbx), %dl
3586; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3587; CHECK-BASELINE-NEXT:    xorb %al, %dl
3588; CHECK-BASELINE-NEXT:    andb 12(%rcx), %dl
3589; CHECK-BASELINE-NEXT:    xorb %al, %dl
3590; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3591; CHECK-BASELINE-NEXT:    movb 13(%rbx), %dl
3592; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3593; CHECK-BASELINE-NEXT:    xorb %al, %dl
3594; CHECK-BASELINE-NEXT:    andb 13(%rcx), %dl
3595; CHECK-BASELINE-NEXT:    xorb %al, %dl
3596; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3597; CHECK-BASELINE-NEXT:    movb 14(%rbx), %dl
3598; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3599; CHECK-BASELINE-NEXT:    xorb %al, %dl
3600; CHECK-BASELINE-NEXT:    andb 14(%rcx), %dl
3601; CHECK-BASELINE-NEXT:    xorb %al, %dl
3602; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3603; CHECK-BASELINE-NEXT:    movb 15(%rbx), %al
3604; CHECK-BASELINE-NEXT:    xorb %r12b, %al
3605; CHECK-BASELINE-NEXT:    andb 15(%rcx), %al
3606; CHECK-BASELINE-NEXT:    xorb %r12b, %al
3607; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3608; CHECK-BASELINE-NEXT:    movb 16(%r13), %al
3609; CHECK-BASELINE-NEXT:    movb 16(%rbx), %dl
3610; CHECK-BASELINE-NEXT:    xorb %al, %dl
3611; CHECK-BASELINE-NEXT:    andb 16(%rcx), %dl
3612; CHECK-BASELINE-NEXT:    xorb %al, %dl
3613; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3614; CHECK-BASELINE-NEXT:    movb 17(%r13), %al
3615; CHECK-BASELINE-NEXT:    movb 17(%rbx), %dl
3616; CHECK-BASELINE-NEXT:    xorb %al, %dl
3617; CHECK-BASELINE-NEXT:    andb 17(%rcx), %dl
3618; CHECK-BASELINE-NEXT:    xorb %al, %dl
3619; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3620; CHECK-BASELINE-NEXT:    movb 18(%r13), %al
3621; CHECK-BASELINE-NEXT:    movb 18(%rbx), %dl
3622; CHECK-BASELINE-NEXT:    xorb %al, %dl
3623; CHECK-BASELINE-NEXT:    andb 18(%rcx), %dl
3624; CHECK-BASELINE-NEXT:    xorb %al, %dl
3625; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3626; CHECK-BASELINE-NEXT:    movb 19(%r13), %al
3627; CHECK-BASELINE-NEXT:    movb 19(%rbx), %r12b
3628; CHECK-BASELINE-NEXT:    xorb %al, %r12b
3629; CHECK-BASELINE-NEXT:    andb 19(%rcx), %r12b
3630; CHECK-BASELINE-NEXT:    xorb %al, %r12b
3631; CHECK-BASELINE-NEXT:    movb 20(%r13), %al
3632; CHECK-BASELINE-NEXT:    movb 20(%rbx), %r15b
3633; CHECK-BASELINE-NEXT:    xorb %al, %r15b
3634; CHECK-BASELINE-NEXT:    andb 20(%rcx), %r15b
3635; CHECK-BASELINE-NEXT:    movq %rcx, %rsi
3636; CHECK-BASELINE-NEXT:    xorb %al, %r15b
3637; CHECK-BASELINE-NEXT:    movb 21(%r13), %al
3638; CHECK-BASELINE-NEXT:    movb 21(%rbx), %r14b
3639; CHECK-BASELINE-NEXT:    xorb %al, %r14b
3640; CHECK-BASELINE-NEXT:    andb 21(%rcx), %r14b
3641; CHECK-BASELINE-NEXT:    xorb %al, %r14b
3642; CHECK-BASELINE-NEXT:    movb 22(%r13), %al
3643; CHECK-BASELINE-NEXT:    movb 22(%rbx), %bpl
3644; CHECK-BASELINE-NEXT:    xorb %al, %bpl
3645; CHECK-BASELINE-NEXT:    andb 22(%rcx), %bpl
3646; CHECK-BASELINE-NEXT:    xorb %al, %bpl
3647; CHECK-BASELINE-NEXT:    movb 23(%r13), %al
3648; CHECK-BASELINE-NEXT:    movb 23(%rbx), %r11b
3649; CHECK-BASELINE-NEXT:    xorb %al, %r11b
3650; CHECK-BASELINE-NEXT:    andb 23(%rcx), %r11b
3651; CHECK-BASELINE-NEXT:    xorb %al, %r11b
3652; CHECK-BASELINE-NEXT:    movb 24(%r13), %al
3653; CHECK-BASELINE-NEXT:    movb 24(%rbx), %r10b
3654; CHECK-BASELINE-NEXT:    xorb %al, %r10b
3655; CHECK-BASELINE-NEXT:    andb 24(%rcx), %r10b
3656; CHECK-BASELINE-NEXT:    xorb %al, %r10b
3657; CHECK-BASELINE-NEXT:    movb 25(%r13), %al
3658; CHECK-BASELINE-NEXT:    movb 25(%rbx), %r9b
3659; CHECK-BASELINE-NEXT:    xorb %al, %r9b
3660; CHECK-BASELINE-NEXT:    andb 25(%rcx), %r9b
3661; CHECK-BASELINE-NEXT:    xorb %al, %r9b
3662; CHECK-BASELINE-NEXT:    movb 26(%r13), %al
3663; CHECK-BASELINE-NEXT:    movb 26(%rbx), %r8b
3664; CHECK-BASELINE-NEXT:    xorb %al, %r8b
3665; CHECK-BASELINE-NEXT:    andb 26(%rcx), %r8b
3666; CHECK-BASELINE-NEXT:    xorb %al, %r8b
3667; CHECK-BASELINE-NEXT:    movb 27(%r13), %al
3668; CHECK-BASELINE-NEXT:    movb 27(%rbx), %dil
3669; CHECK-BASELINE-NEXT:    xorb %al, %dil
3670; CHECK-BASELINE-NEXT:    andb 27(%rcx), %dil
3671; CHECK-BASELINE-NEXT:    xorb %al, %dil
3672; CHECK-BASELINE-NEXT:    movb 28(%r13), %al
3673; CHECK-BASELINE-NEXT:    movb 28(%rbx), %dl
3674; CHECK-BASELINE-NEXT:    xorb %al, %dl
3675; CHECK-BASELINE-NEXT:    andb 28(%rcx), %dl
3676; CHECK-BASELINE-NEXT:    xorb %al, %dl
3677; CHECK-BASELINE-NEXT:    movb 29(%r13), %al
3678; CHECK-BASELINE-NEXT:    movb 29(%rbx), %cl
3679; CHECK-BASELINE-NEXT:    xorb %al, %cl
3680; CHECK-BASELINE-NEXT:    andb 29(%rsi), %cl
3681; CHECK-BASELINE-NEXT:    xorb %al, %cl
3682; CHECK-BASELINE-NEXT:    movb 30(%r13), %al
3683; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3684; CHECK-BASELINE-NEXT:    movb 30(%rbx), %al
3685; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
3686; CHECK-BASELINE-NEXT:    andb 30(%rsi), %al
3687; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
3688; CHECK-BASELINE-NEXT:    movb 31(%r13), %r13b
3689; CHECK-BASELINE-NEXT:    movb 31(%rbx), %bl
3690; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
3691; CHECK-BASELINE-NEXT:    andb 31(%rsi), %bl
3692; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
3693; CHECK-BASELINE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
3694; CHECK-BASELINE-NEXT:    movb %bl, 31(%r13)
3695; CHECK-BASELINE-NEXT:    movb %al, 30(%r13)
3696; CHECK-BASELINE-NEXT:    movb %cl, 29(%r13)
3697; CHECK-BASELINE-NEXT:    movb %dl, 28(%r13)
3698; CHECK-BASELINE-NEXT:    movb %dil, 27(%r13)
3699; CHECK-BASELINE-NEXT:    movb %r8b, 26(%r13)
3700; CHECK-BASELINE-NEXT:    movb %r9b, 25(%r13)
3701; CHECK-BASELINE-NEXT:    movb %r10b, 24(%r13)
3702; CHECK-BASELINE-NEXT:    movb %r11b, 23(%r13)
3703; CHECK-BASELINE-NEXT:    movb %bpl, 22(%r13)
3704; CHECK-BASELINE-NEXT:    movb %r14b, 21(%r13)
3705; CHECK-BASELINE-NEXT:    movb %r15b, 20(%r13)
3706; CHECK-BASELINE-NEXT:    movb %r12b, 19(%r13)
3707; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3708; CHECK-BASELINE-NEXT:    movb %al, 18(%r13)
3709; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3710; CHECK-BASELINE-NEXT:    movb %al, 17(%r13)
3711; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3712; CHECK-BASELINE-NEXT:    movb %al, 16(%r13)
3713; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3714; CHECK-BASELINE-NEXT:    movb %al, 15(%r13)
3715; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3716; CHECK-BASELINE-NEXT:    movb %al, 14(%r13)
3717; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3718; CHECK-BASELINE-NEXT:    movb %al, 13(%r13)
3719; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3720; CHECK-BASELINE-NEXT:    movb %al, 12(%r13)
3721; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3722; CHECK-BASELINE-NEXT:    movb %al, 11(%r13)
3723; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3724; CHECK-BASELINE-NEXT:    movb %al, 10(%r13)
3725; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3726; CHECK-BASELINE-NEXT:    movb %al, 9(%r13)
3727; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3728; CHECK-BASELINE-NEXT:    movb %al, 8(%r13)
3729; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3730; CHECK-BASELINE-NEXT:    movb %al, 7(%r13)
3731; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3732; CHECK-BASELINE-NEXT:    movb %al, 6(%r13)
3733; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3734; CHECK-BASELINE-NEXT:    movb %al, 5(%r13)
3735; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3736; CHECK-BASELINE-NEXT:    movb %al, 4(%r13)
3737; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3738; CHECK-BASELINE-NEXT:    movb %al, 3(%r13)
3739; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3740; CHECK-BASELINE-NEXT:    movb %al, 2(%r13)
3741; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3742; CHECK-BASELINE-NEXT:    movb %al, 1(%r13)
3743; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3744; CHECK-BASELINE-NEXT:    movb %al, (%r13)
3745; CHECK-BASELINE-NEXT:    movq %r13, %rax
3746; CHECK-BASELINE-NEXT:    popq %rbx
3747; CHECK-BASELINE-NEXT:    popq %r12
3748; CHECK-BASELINE-NEXT:    popq %r13
3749; CHECK-BASELINE-NEXT:    popq %r14
3750; CHECK-BASELINE-NEXT:    popq %r15
3751; CHECK-BASELINE-NEXT:    popq %rbp
3752; CHECK-BASELINE-NEXT:    retq
3753;
3754; CHECK-SSE1-LABEL: in_v32i8:
3755; CHECK-SSE1:       # %bb.0:
3756; CHECK-SSE1-NEXT:    pushq %rbp
3757; CHECK-SSE1-NEXT:    pushq %r15
3758; CHECK-SSE1-NEXT:    pushq %r14
3759; CHECK-SSE1-NEXT:    pushq %r13
3760; CHECK-SSE1-NEXT:    pushq %r12
3761; CHECK-SSE1-NEXT:    pushq %rbx
3762; CHECK-SSE1-NEXT:    movq %rdx, %r13
3763; CHECK-SSE1-NEXT:    movq %rsi, %rbx
3764; CHECK-SSE1-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3765; CHECK-SSE1-NEXT:    movb 15(%rdx), %r12b
3766; CHECK-SSE1-NEXT:    movb 14(%rdx), %al
3767; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3768; CHECK-SSE1-NEXT:    movb 13(%rdx), %al
3769; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3770; CHECK-SSE1-NEXT:    movb 12(%rdx), %al
3771; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3772; CHECK-SSE1-NEXT:    movb 11(%rdx), %al
3773; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3774; CHECK-SSE1-NEXT:    movb 10(%rdx), %al
3775; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3776; CHECK-SSE1-NEXT:    movb 9(%rdx), %r9b
3777; CHECK-SSE1-NEXT:    movb 8(%rdx), %r10b
3778; CHECK-SSE1-NEXT:    movb 7(%rdx), %r11b
3779; CHECK-SSE1-NEXT:    movb 6(%rdx), %r8b
3780; CHECK-SSE1-NEXT:    movb 5(%rdx), %bpl
3781; CHECK-SSE1-NEXT:    movb 4(%rdx), %sil
3782; CHECK-SSE1-NEXT:    movb 3(%rdx), %dil
3783; CHECK-SSE1-NEXT:    movb 2(%rdx), %r14b
3784; CHECK-SSE1-NEXT:    movb (%rdx), %al
3785; CHECK-SSE1-NEXT:    movb 1(%rdx), %r15b
3786; CHECK-SSE1-NEXT:    movb (%rbx), %dl
3787; CHECK-SSE1-NEXT:    xorb %al, %dl
3788; CHECK-SSE1-NEXT:    andb (%rcx), %dl
3789; CHECK-SSE1-NEXT:    xorb %al, %dl
3790; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3791; CHECK-SSE1-NEXT:    movb 1(%rbx), %al
3792; CHECK-SSE1-NEXT:    xorb %r15b, %al
3793; CHECK-SSE1-NEXT:    andb 1(%rcx), %al
3794; CHECK-SSE1-NEXT:    xorb %r15b, %al
3795; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3796; CHECK-SSE1-NEXT:    movb 2(%rbx), %al
3797; CHECK-SSE1-NEXT:    xorb %r14b, %al
3798; CHECK-SSE1-NEXT:    andb 2(%rcx), %al
3799; CHECK-SSE1-NEXT:    xorb %r14b, %al
3800; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3801; CHECK-SSE1-NEXT:    movb 3(%rbx), %al
3802; CHECK-SSE1-NEXT:    xorb %dil, %al
3803; CHECK-SSE1-NEXT:    andb 3(%rcx), %al
3804; CHECK-SSE1-NEXT:    xorb %dil, %al
3805; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3806; CHECK-SSE1-NEXT:    movb 4(%rbx), %al
3807; CHECK-SSE1-NEXT:    xorb %sil, %al
3808; CHECK-SSE1-NEXT:    andb 4(%rcx), %al
3809; CHECK-SSE1-NEXT:    xorb %sil, %al
3810; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3811; CHECK-SSE1-NEXT:    movb 5(%rbx), %al
3812; CHECK-SSE1-NEXT:    xorb %bpl, %al
3813; CHECK-SSE1-NEXT:    andb 5(%rcx), %al
3814; CHECK-SSE1-NEXT:    xorb %bpl, %al
3815; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3816; CHECK-SSE1-NEXT:    movb 6(%rbx), %al
3817; CHECK-SSE1-NEXT:    xorb %r8b, %al
3818; CHECK-SSE1-NEXT:    andb 6(%rcx), %al
3819; CHECK-SSE1-NEXT:    xorb %r8b, %al
3820; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3821; CHECK-SSE1-NEXT:    movb 7(%rbx), %al
3822; CHECK-SSE1-NEXT:    xorb %r11b, %al
3823; CHECK-SSE1-NEXT:    andb 7(%rcx), %al
3824; CHECK-SSE1-NEXT:    xorb %r11b, %al
3825; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3826; CHECK-SSE1-NEXT:    movb 8(%rbx), %al
3827; CHECK-SSE1-NEXT:    xorb %r10b, %al
3828; CHECK-SSE1-NEXT:    andb 8(%rcx), %al
3829; CHECK-SSE1-NEXT:    xorb %r10b, %al
3830; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3831; CHECK-SSE1-NEXT:    movb 9(%rbx), %al
3832; CHECK-SSE1-NEXT:    xorb %r9b, %al
3833; CHECK-SSE1-NEXT:    andb 9(%rcx), %al
3834; CHECK-SSE1-NEXT:    xorb %r9b, %al
3835; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3836; CHECK-SSE1-NEXT:    movb 10(%rbx), %dl
3837; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3838; CHECK-SSE1-NEXT:    xorb %al, %dl
3839; CHECK-SSE1-NEXT:    andb 10(%rcx), %dl
3840; CHECK-SSE1-NEXT:    xorb %al, %dl
3841; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3842; CHECK-SSE1-NEXT:    movb 11(%rbx), %dl
3843; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3844; CHECK-SSE1-NEXT:    xorb %al, %dl
3845; CHECK-SSE1-NEXT:    andb 11(%rcx), %dl
3846; CHECK-SSE1-NEXT:    xorb %al, %dl
3847; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3848; CHECK-SSE1-NEXT:    movb 12(%rbx), %dl
3849; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3850; CHECK-SSE1-NEXT:    xorb %al, %dl
3851; CHECK-SSE1-NEXT:    andb 12(%rcx), %dl
3852; CHECK-SSE1-NEXT:    xorb %al, %dl
3853; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3854; CHECK-SSE1-NEXT:    movb 13(%rbx), %dl
3855; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3856; CHECK-SSE1-NEXT:    xorb %al, %dl
3857; CHECK-SSE1-NEXT:    andb 13(%rcx), %dl
3858; CHECK-SSE1-NEXT:    xorb %al, %dl
3859; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3860; CHECK-SSE1-NEXT:    movb 14(%rbx), %dl
3861; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3862; CHECK-SSE1-NEXT:    xorb %al, %dl
3863; CHECK-SSE1-NEXT:    andb 14(%rcx), %dl
3864; CHECK-SSE1-NEXT:    xorb %al, %dl
3865; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3866; CHECK-SSE1-NEXT:    movb 15(%rbx), %al
3867; CHECK-SSE1-NEXT:    xorb %r12b, %al
3868; CHECK-SSE1-NEXT:    andb 15(%rcx), %al
3869; CHECK-SSE1-NEXT:    xorb %r12b, %al
3870; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3871; CHECK-SSE1-NEXT:    movb 16(%r13), %al
3872; CHECK-SSE1-NEXT:    movb 16(%rbx), %dl
3873; CHECK-SSE1-NEXT:    xorb %al, %dl
3874; CHECK-SSE1-NEXT:    andb 16(%rcx), %dl
3875; CHECK-SSE1-NEXT:    xorb %al, %dl
3876; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3877; CHECK-SSE1-NEXT:    movb 17(%r13), %al
3878; CHECK-SSE1-NEXT:    movb 17(%rbx), %dl
3879; CHECK-SSE1-NEXT:    xorb %al, %dl
3880; CHECK-SSE1-NEXT:    andb 17(%rcx), %dl
3881; CHECK-SSE1-NEXT:    xorb %al, %dl
3882; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3883; CHECK-SSE1-NEXT:    movb 18(%r13), %al
3884; CHECK-SSE1-NEXT:    movb 18(%rbx), %dl
3885; CHECK-SSE1-NEXT:    xorb %al, %dl
3886; CHECK-SSE1-NEXT:    andb 18(%rcx), %dl
3887; CHECK-SSE1-NEXT:    xorb %al, %dl
3888; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3889; CHECK-SSE1-NEXT:    movb 19(%r13), %al
3890; CHECK-SSE1-NEXT:    movb 19(%rbx), %r12b
3891; CHECK-SSE1-NEXT:    xorb %al, %r12b
3892; CHECK-SSE1-NEXT:    andb 19(%rcx), %r12b
3893; CHECK-SSE1-NEXT:    xorb %al, %r12b
3894; CHECK-SSE1-NEXT:    movb 20(%r13), %al
3895; CHECK-SSE1-NEXT:    movb 20(%rbx), %r15b
3896; CHECK-SSE1-NEXT:    xorb %al, %r15b
3897; CHECK-SSE1-NEXT:    andb 20(%rcx), %r15b
3898; CHECK-SSE1-NEXT:    movq %rcx, %rsi
3899; CHECK-SSE1-NEXT:    xorb %al, %r15b
3900; CHECK-SSE1-NEXT:    movb 21(%r13), %al
3901; CHECK-SSE1-NEXT:    movb 21(%rbx), %r14b
3902; CHECK-SSE1-NEXT:    xorb %al, %r14b
3903; CHECK-SSE1-NEXT:    andb 21(%rcx), %r14b
3904; CHECK-SSE1-NEXT:    xorb %al, %r14b
3905; CHECK-SSE1-NEXT:    movb 22(%r13), %al
3906; CHECK-SSE1-NEXT:    movb 22(%rbx), %bpl
3907; CHECK-SSE1-NEXT:    xorb %al, %bpl
3908; CHECK-SSE1-NEXT:    andb 22(%rcx), %bpl
3909; CHECK-SSE1-NEXT:    xorb %al, %bpl
3910; CHECK-SSE1-NEXT:    movb 23(%r13), %al
3911; CHECK-SSE1-NEXT:    movb 23(%rbx), %r11b
3912; CHECK-SSE1-NEXT:    xorb %al, %r11b
3913; CHECK-SSE1-NEXT:    andb 23(%rcx), %r11b
3914; CHECK-SSE1-NEXT:    xorb %al, %r11b
3915; CHECK-SSE1-NEXT:    movb 24(%r13), %al
3916; CHECK-SSE1-NEXT:    movb 24(%rbx), %r10b
3917; CHECK-SSE1-NEXT:    xorb %al, %r10b
3918; CHECK-SSE1-NEXT:    andb 24(%rcx), %r10b
3919; CHECK-SSE1-NEXT:    xorb %al, %r10b
3920; CHECK-SSE1-NEXT:    movb 25(%r13), %al
3921; CHECK-SSE1-NEXT:    movb 25(%rbx), %r9b
3922; CHECK-SSE1-NEXT:    xorb %al, %r9b
3923; CHECK-SSE1-NEXT:    andb 25(%rcx), %r9b
3924; CHECK-SSE1-NEXT:    xorb %al, %r9b
3925; CHECK-SSE1-NEXT:    movb 26(%r13), %al
3926; CHECK-SSE1-NEXT:    movb 26(%rbx), %r8b
3927; CHECK-SSE1-NEXT:    xorb %al, %r8b
3928; CHECK-SSE1-NEXT:    andb 26(%rcx), %r8b
3929; CHECK-SSE1-NEXT:    xorb %al, %r8b
3930; CHECK-SSE1-NEXT:    movb 27(%r13), %al
3931; CHECK-SSE1-NEXT:    movb 27(%rbx), %dil
3932; CHECK-SSE1-NEXT:    xorb %al, %dil
3933; CHECK-SSE1-NEXT:    andb 27(%rcx), %dil
3934; CHECK-SSE1-NEXT:    xorb %al, %dil
3935; CHECK-SSE1-NEXT:    movb 28(%r13), %al
3936; CHECK-SSE1-NEXT:    movb 28(%rbx), %dl
3937; CHECK-SSE1-NEXT:    xorb %al, %dl
3938; CHECK-SSE1-NEXT:    andb 28(%rcx), %dl
3939; CHECK-SSE1-NEXT:    xorb %al, %dl
3940; CHECK-SSE1-NEXT:    movb 29(%r13), %al
3941; CHECK-SSE1-NEXT:    movb 29(%rbx), %cl
3942; CHECK-SSE1-NEXT:    xorb %al, %cl
3943; CHECK-SSE1-NEXT:    andb 29(%rsi), %cl
3944; CHECK-SSE1-NEXT:    xorb %al, %cl
3945; CHECK-SSE1-NEXT:    movb 30(%r13), %al
3946; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
3947; CHECK-SSE1-NEXT:    movb 30(%rbx), %al
3948; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
3949; CHECK-SSE1-NEXT:    andb 30(%rsi), %al
3950; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
3951; CHECK-SSE1-NEXT:    movb 31(%r13), %r13b
3952; CHECK-SSE1-NEXT:    movb 31(%rbx), %bl
3953; CHECK-SSE1-NEXT:    xorb %r13b, %bl
3954; CHECK-SSE1-NEXT:    andb 31(%rsi), %bl
3955; CHECK-SSE1-NEXT:    xorb %r13b, %bl
3956; CHECK-SSE1-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
3957; CHECK-SSE1-NEXT:    movb %bl, 31(%r13)
3958; CHECK-SSE1-NEXT:    movb %al, 30(%r13)
3959; CHECK-SSE1-NEXT:    movb %cl, 29(%r13)
3960; CHECK-SSE1-NEXT:    movb %dl, 28(%r13)
3961; CHECK-SSE1-NEXT:    movb %dil, 27(%r13)
3962; CHECK-SSE1-NEXT:    movb %r8b, 26(%r13)
3963; CHECK-SSE1-NEXT:    movb %r9b, 25(%r13)
3964; CHECK-SSE1-NEXT:    movb %r10b, 24(%r13)
3965; CHECK-SSE1-NEXT:    movb %r11b, 23(%r13)
3966; CHECK-SSE1-NEXT:    movb %bpl, 22(%r13)
3967; CHECK-SSE1-NEXT:    movb %r14b, 21(%r13)
3968; CHECK-SSE1-NEXT:    movb %r15b, 20(%r13)
3969; CHECK-SSE1-NEXT:    movb %r12b, 19(%r13)
3970; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3971; CHECK-SSE1-NEXT:    movb %al, 18(%r13)
3972; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3973; CHECK-SSE1-NEXT:    movb %al, 17(%r13)
3974; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3975; CHECK-SSE1-NEXT:    movb %al, 16(%r13)
3976; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3977; CHECK-SSE1-NEXT:    movb %al, 15(%r13)
3978; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3979; CHECK-SSE1-NEXT:    movb %al, 14(%r13)
3980; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3981; CHECK-SSE1-NEXT:    movb %al, 13(%r13)
3982; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3983; CHECK-SSE1-NEXT:    movb %al, 12(%r13)
3984; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3985; CHECK-SSE1-NEXT:    movb %al, 11(%r13)
3986; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3987; CHECK-SSE1-NEXT:    movb %al, 10(%r13)
3988; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3989; CHECK-SSE1-NEXT:    movb %al, 9(%r13)
3990; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3991; CHECK-SSE1-NEXT:    movb %al, 8(%r13)
3992; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3993; CHECK-SSE1-NEXT:    movb %al, 7(%r13)
3994; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3995; CHECK-SSE1-NEXT:    movb %al, 6(%r13)
3996; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3997; CHECK-SSE1-NEXT:    movb %al, 5(%r13)
3998; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
3999; CHECK-SSE1-NEXT:    movb %al, 4(%r13)
4000; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
4001; CHECK-SSE1-NEXT:    movb %al, 3(%r13)
4002; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
4003; CHECK-SSE1-NEXT:    movb %al, 2(%r13)
4004; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
4005; CHECK-SSE1-NEXT:    movb %al, 1(%r13)
4006; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
4007; CHECK-SSE1-NEXT:    movb %al, (%r13)
4008; CHECK-SSE1-NEXT:    movq %r13, %rax
4009; CHECK-SSE1-NEXT:    popq %rbx
4010; CHECK-SSE1-NEXT:    popq %r12
4011; CHECK-SSE1-NEXT:    popq %r13
4012; CHECK-SSE1-NEXT:    popq %r14
4013; CHECK-SSE1-NEXT:    popq %r15
4014; CHECK-SSE1-NEXT:    popq %rbp
4015; CHECK-SSE1-NEXT:    retq
4016;
4017; CHECK-SSE2-LABEL: in_v32i8:
4018; CHECK-SSE2:       # %bb.0:
4019; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
4020; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
4021; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
4022; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
4023; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
4024; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
4025; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
4026; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
4027; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
4028; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
4029; CHECK-SSE2-NEXT:    retq
4030;
4031; CHECK-XOP-LABEL: in_v32i8:
4032; CHECK-XOP:       # %bb.0:
4033; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
4034; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
4035; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
4036; CHECK-XOP-NEXT:    retq
4037  %x = load <32 x i8>, <32 x i8> *%px, align 32
4038  %y = load <32 x i8>, <32 x i8> *%py, align 32
4039  %mask = load <32 x i8>, <32 x i8> *%pmask, align 32
4040  %n0 = xor <32 x i8> %x, %y
4041  %n1 = and <32 x i8> %n0, %mask
4042  %r = xor <32 x i8> %n1, %y
4043  ret <32 x i8> %r
4044}
4045
4046define <16 x i16> @in_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind {
4047; CHECK-BASELINE-LABEL: in_v16i16:
4048; CHECK-BASELINE:       # %bb.0:
4049; CHECK-BASELINE-NEXT:    pushq %rbp
4050; CHECK-BASELINE-NEXT:    pushq %r15
4051; CHECK-BASELINE-NEXT:    pushq %r14
4052; CHECK-BASELINE-NEXT:    pushq %r13
4053; CHECK-BASELINE-NEXT:    pushq %r12
4054; CHECK-BASELINE-NEXT:    pushq %rbx
4055; CHECK-BASELINE-NEXT:    movq %rcx, %r9
4056; CHECK-BASELINE-NEXT:    movq %rdi, %r10
4057; CHECK-BASELINE-NEXT:    movzwl 30(%rdx), %edi
4058; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4059; CHECK-BASELINE-NEXT:    movl 28(%rdx), %edi
4060; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4061; CHECK-BASELINE-NEXT:    movzwl 26(%rdx), %edi
4062; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4063; CHECK-BASELINE-NEXT:    movl 24(%rdx), %eax
4064; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4065; CHECK-BASELINE-NEXT:    movzwl 22(%rdx), %eax
4066; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4067; CHECK-BASELINE-NEXT:    movl 20(%rdx), %r11d
4068; CHECK-BASELINE-NEXT:    movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4069; CHECK-BASELINE-NEXT:    movzwl 18(%rdx), %r14d
4070; CHECK-BASELINE-NEXT:    movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4071; CHECK-BASELINE-NEXT:    movl 16(%rdx), %r15d
4072; CHECK-BASELINE-NEXT:    movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4073; CHECK-BASELINE-NEXT:    movzwl 14(%rdx), %r12d
4074; CHECK-BASELINE-NEXT:    movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4075; CHECK-BASELINE-NEXT:    movl 12(%rdx), %r13d
4076; CHECK-BASELINE-NEXT:    movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4077; CHECK-BASELINE-NEXT:    movzwl 10(%rdx), %r8d
4078; CHECK-BASELINE-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4079; CHECK-BASELINE-NEXT:    movl 8(%rdx), %ebx
4080; CHECK-BASELINE-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4081; CHECK-BASELINE-NEXT:    movzwl 6(%rdx), %ebp
4082; CHECK-BASELINE-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4083; CHECK-BASELINE-NEXT:    movl (%rdx), %ecx
4084; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4085; CHECK-BASELINE-NEXT:    movl 4(%rdx), %edi
4086; CHECK-BASELINE-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4087; CHECK-BASELINE-NEXT:    movzwl 2(%rdx), %eax
4088; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4089; CHECK-BASELINE-NEXT:    movzwl (%rsi), %edx
4090; CHECK-BASELINE-NEXT:    xorw %cx, %dx
4091; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4092; CHECK-BASELINE-NEXT:    movzwl 2(%rsi), %ecx
4093; CHECK-BASELINE-NEXT:    xorw %ax, %cx
4094; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4095; CHECK-BASELINE-NEXT:    movzwl 4(%rsi), %eax
4096; CHECK-BASELINE-NEXT:    xorw %di, %ax
4097; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4098; CHECK-BASELINE-NEXT:    movzwl 6(%rsi), %edx
4099; CHECK-BASELINE-NEXT:    xorw %bp, %dx
4100; CHECK-BASELINE-NEXT:    movl %edx, %eax
4101; CHECK-BASELINE-NEXT:    movzwl 8(%rsi), %ecx
4102; CHECK-BASELINE-NEXT:    xorw %bx, %cx
4103; CHECK-BASELINE-NEXT:    movzwl 10(%rsi), %edx
4104; CHECK-BASELINE-NEXT:    xorw %r8w, %dx
4105; CHECK-BASELINE-NEXT:    movl %edx, %r8d
4106; CHECK-BASELINE-NEXT:    movzwl 12(%rsi), %edx
4107; CHECK-BASELINE-NEXT:    xorw %r13w, %dx
4108; CHECK-BASELINE-NEXT:    movzwl 14(%rsi), %r13d
4109; CHECK-BASELINE-NEXT:    xorw %r12w, %r13w
4110; CHECK-BASELINE-NEXT:    movzwl 16(%rsi), %r12d
4111; CHECK-BASELINE-NEXT:    xorw %r15w, %r12w
4112; CHECK-BASELINE-NEXT:    movzwl 18(%rsi), %r15d
4113; CHECK-BASELINE-NEXT:    xorw %r14w, %r15w
4114; CHECK-BASELINE-NEXT:    movzwl 20(%rsi), %r14d
4115; CHECK-BASELINE-NEXT:    xorw %r11w, %r14w
4116; CHECK-BASELINE-NEXT:    movzwl 22(%rsi), %ebp
4117; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload
4118; CHECK-BASELINE-NEXT:    movzwl 24(%rsi), %ebx
4119; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload
4120; CHECK-BASELINE-NEXT:    movzwl 26(%rsi), %r11d
4121; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload
4122; CHECK-BASELINE-NEXT:    movzwl 28(%rsi), %edi
4123; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload
4124; CHECK-BASELINE-NEXT:    movzwl 30(%rsi), %esi
4125; CHECK-BASELINE-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload
4126; CHECK-BASELINE-NEXT:    andw 30(%r9), %si
4127; CHECK-BASELINE-NEXT:    andw 28(%r9), %di
4128; CHECK-BASELINE-NEXT:    andw 26(%r9), %r11w
4129; CHECK-BASELINE-NEXT:    andw 24(%r9), %bx
4130; CHECK-BASELINE-NEXT:    andw 22(%r9), %bp
4131; CHECK-BASELINE-NEXT:    andw 20(%r9), %r14w
4132; CHECK-BASELINE-NEXT:    andw 18(%r9), %r15w
4133; CHECK-BASELINE-NEXT:    andw 16(%r9), %r12w
4134; CHECK-BASELINE-NEXT:    andw 14(%r9), %r13w
4135; CHECK-BASELINE-NEXT:    andw 12(%r9), %dx
4136; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4137; CHECK-BASELINE-NEXT:    andw 10(%r9), %r8w
4138; CHECK-BASELINE-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4139; CHECK-BASELINE-NEXT:    movl %ecx, %edx
4140; CHECK-BASELINE-NEXT:    andw 8(%r9), %dx
4141; CHECK-BASELINE-NEXT:    andw 6(%r9), %ax
4142; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4143; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload
4144; CHECK-BASELINE-NEXT:    andw 4(%r9), %r8w
4145; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4146; CHECK-BASELINE-NEXT:    andw 2(%r9), %ax
4147; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
4148; CHECK-BASELINE-NEXT:    andw (%r9), %cx
4149; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
4150; CHECK-BASELINE-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4151; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
4152; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4153; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
4154; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload
4155; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload
4156; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4157; CHECK-BASELINE-NEXT:    movl %edx, %ecx
4158; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
4159; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4160; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4161; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
4162; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload
4163; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload
4164; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload
4165; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
4166; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
4167; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
4168; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
4169; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload
4170; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload
4171; CHECK-BASELINE-NEXT:    movw %si, 30(%r10)
4172; CHECK-BASELINE-NEXT:    movw %di, 28(%r10)
4173; CHECK-BASELINE-NEXT:    movw %r11w, 26(%r10)
4174; CHECK-BASELINE-NEXT:    movw %bx, 24(%r10)
4175; CHECK-BASELINE-NEXT:    movw %bp, 22(%r10)
4176; CHECK-BASELINE-NEXT:    movw %r14w, 20(%r10)
4177; CHECK-BASELINE-NEXT:    movw %r15w, 18(%r10)
4178; CHECK-BASELINE-NEXT:    movw %r12w, 16(%r10)
4179; CHECK-BASELINE-NEXT:    movw %r13w, 14(%r10)
4180; CHECK-BASELINE-NEXT:    movw %ax, 12(%r10)
4181; CHECK-BASELINE-NEXT:    movw %dx, 10(%r10)
4182; CHECK-BASELINE-NEXT:    movw %cx, 8(%r10)
4183; CHECK-BASELINE-NEXT:    movw %r9w, 6(%r10)
4184; CHECK-BASELINE-NEXT:    movw %r8w, 4(%r10)
4185; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4186; CHECK-BASELINE-NEXT:    movw %ax, 2(%r10)
4187; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4188; CHECK-BASELINE-NEXT:    movw %ax, (%r10)
4189; CHECK-BASELINE-NEXT:    movq %r10, %rax
4190; CHECK-BASELINE-NEXT:    popq %rbx
4191; CHECK-BASELINE-NEXT:    popq %r12
4192; CHECK-BASELINE-NEXT:    popq %r13
4193; CHECK-BASELINE-NEXT:    popq %r14
4194; CHECK-BASELINE-NEXT:    popq %r15
4195; CHECK-BASELINE-NEXT:    popq %rbp
4196; CHECK-BASELINE-NEXT:    retq
4197;
4198; CHECK-SSE1-LABEL: in_v16i16:
4199; CHECK-SSE1:       # %bb.0:
4200; CHECK-SSE1-NEXT:    pushq %rbp
4201; CHECK-SSE1-NEXT:    pushq %r15
4202; CHECK-SSE1-NEXT:    pushq %r14
4203; CHECK-SSE1-NEXT:    pushq %r13
4204; CHECK-SSE1-NEXT:    pushq %r12
4205; CHECK-SSE1-NEXT:    pushq %rbx
4206; CHECK-SSE1-NEXT:    movq %rcx, %r9
4207; CHECK-SSE1-NEXT:    movq %rdi, %r10
4208; CHECK-SSE1-NEXT:    movzwl 30(%rdx), %edi
4209; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4210; CHECK-SSE1-NEXT:    movl 28(%rdx), %edi
4211; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4212; CHECK-SSE1-NEXT:    movzwl 26(%rdx), %edi
4213; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4214; CHECK-SSE1-NEXT:    movl 24(%rdx), %eax
4215; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4216; CHECK-SSE1-NEXT:    movzwl 22(%rdx), %eax
4217; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4218; CHECK-SSE1-NEXT:    movl 20(%rdx), %r11d
4219; CHECK-SSE1-NEXT:    movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4220; CHECK-SSE1-NEXT:    movzwl 18(%rdx), %r14d
4221; CHECK-SSE1-NEXT:    movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4222; CHECK-SSE1-NEXT:    movl 16(%rdx), %r15d
4223; CHECK-SSE1-NEXT:    movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4224; CHECK-SSE1-NEXT:    movzwl 14(%rdx), %r12d
4225; CHECK-SSE1-NEXT:    movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4226; CHECK-SSE1-NEXT:    movl 12(%rdx), %r13d
4227; CHECK-SSE1-NEXT:    movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4228; CHECK-SSE1-NEXT:    movzwl 10(%rdx), %r8d
4229; CHECK-SSE1-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4230; CHECK-SSE1-NEXT:    movl 8(%rdx), %ebx
4231; CHECK-SSE1-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4232; CHECK-SSE1-NEXT:    movzwl 6(%rdx), %ebp
4233; CHECK-SSE1-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4234; CHECK-SSE1-NEXT:    movl (%rdx), %ecx
4235; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4236; CHECK-SSE1-NEXT:    movl 4(%rdx), %edi
4237; CHECK-SSE1-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4238; CHECK-SSE1-NEXT:    movzwl 2(%rdx), %eax
4239; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4240; CHECK-SSE1-NEXT:    movzwl (%rsi), %edx
4241; CHECK-SSE1-NEXT:    xorw %cx, %dx
4242; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4243; CHECK-SSE1-NEXT:    movzwl 2(%rsi), %ecx
4244; CHECK-SSE1-NEXT:    xorw %ax, %cx
4245; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4246; CHECK-SSE1-NEXT:    movzwl 4(%rsi), %eax
4247; CHECK-SSE1-NEXT:    xorw %di, %ax
4248; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4249; CHECK-SSE1-NEXT:    movzwl 6(%rsi), %edx
4250; CHECK-SSE1-NEXT:    xorw %bp, %dx
4251; CHECK-SSE1-NEXT:    movl %edx, %eax
4252; CHECK-SSE1-NEXT:    movzwl 8(%rsi), %ecx
4253; CHECK-SSE1-NEXT:    xorw %bx, %cx
4254; CHECK-SSE1-NEXT:    movzwl 10(%rsi), %edx
4255; CHECK-SSE1-NEXT:    xorw %r8w, %dx
4256; CHECK-SSE1-NEXT:    movl %edx, %r8d
4257; CHECK-SSE1-NEXT:    movzwl 12(%rsi), %edx
4258; CHECK-SSE1-NEXT:    xorw %r13w, %dx
4259; CHECK-SSE1-NEXT:    movzwl 14(%rsi), %r13d
4260; CHECK-SSE1-NEXT:    xorw %r12w, %r13w
4261; CHECK-SSE1-NEXT:    movzwl 16(%rsi), %r12d
4262; CHECK-SSE1-NEXT:    xorw %r15w, %r12w
4263; CHECK-SSE1-NEXT:    movzwl 18(%rsi), %r15d
4264; CHECK-SSE1-NEXT:    xorw %r14w, %r15w
4265; CHECK-SSE1-NEXT:    movzwl 20(%rsi), %r14d
4266; CHECK-SSE1-NEXT:    xorw %r11w, %r14w
4267; CHECK-SSE1-NEXT:    movzwl 22(%rsi), %ebp
4268; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload
4269; CHECK-SSE1-NEXT:    movzwl 24(%rsi), %ebx
4270; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload
4271; CHECK-SSE1-NEXT:    movzwl 26(%rsi), %r11d
4272; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload
4273; CHECK-SSE1-NEXT:    movzwl 28(%rsi), %edi
4274; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload
4275; CHECK-SSE1-NEXT:    movzwl 30(%rsi), %esi
4276; CHECK-SSE1-NEXT:    xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload
4277; CHECK-SSE1-NEXT:    andw 30(%r9), %si
4278; CHECK-SSE1-NEXT:    andw 28(%r9), %di
4279; CHECK-SSE1-NEXT:    andw 26(%r9), %r11w
4280; CHECK-SSE1-NEXT:    andw 24(%r9), %bx
4281; CHECK-SSE1-NEXT:    andw 22(%r9), %bp
4282; CHECK-SSE1-NEXT:    andw 20(%r9), %r14w
4283; CHECK-SSE1-NEXT:    andw 18(%r9), %r15w
4284; CHECK-SSE1-NEXT:    andw 16(%r9), %r12w
4285; CHECK-SSE1-NEXT:    andw 14(%r9), %r13w
4286; CHECK-SSE1-NEXT:    andw 12(%r9), %dx
4287; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4288; CHECK-SSE1-NEXT:    andw 10(%r9), %r8w
4289; CHECK-SSE1-NEXT:    movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4290; CHECK-SSE1-NEXT:    movl %ecx, %edx
4291; CHECK-SSE1-NEXT:    andw 8(%r9), %dx
4292; CHECK-SSE1-NEXT:    andw 6(%r9), %ax
4293; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4294; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload
4295; CHECK-SSE1-NEXT:    andw 4(%r9), %r8w
4296; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4297; CHECK-SSE1-NEXT:    andw 2(%r9), %ax
4298; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
4299; CHECK-SSE1-NEXT:    andw (%r9), %cx
4300; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload
4301; CHECK-SSE1-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4302; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
4303; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4304; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
4305; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload
4306; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload
4307; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4308; CHECK-SSE1-NEXT:    movl %edx, %ecx
4309; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
4310; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4311; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4312; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
4313; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload
4314; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload
4315; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload
4316; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload
4317; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
4318; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
4319; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload
4320; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload
4321; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload
4322; CHECK-SSE1-NEXT:    movw %si, 30(%r10)
4323; CHECK-SSE1-NEXT:    movw %di, 28(%r10)
4324; CHECK-SSE1-NEXT:    movw %r11w, 26(%r10)
4325; CHECK-SSE1-NEXT:    movw %bx, 24(%r10)
4326; CHECK-SSE1-NEXT:    movw %bp, 22(%r10)
4327; CHECK-SSE1-NEXT:    movw %r14w, 20(%r10)
4328; CHECK-SSE1-NEXT:    movw %r15w, 18(%r10)
4329; CHECK-SSE1-NEXT:    movw %r12w, 16(%r10)
4330; CHECK-SSE1-NEXT:    movw %r13w, 14(%r10)
4331; CHECK-SSE1-NEXT:    movw %ax, 12(%r10)
4332; CHECK-SSE1-NEXT:    movw %dx, 10(%r10)
4333; CHECK-SSE1-NEXT:    movw %cx, 8(%r10)
4334; CHECK-SSE1-NEXT:    movw %r9w, 6(%r10)
4335; CHECK-SSE1-NEXT:    movw %r8w, 4(%r10)
4336; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4337; CHECK-SSE1-NEXT:    movw %ax, 2(%r10)
4338; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
4339; CHECK-SSE1-NEXT:    movw %ax, (%r10)
4340; CHECK-SSE1-NEXT:    movq %r10, %rax
4341; CHECK-SSE1-NEXT:    popq %rbx
4342; CHECK-SSE1-NEXT:    popq %r12
4343; CHECK-SSE1-NEXT:    popq %r13
4344; CHECK-SSE1-NEXT:    popq %r14
4345; CHECK-SSE1-NEXT:    popq %r15
4346; CHECK-SSE1-NEXT:    popq %rbp
4347; CHECK-SSE1-NEXT:    retq
4348;
4349; CHECK-SSE2-LABEL: in_v16i16:
4350; CHECK-SSE2:       # %bb.0:
4351; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
4352; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
4353; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
4354; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
4355; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
4356; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
4357; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
4358; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
4359; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
4360; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
4361; CHECK-SSE2-NEXT:    retq
4362;
4363; CHECK-XOP-LABEL: in_v16i16:
4364; CHECK-XOP:       # %bb.0:
4365; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
4366; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
4367; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
4368; CHECK-XOP-NEXT:    retq
4369  %x = load <16 x i16>, <16 x i16> *%px, align 32
4370  %y = load <16 x i16>, <16 x i16> *%py, align 32
4371  %mask = load <16 x i16>, <16 x i16> *%pmask, align 32
4372  %n0 = xor <16 x i16> %x, %y
4373  %n1 = and <16 x i16> %n0, %mask
4374  %r = xor <16 x i16> %n1, %y
4375  ret <16 x i16> %r
4376}
4377
4378define <8 x i32> @in_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind {
4379; CHECK-BASELINE-LABEL: in_v8i32:
4380; CHECK-BASELINE:       # %bb.0:
4381; CHECK-BASELINE-NEXT:    pushq %rbp
4382; CHECK-BASELINE-NEXT:    pushq %r15
4383; CHECK-BASELINE-NEXT:    pushq %r14
4384; CHECK-BASELINE-NEXT:    pushq %r13
4385; CHECK-BASELINE-NEXT:    pushq %r12
4386; CHECK-BASELINE-NEXT:    pushq %rbx
4387; CHECK-BASELINE-NEXT:    movl 28(%rdx), %r15d
4388; CHECK-BASELINE-NEXT:    movl 24(%rdx), %r14d
4389; CHECK-BASELINE-NEXT:    movl 20(%rdx), %r10d
4390; CHECK-BASELINE-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4391; CHECK-BASELINE-NEXT:    movl 16(%rdx), %eax
4392; CHECK-BASELINE-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4393; CHECK-BASELINE-NEXT:    movl 12(%rdx), %ebp
4394; CHECK-BASELINE-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4395; CHECK-BASELINE-NEXT:    movl 8(%rdx), %ebx
4396; CHECK-BASELINE-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4397; CHECK-BASELINE-NEXT:    movl (%rdx), %r12d
4398; CHECK-BASELINE-NEXT:    movl 4(%rdx), %r13d
4399; CHECK-BASELINE-NEXT:    movl (%rsi), %r11d
4400; CHECK-BASELINE-NEXT:    xorl %r12d, %r11d
4401; CHECK-BASELINE-NEXT:    movl 4(%rsi), %r9d
4402; CHECK-BASELINE-NEXT:    xorl %r13d, %r9d
4403; CHECK-BASELINE-NEXT:    movl 8(%rsi), %r8d
4404; CHECK-BASELINE-NEXT:    xorl %ebx, %r8d
4405; CHECK-BASELINE-NEXT:    movl 12(%rsi), %ebx
4406; CHECK-BASELINE-NEXT:    xorl %ebp, %ebx
4407; CHECK-BASELINE-NEXT:    movl 16(%rsi), %ebp
4408; CHECK-BASELINE-NEXT:    xorl %eax, %ebp
4409; CHECK-BASELINE-NEXT:    movl 20(%rsi), %edx
4410; CHECK-BASELINE-NEXT:    xorl %r10d, %edx
4411; CHECK-BASELINE-NEXT:    movl 24(%rsi), %eax
4412; CHECK-BASELINE-NEXT:    xorl %r14d, %eax
4413; CHECK-BASELINE-NEXT:    movl 28(%rsi), %esi
4414; CHECK-BASELINE-NEXT:    xorl %r15d, %esi
4415; CHECK-BASELINE-NEXT:    andl 28(%rcx), %esi
4416; CHECK-BASELINE-NEXT:    andl 24(%rcx), %eax
4417; CHECK-BASELINE-NEXT:    andl 20(%rcx), %edx
4418; CHECK-BASELINE-NEXT:    andl 16(%rcx), %ebp
4419; CHECK-BASELINE-NEXT:    andl 12(%rcx), %ebx
4420; CHECK-BASELINE-NEXT:    andl 8(%rcx), %r8d
4421; CHECK-BASELINE-NEXT:    andl 4(%rcx), %r9d
4422; CHECK-BASELINE-NEXT:    andl (%rcx), %r11d
4423; CHECK-BASELINE-NEXT:    xorl %r12d, %r11d
4424; CHECK-BASELINE-NEXT:    xorl %r13d, %r9d
4425; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
4426; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
4427; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
4428; CHECK-BASELINE-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4429; CHECK-BASELINE-NEXT:    xorl %r14d, %eax
4430; CHECK-BASELINE-NEXT:    xorl %r15d, %esi
4431; CHECK-BASELINE-NEXT:    movl %esi, 28(%rdi)
4432; CHECK-BASELINE-NEXT:    movl %eax, 24(%rdi)
4433; CHECK-BASELINE-NEXT:    movl %edx, 20(%rdi)
4434; CHECK-BASELINE-NEXT:    movl %ebp, 16(%rdi)
4435; CHECK-BASELINE-NEXT:    movl %ebx, 12(%rdi)
4436; CHECK-BASELINE-NEXT:    movl %r8d, 8(%rdi)
4437; CHECK-BASELINE-NEXT:    movl %r9d, 4(%rdi)
4438; CHECK-BASELINE-NEXT:    movl %r11d, (%rdi)
4439; CHECK-BASELINE-NEXT:    movq %rdi, %rax
4440; CHECK-BASELINE-NEXT:    popq %rbx
4441; CHECK-BASELINE-NEXT:    popq %r12
4442; CHECK-BASELINE-NEXT:    popq %r13
4443; CHECK-BASELINE-NEXT:    popq %r14
4444; CHECK-BASELINE-NEXT:    popq %r15
4445; CHECK-BASELINE-NEXT:    popq %rbp
4446; CHECK-BASELINE-NEXT:    retq
4447;
4448; CHECK-SSE1-LABEL: in_v8i32:
4449; CHECK-SSE1:       # %bb.0:
4450; CHECK-SSE1-NEXT:    pushq %rbp
4451; CHECK-SSE1-NEXT:    pushq %r15
4452; CHECK-SSE1-NEXT:    pushq %r14
4453; CHECK-SSE1-NEXT:    pushq %r13
4454; CHECK-SSE1-NEXT:    pushq %r12
4455; CHECK-SSE1-NEXT:    pushq %rbx
4456; CHECK-SSE1-NEXT:    movl 28(%rdx), %r15d
4457; CHECK-SSE1-NEXT:    movl 24(%rdx), %r14d
4458; CHECK-SSE1-NEXT:    movl 20(%rdx), %r10d
4459; CHECK-SSE1-NEXT:    movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4460; CHECK-SSE1-NEXT:    movl 16(%rdx), %eax
4461; CHECK-SSE1-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4462; CHECK-SSE1-NEXT:    movl 12(%rdx), %ebp
4463; CHECK-SSE1-NEXT:    movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4464; CHECK-SSE1-NEXT:    movl 8(%rdx), %ebx
4465; CHECK-SSE1-NEXT:    movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
4466; CHECK-SSE1-NEXT:    movl (%rdx), %r12d
4467; CHECK-SSE1-NEXT:    movl 4(%rdx), %r13d
4468; CHECK-SSE1-NEXT:    movl (%rsi), %r11d
4469; CHECK-SSE1-NEXT:    xorl %r12d, %r11d
4470; CHECK-SSE1-NEXT:    movl 4(%rsi), %r9d
4471; CHECK-SSE1-NEXT:    xorl %r13d, %r9d
4472; CHECK-SSE1-NEXT:    movl 8(%rsi), %r8d
4473; CHECK-SSE1-NEXT:    xorl %ebx, %r8d
4474; CHECK-SSE1-NEXT:    movl 12(%rsi), %ebx
4475; CHECK-SSE1-NEXT:    xorl %ebp, %ebx
4476; CHECK-SSE1-NEXT:    movl 16(%rsi), %ebp
4477; CHECK-SSE1-NEXT:    xorl %eax, %ebp
4478; CHECK-SSE1-NEXT:    movl 20(%rsi), %edx
4479; CHECK-SSE1-NEXT:    xorl %r10d, %edx
4480; CHECK-SSE1-NEXT:    movl 24(%rsi), %eax
4481; CHECK-SSE1-NEXT:    xorl %r14d, %eax
4482; CHECK-SSE1-NEXT:    movl 28(%rsi), %esi
4483; CHECK-SSE1-NEXT:    xorl %r15d, %esi
4484; CHECK-SSE1-NEXT:    andl 28(%rcx), %esi
4485; CHECK-SSE1-NEXT:    andl 24(%rcx), %eax
4486; CHECK-SSE1-NEXT:    andl 20(%rcx), %edx
4487; CHECK-SSE1-NEXT:    andl 16(%rcx), %ebp
4488; CHECK-SSE1-NEXT:    andl 12(%rcx), %ebx
4489; CHECK-SSE1-NEXT:    andl 8(%rcx), %r8d
4490; CHECK-SSE1-NEXT:    andl 4(%rcx), %r9d
4491; CHECK-SSE1-NEXT:    andl (%rcx), %r11d
4492; CHECK-SSE1-NEXT:    xorl %r12d, %r11d
4493; CHECK-SSE1-NEXT:    xorl %r13d, %r9d
4494; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload
4495; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload
4496; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload
4497; CHECK-SSE1-NEXT:    xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload
4498; CHECK-SSE1-NEXT:    xorl %r14d, %eax
4499; CHECK-SSE1-NEXT:    xorl %r15d, %esi
4500; CHECK-SSE1-NEXT:    movl %esi, 28(%rdi)
4501; CHECK-SSE1-NEXT:    movl %eax, 24(%rdi)
4502; CHECK-SSE1-NEXT:    movl %edx, 20(%rdi)
4503; CHECK-SSE1-NEXT:    movl %ebp, 16(%rdi)
4504; CHECK-SSE1-NEXT:    movl %ebx, 12(%rdi)
4505; CHECK-SSE1-NEXT:    movl %r8d, 8(%rdi)
4506; CHECK-SSE1-NEXT:    movl %r9d, 4(%rdi)
4507; CHECK-SSE1-NEXT:    movl %r11d, (%rdi)
4508; CHECK-SSE1-NEXT:    movq %rdi, %rax
4509; CHECK-SSE1-NEXT:    popq %rbx
4510; CHECK-SSE1-NEXT:    popq %r12
4511; CHECK-SSE1-NEXT:    popq %r13
4512; CHECK-SSE1-NEXT:    popq %r14
4513; CHECK-SSE1-NEXT:    popq %r15
4514; CHECK-SSE1-NEXT:    popq %rbp
4515; CHECK-SSE1-NEXT:    retq
4516;
4517; CHECK-SSE2-LABEL: in_v8i32:
4518; CHECK-SSE2:       # %bb.0:
4519; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
4520; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
4521; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
4522; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
4523; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
4524; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
4525; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
4526; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
4527; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
4528; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
4529; CHECK-SSE2-NEXT:    retq
4530;
4531; CHECK-XOP-LABEL: in_v8i32:
4532; CHECK-XOP:       # %bb.0:
4533; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
4534; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
4535; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
4536; CHECK-XOP-NEXT:    retq
4537  %x = load <8 x i32>, <8 x i32> *%px, align 32
4538  %y = load <8 x i32>, <8 x i32> *%py, align 32
4539  %mask = load <8 x i32>, <8 x i32> *%pmask, align 32
4540  %n0 = xor <8 x i32> %x, %y
4541  %n1 = and <8 x i32> %n0, %mask
4542  %r = xor <8 x i32> %n1, %y
4543  ret <8 x i32> %r
4544}
4545
4546define <4 x i64> @in_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind {
4547; CHECK-BASELINE-LABEL: in_v4i64:
4548; CHECK-BASELINE:       # %bb.0:
4549; CHECK-BASELINE-NEXT:    pushq %rbx
4550; CHECK-BASELINE-NEXT:    movq %rdi, %rax
4551; CHECK-BASELINE-NEXT:    movq 24(%rdx), %r8
4552; CHECK-BASELINE-NEXT:    movq 16(%rdx), %r9
4553; CHECK-BASELINE-NEXT:    movq (%rdx), %r11
4554; CHECK-BASELINE-NEXT:    movq 8(%rdx), %r10
4555; CHECK-BASELINE-NEXT:    movq (%rsi), %rdx
4556; CHECK-BASELINE-NEXT:    xorq %r11, %rdx
4557; CHECK-BASELINE-NEXT:    movq 8(%rsi), %rdi
4558; CHECK-BASELINE-NEXT:    xorq %r10, %rdi
4559; CHECK-BASELINE-NEXT:    movq 16(%rsi), %rbx
4560; CHECK-BASELINE-NEXT:    xorq %r9, %rbx
4561; CHECK-BASELINE-NEXT:    movq 24(%rsi), %rsi
4562; CHECK-BASELINE-NEXT:    xorq %r8, %rsi
4563; CHECK-BASELINE-NEXT:    andq 24(%rcx), %rsi
4564; CHECK-BASELINE-NEXT:    andq 16(%rcx), %rbx
4565; CHECK-BASELINE-NEXT:    andq 8(%rcx), %rdi
4566; CHECK-BASELINE-NEXT:    andq (%rcx), %rdx
4567; CHECK-BASELINE-NEXT:    xorq %r11, %rdx
4568; CHECK-BASELINE-NEXT:    xorq %r10, %rdi
4569; CHECK-BASELINE-NEXT:    xorq %r9, %rbx
4570; CHECK-BASELINE-NEXT:    xorq %r8, %rsi
4571; CHECK-BASELINE-NEXT:    movq %rsi, 24(%rax)
4572; CHECK-BASELINE-NEXT:    movq %rbx, 16(%rax)
4573; CHECK-BASELINE-NEXT:    movq %rdi, 8(%rax)
4574; CHECK-BASELINE-NEXT:    movq %rdx, (%rax)
4575; CHECK-BASELINE-NEXT:    popq %rbx
4576; CHECK-BASELINE-NEXT:    retq
4577;
4578; CHECK-SSE1-LABEL: in_v4i64:
4579; CHECK-SSE1:       # %bb.0:
4580; CHECK-SSE1-NEXT:    pushq %rbx
4581; CHECK-SSE1-NEXT:    movq %rdi, %rax
4582; CHECK-SSE1-NEXT:    movq 24(%rdx), %r8
4583; CHECK-SSE1-NEXT:    movq 16(%rdx), %r9
4584; CHECK-SSE1-NEXT:    movq (%rdx), %r11
4585; CHECK-SSE1-NEXT:    movq 8(%rdx), %r10
4586; CHECK-SSE1-NEXT:    movq (%rsi), %rdx
4587; CHECK-SSE1-NEXT:    xorq %r11, %rdx
4588; CHECK-SSE1-NEXT:    movq 8(%rsi), %rdi
4589; CHECK-SSE1-NEXT:    xorq %r10, %rdi
4590; CHECK-SSE1-NEXT:    movq 16(%rsi), %rbx
4591; CHECK-SSE1-NEXT:    xorq %r9, %rbx
4592; CHECK-SSE1-NEXT:    movq 24(%rsi), %rsi
4593; CHECK-SSE1-NEXT:    xorq %r8, %rsi
4594; CHECK-SSE1-NEXT:    andq 24(%rcx), %rsi
4595; CHECK-SSE1-NEXT:    andq 16(%rcx), %rbx
4596; CHECK-SSE1-NEXT:    andq 8(%rcx), %rdi
4597; CHECK-SSE1-NEXT:    andq (%rcx), %rdx
4598; CHECK-SSE1-NEXT:    xorq %r11, %rdx
4599; CHECK-SSE1-NEXT:    xorq %r10, %rdi
4600; CHECK-SSE1-NEXT:    xorq %r9, %rbx
4601; CHECK-SSE1-NEXT:    xorq %r8, %rsi
4602; CHECK-SSE1-NEXT:    movq %rsi, 24(%rax)
4603; CHECK-SSE1-NEXT:    movq %rbx, 16(%rax)
4604; CHECK-SSE1-NEXT:    movq %rdi, 8(%rax)
4605; CHECK-SSE1-NEXT:    movq %rdx, (%rax)
4606; CHECK-SSE1-NEXT:    popq %rbx
4607; CHECK-SSE1-NEXT:    retq
4608;
4609; CHECK-SSE2-LABEL: in_v4i64:
4610; CHECK-SSE2:       # %bb.0:
4611; CHECK-SSE2-NEXT:    movaps (%rdx), %xmm0
4612; CHECK-SSE2-NEXT:    movaps 16(%rdx), %xmm1
4613; CHECK-SSE2-NEXT:    movaps %xmm0, %xmm2
4614; CHECK-SSE2-NEXT:    andnps (%rsi), %xmm2
4615; CHECK-SSE2-NEXT:    andps (%rdi), %xmm0
4616; CHECK-SSE2-NEXT:    orps %xmm2, %xmm0
4617; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm2
4618; CHECK-SSE2-NEXT:    andnps 16(%rsi), %xmm2
4619; CHECK-SSE2-NEXT:    andps 16(%rdi), %xmm1
4620; CHECK-SSE2-NEXT:    orps %xmm2, %xmm1
4621; CHECK-SSE2-NEXT:    retq
4622;
4623; CHECK-XOP-LABEL: in_v4i64:
4624; CHECK-XOP:       # %bb.0:
4625; CHECK-XOP-NEXT:    vmovdqa (%rdi), %ymm0
4626; CHECK-XOP-NEXT:    vmovdqa (%rdx), %ymm1
4627; CHECK-XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
4628; CHECK-XOP-NEXT:    retq
4629  %x = load <4 x i64>, <4 x i64> *%px, align 32
4630  %y = load <4 x i64>, <4 x i64> *%py, align 32
4631  %mask = load <4 x i64>, <4 x i64> *%pmask, align 32
4632  %n0 = xor <4 x i64> %x, %y
4633  %n1 = and <4 x i64> %n0, %mask
4634  %r = xor <4 x i64> %n1, %y
4635  ret <4 x i64> %r
4636}
4637