• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
18; X86-SSE2:       ## %bb.0:
19; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
20; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
21; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
23; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
24; X86-SSE2-NEXT:    movdqa %xmm3, %xmm4
25; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
26; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT:    pand %xmm5, %xmm2
30; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT:    por %xmm2, %xmm3
32; X86-SSE2-NEXT:    pand %xmm3, %xmm0
33; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
34; X86-SSE2-NEXT:    por %xmm0, %xmm3
35; X86-SSE2-NEXT:    movd %xmm3, %eax
36; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
37; X86-SSE2-NEXT:    movd %xmm0, %edx
38; X86-SSE2-NEXT:    retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
41; X86-SSE42:       ## %bb.0:
42; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
44; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
45; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
46; X86-SSE42-NEXT:    pxor %xmm2, %xmm3
47; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
48; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
49; X86-SSE42-NEXT:    movd %xmm2, %eax
50; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
51; X86-SSE42-NEXT:    retl
52;
53; X86-AVX1-LABEL: test_reduce_v2i64:
54; X86-AVX1:       ## %bb.0:
55; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
56; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
57; X86-AVX1-NEXT:    ## xmm2 = mem[0,0]
58; X86-AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm3
59; X86-AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm2
60; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
61; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
62; X86-AVX1-NEXT:    vmovd %xmm0, %eax
63; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
64; X86-AVX1-NEXT:    retl
65;
66; X86-AVX2-LABEL: test_reduce_v2i64:
67; X86-AVX2:       ## %bb.0:
68; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
69; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
70; X86-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
71; X86-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
72; X86-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
73; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
74; X86-AVX2-NEXT:    vmovd %xmm0, %eax
75; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
76; X86-AVX2-NEXT:    retl
77;
78; X64-SSE2-LABEL: test_reduce_v2i64:
79; X64-SSE2:       ## %bb.0:
80; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
81; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
82; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
83; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
84; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
85; X64-SSE2-NEXT:    movdqa %xmm3, %xmm4
86; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
87; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
88; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
89; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
90; X64-SSE2-NEXT:    pand %xmm5, %xmm2
91; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
92; X64-SSE2-NEXT:    por %xmm2, %xmm3
93; X64-SSE2-NEXT:    pand %xmm3, %xmm0
94; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
95; X64-SSE2-NEXT:    por %xmm0, %xmm3
96; X64-SSE2-NEXT:    movq %xmm3, %rax
97; X64-SSE2-NEXT:    retq
98;
99; X64-SSE42-LABEL: test_reduce_v2i64:
100; X64-SSE42:       ## %bb.0:
101; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
102; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
103; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
104; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
105; X64-SSE42-NEXT:    pxor %xmm2, %xmm3
106; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
107; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
108; X64-SSE42-NEXT:    movq %xmm2, %rax
109; X64-SSE42-NEXT:    retq
110;
111; X64-AVX1-LABEL: test_reduce_v2i64:
112; X64-AVX1:       ## %bb.0:
113; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
114; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
115; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
116; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
117; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
118; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
119; X64-AVX1-NEXT:    vmovq %xmm0, %rax
120; X64-AVX1-NEXT:    retq
121;
122; X64-AVX2-LABEL: test_reduce_v2i64:
123; X64-AVX2:       ## %bb.0:
124; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
125; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
126; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
127; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
128; X64-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
129; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
130; X64-AVX2-NEXT:    vmovq %xmm0, %rax
131; X64-AVX2-NEXT:    retq
132;
133; X64-AVX512-LABEL: test_reduce_v2i64:
134; X64-AVX512:       ## %bb.0:
135; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
136; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
137; X64-AVX512-NEXT:    vmovq %xmm0, %rax
138; X64-AVX512-NEXT:    retq
139  %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
140  %2 = icmp ugt <2 x i64> %a0, %1
141  %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
142  %4 = extractelement <2 x i64> %3, i32 0
143  ret i64 %4
144}
145
146define i32 @test_reduce_v4i32(<4 x i32> %a0) {
147; X86-SSE2-LABEL: test_reduce_v4i32:
148; X86-SSE2:       ## %bb.0:
149; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
150; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
151; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
152; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
153; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
154; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
155; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
156; X86-SSE2-NEXT:    pand %xmm3, %xmm0
157; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
158; X86-SSE2-NEXT:    por %xmm0, %xmm3
159; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
160; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
161; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
162; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
163; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
164; X86-SSE2-NEXT:    pand %xmm1, %xmm3
165; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
166; X86-SSE2-NEXT:    por %xmm3, %xmm1
167; X86-SSE2-NEXT:    movd %xmm1, %eax
168; X86-SSE2-NEXT:    retl
169;
170; X86-SSE42-LABEL: test_reduce_v4i32:
171; X86-SSE42:       ## %bb.0:
172; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
173; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
174; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
175; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
176; X86-SSE42-NEXT:    movd %xmm0, %eax
177; X86-SSE42-NEXT:    retl
178;
179; X86-AVX-LABEL: test_reduce_v4i32:
180; X86-AVX:       ## %bb.0:
181; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
182; X86-AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
183; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
184; X86-AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
185; X86-AVX-NEXT:    vmovd %xmm0, %eax
186; X86-AVX-NEXT:    retl
187;
188; X64-SSE2-LABEL: test_reduce_v4i32:
189; X64-SSE2:       ## %bb.0:
190; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
191; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
192; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
193; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
194; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
195; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
196; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
197; X64-SSE2-NEXT:    pand %xmm3, %xmm0
198; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
199; X64-SSE2-NEXT:    por %xmm0, %xmm3
200; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
201; X64-SSE2-NEXT:    movdqa %xmm3, %xmm1
202; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
203; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
204; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
205; X64-SSE2-NEXT:    pand %xmm1, %xmm3
206; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
207; X64-SSE2-NEXT:    por %xmm3, %xmm1
208; X64-SSE2-NEXT:    movd %xmm1, %eax
209; X64-SSE2-NEXT:    retq
210;
211; X64-SSE42-LABEL: test_reduce_v4i32:
212; X64-SSE42:       ## %bb.0:
213; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
214; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
215; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
216; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
217; X64-SSE42-NEXT:    movd %xmm0, %eax
218; X64-SSE42-NEXT:    retq
219;
220; X64-AVX-LABEL: test_reduce_v4i32:
221; X64-AVX:       ## %bb.0:
222; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
223; X64-AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
224; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
225; X64-AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
226; X64-AVX-NEXT:    vmovd %xmm0, %eax
227; X64-AVX-NEXT:    retq
228  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
229  %2 = icmp ugt <4 x i32> %a0, %1
230  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
231  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
232  %5 = icmp ugt <4 x i32> %3, %4
233  %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
234  %7 = extractelement <4 x i32> %6, i32 0
235  ret i32 %7
236}
237
238define i16 @test_reduce_v8i16(<8 x i16> %a0) {
239; X86-SSE2-LABEL: test_reduce_v8i16:
240; X86-SSE2:       ## %bb.0:
241; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
242; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
243; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
244; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
245; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
246; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
247; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
248; X86-SSE2-NEXT:    psrld $16, %xmm1
249; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
250; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
251; X86-SSE2-NEXT:    movd %xmm1, %eax
252; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
253; X86-SSE2-NEXT:    retl
254;
255; X86-SSE42-LABEL: test_reduce_v8i16:
256; X86-SSE42:       ## %bb.0:
257; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
258; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
259; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
260; X86-SSE42-NEXT:    movd %xmm0, %eax
261; X86-SSE42-NEXT:    notl %eax
262; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
263; X86-SSE42-NEXT:    retl
264;
265; X86-AVX-LABEL: test_reduce_v8i16:
266; X86-AVX:       ## %bb.0:
267; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
268; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
269; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
270; X86-AVX-NEXT:    vmovd %xmm0, %eax
271; X86-AVX-NEXT:    notl %eax
272; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
273; X86-AVX-NEXT:    retl
274;
275; X64-SSE2-LABEL: test_reduce_v8i16:
276; X64-SSE2:       ## %bb.0:
277; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
278; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
279; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
280; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
281; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
282; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
283; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
284; X64-SSE2-NEXT:    psrld $16, %xmm1
285; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
286; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
287; X64-SSE2-NEXT:    movd %xmm1, %eax
288; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
289; X64-SSE2-NEXT:    retq
290;
291; X64-SSE42-LABEL: test_reduce_v8i16:
292; X64-SSE42:       ## %bb.0:
293; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
294; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
295; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
296; X64-SSE42-NEXT:    movd %xmm0, %eax
297; X64-SSE42-NEXT:    notl %eax
298; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
299; X64-SSE42-NEXT:    retq
300;
301; X64-AVX1-LABEL: test_reduce_v8i16:
302; X64-AVX1:       ## %bb.0:
303; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
304; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
305; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
306; X64-AVX1-NEXT:    vmovd %xmm0, %eax
307; X64-AVX1-NEXT:    notl %eax
308; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
309; X64-AVX1-NEXT:    retq
310;
311; X64-AVX2-LABEL: test_reduce_v8i16:
312; X64-AVX2:       ## %bb.0:
313; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
314; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
315; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
316; X64-AVX2-NEXT:    vmovd %xmm0, %eax
317; X64-AVX2-NEXT:    notl %eax
318; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
319; X64-AVX2-NEXT:    retq
320;
321; X64-AVX512-LABEL: test_reduce_v8i16:
322; X64-AVX512:       ## %bb.0:
323; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
324; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
325; X64-AVX512-NEXT:    vmovd %xmm0, %eax
326; X64-AVX512-NEXT:    notl %eax
327; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
328; X64-AVX512-NEXT:    retq
329  %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
330  %2  = icmp ugt <8 x i16> %a0, %1
331  %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
332  %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
333  %5  = icmp ugt <8 x i16> %3, %4
334  %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
335  %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
336  %8  = icmp ugt <8 x i16> %6, %7
337  %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
338  %10 = extractelement <8 x i16> %9, i32 0
339  ret i16 %10
340}
341
342define i8 @test_reduce_v16i8(<16 x i8> %a0) {
343; X86-SSE2-LABEL: test_reduce_v16i8:
344; X86-SSE2:       ## %bb.0:
345; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
346; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
347; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
348; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
349; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
350; X86-SSE2-NEXT:    psrld $16, %xmm1
351; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
352; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
353; X86-SSE2-NEXT:    psrlw $8, %xmm0
354; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
355; X86-SSE2-NEXT:    movd %xmm0, %eax
356; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
357; X86-SSE2-NEXT:    retl
358;
359; X86-SSE42-LABEL: test_reduce_v16i8:
360; X86-SSE42:       ## %bb.0:
361; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
362; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
363; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
364; X86-SSE42-NEXT:    psrlw $8, %xmm0
365; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
366; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
367; X86-SSE42-NEXT:    movd %xmm0, %eax
368; X86-SSE42-NEXT:    notb %al
369; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
370; X86-SSE42-NEXT:    retl
371;
372; X86-AVX-LABEL: test_reduce_v16i8:
373; X86-AVX:       ## %bb.0:
374; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
375; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
376; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
377; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
378; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
379; X86-AVX-NEXT:    vmovd %xmm0, %eax
380; X86-AVX-NEXT:    notb %al
381; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
382; X86-AVX-NEXT:    retl
383;
384; X64-SSE2-LABEL: test_reduce_v16i8:
385; X64-SSE2:       ## %bb.0:
386; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
387; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
388; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
389; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
390; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
391; X64-SSE2-NEXT:    psrld $16, %xmm1
392; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
393; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
394; X64-SSE2-NEXT:    psrlw $8, %xmm0
395; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
396; X64-SSE2-NEXT:    movd %xmm0, %eax
397; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
398; X64-SSE2-NEXT:    retq
399;
400; X64-SSE42-LABEL: test_reduce_v16i8:
401; X64-SSE42:       ## %bb.0:
402; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
403; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
404; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
405; X64-SSE42-NEXT:    psrlw $8, %xmm0
406; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
407; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
408; X64-SSE42-NEXT:    movd %xmm0, %eax
409; X64-SSE42-NEXT:    notb %al
410; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
411; X64-SSE42-NEXT:    retq
412;
413; X64-AVX1-LABEL: test_reduce_v16i8:
414; X64-AVX1:       ## %bb.0:
415; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
416; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
417; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
418; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
419; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
420; X64-AVX1-NEXT:    vmovd %xmm0, %eax
421; X64-AVX1-NEXT:    notb %al
422; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
423; X64-AVX1-NEXT:    retq
424;
425; X64-AVX2-LABEL: test_reduce_v16i8:
426; X64-AVX2:       ## %bb.0:
427; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
428; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
429; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
430; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
431; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
432; X64-AVX2-NEXT:    vmovd %xmm0, %eax
433; X64-AVX2-NEXT:    notb %al
434; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
435; X64-AVX2-NEXT:    retq
436;
437; X64-AVX512-LABEL: test_reduce_v16i8:
438; X64-AVX512:       ## %bb.0:
439; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
440; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
441; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
442; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
443; X64-AVX512-NEXT:    vmovd %xmm0, %eax
444; X64-AVX512-NEXT:    notb %al
445; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
446; X64-AVX512-NEXT:    retq
447  %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
448  %2  = icmp ugt <16 x i8> %a0, %1
449  %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
450  %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
451  %5  = icmp ugt <16 x i8> %3, %4
452  %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
453  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
454  %8  = icmp ugt <16 x i8> %6, %7
455  %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
456  %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
457  %11 = icmp ugt <16 x i8> %9, %10
458  %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
459  %13 = extractelement <16 x i8> %12, i32 0
460  ret i8 %13
461}
462
463;
464; 256-bit Vectors
465;
466
467define i64 @test_reduce_v4i64(<4 x i64> %a0) {
468; X86-SSE2-LABEL: test_reduce_v4i64:
469; X86-SSE2:       ## %bb.0:
470; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
471; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
472; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
473; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
474; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
475; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
476; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
477; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
478; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
479; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
480; X86-SSE2-NEXT:    pand %xmm6, %xmm3
481; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
482; X86-SSE2-NEXT:    por %xmm3, %xmm4
483; X86-SSE2-NEXT:    pand %xmm4, %xmm0
484; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
485; X86-SSE2-NEXT:    por %xmm0, %xmm4
486; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
487; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
488; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
489; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
490; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
491; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
492; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
493; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
494; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
495; X86-SSE2-NEXT:    pand %xmm5, %xmm1
496; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
497; X86-SSE2-NEXT:    por %xmm1, %xmm2
498; X86-SSE2-NEXT:    pand %xmm2, %xmm4
499; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
500; X86-SSE2-NEXT:    por %xmm4, %xmm2
501; X86-SSE2-NEXT:    movd %xmm2, %eax
502; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
503; X86-SSE2-NEXT:    movd %xmm0, %edx
504; X86-SSE2-NEXT:    retl
505;
506; X86-SSE42-LABEL: test_reduce_v4i64:
507; X86-SSE42:       ## %bb.0:
508; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
509; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
510; X86-SSE42-NEXT:    movdqa %xmm1, %xmm4
511; X86-SSE42-NEXT:    pxor %xmm3, %xmm4
512; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
513; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
514; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
515; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
516; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
517; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
518; X86-SSE42-NEXT:    pxor %xmm2, %xmm3
519; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
520; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
521; X86-SSE42-NEXT:    movd %xmm2, %eax
522; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
523; X86-SSE42-NEXT:    retl
524;
525; X86-AVX1-LABEL: test_reduce_v4i64:
526; X86-AVX1:       ## %bb.0:
527; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
528; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
529; X86-AVX1-NEXT:    ## xmm2 = mem[0,0]
530; X86-AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
531; X86-AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm4
532; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
533; X86-AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
534; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
535; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
536; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
537; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
538; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
539; X86-AVX1-NEXT:    vmovd %xmm0, %eax
540; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
541; X86-AVX1-NEXT:    vzeroupper
542; X86-AVX1-NEXT:    retl
543;
544; X86-AVX2-LABEL: test_reduce_v4i64:
545; X86-AVX2:       ## %bb.0:
546; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
547; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
548; X86-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
549; X86-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm4
550; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
551; X86-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
552; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
553; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
554; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
555; X86-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
556; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
557; X86-AVX2-NEXT:    vmovd %xmm0, %eax
558; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
559; X86-AVX2-NEXT:    vzeroupper
560; X86-AVX2-NEXT:    retl
561;
562; X64-SSE2-LABEL: test_reduce_v4i64:
563; X64-SSE2:       ## %bb.0:
564; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
565; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
566; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
567; X64-SSE2-NEXT:    movdqa %xmm0, %xmm4
568; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
569; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
570; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
571; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
572; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
573; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
574; X64-SSE2-NEXT:    pand %xmm6, %xmm3
575; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
576; X64-SSE2-NEXT:    por %xmm3, %xmm4
577; X64-SSE2-NEXT:    pand %xmm4, %xmm0
578; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
579; X64-SSE2-NEXT:    por %xmm0, %xmm4
580; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
581; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
582; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
583; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
584; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
585; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
586; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
587; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
588; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
589; X64-SSE2-NEXT:    pand %xmm5, %xmm1
590; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
591; X64-SSE2-NEXT:    por %xmm1, %xmm2
592; X64-SSE2-NEXT:    pand %xmm2, %xmm4
593; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
594; X64-SSE2-NEXT:    por %xmm4, %xmm2
595; X64-SSE2-NEXT:    movq %xmm2, %rax
596; X64-SSE2-NEXT:    retq
597;
598; X64-SSE42-LABEL: test_reduce_v4i64:
599; X64-SSE42:       ## %bb.0:
600; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
601; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
602; X64-SSE42-NEXT:    movdqa %xmm1, %xmm4
603; X64-SSE42-NEXT:    pxor %xmm3, %xmm4
604; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
605; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
606; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
607; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
608; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
609; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
610; X64-SSE42-NEXT:    pxor %xmm2, %xmm3
611; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
612; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
613; X64-SSE42-NEXT:    movq %xmm2, %rax
614; X64-SSE42-NEXT:    retq
615;
616; X64-AVX1-LABEL: test_reduce_v4i64:
617; X64-AVX1:       ## %bb.0:
618; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
619; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
620; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
621; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm4
622; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
623; X64-AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
624; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
625; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
626; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
627; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
628; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
629; X64-AVX1-NEXT:    vmovq %xmm0, %rax
630; X64-AVX1-NEXT:    vzeroupper
631; X64-AVX1-NEXT:    retq
632;
633; X64-AVX2-LABEL: test_reduce_v4i64:
634; X64-AVX2:       ## %bb.0:
635; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
636; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
637; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
638; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm4
639; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
640; X64-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
641; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
642; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
643; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
644; X64-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
645; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
646; X64-AVX2-NEXT:    vmovq %xmm0, %rax
647; X64-AVX2-NEXT:    vzeroupper
648; X64-AVX2-NEXT:    retq
649;
650; X64-AVX512-LABEL: test_reduce_v4i64:
651; X64-AVX512:       ## %bb.0:
652; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
653; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
654; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
655; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
656; X64-AVX512-NEXT:    vmovq %xmm0, %rax
657; X64-AVX512-NEXT:    vzeroupper
658; X64-AVX512-NEXT:    retq
659  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
660  %2 = icmp ugt <4 x i64> %a0, %1
661  %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
662  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
663  %5 = icmp ugt <4 x i64> %3, %4
664  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
665  %7 = extractelement <4 x i64> %6, i32 0
666  ret i64 %7
667}
668
669define i32 @test_reduce_v8i32(<8 x i32> %a0) {
670; X86-SSE2-LABEL: test_reduce_v8i32:
671; X86-SSE2:       ## %bb.0:
672; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
673; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
674; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
675; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
676; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
677; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
678; X86-SSE2-NEXT:    pand %xmm4, %xmm0
679; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
680; X86-SSE2-NEXT:    por %xmm0, %xmm4
681; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
682; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
683; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
684; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
685; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
686; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
687; X86-SSE2-NEXT:    pand %xmm1, %xmm4
688; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
689; X86-SSE2-NEXT:    por %xmm4, %xmm1
690; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
691; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
692; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
693; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
694; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
695; X86-SSE2-NEXT:    pand %xmm3, %xmm1
696; X86-SSE2-NEXT:    pandn %xmm0, %xmm3
697; X86-SSE2-NEXT:    por %xmm1, %xmm3
698; X86-SSE2-NEXT:    movd %xmm3, %eax
699; X86-SSE2-NEXT:    retl
700;
701; X86-SSE42-LABEL: test_reduce_v8i32:
702; X86-SSE42:       ## %bb.0:
703; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
704; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
705; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
706; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
707; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
708; X86-SSE42-NEXT:    movd %xmm0, %eax
709; X86-SSE42-NEXT:    retl
710;
711; X86-AVX1-LABEL: test_reduce_v8i32:
712; X86-AVX1:       ## %bb.0:
713; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
714; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
715; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
716; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
717; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
718; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
719; X86-AVX1-NEXT:    vmovd %xmm0, %eax
720; X86-AVX1-NEXT:    vzeroupper
721; X86-AVX1-NEXT:    retl
722;
723; X86-AVX2-LABEL: test_reduce_v8i32:
724; X86-AVX2:       ## %bb.0:
725; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
726; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
727; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
728; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
729; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
730; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
731; X86-AVX2-NEXT:    vmovd %xmm0, %eax
732; X86-AVX2-NEXT:    vzeroupper
733; X86-AVX2-NEXT:    retl
734;
735; X64-SSE2-LABEL: test_reduce_v8i32:
736; X64-SSE2:       ## %bb.0:
737; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
738; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
739; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
740; X64-SSE2-NEXT:    movdqa %xmm0, %xmm4
741; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
742; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
743; X64-SSE2-NEXT:    pand %xmm4, %xmm0
744; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
745; X64-SSE2-NEXT:    por %xmm0, %xmm4
746; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
747; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
748; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
749; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
750; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
751; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm1
752; X64-SSE2-NEXT:    pand %xmm1, %xmm4
753; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
754; X64-SSE2-NEXT:    por %xmm4, %xmm1
755; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
756; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
757; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
758; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
759; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
760; X64-SSE2-NEXT:    pand %xmm3, %xmm1
761; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
762; X64-SSE2-NEXT:    por %xmm1, %xmm3
763; X64-SSE2-NEXT:    movd %xmm3, %eax
764; X64-SSE2-NEXT:    retq
765;
766; X64-SSE42-LABEL: test_reduce_v8i32:
767; X64-SSE42:       ## %bb.0:
768; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
769; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
770; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
771; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
772; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
773; X64-SSE42-NEXT:    movd %xmm0, %eax
774; X64-SSE42-NEXT:    retq
775;
776; X64-AVX1-LABEL: test_reduce_v8i32:
777; X64-AVX1:       ## %bb.0:
778; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
779; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
780; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
781; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
782; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
783; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
784; X64-AVX1-NEXT:    vmovd %xmm0, %eax
785; X64-AVX1-NEXT:    vzeroupper
786; X64-AVX1-NEXT:    retq
787;
788; X64-AVX2-LABEL: test_reduce_v8i32:
789; X64-AVX2:       ## %bb.0:
790; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
791; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
792; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
793; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
794; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
795; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
796; X64-AVX2-NEXT:    vmovd %xmm0, %eax
797; X64-AVX2-NEXT:    vzeroupper
798; X64-AVX2-NEXT:    retq
799;
800; X64-AVX512-LABEL: test_reduce_v8i32:
801; X64-AVX512:       ## %bb.0:
802; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
803; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
804; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
805; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
806; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
807; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
808; X64-AVX512-NEXT:    vmovd %xmm0, %eax
809; X64-AVX512-NEXT:    vzeroupper
810; X64-AVX512-NEXT:    retq
811  %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
812  %2  = icmp ugt <8 x i32> %a0, %1
813  %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
814  %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
815  %5  = icmp ugt <8 x i32> %3, %4
816  %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
817  %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
818  %8  = icmp ugt <8 x i32> %6, %7
819  %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
820  %10 = extractelement <8 x i32> %9, i32 0
821  ret i32 %10
822}
823
824define i16 @test_reduce_v16i16(<16 x i16> %a0) {
825; X86-SSE2-LABEL: test_reduce_v16i16:
826; X86-SSE2:       ## %bb.0:
827; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
828; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
829; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
830; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
831; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
832; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
833; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
834; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
835; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
836; X86-SSE2-NEXT:    psrld $16, %xmm0
837; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
838; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
839; X86-SSE2-NEXT:    movd %xmm0, %eax
840; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
841; X86-SSE2-NEXT:    retl
842;
843; X86-SSE42-LABEL: test_reduce_v16i16:
844; X86-SSE42:       ## %bb.0:
845; X86-SSE42-NEXT:    pmaxuw %xmm1, %xmm0
846; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
847; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
848; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
849; X86-SSE42-NEXT:    movd %xmm0, %eax
850; X86-SSE42-NEXT:    notl %eax
851; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
852; X86-SSE42-NEXT:    retl
853;
854; X86-AVX1-LABEL: test_reduce_v16i16:
855; X86-AVX1:       ## %bb.0:
856; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
857; X86-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
858; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
859; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
860; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
861; X86-AVX1-NEXT:    vmovd %xmm0, %eax
862; X86-AVX1-NEXT:    notl %eax
863; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
864; X86-AVX1-NEXT:    vzeroupper
865; X86-AVX1-NEXT:    retl
866;
867; X86-AVX2-LABEL: test_reduce_v16i16:
868; X86-AVX2:       ## %bb.0:
869; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
870; X86-AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
871; X86-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
872; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
873; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
874; X86-AVX2-NEXT:    vmovd %xmm0, %eax
875; X86-AVX2-NEXT:    notl %eax
876; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
877; X86-AVX2-NEXT:    vzeroupper
878; X86-AVX2-NEXT:    retl
879;
880; X64-SSE2-LABEL: test_reduce_v16i16:
881; X64-SSE2:       ## %bb.0:
882; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
883; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
884; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
885; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
886; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
887; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
888; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
889; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
890; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
891; X64-SSE2-NEXT:    psrld $16, %xmm0
892; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
893; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
894; X64-SSE2-NEXT:    movd %xmm0, %eax
895; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
896; X64-SSE2-NEXT:    retq
897;
898; X64-SSE42-LABEL: test_reduce_v16i16:
899; X64-SSE42:       ## %bb.0:
900; X64-SSE42-NEXT:    pmaxuw %xmm1, %xmm0
901; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
902; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
903; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
904; X64-SSE42-NEXT:    movd %xmm0, %eax
905; X64-SSE42-NEXT:    notl %eax
906; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
907; X64-SSE42-NEXT:    retq
908;
909; X64-AVX1-LABEL: test_reduce_v16i16:
910; X64-AVX1:       ## %bb.0:
911; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
912; X64-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
913; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
914; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
915; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
916; X64-AVX1-NEXT:    vmovd %xmm0, %eax
917; X64-AVX1-NEXT:    notl %eax
918; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
919; X64-AVX1-NEXT:    vzeroupper
920; X64-AVX1-NEXT:    retq
921;
922; X64-AVX2-LABEL: test_reduce_v16i16:
923; X64-AVX2:       ## %bb.0:
924; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
925; X64-AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
926; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
927; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
928; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
929; X64-AVX2-NEXT:    vmovd %xmm0, %eax
930; X64-AVX2-NEXT:    notl %eax
931; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
932; X64-AVX2-NEXT:    vzeroupper
933; X64-AVX2-NEXT:    retq
934;
935; X64-AVX512-LABEL: test_reduce_v16i16:
936; X64-AVX512:       ## %bb.0:
937; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
938; X64-AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
939; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
940; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
941; X64-AVX512-NEXT:    vmovd %xmm0, %eax
942; X64-AVX512-NEXT:    notl %eax
943; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
944; X64-AVX512-NEXT:    vzeroupper
945; X64-AVX512-NEXT:    retq
946  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
947  %2  = icmp ugt <16 x i16> %a0, %1
948  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
949  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
950  %5  = icmp ugt <16 x i16> %3, %4
951  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
952  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
953  %8  = icmp ugt <16 x i16> %6, %7
954  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
955  %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
956  %11 = icmp ugt <16 x i16> %9, %10
957  %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
958  %13 = extractelement <16 x i16> %12, i32 0
959  ret i16 %13
960}
961
962define i8 @test_reduce_v32i8(<32 x i8> %a0) {
963; X86-SSE2-LABEL: test_reduce_v32i8:
964; X86-SSE2:       ## %bb.0:
965; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
966; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
967; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
968; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
969; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
970; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
971; X86-SSE2-NEXT:    psrld $16, %xmm1
972; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
973; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
974; X86-SSE2-NEXT:    psrlw $8, %xmm0
975; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
976; X86-SSE2-NEXT:    movd %xmm0, %eax
977; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
978; X86-SSE2-NEXT:    retl
979;
980; X86-SSE42-LABEL: test_reduce_v32i8:
981; X86-SSE42:       ## %bb.0:
982; X86-SSE42-NEXT:    pmaxub %xmm1, %xmm0
983; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
984; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
985; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
986; X86-SSE42-NEXT:    psrlw $8, %xmm0
987; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
988; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
989; X86-SSE42-NEXT:    movd %xmm0, %eax
990; X86-SSE42-NEXT:    notb %al
991; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
992; X86-SSE42-NEXT:    retl
993;
994; X86-AVX1-LABEL: test_reduce_v32i8:
995; X86-AVX1:       ## %bb.0:
996; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
997; X86-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
998; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
999; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1000; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1001; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1002; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1003; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1004; X86-AVX1-NEXT:    notb %al
1005; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1006; X86-AVX1-NEXT:    vzeroupper
1007; X86-AVX1-NEXT:    retl
1008;
1009; X86-AVX2-LABEL: test_reduce_v32i8:
1010; X86-AVX2:       ## %bb.0:
1011; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1012; X86-AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1013; X86-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1014; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1015; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1016; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1017; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1018; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1019; X86-AVX2-NEXT:    notb %al
1020; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1021; X86-AVX2-NEXT:    vzeroupper
1022; X86-AVX2-NEXT:    retl
1023;
1024; X64-SSE2-LABEL: test_reduce_v32i8:
1025; X64-SSE2:       ## %bb.0:
1026; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1027; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1028; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1029; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1030; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1031; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1032; X64-SSE2-NEXT:    psrld $16, %xmm1
1033; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1034; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1035; X64-SSE2-NEXT:    psrlw $8, %xmm0
1036; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1037; X64-SSE2-NEXT:    movd %xmm0, %eax
1038; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1039; X64-SSE2-NEXT:    retq
1040;
1041; X64-SSE42-LABEL: test_reduce_v32i8:
1042; X64-SSE42:       ## %bb.0:
1043; X64-SSE42-NEXT:    pmaxub %xmm1, %xmm0
1044; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
1045; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
1046; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1047; X64-SSE42-NEXT:    psrlw $8, %xmm0
1048; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
1049; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1050; X64-SSE42-NEXT:    movd %xmm0, %eax
1051; X64-SSE42-NEXT:    notb %al
1052; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1053; X64-SSE42-NEXT:    retq
1054;
1055; X64-AVX1-LABEL: test_reduce_v32i8:
1056; X64-AVX1:       ## %bb.0:
1057; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1058; X64-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1059; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1060; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1061; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1062; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1063; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1064; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1065; X64-AVX1-NEXT:    notb %al
1066; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1067; X64-AVX1-NEXT:    vzeroupper
1068; X64-AVX1-NEXT:    retq
1069;
1070; X64-AVX2-LABEL: test_reduce_v32i8:
1071; X64-AVX2:       ## %bb.0:
1072; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1073; X64-AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1074; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1075; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1076; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1077; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1078; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1079; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1080; X64-AVX2-NEXT:    notb %al
1081; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1082; X64-AVX2-NEXT:    vzeroupper
1083; X64-AVX2-NEXT:    retq
1084;
1085; X64-AVX512-LABEL: test_reduce_v32i8:
1086; X64-AVX512:       ## %bb.0:
1087; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1088; X64-AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1089; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1090; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1091; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1092; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1093; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1094; X64-AVX512-NEXT:    notb %al
1095; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1096; X64-AVX512-NEXT:    vzeroupper
1097; X64-AVX512-NEXT:    retq
1098  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1099  %2  = icmp ugt <32 x i8> %a0, %1
1100  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1101  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1102  %5  = icmp ugt <32 x i8> %3, %4
1103  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1104  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1105  %8  = icmp ugt <32 x i8> %6, %7
1106  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1107  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1108  %11 = icmp ugt <32 x i8> %9, %10
1109  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1110  %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1111  %14 = icmp ugt <32 x i8> %12, %13
1112  %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1113  %16 = extractelement <32 x i8> %15, i32 0
1114  ret i8 %16
1115}
1116
1117;
1118; 512-bit Vectors
1119;
1120
1121define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1122; X86-SSE2-LABEL: test_reduce_v8i64:
1123; X86-SSE2:       ## %bb.0:
1124; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1125; X86-SSE2-NEXT:    movdqa %xmm2, %xmm5
1126; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1127; X86-SSE2-NEXT:    movdqa %xmm0, %xmm6
1128; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1129; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
1130; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1131; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1132; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1133; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1134; X86-SSE2-NEXT:    pand %xmm5, %xmm6
1135; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1136; X86-SSE2-NEXT:    por %xmm6, %xmm5
1137; X86-SSE2-NEXT:    pand %xmm5, %xmm0
1138; X86-SSE2-NEXT:    pandn %xmm2, %xmm5
1139; X86-SSE2-NEXT:    por %xmm0, %xmm5
1140; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1141; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1142; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1143; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1144; X86-SSE2-NEXT:    movdqa %xmm2, %xmm6
1145; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
1146; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1147; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1148; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1149; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1150; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1151; X86-SSE2-NEXT:    por %xmm2, %xmm0
1152; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1153; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
1154; X86-SSE2-NEXT:    por %xmm1, %xmm0
1155; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1156; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1157; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
1158; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1159; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1160; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1161; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
1162; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
1163; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1164; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1165; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1166; X86-SSE2-NEXT:    por %xmm2, %xmm1
1167; X86-SSE2-NEXT:    pand %xmm1, %xmm5
1168; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1169; X86-SSE2-NEXT:    por %xmm5, %xmm1
1170; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1171; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1172; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1173; X86-SSE2-NEXT:    pxor %xmm0, %xmm4
1174; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1175; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
1176; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1177; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1178; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1179; X86-SSE2-NEXT:    pand %xmm2, %xmm4
1180; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1181; X86-SSE2-NEXT:    por %xmm4, %xmm2
1182; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1183; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1184; X86-SSE2-NEXT:    por %xmm1, %xmm2
1185; X86-SSE2-NEXT:    movd %xmm2, %eax
1186; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1187; X86-SSE2-NEXT:    movd %xmm0, %edx
1188; X86-SSE2-NEXT:    retl
1189;
1190; X86-SSE42-LABEL: test_reduce_v8i64:
1191; X86-SSE42:       ## %bb.0:
1192; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
1193; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648]
1194; X86-SSE42-NEXT:    movdqa %xmm2, %xmm6
1195; X86-SSE42-NEXT:    pxor %xmm5, %xmm6
1196; X86-SSE42-NEXT:    pxor %xmm5, %xmm0
1197; X86-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1198; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1199; X86-SSE42-NEXT:    movdqa %xmm3, %xmm4
1200; X86-SSE42-NEXT:    pxor %xmm5, %xmm4
1201; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1202; X86-SSE42-NEXT:    pxor %xmm5, %xmm0
1203; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1204; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1205; X86-SSE42-NEXT:    movapd %xmm3, %xmm1
1206; X86-SSE42-NEXT:    xorpd %xmm5, %xmm1
1207; X86-SSE42-NEXT:    movapd %xmm2, %xmm0
1208; X86-SSE42-NEXT:    xorpd %xmm5, %xmm0
1209; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1210; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1211; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1212; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
1213; X86-SSE42-NEXT:    pxor %xmm5, %xmm0
1214; X86-SSE42-NEXT:    pxor %xmm1, %xmm5
1215; X86-SSE42-NEXT:    pcmpgtq %xmm5, %xmm0
1216; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1217; X86-SSE42-NEXT:    movd %xmm1, %eax
1218; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
1219; X86-SSE42-NEXT:    retl
1220;
1221; X86-AVX1-LABEL: test_reduce_v8i64:
1222; X86-AVX1:       ## %bb.0:
1223; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1224; X86-AVX1-NEXT:    ## xmm2 = mem[0,0]
1225; X86-AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
1226; X86-AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm4
1227; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1228; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1229; X86-AVX1-NEXT:    vxorps %xmm2, %xmm4, %xmm5
1230; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
1231; X86-AVX1-NEXT:    vxorps %xmm2, %xmm6, %xmm7
1232; X86-AVX1-NEXT:    vpcmpgtq %xmm5, %xmm7, %xmm5
1233; X86-AVX1-NEXT:    vblendvpd %xmm5, %xmm6, %xmm4, %xmm4
1234; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm4, %xmm5
1235; X86-AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1236; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm1
1237; X86-AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm1
1238; X86-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm4, %xmm0
1239; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1240; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1241; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1242; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1243; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1244; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1245; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
1246; X86-AVX1-NEXT:    vzeroupper
1247; X86-AVX1-NEXT:    retl
1248;
1249; X86-AVX2-LABEL: test_reduce_v8i64:
1250; X86-AVX2:       ## %bb.0:
1251; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1252; X86-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
1253; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm4
1254; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
1255; X86-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1256; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1257; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm3
1258; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm4
1259; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1260; X86-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1261; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1262; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1263; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1264; X86-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1265; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1266; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1267; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
1268; X86-AVX2-NEXT:    vzeroupper
1269; X86-AVX2-NEXT:    retl
1270;
1271; X64-SSE2-LABEL: test_reduce_v8i64:
1272; X64-SSE2:       ## %bb.0:
1273; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1274; X64-SSE2-NEXT:    movdqa %xmm2, %xmm5
1275; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1276; X64-SSE2-NEXT:    movdqa %xmm0, %xmm6
1277; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1278; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
1279; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1280; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1281; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1282; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1283; X64-SSE2-NEXT:    pand %xmm8, %xmm6
1284; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1285; X64-SSE2-NEXT:    por %xmm6, %xmm5
1286; X64-SSE2-NEXT:    pand %xmm5, %xmm0
1287; X64-SSE2-NEXT:    pandn %xmm2, %xmm5
1288; X64-SSE2-NEXT:    por %xmm0, %xmm5
1289; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1290; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1291; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1292; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1293; X64-SSE2-NEXT:    movdqa %xmm2, %xmm6
1294; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
1295; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1296; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1297; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1298; X64-SSE2-NEXT:    pand %xmm7, %xmm0
1299; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
1300; X64-SSE2-NEXT:    por %xmm0, %xmm2
1301; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1302; X64-SSE2-NEXT:    pandn %xmm3, %xmm2
1303; X64-SSE2-NEXT:    por %xmm1, %xmm2
1304; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
1305; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1306; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
1307; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1308; X64-SSE2-NEXT:    movdqa %xmm1, %xmm3
1309; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
1310; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1311; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1312; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1313; X64-SSE2-NEXT:    pand %xmm6, %xmm0
1314; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1315; X64-SSE2-NEXT:    por %xmm0, %xmm1
1316; X64-SSE2-NEXT:    pand %xmm1, %xmm5
1317; X64-SSE2-NEXT:    pandn %xmm2, %xmm1
1318; X64-SSE2-NEXT:    por %xmm5, %xmm1
1319; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1320; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1321; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1322; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1323; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
1324; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
1325; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1326; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1327; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1328; X64-SSE2-NEXT:    pand %xmm5, %xmm2
1329; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1330; X64-SSE2-NEXT:    por %xmm2, %xmm3
1331; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1332; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
1333; X64-SSE2-NEXT:    por %xmm1, %xmm3
1334; X64-SSE2-NEXT:    movq %xmm3, %rax
1335; X64-SSE2-NEXT:    retq
1336;
1337; X64-SSE42-LABEL: test_reduce_v8i64:
1338; X64-SSE42:       ## %bb.0:
1339; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
1340; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
1341; X64-SSE42-NEXT:    movdqa %xmm2, %xmm6
1342; X64-SSE42-NEXT:    pxor %xmm5, %xmm6
1343; X64-SSE42-NEXT:    pxor %xmm5, %xmm0
1344; X64-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1345; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1346; X64-SSE42-NEXT:    movdqa %xmm3, %xmm4
1347; X64-SSE42-NEXT:    pxor %xmm5, %xmm4
1348; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1349; X64-SSE42-NEXT:    pxor %xmm5, %xmm0
1350; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1351; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1352; X64-SSE42-NEXT:    movapd %xmm3, %xmm1
1353; X64-SSE42-NEXT:    xorpd %xmm5, %xmm1
1354; X64-SSE42-NEXT:    movapd %xmm2, %xmm0
1355; X64-SSE42-NEXT:    xorpd %xmm5, %xmm0
1356; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1357; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1358; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1359; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
1360; X64-SSE42-NEXT:    pxor %xmm5, %xmm0
1361; X64-SSE42-NEXT:    pxor %xmm1, %xmm5
1362; X64-SSE42-NEXT:    pcmpgtq %xmm5, %xmm0
1363; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1364; X64-SSE42-NEXT:    movq %xmm1, %rax
1365; X64-SSE42-NEXT:    retq
1366;
1367; X64-AVX1-LABEL: test_reduce_v8i64:
1368; X64-AVX1:       ## %bb.0:
1369; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1370; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
1371; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm4
1372; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1373; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1374; X64-AVX1-NEXT:    vpxor %xmm2, %xmm4, %xmm5
1375; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
1376; X64-AVX1-NEXT:    vpxor %xmm2, %xmm6, %xmm7
1377; X64-AVX1-NEXT:    vpcmpgtq %xmm5, %xmm7, %xmm5
1378; X64-AVX1-NEXT:    vblendvpd %xmm5, %xmm6, %xmm4, %xmm4
1379; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm4, %xmm5
1380; X64-AVX1-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1381; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm1
1382; X64-AVX1-NEXT:    vpcmpgtq %xmm5, %xmm1, %xmm1
1383; X64-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm4, %xmm0
1384; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1385; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1386; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1387; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1388; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1389; X64-AVX1-NEXT:    vmovq %xmm0, %rax
1390; X64-AVX1-NEXT:    vzeroupper
1391; X64-AVX1-NEXT:    retq
1392;
1393; X64-AVX2-LABEL: test_reduce_v8i64:
1394; X64-AVX2:       ## %bb.0:
1395; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1396; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
1397; X64-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm4
1398; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
1399; X64-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1400; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1401; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm3
1402; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm4
1403; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1404; X64-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1405; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1406; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1407; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1408; X64-AVX2-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1409; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1410; X64-AVX2-NEXT:    vmovq %xmm0, %rax
1411; X64-AVX2-NEXT:    vzeroupper
1412; X64-AVX2-NEXT:    retq
1413;
1414; X64-AVX512-LABEL: test_reduce_v8i64:
1415; X64-AVX512:       ## %bb.0:
1416; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1417; X64-AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
1418; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1419; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
1420; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1421; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
1422; X64-AVX512-NEXT:    vmovq %xmm0, %rax
1423; X64-AVX512-NEXT:    vzeroupper
1424; X64-AVX512-NEXT:    retq
1425  %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1426  %2  = icmp ugt <8 x i64> %a0, %1
1427  %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1428  %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1429  %5  = icmp ugt <8 x i64> %3, %4
1430  %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1431  %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1432  %8  = icmp ugt <8 x i64> %6, %7
1433  %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1434  %10 = extractelement <8 x i64> %9, i32 0
1435  ret i64 %10
1436}
1437
1438define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1439; X86-SSE2-LABEL: test_reduce_v16i32:
1440; X86-SSE2:       ## %bb.0:
1441; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1442; X86-SSE2-NEXT:    movdqa %xmm2, %xmm6
1443; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1444; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
1445; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1446; X86-SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
1447; X86-SSE2-NEXT:    pand %xmm5, %xmm0
1448; X86-SSE2-NEXT:    pandn %xmm2, %xmm5
1449; X86-SSE2-NEXT:    por %xmm0, %xmm5
1450; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1451; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1452; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1453; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1454; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1455; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1456; X86-SSE2-NEXT:    pandn %xmm3, %xmm2
1457; X86-SSE2-NEXT:    por %xmm1, %xmm2
1458; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
1459; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1460; X86-SSE2-NEXT:    movdqa %xmm5, %xmm1
1461; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1462; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1463; X86-SSE2-NEXT:    pand %xmm1, %xmm5
1464; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
1465; X86-SSE2-NEXT:    por %xmm5, %xmm1
1466; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1467; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1468; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1469; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1470; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
1471; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1472; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1473; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1474; X86-SSE2-NEXT:    por %xmm1, %xmm2
1475; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1476; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
1477; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1478; X86-SSE2-NEXT:    pxor %xmm0, %xmm4
1479; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1480; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1481; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1482; X86-SSE2-NEXT:    por %xmm2, %xmm1
1483; X86-SSE2-NEXT:    movd %xmm1, %eax
1484; X86-SSE2-NEXT:    retl
1485;
1486; X86-SSE42-LABEL: test_reduce_v16i32:
1487; X86-SSE42:       ## %bb.0:
1488; X86-SSE42-NEXT:    pmaxud %xmm3, %xmm1
1489; X86-SSE42-NEXT:    pmaxud %xmm2, %xmm1
1490; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
1491; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1492; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
1493; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1494; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
1495; X86-SSE42-NEXT:    movd %xmm1, %eax
1496; X86-SSE42-NEXT:    retl
1497;
1498; X86-AVX1-LABEL: test_reduce_v16i32:
1499; X86-AVX1:       ## %bb.0:
1500; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1501; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1502; X86-AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
1503; X86-AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
1504; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1505; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1506; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1507; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1508; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1509; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1510; X86-AVX1-NEXT:    vzeroupper
1511; X86-AVX1-NEXT:    retl
1512;
1513; X86-AVX2-LABEL: test_reduce_v16i32:
1514; X86-AVX2:       ## %bb.0:
1515; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
1516; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1517; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1518; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1519; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1520; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1521; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1522; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1523; X86-AVX2-NEXT:    vzeroupper
1524; X86-AVX2-NEXT:    retl
1525;
1526; X64-SSE2-LABEL: test_reduce_v16i32:
1527; X64-SSE2:       ## %bb.0:
1528; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1529; X64-SSE2-NEXT:    movdqa %xmm2, %xmm6
1530; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1531; X64-SSE2-NEXT:    movdqa %xmm0, %xmm5
1532; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1533; X64-SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
1534; X64-SSE2-NEXT:    pand %xmm5, %xmm0
1535; X64-SSE2-NEXT:    pandn %xmm2, %xmm5
1536; X64-SSE2-NEXT:    por %xmm0, %xmm5
1537; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1538; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1539; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1540; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1541; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1542; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1543; X64-SSE2-NEXT:    pandn %xmm3, %xmm2
1544; X64-SSE2-NEXT:    por %xmm1, %xmm2
1545; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
1546; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1547; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
1548; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1549; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1550; X64-SSE2-NEXT:    pand %xmm1, %xmm5
1551; X64-SSE2-NEXT:    pandn %xmm2, %xmm1
1552; X64-SSE2-NEXT:    por %xmm5, %xmm1
1553; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1554; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1555; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1556; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
1557; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
1558; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1559; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1560; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
1561; X64-SSE2-NEXT:    por %xmm1, %xmm2
1562; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1563; X64-SSE2-NEXT:    movdqa %xmm2, %xmm1
1564; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1565; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1566; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1567; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1568; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1569; X64-SSE2-NEXT:    por %xmm2, %xmm1
1570; X64-SSE2-NEXT:    movd %xmm1, %eax
1571; X64-SSE2-NEXT:    retq
1572;
1573; X64-SSE42-LABEL: test_reduce_v16i32:
1574; X64-SSE42:       ## %bb.0:
1575; X64-SSE42-NEXT:    pmaxud %xmm3, %xmm1
1576; X64-SSE42-NEXT:    pmaxud %xmm2, %xmm1
1577; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
1578; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1579; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
1580; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1581; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
1582; X64-SSE42-NEXT:    movd %xmm1, %eax
1583; X64-SSE42-NEXT:    retq
1584;
1585; X64-AVX1-LABEL: test_reduce_v16i32:
1586; X64-AVX1:       ## %bb.0:
1587; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1588; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1589; X64-AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
1590; X64-AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
1591; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1592; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1593; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1594; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1595; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1596; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1597; X64-AVX1-NEXT:    vzeroupper
1598; X64-AVX1-NEXT:    retq
1599;
1600; X64-AVX2-LABEL: test_reduce_v16i32:
1601; X64-AVX2:       ## %bb.0:
1602; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
1603; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1604; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1605; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1606; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1607; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1608; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1609; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1610; X64-AVX2-NEXT:    vzeroupper
1611; X64-AVX2-NEXT:    retq
1612;
1613; X64-AVX512-LABEL: test_reduce_v16i32:
1614; X64-AVX512:       ## %bb.0:
1615; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1616; X64-AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
1617; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1618; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1619; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1620; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1621; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1622; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1623; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1624; X64-AVX512-NEXT:    vzeroupper
1625; X64-AVX512-NEXT:    retq
1626  %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1627  %2  = icmp ugt <16 x i32> %a0, %1
1628  %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1629  %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1630  %5  = icmp ugt <16 x i32> %3, %4
1631  %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1632  %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1633  %8  = icmp ugt <16 x i32> %6, %7
1634  %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1635  %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1636  %11 = icmp ugt <16 x i32> %9, %10
1637  %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1638  %13 = extractelement <16 x i32> %12, i32 0
1639  ret i32 %13
1640}
1641
1642define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1643; X86-SSE2-LABEL: test_reduce_v32i16:
1644; X86-SSE2:       ## %bb.0:
1645; X86-SSE2-NEXT:    psubusw %xmm0, %xmm2
1646; X86-SSE2-NEXT:    paddw %xmm0, %xmm2
1647; X86-SSE2-NEXT:    psubusw %xmm1, %xmm3
1648; X86-SSE2-NEXT:    paddw %xmm1, %xmm3
1649; X86-SSE2-NEXT:    psubusw %xmm2, %xmm3
1650; X86-SSE2-NEXT:    paddw %xmm2, %xmm3
1651; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
1652; X86-SSE2-NEXT:    psubusw %xmm3, %xmm0
1653; X86-SSE2-NEXT:    paddw %xmm3, %xmm0
1654; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1655; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
1656; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
1657; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1658; X86-SSE2-NEXT:    psrld $16, %xmm0
1659; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
1660; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
1661; X86-SSE2-NEXT:    movd %xmm0, %eax
1662; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1663; X86-SSE2-NEXT:    retl
1664;
1665; X86-SSE42-LABEL: test_reduce_v32i16:
1666; X86-SSE42:       ## %bb.0:
1667; X86-SSE42-NEXT:    pmaxuw %xmm3, %xmm1
1668; X86-SSE42-NEXT:    pmaxuw %xmm2, %xmm1
1669; X86-SSE42-NEXT:    pmaxuw %xmm0, %xmm1
1670; X86-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
1671; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
1672; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1673; X86-SSE42-NEXT:    movd %xmm0, %eax
1674; X86-SSE42-NEXT:    notl %eax
1675; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1676; X86-SSE42-NEXT:    retl
1677;
1678; X86-AVX1-LABEL: test_reduce_v32i16:
1679; X86-AVX1:       ## %bb.0:
1680; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1681; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1682; X86-AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
1683; X86-AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
1684; X86-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1685; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1686; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1687; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1688; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1689; X86-AVX1-NEXT:    notl %eax
1690; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1691; X86-AVX1-NEXT:    vzeroupper
1692; X86-AVX1-NEXT:    retl
1693;
1694; X86-AVX2-LABEL: test_reduce_v32i16:
1695; X86-AVX2:       ## %bb.0:
1696; X86-AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
1697; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1698; X86-AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1699; X86-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1700; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1701; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1702; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1703; X86-AVX2-NEXT:    notl %eax
1704; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1705; X86-AVX2-NEXT:    vzeroupper
1706; X86-AVX2-NEXT:    retl
1707;
1708; X64-SSE2-LABEL: test_reduce_v32i16:
1709; X64-SSE2:       ## %bb.0:
1710; X64-SSE2-NEXT:    psubusw %xmm0, %xmm2
1711; X64-SSE2-NEXT:    paddw %xmm0, %xmm2
1712; X64-SSE2-NEXT:    psubusw %xmm1, %xmm3
1713; X64-SSE2-NEXT:    paddw %xmm1, %xmm3
1714; X64-SSE2-NEXT:    psubusw %xmm2, %xmm3
1715; X64-SSE2-NEXT:    paddw %xmm2, %xmm3
1716; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
1717; X64-SSE2-NEXT:    psubusw %xmm3, %xmm0
1718; X64-SSE2-NEXT:    paddw %xmm3, %xmm0
1719; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1720; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
1721; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
1722; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1723; X64-SSE2-NEXT:    psrld $16, %xmm0
1724; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
1725; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
1726; X64-SSE2-NEXT:    movd %xmm0, %eax
1727; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1728; X64-SSE2-NEXT:    retq
1729;
1730; X64-SSE42-LABEL: test_reduce_v32i16:
1731; X64-SSE42:       ## %bb.0:
1732; X64-SSE42-NEXT:    pmaxuw %xmm3, %xmm1
1733; X64-SSE42-NEXT:    pmaxuw %xmm2, %xmm1
1734; X64-SSE42-NEXT:    pmaxuw %xmm0, %xmm1
1735; X64-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
1736; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
1737; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1738; X64-SSE42-NEXT:    movd %xmm0, %eax
1739; X64-SSE42-NEXT:    notl %eax
1740; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1741; X64-SSE42-NEXT:    retq
1742;
1743; X64-AVX1-LABEL: test_reduce_v32i16:
1744; X64-AVX1:       ## %bb.0:
1745; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1746; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1747; X64-AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
1748; X64-AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
1749; X64-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1750; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1751; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1752; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1753; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1754; X64-AVX1-NEXT:    notl %eax
1755; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1756; X64-AVX1-NEXT:    vzeroupper
1757; X64-AVX1-NEXT:    retq
1758;
1759; X64-AVX2-LABEL: test_reduce_v32i16:
1760; X64-AVX2:       ## %bb.0:
1761; X64-AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
1762; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1763; X64-AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1764; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1765; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1766; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1767; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1768; X64-AVX2-NEXT:    notl %eax
1769; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1770; X64-AVX2-NEXT:    vzeroupper
1771; X64-AVX2-NEXT:    retq
1772;
1773; X64-AVX512-LABEL: test_reduce_v32i16:
1774; X64-AVX512:       ## %bb.0:
1775; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1776; X64-AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
1777; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1778; X64-AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1779; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1780; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1781; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1782; X64-AVX512-NEXT:    notl %eax
1783; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
1784; X64-AVX512-NEXT:    vzeroupper
1785; X64-AVX512-NEXT:    retq
1786  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1787  %2  = icmp ugt <32 x i16> %a0, %1
1788  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1789  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1790  %5  = icmp ugt <32 x i16> %3, %4
1791  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1792  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1793  %8  = icmp ugt <32 x i16> %6, %7
1794  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1795  %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1796  %11 = icmp ugt <32 x i16> %9, %10
1797  %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1798  %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1799  %14 = icmp ugt <32 x i16> %12, %13
1800  %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1801  %16 = extractelement <32 x i16> %15, i32 0
1802  ret i16 %16
1803}
1804
1805define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1806; X86-SSE2-LABEL: test_reduce_v64i8:
1807; X86-SSE2:       ## %bb.0:
1808; X86-SSE2-NEXT:    pmaxub %xmm3, %xmm1
1809; X86-SSE2-NEXT:    pmaxub %xmm2, %xmm1
1810; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1811; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1812; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1813; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1814; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1815; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1816; X86-SSE2-NEXT:    psrld $16, %xmm0
1817; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1818; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1819; X86-SSE2-NEXT:    psrlw $8, %xmm1
1820; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1821; X86-SSE2-NEXT:    movd %xmm1, %eax
1822; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1823; X86-SSE2-NEXT:    retl
1824;
1825; X86-SSE42-LABEL: test_reduce_v64i8:
1826; X86-SSE42:       ## %bb.0:
1827; X86-SSE42-NEXT:    pmaxub %xmm3, %xmm1
1828; X86-SSE42-NEXT:    pmaxub %xmm2, %xmm1
1829; X86-SSE42-NEXT:    pmaxub %xmm0, %xmm1
1830; X86-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
1831; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
1832; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
1833; X86-SSE42-NEXT:    psrlw $8, %xmm1
1834; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
1835; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1836; X86-SSE42-NEXT:    movd %xmm0, %eax
1837; X86-SSE42-NEXT:    notb %al
1838; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1839; X86-SSE42-NEXT:    retl
1840;
1841; X86-AVX1-LABEL: test_reduce_v64i8:
1842; X86-AVX1:       ## %bb.0:
1843; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1844; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1845; X86-AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
1846; X86-AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
1847; X86-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1848; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1849; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1850; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1851; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1852; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1853; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1854; X86-AVX1-NEXT:    notb %al
1855; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1856; X86-AVX1-NEXT:    vzeroupper
1857; X86-AVX1-NEXT:    retl
1858;
1859; X86-AVX2-LABEL: test_reduce_v64i8:
1860; X86-AVX2:       ## %bb.0:
1861; X86-AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
1862; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1863; X86-AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1864; X86-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1865; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1866; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1867; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1868; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1869; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1870; X86-AVX2-NEXT:    notb %al
1871; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1872; X86-AVX2-NEXT:    vzeroupper
1873; X86-AVX2-NEXT:    retl
1874;
1875; X64-SSE2-LABEL: test_reduce_v64i8:
1876; X64-SSE2:       ## %bb.0:
1877; X64-SSE2-NEXT:    pmaxub %xmm3, %xmm1
1878; X64-SSE2-NEXT:    pmaxub %xmm2, %xmm1
1879; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1880; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1881; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1882; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1883; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1884; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1885; X64-SSE2-NEXT:    psrld $16, %xmm0
1886; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
1887; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1888; X64-SSE2-NEXT:    psrlw $8, %xmm1
1889; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
1890; X64-SSE2-NEXT:    movd %xmm1, %eax
1891; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1892; X64-SSE2-NEXT:    retq
1893;
1894; X64-SSE42-LABEL: test_reduce_v64i8:
1895; X64-SSE42:       ## %bb.0:
1896; X64-SSE42-NEXT:    pmaxub %xmm3, %xmm1
1897; X64-SSE42-NEXT:    pmaxub %xmm2, %xmm1
1898; X64-SSE42-NEXT:    pmaxub %xmm0, %xmm1
1899; X64-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
1900; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
1901; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
1902; X64-SSE42-NEXT:    psrlw $8, %xmm1
1903; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
1904; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1905; X64-SSE42-NEXT:    movd %xmm0, %eax
1906; X64-SSE42-NEXT:    notb %al
1907; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1908; X64-SSE42-NEXT:    retq
1909;
1910; X64-AVX1-LABEL: test_reduce_v64i8:
1911; X64-AVX1:       ## %bb.0:
1912; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1913; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1914; X64-AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
1915; X64-AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
1916; X64-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1917; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1918; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1919; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1920; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1921; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1922; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1923; X64-AVX1-NEXT:    notb %al
1924; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1925; X64-AVX1-NEXT:    vzeroupper
1926; X64-AVX1-NEXT:    retq
1927;
1928; X64-AVX2-LABEL: test_reduce_v64i8:
1929; X64-AVX2:       ## %bb.0:
1930; X64-AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
1931; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1932; X64-AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1933; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1934; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1935; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1936; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1937; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1938; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1939; X64-AVX2-NEXT:    notb %al
1940; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1941; X64-AVX2-NEXT:    vzeroupper
1942; X64-AVX2-NEXT:    retq
1943;
1944; X64-AVX512-LABEL: test_reduce_v64i8:
1945; X64-AVX512:       ## %bb.0:
1946; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1947; X64-AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
1948; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1949; X64-AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
1950; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
1951; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1952; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1953; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1954; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1955; X64-AVX512-NEXT:    notb %al
1956; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1957; X64-AVX512-NEXT:    vzeroupper
1958; X64-AVX512-NEXT:    retq
1959  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1960  %2  = icmp ugt <64 x i8> %a0, %1
1961  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1962  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1963  %5  = icmp ugt <64 x i8> %3, %4
1964  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1965  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1966  %8  = icmp ugt <64 x i8> %6, %7
1967  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1968  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1969  %11 = icmp ugt <64 x i8> %9, %10
1970  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1971  %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1972  %14 = icmp ugt <64 x i8> %12, %13
1973  %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1974  %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1975  %17 = icmp ugt <64 x i8> %15, %16
1976  %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1977  %19 = extractelement <64 x i8> %18, i32 0
1978  ret i8 %19
1979}
1980
1981;
1982; Partial Vector Reductions
1983;
1984
1985define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
1986; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
1987; X86-SSE2:       ## %bb.0:
1988; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1989; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
1990; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
1991; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1992; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
1993; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
1994; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1995; X86-SSE2-NEXT:    psrld $16, %xmm1
1996; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
1997; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
1998; X86-SSE2-NEXT:    movd %xmm1, %eax
1999; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
2000; X86-SSE2-NEXT:    retl
2001;
2002; X86-SSE42-LABEL: test_reduce_v16i16_v8i16:
2003; X86-SSE42:       ## %bb.0:
2004; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2005; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
2006; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2007; X86-SSE42-NEXT:    movd %xmm0, %eax
2008; X86-SSE42-NEXT:    notl %eax
2009; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
2010; X86-SSE42-NEXT:    retl
2011;
2012; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
2013; X86-AVX:       ## %bb.0:
2014; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2015; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2016; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2017; X86-AVX-NEXT:    vmovd %xmm0, %eax
2018; X86-AVX-NEXT:    notl %eax
2019; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
2020; X86-AVX-NEXT:    vzeroupper
2021; X86-AVX-NEXT:    retl
2022;
2023; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
2024; X64-SSE2:       ## %bb.0:
2025; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2026; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
2027; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
2028; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2029; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
2030; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
2031; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2032; X64-SSE2-NEXT:    psrld $16, %xmm1
2033; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
2034; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
2035; X64-SSE2-NEXT:    movd %xmm1, %eax
2036; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
2037; X64-SSE2-NEXT:    retq
2038;
2039; X64-SSE42-LABEL: test_reduce_v16i16_v8i16:
2040; X64-SSE42:       ## %bb.0:
2041; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2042; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
2043; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2044; X64-SSE42-NEXT:    movd %xmm0, %eax
2045; X64-SSE42-NEXT:    notl %eax
2046; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
2047; X64-SSE42-NEXT:    retq
2048;
2049; X64-AVX1-LABEL: test_reduce_v16i16_v8i16:
2050; X64-AVX1:       ## %bb.0:
2051; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2052; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2053; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
2054; X64-AVX1-NEXT:    vmovd %xmm0, %eax
2055; X64-AVX1-NEXT:    notl %eax
2056; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
2057; X64-AVX1-NEXT:    vzeroupper
2058; X64-AVX1-NEXT:    retq
2059;
2060; X64-AVX2-LABEL: test_reduce_v16i16_v8i16:
2061; X64-AVX2:       ## %bb.0:
2062; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2063; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2064; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
2065; X64-AVX2-NEXT:    vmovd %xmm0, %eax
2066; X64-AVX2-NEXT:    notl %eax
2067; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
2068; X64-AVX2-NEXT:    vzeroupper
2069; X64-AVX2-NEXT:    retq
2070;
2071; X64-AVX512-LABEL: test_reduce_v16i16_v8i16:
2072; X64-AVX512:       ## %bb.0:
2073; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
2074; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
2075; X64-AVX512-NEXT:    vmovd %xmm0, %eax
2076; X64-AVX512-NEXT:    notl %eax
2077; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
2078; X64-AVX512-NEXT:    vzeroupper
2079; X64-AVX512-NEXT:    retq
2080  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2081  %2  = icmp ugt <16 x i16> %a0, %1
2082  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
2083  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2084  %5  = icmp ugt <16 x i16> %3, %4
2085  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
2086  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2087  %8  = icmp ugt <16 x i16> %6, %7
2088  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
2089  %10 = extractelement <16 x i16> %9, i32 0
2090  ret i16 %10
2091}
2092
2093define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
2094; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
2095; X86-SSE2:       ## %bb.0:
2096; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2097; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
2098; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
2099; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2100; X86-SSE2-NEXT:    psubusw %xmm1, %xmm0
2101; X86-SSE2-NEXT:    paddw %xmm1, %xmm0
2102; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2103; X86-SSE2-NEXT:    psrld $16, %xmm1
2104; X86-SSE2-NEXT:    psubusw %xmm0, %xmm1
2105; X86-SSE2-NEXT:    paddw %xmm0, %xmm1
2106; X86-SSE2-NEXT:    movd %xmm1, %eax
2107; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
2108; X86-SSE2-NEXT:    retl
2109;
2110; X86-SSE42-LABEL: test_reduce_v32i16_v8i16:
2111; X86-SSE42:       ## %bb.0:
2112; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2113; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
2114; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2115; X86-SSE42-NEXT:    movd %xmm0, %eax
2116; X86-SSE42-NEXT:    notl %eax
2117; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
2118; X86-SSE42-NEXT:    retl
2119;
2120; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
2121; X86-AVX:       ## %bb.0:
2122; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2123; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2124; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2125; X86-AVX-NEXT:    vmovd %xmm0, %eax
2126; X86-AVX-NEXT:    notl %eax
2127; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
2128; X86-AVX-NEXT:    vzeroupper
2129; X86-AVX-NEXT:    retl
2130;
2131; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
2132; X64-SSE2:       ## %bb.0:
2133; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2134; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
2135; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
2136; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2137; X64-SSE2-NEXT:    psubusw %xmm1, %xmm0
2138; X64-SSE2-NEXT:    paddw %xmm1, %xmm0
2139; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2140; X64-SSE2-NEXT:    psrld $16, %xmm1
2141; X64-SSE2-NEXT:    psubusw %xmm0, %xmm1
2142; X64-SSE2-NEXT:    paddw %xmm0, %xmm1
2143; X64-SSE2-NEXT:    movd %xmm1, %eax
2144; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
2145; X64-SSE2-NEXT:    retq
2146;
2147; X64-SSE42-LABEL: test_reduce_v32i16_v8i16:
2148; X64-SSE42:       ## %bb.0:
2149; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2150; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
2151; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2152; X64-SSE42-NEXT:    movd %xmm0, %eax
2153; X64-SSE42-NEXT:    notl %eax
2154; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
2155; X64-SSE42-NEXT:    retq
2156;
2157; X64-AVX1-LABEL: test_reduce_v32i16_v8i16:
2158; X64-AVX1:       ## %bb.0:
2159; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2160; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2161; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
2162; X64-AVX1-NEXT:    vmovd %xmm0, %eax
2163; X64-AVX1-NEXT:    notl %eax
2164; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
2165; X64-AVX1-NEXT:    vzeroupper
2166; X64-AVX1-NEXT:    retq
2167;
2168; X64-AVX2-LABEL: test_reduce_v32i16_v8i16:
2169; X64-AVX2:       ## %bb.0:
2170; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2171; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2172; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
2173; X64-AVX2-NEXT:    vmovd %xmm0, %eax
2174; X64-AVX2-NEXT:    notl %eax
2175; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
2176; X64-AVX2-NEXT:    vzeroupper
2177; X64-AVX2-NEXT:    retq
2178;
2179; X64-AVX512-LABEL: test_reduce_v32i16_v8i16:
2180; X64-AVX512:       ## %bb.0:
2181; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
2182; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
2183; X64-AVX512-NEXT:    vmovd %xmm0, %eax
2184; X64-AVX512-NEXT:    notl %eax
2185; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
2186; X64-AVX512-NEXT:    vzeroupper
2187; X64-AVX512-NEXT:    retq
2188  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2189  %2  = icmp ugt <32 x i16> %a0, %1
2190  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
2191  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2192  %5  = icmp ugt <32 x i16> %3, %4
2193  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
2194  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2195  %8  = icmp ugt <32 x i16> %6, %7
2196  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
2197  %10 = extractelement <32 x i16> %9, i32 0
2198  ret i16 %10
2199}
2200
2201define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
2202; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
2203; X86-SSE2:       ## %bb.0:
2204; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2205; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2206; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2207; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2208; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2209; X86-SSE2-NEXT:    psrld $16, %xmm1
2210; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2211; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2212; X86-SSE2-NEXT:    psrlw $8, %xmm0
2213; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2214; X86-SSE2-NEXT:    movd %xmm0, %eax
2215; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2216; X86-SSE2-NEXT:    retl
2217;
2218; X86-SSE42-LABEL: test_reduce_v32i8_v16i8:
2219; X86-SSE42:       ## %bb.0:
2220; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2221; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
2222; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
2223; X86-SSE42-NEXT:    psrlw $8, %xmm0
2224; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
2225; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
2226; X86-SSE42-NEXT:    movd %xmm0, %eax
2227; X86-SSE42-NEXT:    notb %al
2228; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2229; X86-SSE42-NEXT:    retl
2230;
2231; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
2232; X86-AVX:       ## %bb.0:
2233; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2234; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2235; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2236; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2237; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2238; X86-AVX-NEXT:    vmovd %xmm0, %eax
2239; X86-AVX-NEXT:    notb %al
2240; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2241; X86-AVX-NEXT:    vzeroupper
2242; X86-AVX-NEXT:    retl
2243;
2244; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
2245; X64-SSE2:       ## %bb.0:
2246; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2247; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2248; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2249; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2250; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2251; X64-SSE2-NEXT:    psrld $16, %xmm1
2252; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2253; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2254; X64-SSE2-NEXT:    psrlw $8, %xmm0
2255; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2256; X64-SSE2-NEXT:    movd %xmm0, %eax
2257; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2258; X64-SSE2-NEXT:    retq
2259;
2260; X64-SSE42-LABEL: test_reduce_v32i8_v16i8:
2261; X64-SSE42:       ## %bb.0:
2262; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2263; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
2264; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
2265; X64-SSE42-NEXT:    psrlw $8, %xmm0
2266; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
2267; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
2268; X64-SSE42-NEXT:    movd %xmm0, %eax
2269; X64-SSE42-NEXT:    notb %al
2270; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2271; X64-SSE42-NEXT:    retq
2272;
2273; X64-AVX1-LABEL: test_reduce_v32i8_v16i8:
2274; X64-AVX1:       ## %bb.0:
2275; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2276; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2277; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
2278; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2279; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
2280; X64-AVX1-NEXT:    vmovd %xmm0, %eax
2281; X64-AVX1-NEXT:    notb %al
2282; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
2283; X64-AVX1-NEXT:    vzeroupper
2284; X64-AVX1-NEXT:    retq
2285;
2286; X64-AVX2-LABEL: test_reduce_v32i8_v16i8:
2287; X64-AVX2:       ## %bb.0:
2288; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2289; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2290; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
2291; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2292; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
2293; X64-AVX2-NEXT:    vmovd %xmm0, %eax
2294; X64-AVX2-NEXT:    notb %al
2295; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
2296; X64-AVX2-NEXT:    vzeroupper
2297; X64-AVX2-NEXT:    retq
2298;
2299; X64-AVX512-LABEL: test_reduce_v32i8_v16i8:
2300; X64-AVX512:       ## %bb.0:
2301; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
2302; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
2303; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2304; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
2305; X64-AVX512-NEXT:    vmovd %xmm0, %eax
2306; X64-AVX512-NEXT:    notb %al
2307; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
2308; X64-AVX512-NEXT:    vzeroupper
2309; X64-AVX512-NEXT:    retq
2310  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2311  %2  = icmp ugt <32 x i8> %a0, %1
2312  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
2313  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2314  %5  = icmp ugt <32 x i8> %3, %4
2315  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
2316  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2317  %8  = icmp ugt <32 x i8> %6, %7
2318  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
2319  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2320  %11 = icmp ugt <32 x i8> %9, %10
2321  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
2322  %13 = extractelement <32 x i8> %12, i32 0
2323  ret i8 %13
2324}
2325
2326define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
2327; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
2328; X86-SSE2:       ## %bb.0:
2329; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2330; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2331; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2332; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2333; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2334; X86-SSE2-NEXT:    psrld $16, %xmm1
2335; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2336; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2337; X86-SSE2-NEXT:    psrlw $8, %xmm0
2338; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2339; X86-SSE2-NEXT:    movd %xmm0, %eax
2340; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2341; X86-SSE2-NEXT:    retl
2342;
2343; X86-SSE42-LABEL: test_reduce_v64i8_v16i8:
2344; X86-SSE42:       ## %bb.0:
2345; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2346; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
2347; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
2348; X86-SSE42-NEXT:    psrlw $8, %xmm0
2349; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
2350; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
2351; X86-SSE42-NEXT:    movd %xmm0, %eax
2352; X86-SSE42-NEXT:    notb %al
2353; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2354; X86-SSE42-NEXT:    retl
2355;
2356; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
2357; X86-AVX:       ## %bb.0:
2358; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2359; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2360; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2361; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2362; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2363; X86-AVX-NEXT:    vmovd %xmm0, %eax
2364; X86-AVX-NEXT:    notb %al
2365; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2366; X86-AVX-NEXT:    vzeroupper
2367; X86-AVX-NEXT:    retl
2368;
2369; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
2370; X64-SSE2:       ## %bb.0:
2371; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2372; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2373; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2374; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2375; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2376; X64-SSE2-NEXT:    psrld $16, %xmm1
2377; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
2378; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2379; X64-SSE2-NEXT:    psrlw $8, %xmm0
2380; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
2381; X64-SSE2-NEXT:    movd %xmm0, %eax
2382; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2383; X64-SSE2-NEXT:    retq
2384;
2385; X64-SSE42-LABEL: test_reduce_v64i8_v16i8:
2386; X64-SSE42:       ## %bb.0:
2387; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
2388; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
2389; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
2390; X64-SSE42-NEXT:    psrlw $8, %xmm0
2391; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
2392; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
2393; X64-SSE42-NEXT:    movd %xmm0, %eax
2394; X64-SSE42-NEXT:    notb %al
2395; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2396; X64-SSE42-NEXT:    retq
2397;
2398; X64-AVX1-LABEL: test_reduce_v64i8_v16i8:
2399; X64-AVX1:       ## %bb.0:
2400; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2401; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2402; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
2403; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2404; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
2405; X64-AVX1-NEXT:    vmovd %xmm0, %eax
2406; X64-AVX1-NEXT:    notb %al
2407; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
2408; X64-AVX1-NEXT:    vzeroupper
2409; X64-AVX1-NEXT:    retq
2410;
2411; X64-AVX2-LABEL: test_reduce_v64i8_v16i8:
2412; X64-AVX2:       ## %bb.0:
2413; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
2414; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
2415; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
2416; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2417; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
2418; X64-AVX2-NEXT:    vmovd %xmm0, %eax
2419; X64-AVX2-NEXT:    notb %al
2420; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
2421; X64-AVX2-NEXT:    vzeroupper
2422; X64-AVX2-NEXT:    retq
2423;
2424; X64-AVX512-LABEL: test_reduce_v64i8_v16i8:
2425; X64-AVX512:       ## %bb.0:
2426; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
2427; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
2428; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2429; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
2430; X64-AVX512-NEXT:    vmovd %xmm0, %eax
2431; X64-AVX512-NEXT:    notb %al
2432; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
2433; X64-AVX512-NEXT:    vzeroupper
2434; X64-AVX512-NEXT:    retq
2435  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2436  %2  = icmp ugt <64 x i8> %a0, %1
2437  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
2438  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2439  %5  = icmp ugt <64 x i8> %3, %4
2440  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
2441  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2442  %8  = icmp ugt <64 x i8> %6, %7
2443  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
2444  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2445  %11 = icmp ugt <64 x i8> %9, %10
2446  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
2447  %13 = extractelement <64 x i8> %12, i32 0
2448  ret i8 %13
2449}
2450