• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
18; X86-SSE2:       ## %bb.0:
19; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
20; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
21; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
23; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
24; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
25; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
26; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT:    pand %xmm5, %xmm2
30; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT:    por %xmm2, %xmm3
32; X86-SSE2-NEXT:    pand %xmm3, %xmm0
33; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
34; X86-SSE2-NEXT:    por %xmm0, %xmm3
35; X86-SSE2-NEXT:    movd %xmm3, %eax
36; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
37; X86-SSE2-NEXT:    movd %xmm0, %edx
38; X86-SSE2-NEXT:    retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
41; X86-SSE42:       ## %bb.0:
42; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
44; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
45; X86-SSE42-NEXT:    movdqa %xmm1, %xmm3
46; X86-SSE42-NEXT:    pxor %xmm0, %xmm3
47; X86-SSE42-NEXT:    pxor %xmm2, %xmm0
48; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
49; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
50; X86-SSE42-NEXT:    movd %xmm2, %eax
51; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
52; X86-SSE42-NEXT:    retl
53;
54; X86-AVX1-LABEL: test_reduce_v2i64:
55; X86-AVX1:       ## %bb.0:
56; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
57; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
58; X86-AVX1-NEXT:    ## xmm2 = mem[0,0]
59; X86-AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm3
60; X86-AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm2
61; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
62; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
63; X86-AVX1-NEXT:    vmovd %xmm0, %eax
64; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
65; X86-AVX1-NEXT:    retl
66;
67; X86-AVX2-LABEL: test_reduce_v2i64:
68; X86-AVX2:       ## %bb.0:
69; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
70; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
71; X86-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
72; X86-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
73; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
74; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
75; X86-AVX2-NEXT:    vmovd %xmm0, %eax
76; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
77; X86-AVX2-NEXT:    retl
78;
79; X64-SSE2-LABEL: test_reduce_v2i64:
80; X64-SSE2:       ## %bb.0:
81; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
82; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
83; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
84; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
85; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
86; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
87; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
88; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
89; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
90; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
91; X64-SSE2-NEXT:    pand %xmm5, %xmm2
92; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
93; X64-SSE2-NEXT:    por %xmm2, %xmm3
94; X64-SSE2-NEXT:    pand %xmm3, %xmm0
95; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
96; X64-SSE2-NEXT:    por %xmm0, %xmm3
97; X64-SSE2-NEXT:    movq %xmm3, %rax
98; X64-SSE2-NEXT:    retq
99;
100; X64-SSE42-LABEL: test_reduce_v2i64:
101; X64-SSE42:       ## %bb.0:
102; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
103; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
104; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
105; X64-SSE42-NEXT:    movdqa %xmm1, %xmm3
106; X64-SSE42-NEXT:    pxor %xmm0, %xmm3
107; X64-SSE42-NEXT:    pxor %xmm2, %xmm0
108; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
109; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
110; X64-SSE42-NEXT:    movq %xmm2, %rax
111; X64-SSE42-NEXT:    retq
112;
113; X64-AVX1-LABEL: test_reduce_v2i64:
114; X64-AVX1:       ## %bb.0:
115; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
116; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
117; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
118; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
119; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
120; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
121; X64-AVX1-NEXT:    vmovq %xmm0, %rax
122; X64-AVX1-NEXT:    retq
123;
124; X64-AVX2-LABEL: test_reduce_v2i64:
125; X64-AVX2:       ## %bb.0:
126; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
127; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
128; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
129; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
130; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
131; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
132; X64-AVX2-NEXT:    vmovq %xmm0, %rax
133; X64-AVX2-NEXT:    retq
134;
135; X64-AVX512-LABEL: test_reduce_v2i64:
136; X64-AVX512:       ## %bb.0:
137; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
138; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
139; X64-AVX512-NEXT:    vmovq %xmm0, %rax
140; X64-AVX512-NEXT:    retq
141  %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
142  %2 = icmp ult <2 x i64> %a0, %1
143  %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
144  %4 = extractelement <2 x i64> %3, i32 0
145  ret i64 %4
146}
147
148define i32 @test_reduce_v4i32(<4 x i32> %a0) {
149; X86-SSE2-LABEL: test_reduce_v4i32:
150; X86-SSE2:       ## %bb.0:
151; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
152; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
153; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
154; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
155; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
156; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
157; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
158; X86-SSE2-NEXT:    pand %xmm4, %xmm0
159; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
160; X86-SSE2-NEXT:    por %xmm0, %xmm4
161; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1]
162; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
163; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
164; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
165; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
166; X86-SSE2-NEXT:    pand %xmm2, %xmm4
167; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
168; X86-SSE2-NEXT:    por %xmm4, %xmm2
169; X86-SSE2-NEXT:    movd %xmm2, %eax
170; X86-SSE2-NEXT:    retl
171;
172; X86-SSE42-LABEL: test_reduce_v4i32:
173; X86-SSE42:       ## %bb.0:
174; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
175; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
176; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
177; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
178; X86-SSE42-NEXT:    movd %xmm0, %eax
179; X86-SSE42-NEXT:    retl
180;
181; X86-AVX-LABEL: test_reduce_v4i32:
182; X86-AVX:       ## %bb.0:
183; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
184; X86-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
185; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
186; X86-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
187; X86-AVX-NEXT:    vmovd %xmm0, %eax
188; X86-AVX-NEXT:    retl
189;
190; X64-SSE2-LABEL: test_reduce_v4i32:
191; X64-SSE2:       ## %bb.0:
192; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
193; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
194; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
195; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
196; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
197; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
198; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
199; X64-SSE2-NEXT:    pand %xmm4, %xmm0
200; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
201; X64-SSE2-NEXT:    por %xmm0, %xmm4
202; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1]
203; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
204; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
205; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
206; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
207; X64-SSE2-NEXT:    pand %xmm2, %xmm4
208; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
209; X64-SSE2-NEXT:    por %xmm4, %xmm2
210; X64-SSE2-NEXT:    movd %xmm2, %eax
211; X64-SSE2-NEXT:    retq
212;
213; X64-SSE42-LABEL: test_reduce_v4i32:
214; X64-SSE42:       ## %bb.0:
215; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
216; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
217; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
218; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
219; X64-SSE42-NEXT:    movd %xmm0, %eax
220; X64-SSE42-NEXT:    retq
221;
222; X64-AVX-LABEL: test_reduce_v4i32:
223; X64-AVX:       ## %bb.0:
224; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
225; X64-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
226; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
227; X64-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
228; X64-AVX-NEXT:    vmovd %xmm0, %eax
229; X64-AVX-NEXT:    retq
230  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
231  %2 = icmp ult <4 x i32> %a0, %1
232  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
233  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
234  %5 = icmp ult <4 x i32> %3, %4
235  %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
236  %7 = extractelement <4 x i32> %6, i32 0
237  ret i32 %7
238}
239
240define i16 @test_reduce_v8i16(<8 x i16> %a0) {
241; X86-SSE2-LABEL: test_reduce_v8i16:
242; X86-SSE2:       ## %bb.0:
243; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
244; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
245; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
246; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
247; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
248; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
249; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
250; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
251; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
252; X86-SSE2-NEXT:    psrld $16, %xmm1
253; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
254; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
255; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
256; X86-SSE2-NEXT:    movd %xmm0, %eax
257; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
258; X86-SSE2-NEXT:    retl
259;
260; X86-SSE42-LABEL: test_reduce_v8i16:
261; X86-SSE42:       ## %bb.0:
262; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
263; X86-SSE42-NEXT:    movd %xmm0, %eax
264; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
265; X86-SSE42-NEXT:    retl
266;
267; X86-AVX-LABEL: test_reduce_v8i16:
268; X86-AVX:       ## %bb.0:
269; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
270; X86-AVX-NEXT:    vmovd %xmm0, %eax
271; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
272; X86-AVX-NEXT:    retl
273;
274; X64-SSE2-LABEL: test_reduce_v8i16:
275; X64-SSE2:       ## %bb.0:
276; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
277; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
278; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
279; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
280; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
281; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
282; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
283; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
284; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
285; X64-SSE2-NEXT:    psrld $16, %xmm1
286; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
287; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
288; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
289; X64-SSE2-NEXT:    movd %xmm0, %eax
290; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
291; X64-SSE2-NEXT:    retq
292;
293; X64-SSE42-LABEL: test_reduce_v8i16:
294; X64-SSE42:       ## %bb.0:
295; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
296; X64-SSE42-NEXT:    movd %xmm0, %eax
297; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
298; X64-SSE42-NEXT:    retq
299;
300; X64-AVX-LABEL: test_reduce_v8i16:
301; X64-AVX:       ## %bb.0:
302; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
303; X64-AVX-NEXT:    vmovd %xmm0, %eax
304; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
305; X64-AVX-NEXT:    retq
306  %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
307  %2  = icmp ult <8 x i16> %a0, %1
308  %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
309  %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
310  %5  = icmp ult <8 x i16> %3, %4
311  %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
312  %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
313  %8  = icmp ult <8 x i16> %6, %7
314  %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
315  %10 = extractelement <8 x i16> %9, i32 0
316  ret i16 %10
317}
318
319define i8 @test_reduce_v16i8(<16 x i8> %a0) {
320; X86-SSE2-LABEL: test_reduce_v16i8:
321; X86-SSE2:       ## %bb.0:
322; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
323; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
324; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
325; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
326; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
327; X86-SSE2-NEXT:    psrld $16, %xmm1
328; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
329; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
330; X86-SSE2-NEXT:    psrlw $8, %xmm0
331; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
332; X86-SSE2-NEXT:    movd %xmm0, %eax
333; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
334; X86-SSE2-NEXT:    retl
335;
336; X86-SSE42-LABEL: test_reduce_v16i8:
337; X86-SSE42:       ## %bb.0:
338; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
339; X86-SSE42-NEXT:    psrlw $8, %xmm1
340; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
341; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
342; X86-SSE42-NEXT:    movd %xmm0, %eax
343; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
344; X86-SSE42-NEXT:    retl
345;
346; X86-AVX-LABEL: test_reduce_v16i8:
347; X86-AVX:       ## %bb.0:
348; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
349; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
350; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
351; X86-AVX-NEXT:    vmovd %xmm0, %eax
352; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
353; X86-AVX-NEXT:    retl
354;
355; X64-SSE2-LABEL: test_reduce_v16i8:
356; X64-SSE2:       ## %bb.0:
357; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
358; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
359; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
360; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
361; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
362; X64-SSE2-NEXT:    psrld $16, %xmm1
363; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
364; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
365; X64-SSE2-NEXT:    psrlw $8, %xmm0
366; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
367; X64-SSE2-NEXT:    movd %xmm0, %eax
368; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
369; X64-SSE2-NEXT:    retq
370;
371; X64-SSE42-LABEL: test_reduce_v16i8:
372; X64-SSE42:       ## %bb.0:
373; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
374; X64-SSE42-NEXT:    psrlw $8, %xmm1
375; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
376; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
377; X64-SSE42-NEXT:    movd %xmm0, %eax
378; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
379; X64-SSE42-NEXT:    retq
380;
381; X64-AVX-LABEL: test_reduce_v16i8:
382; X64-AVX:       ## %bb.0:
383; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
384; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
385; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
386; X64-AVX-NEXT:    vmovd %xmm0, %eax
387; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
388; X64-AVX-NEXT:    retq
389  %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
390  %2  = icmp ult <16 x i8> %a0, %1
391  %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
392  %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
393  %5  = icmp ult <16 x i8> %3, %4
394  %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
395  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
396  %8  = icmp ult <16 x i8> %6, %7
397  %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
398  %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
399  %11 = icmp ult <16 x i8> %9, %10
400  %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
401  %13 = extractelement <16 x i8> %12, i32 0
402  ret i8 %13
403}
404
405;
406; 256-bit Vectors
407;
408
409define i64 @test_reduce_v4i64(<4 x i64> %a0) {
410; X86-SSE2-LABEL: test_reduce_v4i64:
411; X86-SSE2:       ## %bb.0:
412; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
413; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
414; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
415; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
416; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
417; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
418; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
419; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
420; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
421; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
422; X86-SSE2-NEXT:    pand %xmm6, %xmm3
423; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
424; X86-SSE2-NEXT:    por %xmm3, %xmm4
425; X86-SSE2-NEXT:    pand %xmm4, %xmm0
426; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
427; X86-SSE2-NEXT:    por %xmm0, %xmm4
428; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
429; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
430; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
431; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
432; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
433; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
434; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
435; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
436; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
437; X86-SSE2-NEXT:    pand %xmm5, %xmm1
438; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
439; X86-SSE2-NEXT:    por %xmm1, %xmm2
440; X86-SSE2-NEXT:    pand %xmm2, %xmm4
441; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
442; X86-SSE2-NEXT:    por %xmm4, %xmm2
443; X86-SSE2-NEXT:    movd %xmm2, %eax
444; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
445; X86-SSE2-NEXT:    movd %xmm0, %edx
446; X86-SSE2-NEXT:    retl
447;
448; X86-SSE42-LABEL: test_reduce_v4i64:
449; X86-SSE42:       ## %bb.0:
450; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
451; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
452; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
453; X86-SSE42-NEXT:    pxor %xmm3, %xmm4
454; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
455; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
456; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
457; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
458; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
459; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
460; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
461; X86-SSE42-NEXT:    pxor %xmm2, %xmm3
462; X86-SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
463; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
464; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
465; X86-SSE42-NEXT:    movd %xmm2, %eax
466; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
467; X86-SSE42-NEXT:    retl
468;
469; X86-AVX1-LABEL: test_reduce_v4i64:
470; X86-AVX1:       ## %bb.0:
471; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
472; X86-AVX1-NEXT:    ## xmm1 = mem[0,0]
473; X86-AVX1-NEXT:    vxorps %xmm1, %xmm0, %xmm2
474; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
475; X86-AVX1-NEXT:    vxorps %xmm1, %xmm3, %xmm4
476; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
477; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
478; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
479; X86-AVX1-NEXT:    vxorpd %xmm1, %xmm0, %xmm3
480; X86-AVX1-NEXT:    vxorpd %xmm1, %xmm2, %xmm1
481; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
482; X86-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
483; X86-AVX1-NEXT:    vmovd %xmm0, %eax
484; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
485; X86-AVX1-NEXT:    vzeroupper
486; X86-AVX1-NEXT:    retl
487;
488; X86-AVX2-LABEL: test_reduce_v4i64:
489; X86-AVX2:       ## %bb.0:
490; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
491; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
492; X86-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
493; X86-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm4
494; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
495; X86-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
496; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
497; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
498; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
499; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
500; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
501; X86-AVX2-NEXT:    vmovd %xmm0, %eax
502; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
503; X86-AVX2-NEXT:    vzeroupper
504; X86-AVX2-NEXT:    retl
505;
506; X64-SSE2-LABEL: test_reduce_v4i64:
507; X64-SSE2:       ## %bb.0:
508; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
509; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
510; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
511; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
512; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
513; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
514; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
515; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
516; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
517; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
518; X64-SSE2-NEXT:    pand %xmm6, %xmm3
519; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
520; X64-SSE2-NEXT:    por %xmm3, %xmm4
521; X64-SSE2-NEXT:    pand %xmm4, %xmm0
522; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
523; X64-SSE2-NEXT:    por %xmm0, %xmm4
524; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
525; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
526; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
527; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
528; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
529; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
530; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
531; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
532; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
533; X64-SSE2-NEXT:    pand %xmm5, %xmm1
534; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
535; X64-SSE2-NEXT:    por %xmm1, %xmm2
536; X64-SSE2-NEXT:    pand %xmm2, %xmm4
537; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
538; X64-SSE2-NEXT:    por %xmm4, %xmm2
539; X64-SSE2-NEXT:    movq %xmm2, %rax
540; X64-SSE2-NEXT:    retq
541;
542; X64-SSE42-LABEL: test_reduce_v4i64:
543; X64-SSE42:       ## %bb.0:
544; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
545; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
546; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
547; X64-SSE42-NEXT:    pxor %xmm3, %xmm4
548; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
549; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
550; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
551; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
552; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
553; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
554; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
555; X64-SSE42-NEXT:    pxor %xmm2, %xmm3
556; X64-SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
557; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
558; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
559; X64-SSE42-NEXT:    movq %xmm2, %rax
560; X64-SSE42-NEXT:    retq
561;
562; X64-AVX1-LABEL: test_reduce_v4i64:
563; X64-AVX1:       ## %bb.0:
564; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
565; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm2
566; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
567; X64-AVX1-NEXT:    vpxor %xmm1, %xmm3, %xmm4
568; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
569; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
570; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
571; X64-AVX1-NEXT:    vxorpd %xmm1, %xmm0, %xmm3
572; X64-AVX1-NEXT:    vxorpd %xmm1, %xmm2, %xmm1
573; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
574; X64-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
575; X64-AVX1-NEXT:    vmovq %xmm0, %rax
576; X64-AVX1-NEXT:    vzeroupper
577; X64-AVX1-NEXT:    retq
578;
579; X64-AVX2-LABEL: test_reduce_v4i64:
580; X64-AVX2:       ## %bb.0:
581; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
582; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
583; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
584; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm4
585; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
586; X64-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
587; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
588; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
589; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
590; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
591; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
592; X64-AVX2-NEXT:    vmovq %xmm0, %rax
593; X64-AVX2-NEXT:    vzeroupper
594; X64-AVX2-NEXT:    retq
595;
596; X64-AVX512-LABEL: test_reduce_v4i64:
597; X64-AVX512:       ## %bb.0:
598; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
599; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
600; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
601; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
602; X64-AVX512-NEXT:    vmovq %xmm0, %rax
603; X64-AVX512-NEXT:    vzeroupper
604; X64-AVX512-NEXT:    retq
605  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
606  %2 = icmp ult <4 x i64> %a0, %1
607  %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
608  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
609  %5 = icmp ult <4 x i64> %3, %4
610  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
611  %7 = extractelement <4 x i64> %6, i32 0
612  ret i64 %7
613}
614
615define i32 @test_reduce_v8i32(<8 x i32> %a0) {
616; X86-SSE2-LABEL: test_reduce_v8i32:
617; X86-SSE2:       ## %bb.0:
618; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
619; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
620; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
621; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
622; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
623; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
624; X86-SSE2-NEXT:    pand %xmm4, %xmm0
625; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
626; X86-SSE2-NEXT:    por %xmm0, %xmm4
627; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
628; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
629; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
630; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
631; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
632; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
633; X86-SSE2-NEXT:    pand %xmm3, %xmm4
634; X86-SSE2-NEXT:    pandn %xmm0, %xmm3
635; X86-SSE2-NEXT:    por %xmm4, %xmm3
636; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
637; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
638; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
639; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
640; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
641; X86-SSE2-NEXT:    pand %xmm2, %xmm3
642; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
643; X86-SSE2-NEXT:    por %xmm3, %xmm2
644; X86-SSE2-NEXT:    movd %xmm2, %eax
645; X86-SSE2-NEXT:    retl
646;
647; X86-SSE42-LABEL: test_reduce_v8i32:
648; X86-SSE42:       ## %bb.0:
649; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
650; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
651; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
652; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
653; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
654; X86-SSE42-NEXT:    movd %xmm0, %eax
655; X86-SSE42-NEXT:    retl
656;
657; X86-AVX1-LABEL: test_reduce_v8i32:
658; X86-AVX1:       ## %bb.0:
659; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
660; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
661; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
662; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
663; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
664; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
665; X86-AVX1-NEXT:    vmovd %xmm0, %eax
666; X86-AVX1-NEXT:    vzeroupper
667; X86-AVX1-NEXT:    retl
668;
669; X86-AVX2-LABEL: test_reduce_v8i32:
670; X86-AVX2:       ## %bb.0:
671; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
672; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
673; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
674; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
675; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
676; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
677; X86-AVX2-NEXT:    vmovd %xmm0, %eax
678; X86-AVX2-NEXT:    vzeroupper
679; X86-AVX2-NEXT:    retl
680;
681; X64-SSE2-LABEL: test_reduce_v8i32:
682; X64-SSE2:       ## %bb.0:
683; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
684; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
685; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
686; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
687; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
688; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
689; X64-SSE2-NEXT:    pand %xmm4, %xmm0
690; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
691; X64-SSE2-NEXT:    por %xmm0, %xmm4
692; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
693; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
694; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
695; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
696; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
697; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
698; X64-SSE2-NEXT:    pand %xmm3, %xmm4
699; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
700; X64-SSE2-NEXT:    por %xmm4, %xmm3
701; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
702; X64-SSE2-NEXT:    movdqa %xmm3, %xmm1
703; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
704; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
705; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
706; X64-SSE2-NEXT:    pand %xmm2, %xmm3
707; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
708; X64-SSE2-NEXT:    por %xmm3, %xmm2
709; X64-SSE2-NEXT:    movd %xmm2, %eax
710; X64-SSE2-NEXT:    retq
711;
712; X64-SSE42-LABEL: test_reduce_v8i32:
713; X64-SSE42:       ## %bb.0:
714; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
715; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
716; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
717; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
718; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
719; X64-SSE42-NEXT:    movd %xmm0, %eax
720; X64-SSE42-NEXT:    retq
721;
722; X64-AVX1-LABEL: test_reduce_v8i32:
723; X64-AVX1:       ## %bb.0:
724; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
725; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
726; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
727; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
728; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
729; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
730; X64-AVX1-NEXT:    vmovd %xmm0, %eax
731; X64-AVX1-NEXT:    vzeroupper
732; X64-AVX1-NEXT:    retq
733;
734; X64-AVX2-LABEL: test_reduce_v8i32:
735; X64-AVX2:       ## %bb.0:
736; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
737; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
738; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
739; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
740; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
741; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
742; X64-AVX2-NEXT:    vmovd %xmm0, %eax
743; X64-AVX2-NEXT:    vzeroupper
744; X64-AVX2-NEXT:    retq
745;
746; X64-AVX512-LABEL: test_reduce_v8i32:
747; X64-AVX512:       ## %bb.0:
748; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
749; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
750; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
751; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
752; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
753; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
754; X64-AVX512-NEXT:    vmovd %xmm0, %eax
755; X64-AVX512-NEXT:    vzeroupper
756; X64-AVX512-NEXT:    retq
757  %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
758  %2  = icmp ult <8 x i32> %a0, %1
759  %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
760  %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
761  %5  = icmp ult <8 x i32> %3, %4
762  %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
763  %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
764  %8  = icmp ult <8 x i32> %6, %7
765  %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
766  %10 = extractelement <8 x i32> %9, i32 0
767  ret i32 %10
768}
769
770define i16 @test_reduce_v16i16(<16 x i16> %a0) {
771; X86-SSE2-LABEL: test_reduce_v16i16:
772; X86-SSE2:       ## %bb.0:
773; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
774; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
775; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
776; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
777; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
778; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
779; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
780; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
781; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
782; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
783; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
784; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
785; X86-SSE2-NEXT:    psrld $16, %xmm1
786; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
787; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
788; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
789; X86-SSE2-NEXT:    movd %xmm0, %eax
790; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
791; X86-SSE2-NEXT:    retl
792;
793; X86-SSE42-LABEL: test_reduce_v16i16:
794; X86-SSE42:       ## %bb.0:
795; X86-SSE42-NEXT:    pminuw %xmm1, %xmm0
796; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
797; X86-SSE42-NEXT:    movd %xmm0, %eax
798; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
799; X86-SSE42-NEXT:    retl
800;
801; X86-AVX1-LABEL: test_reduce_v16i16:
802; X86-AVX1:       ## %bb.0:
803; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
804; X86-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
805; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
806; X86-AVX1-NEXT:    vmovd %xmm0, %eax
807; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
808; X86-AVX1-NEXT:    vzeroupper
809; X86-AVX1-NEXT:    retl
810;
811; X86-AVX2-LABEL: test_reduce_v16i16:
812; X86-AVX2:       ## %bb.0:
813; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
814; X86-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
815; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
816; X86-AVX2-NEXT:    vmovd %xmm0, %eax
817; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
818; X86-AVX2-NEXT:    vzeroupper
819; X86-AVX2-NEXT:    retl
820;
821; X64-SSE2-LABEL: test_reduce_v16i16:
822; X64-SSE2:       ## %bb.0:
823; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
824; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
825; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
826; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
827; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
828; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
829; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
830; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
831; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
832; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
833; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
834; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
835; X64-SSE2-NEXT:    psrld $16, %xmm1
836; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
837; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
838; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
839; X64-SSE2-NEXT:    movd %xmm0, %eax
840; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
841; X64-SSE2-NEXT:    retq
842;
843; X64-SSE42-LABEL: test_reduce_v16i16:
844; X64-SSE42:       ## %bb.0:
845; X64-SSE42-NEXT:    pminuw %xmm1, %xmm0
846; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
847; X64-SSE42-NEXT:    movd %xmm0, %eax
848; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
849; X64-SSE42-NEXT:    retq
850;
851; X64-AVX1-LABEL: test_reduce_v16i16:
852; X64-AVX1:       ## %bb.0:
853; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
854; X64-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
855; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
856; X64-AVX1-NEXT:    vmovd %xmm0, %eax
857; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
858; X64-AVX1-NEXT:    vzeroupper
859; X64-AVX1-NEXT:    retq
860;
861; X64-AVX2-LABEL: test_reduce_v16i16:
862; X64-AVX2:       ## %bb.0:
863; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
864; X64-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
865; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
866; X64-AVX2-NEXT:    vmovd %xmm0, %eax
867; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
868; X64-AVX2-NEXT:    vzeroupper
869; X64-AVX2-NEXT:    retq
870;
871; X64-AVX512-LABEL: test_reduce_v16i16:
872; X64-AVX512:       ## %bb.0:
873; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
874; X64-AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
875; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
876; X64-AVX512-NEXT:    vmovd %xmm0, %eax
877; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
878; X64-AVX512-NEXT:    vzeroupper
879; X64-AVX512-NEXT:    retq
880  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
881  %2  = icmp ult <16 x i16> %a0, %1
882  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
883  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
884  %5  = icmp ult <16 x i16> %3, %4
885  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
886  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
887  %8  = icmp ult <16 x i16> %6, %7
888  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
889  %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
890  %11 = icmp ult <16 x i16> %9, %10
891  %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
892  %13 = extractelement <16 x i16> %12, i32 0
893  ret i16 %13
894}
895
896define i8 @test_reduce_v32i8(<32 x i8> %a0) {
897; X86-SSE2-LABEL: test_reduce_v32i8:
898; X86-SSE2:       ## %bb.0:
899; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
900; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
901; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
902; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
903; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
904; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
905; X86-SSE2-NEXT:    psrld $16, %xmm1
906; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
907; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
908; X86-SSE2-NEXT:    psrlw $8, %xmm0
909; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
910; X86-SSE2-NEXT:    movd %xmm0, %eax
911; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
912; X86-SSE2-NEXT:    retl
913;
914; X86-SSE42-LABEL: test_reduce_v32i8:
915; X86-SSE42:       ## %bb.0:
916; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
917; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
918; X86-SSE42-NEXT:    psrlw $8, %xmm1
919; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
920; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
921; X86-SSE42-NEXT:    movd %xmm0, %eax
922; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
923; X86-SSE42-NEXT:    retl
924;
925; X86-AVX1-LABEL: test_reduce_v32i8:
926; X86-AVX1:       ## %bb.0:
927; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
928; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
929; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
930; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
931; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
932; X86-AVX1-NEXT:    vmovd %xmm0, %eax
933; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
934; X86-AVX1-NEXT:    vzeroupper
935; X86-AVX1-NEXT:    retl
936;
937; X86-AVX2-LABEL: test_reduce_v32i8:
938; X86-AVX2:       ## %bb.0:
939; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
940; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
941; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
942; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
943; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
944; X86-AVX2-NEXT:    vmovd %xmm0, %eax
945; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
946; X86-AVX2-NEXT:    vzeroupper
947; X86-AVX2-NEXT:    retl
948;
949; X64-SSE2-LABEL: test_reduce_v32i8:
950; X64-SSE2:       ## %bb.0:
951; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
952; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
953; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
954; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
955; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
956; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
957; X64-SSE2-NEXT:    psrld $16, %xmm1
958; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
959; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
960; X64-SSE2-NEXT:    psrlw $8, %xmm0
961; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
962; X64-SSE2-NEXT:    movd %xmm0, %eax
963; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
964; X64-SSE2-NEXT:    retq
965;
966; X64-SSE42-LABEL: test_reduce_v32i8:
967; X64-SSE42:       ## %bb.0:
968; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
969; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
970; X64-SSE42-NEXT:    psrlw $8, %xmm1
971; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
972; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
973; X64-SSE42-NEXT:    movd %xmm0, %eax
974; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
975; X64-SSE42-NEXT:    retq
976;
977; X64-AVX1-LABEL: test_reduce_v32i8:
978; X64-AVX1:       ## %bb.0:
979; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
980; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
981; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
982; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
983; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
984; X64-AVX1-NEXT:    vmovd %xmm0, %eax
985; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
986; X64-AVX1-NEXT:    vzeroupper
987; X64-AVX1-NEXT:    retq
988;
989; X64-AVX2-LABEL: test_reduce_v32i8:
990; X64-AVX2:       ## %bb.0:
991; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
992; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
993; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
994; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
995; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
996; X64-AVX2-NEXT:    vmovd %xmm0, %eax
997; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
998; X64-AVX2-NEXT:    vzeroupper
999; X64-AVX2-NEXT:    retq
1000;
1001; X64-AVX512-LABEL: test_reduce_v32i8:
1002; X64-AVX512:       ## %bb.0:
1003; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1004; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1005; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1006; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1007; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1008; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1009; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1010; X64-AVX512-NEXT:    vzeroupper
1011; X64-AVX512-NEXT:    retq
1012  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1013  %2  = icmp ult <32 x i8> %a0, %1
1014  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1015  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1016  %5  = icmp ult <32 x i8> %3, %4
1017  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1018  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1019  %8  = icmp ult <32 x i8> %6, %7
1020  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1021  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1022  %11 = icmp ult <32 x i8> %9, %10
1023  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1024  %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1025  %14 = icmp ult <32 x i8> %12, %13
1026  %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1027  %16 = extractelement <32 x i8> %15, i32 0
1028  ret i8 %16
1029}
1030
1031;
1032; 512-bit Vectors
1033;
1034
1035define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1036; X86-SSE2-LABEL: test_reduce_v8i64:
1037; X86-SSE2:       ## %bb.0:
1038; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1039; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
1040; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1041; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
1042; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1043; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
1044; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1045; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1046; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1047; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1048; X86-SSE2-NEXT:    pand %xmm5, %xmm6
1049; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1050; X86-SSE2-NEXT:    por %xmm6, %xmm5
1051; X86-SSE2-NEXT:    pand %xmm5, %xmm1
1052; X86-SSE2-NEXT:    pandn %xmm3, %xmm5
1053; X86-SSE2-NEXT:    por %xmm1, %xmm5
1054; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1055; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1056; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1057; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
1058; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
1059; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
1060; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1061; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
1062; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1063; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1064; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
1065; X86-SSE2-NEXT:    por %xmm3, %xmm1
1066; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1067; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
1068; X86-SSE2-NEXT:    por %xmm0, %xmm1
1069; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1070; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1071; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
1072; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1073; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1074; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
1075; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1076; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1077; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1078; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1079; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1080; X86-SSE2-NEXT:    por %xmm2, %xmm0
1081; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1082; X86-SSE2-NEXT:    pandn %xmm5, %xmm0
1083; X86-SSE2-NEXT:    por %xmm1, %xmm0
1084; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1085; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1086; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1087; X86-SSE2-NEXT:    pxor %xmm1, %xmm4
1088; X86-SSE2-NEXT:    movdqa %xmm4, %xmm3
1089; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1090; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1091; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1092; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1093; X86-SSE2-NEXT:    pand %xmm2, %xmm4
1094; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1095; X86-SSE2-NEXT:    por %xmm4, %xmm2
1096; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1097; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
1098; X86-SSE2-NEXT:    por %xmm0, %xmm2
1099; X86-SSE2-NEXT:    movd %xmm2, %eax
1100; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1101; X86-SSE2-NEXT:    movd %xmm0, %edx
1102; X86-SSE2-NEXT:    retl
1103;
1104; X86-SSE42-LABEL: test_reduce_v8i64:
1105; X86-SSE42:       ## %bb.0:
1106; X86-SSE42-NEXT:    movdqa %xmm0, %xmm5
1107; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
1108; X86-SSE42-NEXT:    movdqa %xmm1, %xmm6
1109; X86-SSE42-NEXT:    pxor %xmm4, %xmm6
1110; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
1111; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
1112; X86-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1113; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1114; X86-SSE42-NEXT:    movdqa %xmm5, %xmm1
1115; X86-SSE42-NEXT:    pxor %xmm4, %xmm1
1116; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
1117; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
1118; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1119; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
1120; X86-SSE42-NEXT:    movapd %xmm2, %xmm1
1121; X86-SSE42-NEXT:    xorpd %xmm4, %xmm1
1122; X86-SSE42-NEXT:    movapd %xmm3, %xmm0
1123; X86-SSE42-NEXT:    xorpd %xmm4, %xmm0
1124; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1125; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1126; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1127; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
1128; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
1129; X86-SSE42-NEXT:    pxor %xmm1, %xmm4
1130; X86-SSE42-NEXT:    pcmpgtq %xmm0, %xmm4
1131; X86-SSE42-NEXT:    movdqa %xmm4, %xmm0
1132; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1133; X86-SSE42-NEXT:    movd %xmm1, %eax
1134; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
1135; X86-SSE42-NEXT:    retl
1136;
1137; X86-AVX1-LABEL: test_reduce_v8i64:
1138; X86-AVX1:       ## %bb.0:
1139; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1140; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1141; X86-AVX1-NEXT:    ## xmm3 = mem[0,0]
1142; X86-AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm4
1143; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
1144; X86-AVX1-NEXT:    vxorps %xmm3, %xmm5, %xmm6
1145; X86-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
1146; X86-AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm6
1147; X86-AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm7
1148; X86-AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
1149; X86-AVX1-NEXT:    vblendvpd %xmm6, %xmm0, %xmm1, %xmm0
1150; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm1
1151; X86-AVX1-NEXT:    vblendvpd %xmm4, %xmm2, %xmm5, %xmm2
1152; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm2, %xmm4
1153; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm1
1154; X86-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
1155; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1156; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
1157; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm1, %xmm3
1158; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1159; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1160; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1161; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
1162; X86-AVX1-NEXT:    vzeroupper
1163; X86-AVX1-NEXT:    retl
1164;
1165; X86-AVX2-LABEL: test_reduce_v8i64:
1166; X86-AVX2:       ## %bb.0:
1167; X86-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1168; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1169; X86-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
1170; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
1171; X86-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1172; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1173; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1174; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm4
1175; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1176; X86-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1177; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1178; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1179; X86-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1180; X86-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1181; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1182; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1183; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
1184; X86-AVX2-NEXT:    vzeroupper
1185; X86-AVX2-NEXT:    retl
1186;
1187; X64-SSE2-LABEL: test_reduce_v8i64:
1188; X64-SSE2:       ## %bb.0:
1189; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1190; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
1191; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1192; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
1193; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1194; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
1195; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1196; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1197; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1198; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1199; X64-SSE2-NEXT:    pand %xmm8, %xmm6
1200; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1201; X64-SSE2-NEXT:    por %xmm6, %xmm5
1202; X64-SSE2-NEXT:    pand %xmm5, %xmm1
1203; X64-SSE2-NEXT:    pandn %xmm3, %xmm5
1204; X64-SSE2-NEXT:    por %xmm1, %xmm5
1205; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1206; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1207; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
1208; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
1209; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
1210; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
1211; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1212; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1213; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1214; X64-SSE2-NEXT:    pand %xmm7, %xmm1
1215; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1216; X64-SSE2-NEXT:    por %xmm1, %xmm3
1217; X64-SSE2-NEXT:    pand %xmm3, %xmm0
1218; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
1219; X64-SSE2-NEXT:    por %xmm0, %xmm3
1220; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1221; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1222; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
1223; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1224; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1225; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1226; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1227; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1228; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1229; X64-SSE2-NEXT:    pand %xmm6, %xmm0
1230; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1231; X64-SSE2-NEXT:    por %xmm0, %xmm1
1232; X64-SSE2-NEXT:    pand %xmm1, %xmm3
1233; X64-SSE2-NEXT:    pandn %xmm5, %xmm1
1234; X64-SSE2-NEXT:    por %xmm3, %xmm1
1235; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1236; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1237; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1238; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1239; X64-SSE2-NEXT:    movdqa %xmm4, %xmm3
1240; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1241; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1242; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1243; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1244; X64-SSE2-NEXT:    pand %xmm5, %xmm2
1245; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1246; X64-SSE2-NEXT:    por %xmm2, %xmm3
1247; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1248; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
1249; X64-SSE2-NEXT:    por %xmm1, %xmm3
1250; X64-SSE2-NEXT:    movq %xmm3, %rax
1251; X64-SSE2-NEXT:    retq
1252;
1253; X64-SSE42-LABEL: test_reduce_v8i64:
1254; X64-SSE42:       ## %bb.0:
1255; X64-SSE42-NEXT:    movdqa %xmm0, %xmm5
1256; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1257; X64-SSE42-NEXT:    movdqa %xmm1, %xmm6
1258; X64-SSE42-NEXT:    pxor %xmm4, %xmm6
1259; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
1260; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
1261; X64-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1262; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1263; X64-SSE42-NEXT:    movdqa %xmm5, %xmm1
1264; X64-SSE42-NEXT:    pxor %xmm4, %xmm1
1265; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
1266; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
1267; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1268; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
1269; X64-SSE42-NEXT:    movapd %xmm2, %xmm1
1270; X64-SSE42-NEXT:    xorpd %xmm4, %xmm1
1271; X64-SSE42-NEXT:    movapd %xmm3, %xmm0
1272; X64-SSE42-NEXT:    xorpd %xmm4, %xmm0
1273; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1274; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1275; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1276; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
1277; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
1278; X64-SSE42-NEXT:    pxor %xmm1, %xmm4
1279; X64-SSE42-NEXT:    pcmpgtq %xmm0, %xmm4
1280; X64-SSE42-NEXT:    movdqa %xmm4, %xmm0
1281; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1282; X64-SSE42-NEXT:    movq %xmm1, %rax
1283; X64-SSE42-NEXT:    retq
1284;
1285; X64-AVX1-LABEL: test_reduce_v8i64:
1286; X64-AVX1:       ## %bb.0:
1287; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1288; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1289; X64-AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm4
1290; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
1291; X64-AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
1292; X64-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
1293; X64-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm6
1294; X64-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm7
1295; X64-AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
1296; X64-AVX1-NEXT:    vblendvpd %xmm6, %xmm0, %xmm1, %xmm0
1297; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm1
1298; X64-AVX1-NEXT:    vblendvpd %xmm4, %xmm2, %xmm5, %xmm2
1299; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm2, %xmm4
1300; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm1
1301; X64-AVX1-NEXT:    vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
1302; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1303; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
1304; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm1, %xmm3
1305; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
1306; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1307; X64-AVX1-NEXT:    vmovq %xmm0, %rax
1308; X64-AVX1-NEXT:    vzeroupper
1309; X64-AVX1-NEXT:    retq
1310;
1311; X64-AVX2-LABEL: test_reduce_v8i64:
1312; X64-AVX2:       ## %bb.0:
1313; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1314; X64-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1315; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
1316; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
1317; X64-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1318; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1319; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1320; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm4
1321; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
1322; X64-AVX2-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1323; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1324; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
1325; X64-AVX2-NEXT:    vxorpd %xmm2, %xmm1, %xmm2
1326; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1327; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1328; X64-AVX2-NEXT:    vmovq %xmm0, %rax
1329; X64-AVX2-NEXT:    vzeroupper
1330; X64-AVX2-NEXT:    retq
1331;
1332; X64-AVX512-LABEL: test_reduce_v8i64:
1333; X64-AVX512:       ## %bb.0:
1334; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1335; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1336; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1337; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
1338; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1339; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
1340; X64-AVX512-NEXT:    vmovq %xmm0, %rax
1341; X64-AVX512-NEXT:    vzeroupper
1342; X64-AVX512-NEXT:    retq
1343  %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1344  %2  = icmp ult <8 x i64> %a0, %1
1345  %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1346  %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1347  %5  = icmp ult <8 x i64> %3, %4
1348  %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1349  %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1350  %8  = icmp ult <8 x i64> %6, %7
1351  %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1352  %10 = extractelement <8 x i64> %9, i32 0
1353  ret i64 %10
1354}
1355
1356define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1357; X86-SSE2-LABEL: test_reduce_v16i32:
1358; X86-SSE2:       ## %bb.0:
1359; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1360; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
1361; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1362; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
1363; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1364; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1365; X86-SSE2-NEXT:    pand %xmm6, %xmm1
1366; X86-SSE2-NEXT:    pandn %xmm3, %xmm6
1367; X86-SSE2-NEXT:    por %xmm1, %xmm6
1368; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1369; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1370; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1371; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
1372; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1373; X86-SSE2-NEXT:    pand %xmm3, %xmm0
1374; X86-SSE2-NEXT:    pandn %xmm2, %xmm3
1375; X86-SSE2-NEXT:    por %xmm0, %xmm3
1376; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1377; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1378; X86-SSE2-NEXT:    movdqa %xmm6, %xmm1
1379; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1380; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1381; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1382; X86-SSE2-NEXT:    pandn %xmm6, %xmm1
1383; X86-SSE2-NEXT:    por %xmm3, %xmm1
1384; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1385; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
1386; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1387; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1388; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
1389; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1390; X86-SSE2-NEXT:    pand %xmm3, %xmm1
1391; X86-SSE2-NEXT:    pandn %xmm0, %xmm3
1392; X86-SSE2-NEXT:    por %xmm1, %xmm3
1393; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
1394; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
1395; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1396; X86-SSE2-NEXT:    pxor %xmm0, %xmm4
1397; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
1398; X86-SSE2-NEXT:    pand %xmm4, %xmm3
1399; X86-SSE2-NEXT:    pandn %xmm0, %xmm4
1400; X86-SSE2-NEXT:    por %xmm3, %xmm4
1401; X86-SSE2-NEXT:    movd %xmm4, %eax
1402; X86-SSE2-NEXT:    retl
1403;
1404; X86-SSE42-LABEL: test_reduce_v16i32:
1405; X86-SSE42:       ## %bb.0:
1406; X86-SSE42-NEXT:    pminud %xmm3, %xmm1
1407; X86-SSE42-NEXT:    pminud %xmm2, %xmm1
1408; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
1409; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1410; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
1411; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1412; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
1413; X86-SSE42-NEXT:    movd %xmm1, %eax
1414; X86-SSE42-NEXT:    retl
1415;
1416; X86-AVX1-LABEL: test_reduce_v16i32:
1417; X86-AVX1:       ## %bb.0:
1418; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1419; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1420; X86-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1421; X86-AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
1422; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1423; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1424; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1425; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1426; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1427; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1428; X86-AVX1-NEXT:    vzeroupper
1429; X86-AVX1-NEXT:    retl
1430;
1431; X86-AVX2-LABEL: test_reduce_v16i32:
1432; X86-AVX2:       ## %bb.0:
1433; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1434; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1435; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1436; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1437; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1438; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1439; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1440; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1441; X86-AVX2-NEXT:    vzeroupper
1442; X86-AVX2-NEXT:    retl
1443;
1444; X64-SSE2-LABEL: test_reduce_v16i32:
1445; X64-SSE2:       ## %bb.0:
1446; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1447; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
1448; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1449; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
1450; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1451; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1452; X64-SSE2-NEXT:    pand %xmm6, %xmm1
1453; X64-SSE2-NEXT:    pandn %xmm3, %xmm6
1454; X64-SSE2-NEXT:    por %xmm1, %xmm6
1455; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1456; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1457; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
1458; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
1459; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
1460; X64-SSE2-NEXT:    pand %xmm3, %xmm0
1461; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
1462; X64-SSE2-NEXT:    por %xmm0, %xmm3
1463; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1464; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1465; X64-SSE2-NEXT:    movdqa %xmm6, %xmm1
1466; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1467; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
1468; X64-SSE2-NEXT:    pand %xmm1, %xmm3
1469; X64-SSE2-NEXT:    pandn %xmm6, %xmm1
1470; X64-SSE2-NEXT:    por %xmm3, %xmm1
1471; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1472; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1473; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1474; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
1475; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
1476; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1477; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1478; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
1479; X64-SSE2-NEXT:    por %xmm1, %xmm3
1480; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
1481; X64-SSE2-NEXT:    movdqa %xmm3, %xmm1
1482; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1483; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1484; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
1485; X64-SSE2-NEXT:    pand %xmm4, %xmm3
1486; X64-SSE2-NEXT:    pandn %xmm0, %xmm4
1487; X64-SSE2-NEXT:    por %xmm3, %xmm4
1488; X64-SSE2-NEXT:    movd %xmm4, %eax
1489; X64-SSE2-NEXT:    retq
1490;
1491; X64-SSE42-LABEL: test_reduce_v16i32:
1492; X64-SSE42:       ## %bb.0:
1493; X64-SSE42-NEXT:    pminud %xmm3, %xmm1
1494; X64-SSE42-NEXT:    pminud %xmm2, %xmm1
1495; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
1496; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1497; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
1498; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1499; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
1500; X64-SSE42-NEXT:    movd %xmm1, %eax
1501; X64-SSE42-NEXT:    retq
1502;
1503; X64-AVX1-LABEL: test_reduce_v16i32:
1504; X64-AVX1:       ## %bb.0:
1505; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1506; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1507; X64-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1508; X64-AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
1509; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1510; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1511; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1512; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1513; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1514; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1515; X64-AVX1-NEXT:    vzeroupper
1516; X64-AVX1-NEXT:    retq
1517;
1518; X64-AVX2-LABEL: test_reduce_v16i32:
1519; X64-AVX2:       ## %bb.0:
1520; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1521; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1522; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1523; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1524; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1525; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1526; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1527; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1528; X64-AVX2-NEXT:    vzeroupper
1529; X64-AVX2-NEXT:    retq
1530;
1531; X64-AVX512-LABEL: test_reduce_v16i32:
1532; X64-AVX512:       ## %bb.0:
1533; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1534; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
1535; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1536; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1537; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1538; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1539; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1540; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1541; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1542; X64-AVX512-NEXT:    vzeroupper
1543; X64-AVX512-NEXT:    retq
1544  %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1545  %2  = icmp ult <16 x i32> %a0, %1
1546  %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1547  %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1548  %5  = icmp ult <16 x i32> %3, %4
1549  %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1550  %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1551  %8  = icmp ult <16 x i32> %6, %7
1552  %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1553  %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1554  %11 = icmp ult <16 x i32> %9, %10
1555  %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1556  %13 = extractelement <16 x i32> %12, i32 0
1557  ret i32 %13
1558}
1559
1560define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1561; X86-SSE2-LABEL: test_reduce_v32i16:
1562; X86-SSE2:       ## %bb.0:
1563; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
1564; X86-SSE2-NEXT:    psubusw %xmm3, %xmm4
1565; X86-SSE2-NEXT:    psubw %xmm4, %xmm1
1566; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1567; X86-SSE2-NEXT:    psubusw %xmm2, %xmm3
1568; X86-SSE2-NEXT:    psubw %xmm3, %xmm0
1569; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1570; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1571; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1572; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1573; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1574; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1575; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1576; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1577; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1578; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1579; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1580; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1581; X86-SSE2-NEXT:    psrld $16, %xmm1
1582; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1583; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1584; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1585; X86-SSE2-NEXT:    movd %xmm0, %eax
1586; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1587; X86-SSE2-NEXT:    retl
1588;
1589; X86-SSE42-LABEL: test_reduce_v32i16:
1590; X86-SSE42:       ## %bb.0:
1591; X86-SSE42-NEXT:    pminuw %xmm3, %xmm1
1592; X86-SSE42-NEXT:    pminuw %xmm2, %xmm1
1593; X86-SSE42-NEXT:    pminuw %xmm0, %xmm1
1594; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1595; X86-SSE42-NEXT:    movd %xmm0, %eax
1596; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1597; X86-SSE42-NEXT:    retl
1598;
1599; X86-AVX1-LABEL: test_reduce_v32i16:
1600; X86-AVX1:       ## %bb.0:
1601; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1602; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1603; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1604; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
1605; X86-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1606; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1607; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1608; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1609; X86-AVX1-NEXT:    vzeroupper
1610; X86-AVX1-NEXT:    retl
1611;
1612; X86-AVX2-LABEL: test_reduce_v32i16:
1613; X86-AVX2:       ## %bb.0:
1614; X86-AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1615; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1616; X86-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1617; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1618; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1619; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1620; X86-AVX2-NEXT:    vzeroupper
1621; X86-AVX2-NEXT:    retl
1622;
1623; X64-SSE2-LABEL: test_reduce_v32i16:
1624; X64-SSE2:       ## %bb.0:
1625; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
1626; X64-SSE2-NEXT:    psubusw %xmm3, %xmm4
1627; X64-SSE2-NEXT:    psubw %xmm4, %xmm1
1628; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
1629; X64-SSE2-NEXT:    psubusw %xmm2, %xmm3
1630; X64-SSE2-NEXT:    psubw %xmm3, %xmm0
1631; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1632; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1633; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1634; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1635; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1636; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1637; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1638; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1639; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1640; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1641; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1642; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1643; X64-SSE2-NEXT:    psrld $16, %xmm1
1644; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1645; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1646; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1647; X64-SSE2-NEXT:    movd %xmm0, %eax
1648; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1649; X64-SSE2-NEXT:    retq
1650;
1651; X64-SSE42-LABEL: test_reduce_v32i16:
1652; X64-SSE42:       ## %bb.0:
1653; X64-SSE42-NEXT:    pminuw %xmm3, %xmm1
1654; X64-SSE42-NEXT:    pminuw %xmm2, %xmm1
1655; X64-SSE42-NEXT:    pminuw %xmm0, %xmm1
1656; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1657; X64-SSE42-NEXT:    movd %xmm0, %eax
1658; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1659; X64-SSE42-NEXT:    retq
1660;
1661; X64-AVX1-LABEL: test_reduce_v32i16:
1662; X64-AVX1:       ## %bb.0:
1663; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1664; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1665; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1666; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
1667; X64-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1668; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1669; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1670; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1671; X64-AVX1-NEXT:    vzeroupper
1672; X64-AVX1-NEXT:    retq
1673;
1674; X64-AVX2-LABEL: test_reduce_v32i16:
1675; X64-AVX2:       ## %bb.0:
1676; X64-AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1677; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1678; X64-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1679; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1680; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1681; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1682; X64-AVX2-NEXT:    vzeroupper
1683; X64-AVX2-NEXT:    retq
1684;
1685; X64-AVX512-LABEL: test_reduce_v32i16:
1686; X64-AVX512:       ## %bb.0:
1687; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1688; X64-AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1689; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1690; X64-AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1691; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1692; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1693; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
1694; X64-AVX512-NEXT:    vzeroupper
1695; X64-AVX512-NEXT:    retq
1696  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1697  %2  = icmp ult <32 x i16> %a0, %1
1698  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1699  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1700  %5  = icmp ult <32 x i16> %3, %4
1701  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1702  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1703  %8  = icmp ult <32 x i16> %6, %7
1704  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1705  %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1706  %11 = icmp ult <32 x i16> %9, %10
1707  %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1708  %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1709  %14 = icmp ult <32 x i16> %12, %13
1710  %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1711  %16 = extractelement <32 x i16> %15, i32 0
1712  ret i16 %16
1713}
1714
1715define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1716; X86-SSE2-LABEL: test_reduce_v64i8:
1717; X86-SSE2:       ## %bb.0:
1718; X86-SSE2-NEXT:    pminub %xmm3, %xmm1
1719; X86-SSE2-NEXT:    pminub %xmm2, %xmm1
1720; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
1721; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1722; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
1723; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1724; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
1725; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1726; X86-SSE2-NEXT:    psrld $16, %xmm0
1727; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
1728; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1729; X86-SSE2-NEXT:    psrlw $8, %xmm1
1730; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
1731; X86-SSE2-NEXT:    movd %xmm1, %eax
1732; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1733; X86-SSE2-NEXT:    retl
1734;
1735; X86-SSE42-LABEL: test_reduce_v64i8:
1736; X86-SSE42:       ## %bb.0:
1737; X86-SSE42-NEXT:    pminub %xmm3, %xmm1
1738; X86-SSE42-NEXT:    pminub %xmm2, %xmm1
1739; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
1740; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1741; X86-SSE42-NEXT:    psrlw $8, %xmm0
1742; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
1743; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1744; X86-SSE42-NEXT:    movd %xmm0, %eax
1745; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1746; X86-SSE42-NEXT:    retl
1747;
1748; X86-AVX1-LABEL: test_reduce_v64i8:
1749; X86-AVX1:       ## %bb.0:
1750; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1751; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1752; X86-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1753; X86-AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
1754; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1755; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1756; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1757; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1758; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1759; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1760; X86-AVX1-NEXT:    vzeroupper
1761; X86-AVX1-NEXT:    retl
1762;
1763; X86-AVX2-LABEL: test_reduce_v64i8:
1764; X86-AVX2:       ## %bb.0:
1765; X86-AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1766; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1767; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1768; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1769; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1770; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1771; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1772; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1773; X86-AVX2-NEXT:    vzeroupper
1774; X86-AVX2-NEXT:    retl
1775;
1776; X64-SSE2-LABEL: test_reduce_v64i8:
1777; X64-SSE2:       ## %bb.0:
1778; X64-SSE2-NEXT:    pminub %xmm3, %xmm1
1779; X64-SSE2-NEXT:    pminub %xmm2, %xmm1
1780; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
1781; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1782; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
1783; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1784; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
1785; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1786; X64-SSE2-NEXT:    psrld $16, %xmm0
1787; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
1788; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1789; X64-SSE2-NEXT:    psrlw $8, %xmm1
1790; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
1791; X64-SSE2-NEXT:    movd %xmm1, %eax
1792; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1793; X64-SSE2-NEXT:    retq
1794;
1795; X64-SSE42-LABEL: test_reduce_v64i8:
1796; X64-SSE42:       ## %bb.0:
1797; X64-SSE42-NEXT:    pminub %xmm3, %xmm1
1798; X64-SSE42-NEXT:    pminub %xmm2, %xmm1
1799; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
1800; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1801; X64-SSE42-NEXT:    psrlw $8, %xmm0
1802; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
1803; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1804; X64-SSE42-NEXT:    movd %xmm0, %eax
1805; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1806; X64-SSE42-NEXT:    retq
1807;
1808; X64-AVX1-LABEL: test_reduce_v64i8:
1809; X64-AVX1:       ## %bb.0:
1810; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1811; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1812; X64-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1813; X64-AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
1814; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1815; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1816; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1817; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1818; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1819; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1820; X64-AVX1-NEXT:    vzeroupper
1821; X64-AVX1-NEXT:    retq
1822;
1823; X64-AVX2-LABEL: test_reduce_v64i8:
1824; X64-AVX2:       ## %bb.0:
1825; X64-AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1826; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1827; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1828; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1829; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1830; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1831; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1832; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1833; X64-AVX2-NEXT:    vzeroupper
1834; X64-AVX2-NEXT:    retq
1835;
1836; X64-AVX512-LABEL: test_reduce_v64i8:
1837; X64-AVX512:       ## %bb.0:
1838; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1839; X64-AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1840; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1841; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1842; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1843; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1844; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1845; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1846; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1847; X64-AVX512-NEXT:    vzeroupper
1848; X64-AVX512-NEXT:    retq
1849  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1850  %2  = icmp ult <64 x i8> %a0, %1
1851  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1852  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1853  %5  = icmp ult <64 x i8> %3, %4
1854  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1855  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1856  %8  = icmp ult <64 x i8> %6, %7
1857  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1858  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1859  %11 = icmp ult <64 x i8> %9, %10
1860  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1861  %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1862  %14 = icmp ult <64 x i8> %12, %13
1863  %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1864  %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1865  %17 = icmp ult <64 x i8> %15, %16
1866  %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1867  %19 = extractelement <64 x i8> %18, i32 0
1868  ret i8 %19
1869}
1870
1871;
1872; Partial Vector Reductions
1873;
1874
1875define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
1876; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
1877; X86-SSE2:       ## %bb.0:
1878; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1879; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1880; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1881; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1882; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1883; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1884; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1885; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1886; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1887; X86-SSE2-NEXT:    psrld $16, %xmm1
1888; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1889; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1890; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1891; X86-SSE2-NEXT:    movd %xmm0, %eax
1892; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1893; X86-SSE2-NEXT:    retl
1894;
1895; X86-SSE42-LABEL: test_reduce_v16i16_v8i16:
1896; X86-SSE42:       ## %bb.0:
1897; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1898; X86-SSE42-NEXT:    movd %xmm0, %eax
1899; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1900; X86-SSE42-NEXT:    retl
1901;
1902; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
1903; X86-AVX:       ## %bb.0:
1904; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1905; X86-AVX-NEXT:    vmovd %xmm0, %eax
1906; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1907; X86-AVX-NEXT:    vzeroupper
1908; X86-AVX-NEXT:    retl
1909;
1910; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
1911; X64-SSE2:       ## %bb.0:
1912; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1913; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1914; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1915; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1916; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1917; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1918; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1919; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1920; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1921; X64-SSE2-NEXT:    psrld $16, %xmm1
1922; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1923; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1924; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1925; X64-SSE2-NEXT:    movd %xmm0, %eax
1926; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1927; X64-SSE2-NEXT:    retq
1928;
1929; X64-SSE42-LABEL: test_reduce_v16i16_v8i16:
1930; X64-SSE42:       ## %bb.0:
1931; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1932; X64-SSE42-NEXT:    movd %xmm0, %eax
1933; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1934; X64-SSE42-NEXT:    retq
1935;
1936; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
1937; X64-AVX:       ## %bb.0:
1938; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1939; X64-AVX-NEXT:    vmovd %xmm0, %eax
1940; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1941; X64-AVX-NEXT:    vzeroupper
1942; X64-AVX-NEXT:    retq
1943  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1944  %2  = icmp ult <16 x i16> %a0, %1
1945  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
1946  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1947  %5  = icmp ult <16 x i16> %3, %4
1948  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
1949  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1950  %8  = icmp ult <16 x i16> %6, %7
1951  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
1952  %10 = extractelement <16 x i16> %9, i32 0
1953  ret i16 %10
1954}
1955
1956define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
1957; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
1958; X86-SSE2:       ## %bb.0:
1959; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1960; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1961; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1962; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1963; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1964; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1965; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1966; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1967; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1968; X86-SSE2-NEXT:    psrld $16, %xmm1
1969; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1970; X86-SSE2-NEXT:    psubusw %xmm1, %xmm2
1971; X86-SSE2-NEXT:    psubw %xmm2, %xmm0
1972; X86-SSE2-NEXT:    movd %xmm0, %eax
1973; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1974; X86-SSE2-NEXT:    retl
1975;
1976; X86-SSE42-LABEL: test_reduce_v32i16_v8i16:
1977; X86-SSE42:       ## %bb.0:
1978; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1979; X86-SSE42-NEXT:    movd %xmm0, %eax
1980; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1981; X86-SSE42-NEXT:    retl
1982;
1983; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
1984; X86-AVX:       ## %bb.0:
1985; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1986; X86-AVX-NEXT:    vmovd %xmm0, %eax
1987; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1988; X86-AVX-NEXT:    vzeroupper
1989; X86-AVX-NEXT:    retl
1990;
1991; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
1992; X64-SSE2:       ## %bb.0:
1993; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1994; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1995; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
1996; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
1997; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1998; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1999; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
2000; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
2001; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2002; X64-SSE2-NEXT:    psrld $16, %xmm1
2003; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2004; X64-SSE2-NEXT:    psubusw %xmm1, %xmm2
2005; X64-SSE2-NEXT:    psubw %xmm2, %xmm0
2006; X64-SSE2-NEXT:    movd %xmm0, %eax
2007; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
2008; X64-SSE2-NEXT:    retq
2009;
2010; X64-SSE42-LABEL: test_reduce_v32i16_v8i16:
2011; X64-SSE42:       ## %bb.0:
2012; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
2013; X64-SSE42-NEXT:    movd %xmm0, %eax
2014; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
2015; X64-SSE42-NEXT:    retq
2016;
2017; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
2018; X64-AVX:       ## %bb.0:
2019; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2020; X64-AVX-NEXT:    vmovd %xmm0, %eax
2021; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
2022; X64-AVX-NEXT:    vzeroupper
2023; X64-AVX-NEXT:    retq
2024  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2025  %2  = icmp ult <32 x i16> %a0, %1
2026  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
2027  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2028  %5  = icmp ult <32 x i16> %3, %4
2029  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
2030  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2031  %8  = icmp ult <32 x i16> %6, %7
2032  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
2033  %10 = extractelement <32 x i16> %9, i32 0
2034  ret i16 %10
2035}
2036
2037define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
2038; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
2039; X86-SSE2:       ## %bb.0:
2040; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2041; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
2042; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2043; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
2044; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2045; X86-SSE2-NEXT:    psrld $16, %xmm1
2046; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
2047; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2048; X86-SSE2-NEXT:    psrlw $8, %xmm0
2049; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
2050; X86-SSE2-NEXT:    movd %xmm0, %eax
2051; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2052; X86-SSE2-NEXT:    retl
2053;
2054; X86-SSE42-LABEL: test_reduce_v32i8_v16i8:
2055; X86-SSE42:       ## %bb.0:
2056; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2057; X86-SSE42-NEXT:    psrlw $8, %xmm1
2058; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2059; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2060; X86-SSE42-NEXT:    movd %xmm0, %eax
2061; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2062; X86-SSE42-NEXT:    retl
2063;
2064; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
2065; X86-AVX:       ## %bb.0:
2066; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2067; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2068; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2069; X86-AVX-NEXT:    vmovd %xmm0, %eax
2070; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2071; X86-AVX-NEXT:    vzeroupper
2072; X86-AVX-NEXT:    retl
2073;
2074; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
2075; X64-SSE2:       ## %bb.0:
2076; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2077; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
2078; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2079; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
2080; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2081; X64-SSE2-NEXT:    psrld $16, %xmm1
2082; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
2083; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2084; X64-SSE2-NEXT:    psrlw $8, %xmm0
2085; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
2086; X64-SSE2-NEXT:    movd %xmm0, %eax
2087; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2088; X64-SSE2-NEXT:    retq
2089;
2090; X64-SSE42-LABEL: test_reduce_v32i8_v16i8:
2091; X64-SSE42:       ## %bb.0:
2092; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2093; X64-SSE42-NEXT:    psrlw $8, %xmm1
2094; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2095; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2096; X64-SSE42-NEXT:    movd %xmm0, %eax
2097; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2098; X64-SSE42-NEXT:    retq
2099;
2100; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
2101; X64-AVX:       ## %bb.0:
2102; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2103; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2104; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2105; X64-AVX-NEXT:    vmovd %xmm0, %eax
2106; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2107; X64-AVX-NEXT:    vzeroupper
2108; X64-AVX-NEXT:    retq
2109  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2110  %2  = icmp ult <32 x i8> %a0, %1
2111  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
2112  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2113  %5  = icmp ult <32 x i8> %3, %4
2114  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
2115  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2116  %8  = icmp ult <32 x i8> %6, %7
2117  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
2118  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2119  %11 = icmp ult <32 x i8> %9, %10
2120  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
2121  %13 = extractelement <32 x i8> %12, i32 0
2122  ret i8 %13
2123}
2124
2125define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
2126; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
2127; X86-SSE2:       ## %bb.0:
2128; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2129; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
2130; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2131; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
2132; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2133; X86-SSE2-NEXT:    psrld $16, %xmm1
2134; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
2135; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2136; X86-SSE2-NEXT:    psrlw $8, %xmm0
2137; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
2138; X86-SSE2-NEXT:    movd %xmm0, %eax
2139; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2140; X86-SSE2-NEXT:    retl
2141;
2142; X86-SSE42-LABEL: test_reduce_v64i8_v16i8:
2143; X86-SSE42:       ## %bb.0:
2144; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2145; X86-SSE42-NEXT:    psrlw $8, %xmm1
2146; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2147; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2148; X86-SSE42-NEXT:    movd %xmm0, %eax
2149; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2150; X86-SSE42-NEXT:    retl
2151;
2152; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
2153; X86-AVX:       ## %bb.0:
2154; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2155; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2156; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2157; X86-AVX-NEXT:    vmovd %xmm0, %eax
2158; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2159; X86-AVX-NEXT:    vzeroupper
2160; X86-AVX-NEXT:    retl
2161;
2162; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
2163; X64-SSE2:       ## %bb.0:
2164; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2165; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
2166; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2167; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
2168; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2169; X64-SSE2-NEXT:    psrld $16, %xmm1
2170; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
2171; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2172; X64-SSE2-NEXT:    psrlw $8, %xmm0
2173; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
2174; X64-SSE2-NEXT:    movd %xmm0, %eax
2175; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2176; X64-SSE2-NEXT:    retq
2177;
2178; X64-SSE42-LABEL: test_reduce_v64i8_v16i8:
2179; X64-SSE42:       ## %bb.0:
2180; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2181; X64-SSE42-NEXT:    psrlw $8, %xmm1
2182; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2183; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2184; X64-SSE42-NEXT:    movd %xmm0, %eax
2185; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2186; X64-SSE42-NEXT:    retq
2187;
2188; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
2189; X64-AVX:       ## %bb.0:
2190; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2191; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2192; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2193; X64-AVX-NEXT:    vmovd %xmm0, %eax
2194; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2195; X64-AVX-NEXT:    vzeroupper
2196; X64-AVX-NEXT:    retq
2197  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2198  %2  = icmp ult <64 x i8> %a0, %1
2199  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
2200  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2201  %5  = icmp ult <64 x i8> %3, %4
2202  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
2203  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2204  %8  = icmp ult <64 x i8> %6, %7
2205  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
2206  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2207  %11 = icmp ult <64 x i8> %9, %10
2208  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
2209  %13 = extractelement <64 x i8> %12, i32 0
2210  ret i8 %13
2211}
2212