• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
8
9;
10; vXi64
11;
12
13define i64 @test_v2i64(<2 x i64> %a0) {
14; SSE2-LABEL: test_v2i64:
15; SSE2:       # %bb.0:
16; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
17; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
18; SSE2-NEXT:    movdqa %xmm0, %xmm3
19; SSE2-NEXT:    pxor %xmm2, %xmm3
20; SSE2-NEXT:    pxor %xmm1, %xmm2
21; SSE2-NEXT:    movdqa %xmm3, %xmm4
22; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
23; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
25; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26; SSE2-NEXT:    pand %xmm5, %xmm2
27; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28; SSE2-NEXT:    por %xmm2, %xmm3
29; SSE2-NEXT:    pand %xmm3, %xmm0
30; SSE2-NEXT:    pandn %xmm1, %xmm3
31; SSE2-NEXT:    por %xmm0, %xmm3
32; SSE2-NEXT:    movq %xmm3, %rax
33; SSE2-NEXT:    retq
34;
35; SSE41-LABEL: test_v2i64:
36; SSE41:       # %bb.0:
37; SSE41-NEXT:    movdqa %xmm0, %xmm1
38; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
39; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
40; SSE41-NEXT:    movdqa %xmm1, %xmm3
41; SSE41-NEXT:    pxor %xmm0, %xmm3
42; SSE41-NEXT:    pxor %xmm2, %xmm0
43; SSE41-NEXT:    movdqa %xmm3, %xmm4
44; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
45; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
46; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
47; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
48; SSE41-NEXT:    pand %xmm5, %xmm3
49; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
50; SSE41-NEXT:    por %xmm3, %xmm0
51; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
52; SSE41-NEXT:    movq %xmm2, %rax
53; SSE41-NEXT:    retq
54;
55; AVX-LABEL: test_v2i64:
56; AVX:       # %bb.0:
57; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
58; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
59; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
60; AVX-NEXT:    vmovq %xmm0, %rax
61; AVX-NEXT:    retq
62;
63; AVX512BW-LABEL: test_v2i64:
64; AVX512BW:       # %bb.0:
65; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
66; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
67; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
68; AVX512BW-NEXT:    vmovq %xmm0, %rax
69; AVX512BW-NEXT:    vzeroupper
70; AVX512BW-NEXT:    retq
71;
72; AVX512VL-LABEL: test_v2i64:
73; AVX512VL:       # %bb.0:
74; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
75; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
76; AVX512VL-NEXT:    vmovq %xmm0, %rax
77; AVX512VL-NEXT:    retq
78  %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %a0)
79  ret i64 %1
80}
81
82define i64 @test_v4i64(<4 x i64> %a0) {
83; SSE2-LABEL: test_v4i64:
84; SSE2:       # %bb.0:
85; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
86; SSE2-NEXT:    movdqa %xmm1, %xmm3
87; SSE2-NEXT:    pxor %xmm2, %xmm3
88; SSE2-NEXT:    movdqa %xmm0, %xmm4
89; SSE2-NEXT:    pxor %xmm2, %xmm4
90; SSE2-NEXT:    movdqa %xmm4, %xmm5
91; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
92; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
93; SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
94; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
95; SSE2-NEXT:    pand %xmm6, %xmm3
96; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
97; SSE2-NEXT:    por %xmm3, %xmm4
98; SSE2-NEXT:    pand %xmm4, %xmm0
99; SSE2-NEXT:    pandn %xmm1, %xmm4
100; SSE2-NEXT:    por %xmm0, %xmm4
101; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
102; SSE2-NEXT:    movdqa %xmm4, %xmm1
103; SSE2-NEXT:    pxor %xmm2, %xmm1
104; SSE2-NEXT:    pxor %xmm0, %xmm2
105; SSE2-NEXT:    movdqa %xmm1, %xmm3
106; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
107; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
108; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
109; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
110; SSE2-NEXT:    pand %xmm5, %xmm1
111; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
112; SSE2-NEXT:    por %xmm1, %xmm2
113; SSE2-NEXT:    pand %xmm2, %xmm4
114; SSE2-NEXT:    pandn %xmm0, %xmm2
115; SSE2-NEXT:    por %xmm4, %xmm2
116; SSE2-NEXT:    movq %xmm2, %rax
117; SSE2-NEXT:    retq
118;
119; SSE41-LABEL: test_v4i64:
120; SSE41:       # %bb.0:
121; SSE41-NEXT:    movdqa %xmm0, %xmm2
122; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,0,2147483648,0]
123; SSE41-NEXT:    movdqa %xmm1, %xmm0
124; SSE41-NEXT:    pxor %xmm3, %xmm0
125; SSE41-NEXT:    movdqa %xmm2, %xmm4
126; SSE41-NEXT:    pxor %xmm3, %xmm4
127; SSE41-NEXT:    movdqa %xmm4, %xmm5
128; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
129; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
130; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
131; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
132; SSE41-NEXT:    pand %xmm6, %xmm4
133; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
134; SSE41-NEXT:    por %xmm4, %xmm0
135; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
136; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
137; SSE41-NEXT:    movdqa %xmm1, %xmm0
138; SSE41-NEXT:    pxor %xmm3, %xmm0
139; SSE41-NEXT:    pxor %xmm2, %xmm3
140; SSE41-NEXT:    movdqa %xmm0, %xmm4
141; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
142; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
143; SSE41-NEXT:    pcmpeqd %xmm0, %xmm3
144; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
145; SSE41-NEXT:    pand %xmm5, %xmm3
146; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
147; SSE41-NEXT:    por %xmm3, %xmm0
148; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
149; SSE41-NEXT:    movq %xmm2, %rax
150; SSE41-NEXT:    retq
151;
152; AVX1-LABEL: test_v4i64:
153; AVX1:       # %bb.0:
154; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
155; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
156; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
157; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
158; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
159; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
160; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
161; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
162; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm3
163; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
164; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
165; AVX1-NEXT:    vmovq %xmm0, %rax
166; AVX1-NEXT:    vzeroupper
167; AVX1-NEXT:    retq
168;
169; AVX2-LABEL: test_v4i64:
170; AVX2:       # %bb.0:
171; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
172; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
173; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
174; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
175; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
176; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
177; AVX2-NEXT:    vmovq %xmm0, %rax
178; AVX2-NEXT:    vzeroupper
179; AVX2-NEXT:    retq
180;
181; AVX512BW-LABEL: test_v4i64:
182; AVX512BW:       # %bb.0:
183; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
184; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
185; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
186; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
187; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
188; AVX512BW-NEXT:    vmovq %xmm0, %rax
189; AVX512BW-NEXT:    vzeroupper
190; AVX512BW-NEXT:    retq
191;
192; AVX512VL-LABEL: test_v4i64:
193; AVX512VL:       # %bb.0:
194; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
195; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
196; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
197; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
198; AVX512VL-NEXT:    vmovq %xmm0, %rax
199; AVX512VL-NEXT:    vzeroupper
200; AVX512VL-NEXT:    retq
201  %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> %a0)
202  ret i64 %1
203}
204
205define i64 @test_v8i64(<8 x i64> %a0) {
206; SSE2-LABEL: test_v8i64:
207; SSE2:       # %bb.0:
208; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
209; SSE2-NEXT:    movdqa %xmm2, %xmm5
210; SSE2-NEXT:    pxor %xmm4, %xmm5
211; SSE2-NEXT:    movdqa %xmm0, %xmm6
212; SSE2-NEXT:    pxor %xmm4, %xmm6
213; SSE2-NEXT:    movdqa %xmm6, %xmm7
214; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
215; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
216; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
217; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
218; SSE2-NEXT:    pand %xmm8, %xmm6
219; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
220; SSE2-NEXT:    por %xmm6, %xmm5
221; SSE2-NEXT:    pand %xmm5, %xmm0
222; SSE2-NEXT:    pandn %xmm2, %xmm5
223; SSE2-NEXT:    por %xmm0, %xmm5
224; SSE2-NEXT:    movdqa %xmm3, %xmm0
225; SSE2-NEXT:    pxor %xmm4, %xmm0
226; SSE2-NEXT:    movdqa %xmm1, %xmm2
227; SSE2-NEXT:    pxor %xmm4, %xmm2
228; SSE2-NEXT:    movdqa %xmm2, %xmm6
229; SSE2-NEXT:    pcmpgtd %xmm0, %xmm6
230; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
231; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
232; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
233; SSE2-NEXT:    pand %xmm7, %xmm0
234; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
235; SSE2-NEXT:    por %xmm0, %xmm2
236; SSE2-NEXT:    pand %xmm2, %xmm1
237; SSE2-NEXT:    pandn %xmm3, %xmm2
238; SSE2-NEXT:    por %xmm1, %xmm2
239; SSE2-NEXT:    movdqa %xmm2, %xmm0
240; SSE2-NEXT:    pxor %xmm4, %xmm0
241; SSE2-NEXT:    movdqa %xmm5, %xmm1
242; SSE2-NEXT:    pxor %xmm4, %xmm1
243; SSE2-NEXT:    movdqa %xmm1, %xmm3
244; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
245; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
246; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
247; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
248; SSE2-NEXT:    pand %xmm6, %xmm0
249; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
250; SSE2-NEXT:    por %xmm0, %xmm1
251; SSE2-NEXT:    pand %xmm1, %xmm5
252; SSE2-NEXT:    pandn %xmm2, %xmm1
253; SSE2-NEXT:    por %xmm5, %xmm1
254; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
255; SSE2-NEXT:    movdqa %xmm1, %xmm2
256; SSE2-NEXT:    pxor %xmm4, %xmm2
257; SSE2-NEXT:    pxor %xmm0, %xmm4
258; SSE2-NEXT:    movdqa %xmm2, %xmm3
259; SSE2-NEXT:    pcmpgtd %xmm4, %xmm3
260; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
261; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
262; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
263; SSE2-NEXT:    pand %xmm5, %xmm2
264; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
265; SSE2-NEXT:    por %xmm2, %xmm3
266; SSE2-NEXT:    pand %xmm3, %xmm1
267; SSE2-NEXT:    pandn %xmm0, %xmm3
268; SSE2-NEXT:    por %xmm1, %xmm3
269; SSE2-NEXT:    movq %xmm3, %rax
270; SSE2-NEXT:    retq
271;
272; SSE41-LABEL: test_v8i64:
273; SSE41:       # %bb.0:
274; SSE41-NEXT:    movdqa %xmm0, %xmm4
275; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
276; SSE41-NEXT:    movdqa %xmm2, %xmm0
277; SSE41-NEXT:    pxor %xmm5, %xmm0
278; SSE41-NEXT:    movdqa %xmm4, %xmm6
279; SSE41-NEXT:    pxor %xmm5, %xmm6
280; SSE41-NEXT:    movdqa %xmm6, %xmm7
281; SSE41-NEXT:    pcmpgtd %xmm0, %xmm7
282; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
283; SSE41-NEXT:    pcmpeqd %xmm0, %xmm6
284; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
285; SSE41-NEXT:    pand %xmm8, %xmm6
286; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
287; SSE41-NEXT:    por %xmm6, %xmm0
288; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
289; SSE41-NEXT:    movdqa %xmm3, %xmm0
290; SSE41-NEXT:    pxor %xmm5, %xmm0
291; SSE41-NEXT:    movdqa %xmm1, %xmm4
292; SSE41-NEXT:    pxor %xmm5, %xmm4
293; SSE41-NEXT:    movdqa %xmm4, %xmm6
294; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
295; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
296; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
297; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
298; SSE41-NEXT:    pand %xmm7, %xmm4
299; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
300; SSE41-NEXT:    por %xmm4, %xmm0
301; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
302; SSE41-NEXT:    movapd %xmm3, %xmm0
303; SSE41-NEXT:    xorpd %xmm5, %xmm0
304; SSE41-NEXT:    movapd %xmm2, %xmm1
305; SSE41-NEXT:    xorpd %xmm5, %xmm1
306; SSE41-NEXT:    movapd %xmm1, %xmm4
307; SSE41-NEXT:    pcmpgtd %xmm0, %xmm4
308; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
309; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
310; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
311; SSE41-NEXT:    pand %xmm6, %xmm1
312; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
313; SSE41-NEXT:    por %xmm1, %xmm0
314; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
315; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
316; SSE41-NEXT:    movdqa %xmm3, %xmm0
317; SSE41-NEXT:    pxor %xmm5, %xmm0
318; SSE41-NEXT:    pxor %xmm1, %xmm5
319; SSE41-NEXT:    movdqa %xmm0, %xmm2
320; SSE41-NEXT:    pcmpgtd %xmm5, %xmm2
321; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
322; SSE41-NEXT:    pcmpeqd %xmm0, %xmm5
323; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
324; SSE41-NEXT:    pand %xmm4, %xmm5
325; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
326; SSE41-NEXT:    por %xmm5, %xmm0
327; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
328; SSE41-NEXT:    movq %xmm1, %rax
329; SSE41-NEXT:    retq
330;
331; AVX1-LABEL: test_v8i64:
332; AVX1:       # %bb.0:
333; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
334; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
335; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
336; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
337; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
338; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
339; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
340; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
341; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
342; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
343; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
344; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
345; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
346; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
347; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm3
348; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
349; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
350; AVX1-NEXT:    vmovq %xmm0, %rax
351; AVX1-NEXT:    vzeroupper
352; AVX1-NEXT:    retq
353;
354; AVX2-LABEL: test_v8i64:
355; AVX2:       # %bb.0:
356; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
357; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
358; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
359; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
360; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
361; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
362; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
363; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
364; AVX2-NEXT:    vmovq %xmm0, %rax
365; AVX2-NEXT:    vzeroupper
366; AVX2-NEXT:    retq
367;
368; AVX512-LABEL: test_v8i64:
369; AVX512:       # %bb.0:
370; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
371; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
372; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
373; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
374; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
375; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
376; AVX512-NEXT:    vmovq %xmm0, %rax
377; AVX512-NEXT:    vzeroupper
378; AVX512-NEXT:    retq
379  %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
380  ret i64 %1
381}
382
383define i64 @test_v16i64(<16 x i64> %a0) {
384; SSE2-LABEL: test_v16i64:
385; SSE2:       # %bb.0:
386; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
387; SSE2-NEXT:    movdqa %xmm5, %xmm9
388; SSE2-NEXT:    pxor %xmm8, %xmm9
389; SSE2-NEXT:    movdqa %xmm1, %xmm10
390; SSE2-NEXT:    pxor %xmm8, %xmm10
391; SSE2-NEXT:    movdqa %xmm10, %xmm11
392; SSE2-NEXT:    pcmpgtd %xmm9, %xmm11
393; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
394; SSE2-NEXT:    pcmpeqd %xmm9, %xmm10
395; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
396; SSE2-NEXT:    pand %xmm12, %xmm10
397; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm11[1,1,3,3]
398; SSE2-NEXT:    por %xmm10, %xmm9
399; SSE2-NEXT:    pand %xmm9, %xmm1
400; SSE2-NEXT:    pandn %xmm5, %xmm9
401; SSE2-NEXT:    por %xmm1, %xmm9
402; SSE2-NEXT:    movdqa %xmm7, %xmm1
403; SSE2-NEXT:    pxor %xmm8, %xmm1
404; SSE2-NEXT:    movdqa %xmm3, %xmm5
405; SSE2-NEXT:    pxor %xmm8, %xmm5
406; SSE2-NEXT:    movdqa %xmm5, %xmm10
407; SSE2-NEXT:    pcmpgtd %xmm1, %xmm10
408; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
409; SSE2-NEXT:    pcmpeqd %xmm1, %xmm5
410; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
411; SSE2-NEXT:    pand %xmm11, %xmm5
412; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm10[1,1,3,3]
413; SSE2-NEXT:    por %xmm5, %xmm1
414; SSE2-NEXT:    pand %xmm1, %xmm3
415; SSE2-NEXT:    pandn %xmm7, %xmm1
416; SSE2-NEXT:    por %xmm3, %xmm1
417; SSE2-NEXT:    movdqa %xmm4, %xmm3
418; SSE2-NEXT:    pxor %xmm8, %xmm3
419; SSE2-NEXT:    movdqa %xmm0, %xmm5
420; SSE2-NEXT:    pxor %xmm8, %xmm5
421; SSE2-NEXT:    movdqa %xmm5, %xmm7
422; SSE2-NEXT:    pcmpgtd %xmm3, %xmm7
423; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
424; SSE2-NEXT:    pcmpeqd %xmm3, %xmm5
425; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
426; SSE2-NEXT:    pand %xmm10, %xmm5
427; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
428; SSE2-NEXT:    por %xmm5, %xmm3
429; SSE2-NEXT:    pand %xmm3, %xmm0
430; SSE2-NEXT:    pandn %xmm4, %xmm3
431; SSE2-NEXT:    por %xmm0, %xmm3
432; SSE2-NEXT:    movdqa %xmm6, %xmm0
433; SSE2-NEXT:    pxor %xmm8, %xmm0
434; SSE2-NEXT:    movdqa %xmm2, %xmm4
435; SSE2-NEXT:    pxor %xmm8, %xmm4
436; SSE2-NEXT:    movdqa %xmm4, %xmm5
437; SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
438; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
439; SSE2-NEXT:    pcmpeqd %xmm0, %xmm4
440; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
441; SSE2-NEXT:    pand %xmm7, %xmm0
442; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
443; SSE2-NEXT:    por %xmm0, %xmm4
444; SSE2-NEXT:    pand %xmm4, %xmm2
445; SSE2-NEXT:    pandn %xmm6, %xmm4
446; SSE2-NEXT:    por %xmm2, %xmm4
447; SSE2-NEXT:    movdqa %xmm4, %xmm0
448; SSE2-NEXT:    pxor %xmm8, %xmm0
449; SSE2-NEXT:    movdqa %xmm3, %xmm2
450; SSE2-NEXT:    pxor %xmm8, %xmm2
451; SSE2-NEXT:    movdqa %xmm2, %xmm5
452; SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
453; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
454; SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
455; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
456; SSE2-NEXT:    pand %xmm6, %xmm2
457; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
458; SSE2-NEXT:    por %xmm2, %xmm0
459; SSE2-NEXT:    pand %xmm0, %xmm3
460; SSE2-NEXT:    pandn %xmm4, %xmm0
461; SSE2-NEXT:    por %xmm3, %xmm0
462; SSE2-NEXT:    movdqa %xmm1, %xmm2
463; SSE2-NEXT:    pxor %xmm8, %xmm2
464; SSE2-NEXT:    movdqa %xmm9, %xmm3
465; SSE2-NEXT:    pxor %xmm8, %xmm3
466; SSE2-NEXT:    movdqa %xmm3, %xmm4
467; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
468; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
469; SSE2-NEXT:    pcmpeqd %xmm2, %xmm3
470; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
471; SSE2-NEXT:    pand %xmm5, %xmm2
472; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
473; SSE2-NEXT:    por %xmm2, %xmm3
474; SSE2-NEXT:    pand %xmm3, %xmm9
475; SSE2-NEXT:    pandn %xmm1, %xmm3
476; SSE2-NEXT:    por %xmm9, %xmm3
477; SSE2-NEXT:    movdqa %xmm3, %xmm1
478; SSE2-NEXT:    pxor %xmm8, %xmm1
479; SSE2-NEXT:    movdqa %xmm0, %xmm2
480; SSE2-NEXT:    pxor %xmm8, %xmm2
481; SSE2-NEXT:    movdqa %xmm2, %xmm4
482; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
483; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
484; SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
485; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
486; SSE2-NEXT:    pand %xmm5, %xmm1
487; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
488; SSE2-NEXT:    por %xmm1, %xmm2
489; SSE2-NEXT:    pand %xmm2, %xmm0
490; SSE2-NEXT:    pandn %xmm3, %xmm2
491; SSE2-NEXT:    por %xmm0, %xmm2
492; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
493; SSE2-NEXT:    movdqa %xmm2, %xmm1
494; SSE2-NEXT:    pxor %xmm8, %xmm1
495; SSE2-NEXT:    pxor %xmm0, %xmm8
496; SSE2-NEXT:    movdqa %xmm1, %xmm3
497; SSE2-NEXT:    pcmpgtd %xmm8, %xmm3
498; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
499; SSE2-NEXT:    pcmpeqd %xmm1, %xmm8
500; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3]
501; SSE2-NEXT:    pand %xmm4, %xmm1
502; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
503; SSE2-NEXT:    por %xmm1, %xmm3
504; SSE2-NEXT:    pand %xmm3, %xmm2
505; SSE2-NEXT:    pandn %xmm0, %xmm3
506; SSE2-NEXT:    por %xmm2, %xmm3
507; SSE2-NEXT:    movq %xmm3, %rax
508; SSE2-NEXT:    retq
509;
510; SSE41-LABEL: test_v16i64:
511; SSE41:       # %bb.0:
512; SSE41-NEXT:    movdqa %xmm0, %xmm8
513; SSE41-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
514; SSE41-NEXT:    movdqa %xmm5, %xmm10
515; SSE41-NEXT:    pxor %xmm9, %xmm10
516; SSE41-NEXT:    movdqa %xmm1, %xmm0
517; SSE41-NEXT:    pxor %xmm9, %xmm0
518; SSE41-NEXT:    movdqa %xmm0, %xmm11
519; SSE41-NEXT:    pcmpgtd %xmm10, %xmm11
520; SSE41-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
521; SSE41-NEXT:    pcmpeqd %xmm10, %xmm0
522; SSE41-NEXT:    pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3]
523; SSE41-NEXT:    pand %xmm12, %xmm10
524; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm11[1,1,3,3]
525; SSE41-NEXT:    por %xmm10, %xmm0
526; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm5
527; SSE41-NEXT:    movdqa %xmm7, %xmm0
528; SSE41-NEXT:    pxor %xmm9, %xmm0
529; SSE41-NEXT:    movdqa %xmm3, %xmm1
530; SSE41-NEXT:    pxor %xmm9, %xmm1
531; SSE41-NEXT:    movdqa %xmm1, %xmm10
532; SSE41-NEXT:    pcmpgtd %xmm0, %xmm10
533; SSE41-NEXT:    pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
534; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
535; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
536; SSE41-NEXT:    pand %xmm11, %xmm1
537; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
538; SSE41-NEXT:    por %xmm1, %xmm0
539; SSE41-NEXT:    blendvpd %xmm0, %xmm3, %xmm7
540; SSE41-NEXT:    movdqa %xmm4, %xmm0
541; SSE41-NEXT:    pxor %xmm9, %xmm0
542; SSE41-NEXT:    movdqa %xmm8, %xmm1
543; SSE41-NEXT:    pxor %xmm9, %xmm1
544; SSE41-NEXT:    movdqa %xmm1, %xmm3
545; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
546; SSE41-NEXT:    pshufd {{.*#+}} xmm10 = xmm3[0,0,2,2]
547; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
548; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
549; SSE41-NEXT:    pand %xmm10, %xmm1
550; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
551; SSE41-NEXT:    por %xmm1, %xmm0
552; SSE41-NEXT:    blendvpd %xmm0, %xmm8, %xmm4
553; SSE41-NEXT:    movdqa %xmm6, %xmm0
554; SSE41-NEXT:    pxor %xmm9, %xmm0
555; SSE41-NEXT:    movdqa %xmm2, %xmm1
556; SSE41-NEXT:    pxor %xmm9, %xmm1
557; SSE41-NEXT:    movdqa %xmm1, %xmm3
558; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
559; SSE41-NEXT:    pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
560; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
561; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
562; SSE41-NEXT:    pand %xmm8, %xmm1
563; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
564; SSE41-NEXT:    por %xmm1, %xmm0
565; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm6
566; SSE41-NEXT:    movapd %xmm6, %xmm0
567; SSE41-NEXT:    xorpd %xmm9, %xmm0
568; SSE41-NEXT:    movapd %xmm4, %xmm1
569; SSE41-NEXT:    xorpd %xmm9, %xmm1
570; SSE41-NEXT:    movapd %xmm1, %xmm2
571; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
572; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
573; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
574; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
575; SSE41-NEXT:    pand %xmm3, %xmm1
576; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
577; SSE41-NEXT:    por %xmm1, %xmm0
578; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm6
579; SSE41-NEXT:    movapd %xmm7, %xmm0
580; SSE41-NEXT:    xorpd %xmm9, %xmm0
581; SSE41-NEXT:    movapd %xmm5, %xmm1
582; SSE41-NEXT:    xorpd %xmm9, %xmm1
583; SSE41-NEXT:    movapd %xmm1, %xmm2
584; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
585; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
586; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
587; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
588; SSE41-NEXT:    pand %xmm3, %xmm1
589; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
590; SSE41-NEXT:    por %xmm1, %xmm0
591; SSE41-NEXT:    blendvpd %xmm0, %xmm5, %xmm7
592; SSE41-NEXT:    movapd %xmm7, %xmm0
593; SSE41-NEXT:    xorpd %xmm9, %xmm0
594; SSE41-NEXT:    movapd %xmm6, %xmm1
595; SSE41-NEXT:    xorpd %xmm9, %xmm1
596; SSE41-NEXT:    movapd %xmm1, %xmm2
597; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
598; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
599; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
600; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
601; SSE41-NEXT:    pand %xmm3, %xmm1
602; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
603; SSE41-NEXT:    por %xmm1, %xmm0
604; SSE41-NEXT:    blendvpd %xmm0, %xmm6, %xmm7
605; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm7[2,3,0,1]
606; SSE41-NEXT:    movdqa %xmm7, %xmm0
607; SSE41-NEXT:    pxor %xmm9, %xmm0
608; SSE41-NEXT:    pxor %xmm1, %xmm9
609; SSE41-NEXT:    movdqa %xmm0, %xmm2
610; SSE41-NEXT:    pcmpgtd %xmm9, %xmm2
611; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
612; SSE41-NEXT:    pcmpeqd %xmm0, %xmm9
613; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3]
614; SSE41-NEXT:    pand %xmm3, %xmm4
615; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
616; SSE41-NEXT:    por %xmm4, %xmm0
617; SSE41-NEXT:    blendvpd %xmm0, %xmm7, %xmm1
618; SSE41-NEXT:    movq %xmm1, %rax
619; SSE41-NEXT:    retq
620;
621; AVX1-LABEL: test_v16i64:
622; AVX1:       # %bb.0:
623; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
624; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
625; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
626; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm5
627; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
628; AVX1-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
629; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
630; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
631; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
632; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm4
633; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
634; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm3, %ymm1
635; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
636; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
637; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
638; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
639; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
640; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
641; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
642; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
643; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
644; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
645; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
646; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
647; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
648; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
649; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm3
650; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
651; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
652; AVX1-NEXT:    vmovq %xmm0, %rax
653; AVX1-NEXT:    vzeroupper
654; AVX1-NEXT:    retq
655;
656; AVX2-LABEL: test_v16i64:
657; AVX2:       # %bb.0:
658; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm4
659; AVX2-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
660; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm3
661; AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
662; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
663; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
664; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
665; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
666; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
667; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
668; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
669; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
670; AVX2-NEXT:    vmovq %xmm0, %rax
671; AVX2-NEXT:    vzeroupper
672; AVX2-NEXT:    retq
673;
674; AVX512-LABEL: test_v16i64:
675; AVX512:       # %bb.0:
676; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
677; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
678; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
679; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
680; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
681; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
682; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
683; AVX512-NEXT:    vmovq %xmm0, %rax
684; AVX512-NEXT:    vzeroupper
685; AVX512-NEXT:    retq
686  %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
687  ret i64 %1
688}
689
690;
691; vXi32
692;
693
694define i32 @test_v4i32(<4 x i32> %a0) {
695; SSE2-LABEL: test_v4i32:
696; SSE2:       # %bb.0:
697; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
698; SSE2-NEXT:    movdqa %xmm0, %xmm2
699; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
700; SSE2-NEXT:    pand %xmm2, %xmm0
701; SSE2-NEXT:    pandn %xmm1, %xmm2
702; SSE2-NEXT:    por %xmm0, %xmm2
703; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
704; SSE2-NEXT:    movdqa %xmm2, %xmm1
705; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
706; SSE2-NEXT:    pand %xmm1, %xmm2
707; SSE2-NEXT:    pandn %xmm0, %xmm1
708; SSE2-NEXT:    por %xmm2, %xmm1
709; SSE2-NEXT:    movd %xmm1, %eax
710; SSE2-NEXT:    retq
711;
712; SSE41-LABEL: test_v4i32:
713; SSE41:       # %bb.0:
714; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
715; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
716; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
717; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
718; SSE41-NEXT:    movd %xmm0, %eax
719; SSE41-NEXT:    retq
720;
721; AVX-LABEL: test_v4i32:
722; AVX:       # %bb.0:
723; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
724; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
725; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
726; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
727; AVX-NEXT:    vmovd %xmm0, %eax
728; AVX-NEXT:    retq
729;
730; AVX512-LABEL: test_v4i32:
731; AVX512:       # %bb.0:
732; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
733; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
734; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
735; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
736; AVX512-NEXT:    vmovd %xmm0, %eax
737; AVX512-NEXT:    retq
738  %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a0)
739  ret i32 %1
740}
741
742define i32 @test_v8i32(<8 x i32> %a0) {
743; SSE2-LABEL: test_v8i32:
744; SSE2:       # %bb.0:
745; SSE2-NEXT:    movdqa %xmm0, %xmm2
746; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
747; SSE2-NEXT:    pand %xmm2, %xmm0
748; SSE2-NEXT:    pandn %xmm1, %xmm2
749; SSE2-NEXT:    por %xmm0, %xmm2
750; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
751; SSE2-NEXT:    movdqa %xmm2, %xmm1
752; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
753; SSE2-NEXT:    pand %xmm1, %xmm2
754; SSE2-NEXT:    pandn %xmm0, %xmm1
755; SSE2-NEXT:    por %xmm2, %xmm1
756; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
757; SSE2-NEXT:    movdqa %xmm1, %xmm2
758; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
759; SSE2-NEXT:    pand %xmm2, %xmm1
760; SSE2-NEXT:    pandn %xmm0, %xmm2
761; SSE2-NEXT:    por %xmm1, %xmm2
762; SSE2-NEXT:    movd %xmm2, %eax
763; SSE2-NEXT:    retq
764;
765; SSE41-LABEL: test_v8i32:
766; SSE41:       # %bb.0:
767; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
768; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
769; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
770; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
771; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
772; SSE41-NEXT:    movd %xmm0, %eax
773; SSE41-NEXT:    retq
774;
775; AVX1-LABEL: test_v8i32:
776; AVX1:       # %bb.0:
777; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
778; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
779; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
780; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
781; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
782; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
783; AVX1-NEXT:    vmovd %xmm0, %eax
784; AVX1-NEXT:    vzeroupper
785; AVX1-NEXT:    retq
786;
787; AVX2-LABEL: test_v8i32:
788; AVX2:       # %bb.0:
789; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
790; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
791; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
792; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
793; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
794; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
795; AVX2-NEXT:    vmovd %xmm0, %eax
796; AVX2-NEXT:    vzeroupper
797; AVX2-NEXT:    retq
798;
799; AVX512-LABEL: test_v8i32:
800; AVX512:       # %bb.0:
801; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
802; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
803; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
804; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
805; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
806; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
807; AVX512-NEXT:    vmovd %xmm0, %eax
808; AVX512-NEXT:    vzeroupper
809; AVX512-NEXT:    retq
810  %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> %a0)
811  ret i32 %1
812}
813
814define i32 @test_v16i32(<16 x i32> %a0) {
815; SSE2-LABEL: test_v16i32:
816; SSE2:       # %bb.0:
817; SSE2-NEXT:    movdqa %xmm1, %xmm4
818; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
819; SSE2-NEXT:    pand %xmm4, %xmm1
820; SSE2-NEXT:    pandn %xmm3, %xmm4
821; SSE2-NEXT:    por %xmm1, %xmm4
822; SSE2-NEXT:    movdqa %xmm0, %xmm1
823; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
824; SSE2-NEXT:    pand %xmm1, %xmm0
825; SSE2-NEXT:    pandn %xmm2, %xmm1
826; SSE2-NEXT:    por %xmm0, %xmm1
827; SSE2-NEXT:    movdqa %xmm1, %xmm0
828; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
829; SSE2-NEXT:    pand %xmm0, %xmm1
830; SSE2-NEXT:    pandn %xmm4, %xmm0
831; SSE2-NEXT:    por %xmm1, %xmm0
832; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
833; SSE2-NEXT:    movdqa %xmm0, %xmm2
834; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
835; SSE2-NEXT:    pand %xmm2, %xmm0
836; SSE2-NEXT:    pandn %xmm1, %xmm2
837; SSE2-NEXT:    por %xmm0, %xmm2
838; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
839; SSE2-NEXT:    movdqa %xmm2, %xmm1
840; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
841; SSE2-NEXT:    pand %xmm1, %xmm2
842; SSE2-NEXT:    pandn %xmm0, %xmm1
843; SSE2-NEXT:    por %xmm2, %xmm1
844; SSE2-NEXT:    movd %xmm1, %eax
845; SSE2-NEXT:    retq
846;
847; SSE41-LABEL: test_v16i32:
848; SSE41:       # %bb.0:
849; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
850; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
851; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
852; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
853; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
854; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
855; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
856; SSE41-NEXT:    movd %xmm0, %eax
857; SSE41-NEXT:    retq
858;
859; AVX1-LABEL: test_v16i32:
860; AVX1:       # %bb.0:
861; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
862; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
863; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
864; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
865; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
866; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
867; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
868; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
869; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
870; AVX1-NEXT:    vmovd %xmm0, %eax
871; AVX1-NEXT:    vzeroupper
872; AVX1-NEXT:    retq
873;
874; AVX2-LABEL: test_v16i32:
875; AVX2:       # %bb.0:
876; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
877; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
878; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
879; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
880; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
881; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
882; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
883; AVX2-NEXT:    vmovd %xmm0, %eax
884; AVX2-NEXT:    vzeroupper
885; AVX2-NEXT:    retq
886;
887; AVX512-LABEL: test_v16i32:
888; AVX512:       # %bb.0:
889; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
890; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
891; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
892; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
893; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
894; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
895; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
896; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
897; AVX512-NEXT:    vmovd %xmm0, %eax
898; AVX512-NEXT:    vzeroupper
899; AVX512-NEXT:    retq
900  %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %a0)
901  ret i32 %1
902}
903
904define i32 @test_v32i32(<32 x i32> %a0) {
905; SSE2-LABEL: test_v32i32:
906; SSE2:       # %bb.0:
907; SSE2-NEXT:    movdqa %xmm2, %xmm8
908; SSE2-NEXT:    pcmpgtd %xmm6, %xmm8
909; SSE2-NEXT:    pand %xmm8, %xmm2
910; SSE2-NEXT:    pandn %xmm6, %xmm8
911; SSE2-NEXT:    por %xmm2, %xmm8
912; SSE2-NEXT:    movdqa %xmm0, %xmm2
913; SSE2-NEXT:    pcmpgtd %xmm4, %xmm2
914; SSE2-NEXT:    pand %xmm2, %xmm0
915; SSE2-NEXT:    pandn %xmm4, %xmm2
916; SSE2-NEXT:    por %xmm0, %xmm2
917; SSE2-NEXT:    movdqa %xmm3, %xmm0
918; SSE2-NEXT:    pcmpgtd %xmm7, %xmm0
919; SSE2-NEXT:    pand %xmm0, %xmm3
920; SSE2-NEXT:    pandn %xmm7, %xmm0
921; SSE2-NEXT:    por %xmm3, %xmm0
922; SSE2-NEXT:    movdqa %xmm1, %xmm3
923; SSE2-NEXT:    pcmpgtd %xmm5, %xmm3
924; SSE2-NEXT:    pand %xmm3, %xmm1
925; SSE2-NEXT:    pandn %xmm5, %xmm3
926; SSE2-NEXT:    por %xmm1, %xmm3
927; SSE2-NEXT:    movdqa %xmm3, %xmm1
928; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
929; SSE2-NEXT:    pand %xmm1, %xmm3
930; SSE2-NEXT:    pandn %xmm0, %xmm1
931; SSE2-NEXT:    por %xmm3, %xmm1
932; SSE2-NEXT:    movdqa %xmm2, %xmm0
933; SSE2-NEXT:    pcmpgtd %xmm8, %xmm0
934; SSE2-NEXT:    pand %xmm0, %xmm2
935; SSE2-NEXT:    pandn %xmm8, %xmm0
936; SSE2-NEXT:    por %xmm2, %xmm0
937; SSE2-NEXT:    movdqa %xmm0, %xmm2
938; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
939; SSE2-NEXT:    pand %xmm2, %xmm0
940; SSE2-NEXT:    pandn %xmm1, %xmm2
941; SSE2-NEXT:    por %xmm0, %xmm2
942; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
943; SSE2-NEXT:    movdqa %xmm2, %xmm1
944; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
945; SSE2-NEXT:    pand %xmm1, %xmm2
946; SSE2-NEXT:    pandn %xmm0, %xmm1
947; SSE2-NEXT:    por %xmm2, %xmm1
948; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
949; SSE2-NEXT:    movdqa %xmm1, %xmm2
950; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
951; SSE2-NEXT:    pand %xmm2, %xmm1
952; SSE2-NEXT:    pandn %xmm0, %xmm2
953; SSE2-NEXT:    por %xmm1, %xmm2
954; SSE2-NEXT:    movd %xmm2, %eax
955; SSE2-NEXT:    retq
956;
957; SSE41-LABEL: test_v32i32:
958; SSE41:       # %bb.0:
959; SSE41-NEXT:    pmaxsd %xmm6, %xmm2
960; SSE41-NEXT:    pmaxsd %xmm4, %xmm0
961; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
962; SSE41-NEXT:    pmaxsd %xmm7, %xmm3
963; SSE41-NEXT:    pmaxsd %xmm5, %xmm1
964; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
965; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
966; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
967; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
968; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
969; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
970; SSE41-NEXT:    movd %xmm1, %eax
971; SSE41-NEXT:    retq
972;
973; AVX1-LABEL: test_v32i32:
974; AVX1:       # %bb.0:
975; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm4
976; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm5
977; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
978; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
979; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
980; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
981; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
982; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
983; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
984; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
985; AVX1-NEXT:    vpmaxsd %xmm0, %xmm4, %xmm0
986; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
987; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
988; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
989; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
990; AVX1-NEXT:    vmovd %xmm0, %eax
991; AVX1-NEXT:    vzeroupper
992; AVX1-NEXT:    retq
993;
994; AVX2-LABEL: test_v32i32:
995; AVX2:       # %bb.0:
996; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
997; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
998; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
999; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1000; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1001; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1002; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1003; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1004; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1005; AVX2-NEXT:    vmovd %xmm0, %eax
1006; AVX2-NEXT:    vzeroupper
1007; AVX2-NEXT:    retq
1008;
1009; AVX512-LABEL: test_v32i32:
1010; AVX512:       # %bb.0:
1011; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1012; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1013; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1014; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1015; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1016; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1017; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1018; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1019; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
1020; AVX512-NEXT:    vmovd %xmm0, %eax
1021; AVX512-NEXT:    vzeroupper
1022; AVX512-NEXT:    retq
1023  %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> %a0)
1024  ret i32 %1
1025}
1026
1027;
1028; vXi16
1029;
1030
1031define i16 @test_v8i16(<8 x i16> %a0) {
1032; SSE2-LABEL: test_v8i16:
1033; SSE2:       # %bb.0:
1034; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1035; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1036; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1037; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1038; SSE2-NEXT:    movdqa %xmm0, %xmm1
1039; SSE2-NEXT:    psrld $16, %xmm1
1040; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1041; SSE2-NEXT:    movd %xmm1, %eax
1042; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
1043; SSE2-NEXT:    retq
1044;
1045; SSE41-LABEL: test_v8i16:
1046; SSE41:       # %bb.0:
1047; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1048; SSE41-NEXT:    pxor %xmm1, %xmm0
1049; SSE41-NEXT:    phminposuw %xmm0, %xmm0
1050; SSE41-NEXT:    pxor %xmm1, %xmm0
1051; SSE41-NEXT:    movd %xmm0, %eax
1052; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
1053; SSE41-NEXT:    retq
1054;
1055; AVX-LABEL: test_v8i16:
1056; AVX:       # %bb.0:
1057; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1058; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1059; AVX-NEXT:    vphminposuw %xmm0, %xmm0
1060; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1061; AVX-NEXT:    vmovd %xmm0, %eax
1062; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
1063; AVX-NEXT:    retq
1064;
1065; AVX512-LABEL: test_v8i16:
1066; AVX512:       # %bb.0:
1067; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1068; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1069; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1070; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1071; AVX512-NEXT:    vmovd %xmm0, %eax
1072; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
1073; AVX512-NEXT:    retq
1074  %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
1075  ret i16 %1
1076}
1077
1078define i16 @test_v16i16(<16 x i16> %a0) {
1079; SSE2-LABEL: test_v16i16:
1080; SSE2:       # %bb.0:
1081; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1082; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1083; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1084; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1085; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1086; SSE2-NEXT:    movdqa %xmm0, %xmm1
1087; SSE2-NEXT:    psrld $16, %xmm1
1088; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1089; SSE2-NEXT:    movd %xmm1, %eax
1090; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
1091; SSE2-NEXT:    retq
1092;
1093; SSE41-LABEL: test_v16i16:
1094; SSE41:       # %bb.0:
1095; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
1096; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1097; SSE41-NEXT:    pxor %xmm1, %xmm0
1098; SSE41-NEXT:    phminposuw %xmm0, %xmm0
1099; SSE41-NEXT:    pxor %xmm1, %xmm0
1100; SSE41-NEXT:    movd %xmm0, %eax
1101; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
1102; SSE41-NEXT:    retq
1103;
1104; AVX1-LABEL: test_v16i16:
1105; AVX1:       # %bb.0:
1106; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1107; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1108; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1109; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1110; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1111; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1112; AVX1-NEXT:    vmovd %xmm0, %eax
1113; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
1114; AVX1-NEXT:    vzeroupper
1115; AVX1-NEXT:    retq
1116;
1117; AVX2-LABEL: test_v16i16:
1118; AVX2:       # %bb.0:
1119; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1120; AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1121; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1122; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1123; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1124; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1125; AVX2-NEXT:    vmovd %xmm0, %eax
1126; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
1127; AVX2-NEXT:    vzeroupper
1128; AVX2-NEXT:    retq
1129;
1130; AVX512-LABEL: test_v16i16:
1131; AVX512:       # %bb.0:
1132; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1133; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1134; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1135; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1136; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1137; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1138; AVX512-NEXT:    vmovd %xmm0, %eax
1139; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
1140; AVX512-NEXT:    vzeroupper
1141; AVX512-NEXT:    retq
1142  %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %a0)
1143  ret i16 %1
1144}
1145
1146define i16 @test_v32i16(<32 x i16> %a0) {
1147; SSE2-LABEL: test_v32i16:
1148; SSE2:       # %bb.0:
1149; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
1150; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
1151; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1152; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1153; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1154; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1155; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1156; SSE2-NEXT:    movdqa %xmm0, %xmm1
1157; SSE2-NEXT:    psrld $16, %xmm1
1158; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1159; SSE2-NEXT:    movd %xmm1, %eax
1160; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
1161; SSE2-NEXT:    retq
1162;
1163; SSE41-LABEL: test_v32i16:
1164; SSE41:       # %bb.0:
1165; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
1166; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
1167; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
1168; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1169; SSE41-NEXT:    pxor %xmm1, %xmm0
1170; SSE41-NEXT:    phminposuw %xmm0, %xmm0
1171; SSE41-NEXT:    pxor %xmm1, %xmm0
1172; SSE41-NEXT:    movd %xmm0, %eax
1173; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
1174; SSE41-NEXT:    retq
1175;
1176; AVX1-LABEL: test_v32i16:
1177; AVX1:       # %bb.0:
1178; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1179; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1180; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
1181; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1182; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
1183; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1184; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1185; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1186; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1187; AVX1-NEXT:    vmovd %xmm0, %eax
1188; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
1189; AVX1-NEXT:    vzeroupper
1190; AVX1-NEXT:    retq
1191;
1192; AVX2-LABEL: test_v32i16:
1193; AVX2:       # %bb.0:
1194; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1195; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1196; AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1197; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1198; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1199; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1200; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1201; AVX2-NEXT:    vmovd %xmm0, %eax
1202; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
1203; AVX2-NEXT:    vzeroupper
1204; AVX2-NEXT:    retq
1205;
1206; AVX512-LABEL: test_v32i16:
1207; AVX512:       # %bb.0:
1208; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1209; AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1210; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1211; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1212; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1213; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1214; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1215; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1216; AVX512-NEXT:    vmovd %xmm0, %eax
1217; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
1218; AVX512-NEXT:    vzeroupper
1219; AVX512-NEXT:    retq
1220  %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> %a0)
1221  ret i16 %1
1222}
1223
1224define i16 @test_v64i16(<64 x i16> %a0) {
1225; SSE2-LABEL: test_v64i16:
1226; SSE2:       # %bb.0:
1227; SSE2-NEXT:    pmaxsw %xmm6, %xmm2
1228; SSE2-NEXT:    pmaxsw %xmm4, %xmm0
1229; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
1230; SSE2-NEXT:    pmaxsw %xmm7, %xmm3
1231; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
1232; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
1233; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1234; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1235; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1236; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1237; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
1238; SSE2-NEXT:    movdqa %xmm1, %xmm0
1239; SSE2-NEXT:    psrld $16, %xmm0
1240; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
1241; SSE2-NEXT:    movd %xmm0, %eax
1242; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
1243; SSE2-NEXT:    retq
1244;
1245; SSE41-LABEL: test_v64i16:
1246; SSE41:       # %bb.0:
1247; SSE41-NEXT:    pmaxsw %xmm7, %xmm3
1248; SSE41-NEXT:    pmaxsw %xmm5, %xmm1
1249; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
1250; SSE41-NEXT:    pmaxsw %xmm6, %xmm2
1251; SSE41-NEXT:    pmaxsw %xmm4, %xmm0
1252; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
1253; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
1254; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1255; SSE41-NEXT:    pxor %xmm1, %xmm0
1256; SSE41-NEXT:    phminposuw %xmm0, %xmm0
1257; SSE41-NEXT:    pxor %xmm1, %xmm0
1258; SSE41-NEXT:    movd %xmm0, %eax
1259; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
1260; SSE41-NEXT:    retq
1261;
1262; AVX1-LABEL: test_v64i16:
1263; AVX1:       # %bb.0:
1264; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1265; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
1266; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
1267; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1268; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
1269; AVX1-NEXT:    vpmaxsw %xmm5, %xmm6, %xmm5
1270; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
1271; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
1272; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
1273; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1274; AVX1-NEXT:    vpmaxsw %xmm4, %xmm0, %xmm0
1275; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1276; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1277; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1278; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1279; AVX1-NEXT:    vmovd %xmm0, %eax
1280; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
1281; AVX1-NEXT:    vzeroupper
1282; AVX1-NEXT:    retq
1283;
1284; AVX2-LABEL: test_v64i16:
1285; AVX2:       # %bb.0:
1286; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
1287; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
1288; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1289; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1290; AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1291; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1292; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1293; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1294; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1295; AVX2-NEXT:    vmovd %xmm0, %eax
1296; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
1297; AVX2-NEXT:    vzeroupper
1298; AVX2-NEXT:    retq
1299;
1300; AVX512-LABEL: test_v64i16:
1301; AVX512:       # %bb.0:
1302; AVX512-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
1303; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1304; AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1305; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1306; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
1307; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1308; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1309; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1310; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1311; AVX512-NEXT:    vmovd %xmm0, %eax
1312; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
1313; AVX512-NEXT:    vzeroupper
1314; AVX512-NEXT:    retq
1315  %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> %a0)
1316  ret i16 %1
1317}
1318
1319;
1320; vXi8
1321;
1322
1323define i8 @test_v16i8(<16 x i8> %a0) {
1324; SSE2-LABEL: test_v16i8:
1325; SSE2:       # %bb.0:
1326; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1327; SSE2-NEXT:    movdqa %xmm0, %xmm2
1328; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1329; SSE2-NEXT:    pand %xmm2, %xmm0
1330; SSE2-NEXT:    pandn %xmm1, %xmm2
1331; SSE2-NEXT:    por %xmm0, %xmm2
1332; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1333; SSE2-NEXT:    movdqa %xmm2, %xmm1
1334; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1335; SSE2-NEXT:    pand %xmm1, %xmm2
1336; SSE2-NEXT:    pandn %xmm0, %xmm1
1337; SSE2-NEXT:    por %xmm2, %xmm1
1338; SSE2-NEXT:    movdqa %xmm1, %xmm0
1339; SSE2-NEXT:    psrld $16, %xmm0
1340; SSE2-NEXT:    movdqa %xmm1, %xmm2
1341; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1342; SSE2-NEXT:    pand %xmm2, %xmm1
1343; SSE2-NEXT:    pandn %xmm0, %xmm2
1344; SSE2-NEXT:    por %xmm1, %xmm2
1345; SSE2-NEXT:    movdqa %xmm2, %xmm0
1346; SSE2-NEXT:    psrlw $8, %xmm0
1347; SSE2-NEXT:    movdqa %xmm2, %xmm1
1348; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1349; SSE2-NEXT:    pand %xmm1, %xmm2
1350; SSE2-NEXT:    pandn %xmm0, %xmm1
1351; SSE2-NEXT:    por %xmm2, %xmm1
1352; SSE2-NEXT:    movd %xmm1, %eax
1353; SSE2-NEXT:    # kill: def $al killed $al killed $eax
1354; SSE2-NEXT:    retq
1355;
1356; SSE41-LABEL: test_v16i8:
1357; SSE41:       # %bb.0:
1358; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1359; SSE41-NEXT:    pxor %xmm1, %xmm0
1360; SSE41-NEXT:    movdqa %xmm0, %xmm2
1361; SSE41-NEXT:    psrlw $8, %xmm2
1362; SSE41-NEXT:    pminub %xmm0, %xmm2
1363; SSE41-NEXT:    phminposuw %xmm2, %xmm0
1364; SSE41-NEXT:    pxor %xmm1, %xmm0
1365; SSE41-NEXT:    pextrb $0, %xmm0, %eax
1366; SSE41-NEXT:    # kill: def $al killed $al killed $eax
1367; SSE41-NEXT:    retq
1368;
1369; AVX-LABEL: test_v16i8:
1370; AVX:       # %bb.0:
1371; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1372; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1373; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm2
1374; AVX-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1375; AVX-NEXT:    vphminposuw %xmm0, %xmm0
1376; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1377; AVX-NEXT:    vpextrb $0, %xmm0, %eax
1378; AVX-NEXT:    # kill: def $al killed $al killed $eax
1379; AVX-NEXT:    retq
1380;
1381; AVX512-LABEL: test_v16i8:
1382; AVX512:       # %bb.0:
1383; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1384; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1385; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
1386; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1387; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1388; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1389; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
1390; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1391; AVX512-NEXT:    retq
1392  %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
1393  ret i8 %1
1394}
1395
1396define i8 @test_v32i8(<32 x i8> %a0) {
1397; SSE2-LABEL: test_v32i8:
1398; SSE2:       # %bb.0:
1399; SSE2-NEXT:    movdqa %xmm0, %xmm2
1400; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1401; SSE2-NEXT:    pand %xmm2, %xmm0
1402; SSE2-NEXT:    pandn %xmm1, %xmm2
1403; SSE2-NEXT:    por %xmm0, %xmm2
1404; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1405; SSE2-NEXT:    movdqa %xmm2, %xmm1
1406; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1407; SSE2-NEXT:    pand %xmm1, %xmm2
1408; SSE2-NEXT:    pandn %xmm0, %xmm1
1409; SSE2-NEXT:    por %xmm2, %xmm1
1410; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1411; SSE2-NEXT:    movdqa %xmm1, %xmm2
1412; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1413; SSE2-NEXT:    pand %xmm2, %xmm1
1414; SSE2-NEXT:    pandn %xmm0, %xmm2
1415; SSE2-NEXT:    por %xmm1, %xmm2
1416; SSE2-NEXT:    movdqa %xmm2, %xmm0
1417; SSE2-NEXT:    psrld $16, %xmm0
1418; SSE2-NEXT:    movdqa %xmm2, %xmm1
1419; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1420; SSE2-NEXT:    pand %xmm1, %xmm2
1421; SSE2-NEXT:    pandn %xmm0, %xmm1
1422; SSE2-NEXT:    por %xmm2, %xmm1
1423; SSE2-NEXT:    movdqa %xmm1, %xmm0
1424; SSE2-NEXT:    psrlw $8, %xmm0
1425; SSE2-NEXT:    movdqa %xmm1, %xmm2
1426; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1427; SSE2-NEXT:    pand %xmm2, %xmm1
1428; SSE2-NEXT:    pandn %xmm0, %xmm2
1429; SSE2-NEXT:    por %xmm1, %xmm2
1430; SSE2-NEXT:    movd %xmm2, %eax
1431; SSE2-NEXT:    # kill: def $al killed $al killed $eax
1432; SSE2-NEXT:    retq
1433;
1434; SSE41-LABEL: test_v32i8:
1435; SSE41:       # %bb.0:
1436; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
1437; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1438; SSE41-NEXT:    pxor %xmm1, %xmm0
1439; SSE41-NEXT:    movdqa %xmm0, %xmm2
1440; SSE41-NEXT:    psrlw $8, %xmm2
1441; SSE41-NEXT:    pminub %xmm0, %xmm2
1442; SSE41-NEXT:    phminposuw %xmm2, %xmm0
1443; SSE41-NEXT:    pxor %xmm1, %xmm0
1444; SSE41-NEXT:    pextrb $0, %xmm0, %eax
1445; SSE41-NEXT:    # kill: def $al killed $al killed $eax
1446; SSE41-NEXT:    retq
1447;
1448; AVX1-LABEL: test_v32i8:
1449; AVX1:       # %bb.0:
1450; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1451; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1452; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1453; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1454; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
1455; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1456; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1457; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1458; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
1459; AVX1-NEXT:    # kill: def $al killed $al killed $eax
1460; AVX1-NEXT:    vzeroupper
1461; AVX1-NEXT:    retq
1462;
1463; AVX2-LABEL: test_v32i8:
1464; AVX2:       # %bb.0:
1465; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1466; AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1467; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1468; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1469; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
1470; AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1471; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1472; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1473; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
1474; AVX2-NEXT:    # kill: def $al killed $al killed $eax
1475; AVX2-NEXT:    vzeroupper
1476; AVX2-NEXT:    retq
1477;
1478; AVX512-LABEL: test_v32i8:
1479; AVX512:       # %bb.0:
1480; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1481; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1482; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1483; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1484; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
1485; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1486; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1487; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1488; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
1489; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1490; AVX512-NEXT:    vzeroupper
1491; AVX512-NEXT:    retq
1492  %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> %a0)
1493  ret i8 %1
1494}
1495
1496define i8 @test_v64i8(<64 x i8> %a0) {
1497; SSE2-LABEL: test_v64i8:
1498; SSE2:       # %bb.0:
1499; SSE2-NEXT:    movdqa %xmm1, %xmm4
1500; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
1501; SSE2-NEXT:    pand %xmm4, %xmm1
1502; SSE2-NEXT:    pandn %xmm3, %xmm4
1503; SSE2-NEXT:    por %xmm1, %xmm4
1504; SSE2-NEXT:    movdqa %xmm0, %xmm1
1505; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1506; SSE2-NEXT:    pand %xmm1, %xmm0
1507; SSE2-NEXT:    pandn %xmm2, %xmm1
1508; SSE2-NEXT:    por %xmm0, %xmm1
1509; SSE2-NEXT:    movdqa %xmm1, %xmm0
1510; SSE2-NEXT:    pcmpgtb %xmm4, %xmm0
1511; SSE2-NEXT:    pand %xmm0, %xmm1
1512; SSE2-NEXT:    pandn %xmm4, %xmm0
1513; SSE2-NEXT:    por %xmm1, %xmm0
1514; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1515; SSE2-NEXT:    movdqa %xmm0, %xmm2
1516; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1517; SSE2-NEXT:    pand %xmm2, %xmm0
1518; SSE2-NEXT:    pandn %xmm1, %xmm2
1519; SSE2-NEXT:    por %xmm0, %xmm2
1520; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1521; SSE2-NEXT:    movdqa %xmm2, %xmm1
1522; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1523; SSE2-NEXT:    pand %xmm1, %xmm2
1524; SSE2-NEXT:    pandn %xmm0, %xmm1
1525; SSE2-NEXT:    por %xmm2, %xmm1
1526; SSE2-NEXT:    movdqa %xmm1, %xmm0
1527; SSE2-NEXT:    psrld $16, %xmm0
1528; SSE2-NEXT:    movdqa %xmm1, %xmm2
1529; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1530; SSE2-NEXT:    pand %xmm2, %xmm1
1531; SSE2-NEXT:    pandn %xmm0, %xmm2
1532; SSE2-NEXT:    por %xmm1, %xmm2
1533; SSE2-NEXT:    movdqa %xmm2, %xmm0
1534; SSE2-NEXT:    psrlw $8, %xmm0
1535; SSE2-NEXT:    movdqa %xmm2, %xmm1
1536; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1537; SSE2-NEXT:    pand %xmm1, %xmm2
1538; SSE2-NEXT:    pandn %xmm0, %xmm1
1539; SSE2-NEXT:    por %xmm2, %xmm1
1540; SSE2-NEXT:    movd %xmm1, %eax
1541; SSE2-NEXT:    # kill: def $al killed $al killed $eax
1542; SSE2-NEXT:    retq
1543;
1544; SSE41-LABEL: test_v64i8:
1545; SSE41:       # %bb.0:
1546; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
1547; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
1548; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
1549; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1550; SSE41-NEXT:    pxor %xmm1, %xmm0
1551; SSE41-NEXT:    movdqa %xmm0, %xmm2
1552; SSE41-NEXT:    psrlw $8, %xmm2
1553; SSE41-NEXT:    pminub %xmm0, %xmm2
1554; SSE41-NEXT:    phminposuw %xmm2, %xmm0
1555; SSE41-NEXT:    pxor %xmm1, %xmm0
1556; SSE41-NEXT:    pextrb $0, %xmm0, %eax
1557; SSE41-NEXT:    # kill: def $al killed $al killed $eax
1558; SSE41-NEXT:    retq
1559;
1560; AVX1-LABEL: test_v64i8:
1561; AVX1:       # %bb.0:
1562; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1563; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1564; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
1565; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1566; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
1567; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1568; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1569; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
1570; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1571; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1572; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1573; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
1574; AVX1-NEXT:    # kill: def $al killed $al killed $eax
1575; AVX1-NEXT:    vzeroupper
1576; AVX1-NEXT:    retq
1577;
1578; AVX2-LABEL: test_v64i8:
1579; AVX2:       # %bb.0:
1580; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1581; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1582; AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1583; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1584; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1585; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
1586; AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1587; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1588; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1589; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
1590; AVX2-NEXT:    # kill: def $al killed $al killed $eax
1591; AVX2-NEXT:    vzeroupper
1592; AVX2-NEXT:    retq
1593;
1594; AVX512-LABEL: test_v64i8:
1595; AVX512:       # %bb.0:
1596; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1597; AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1598; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1599; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1600; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1601; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1602; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
1603; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1604; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1605; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1606; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
1607; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1608; AVX512-NEXT:    vzeroupper
1609; AVX512-NEXT:    retq
1610  %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> %a0)
1611  ret i8 %1
1612}
1613
1614define i8 @test_v128i8(<128 x i8> %a0) {
1615; SSE2-LABEL: test_v128i8:
1616; SSE2:       # %bb.0:
1617; SSE2-NEXT:    movdqa %xmm2, %xmm8
1618; SSE2-NEXT:    pcmpgtb %xmm6, %xmm8
1619; SSE2-NEXT:    pand %xmm8, %xmm2
1620; SSE2-NEXT:    pandn %xmm6, %xmm8
1621; SSE2-NEXT:    por %xmm2, %xmm8
1622; SSE2-NEXT:    movdqa %xmm0, %xmm2
1623; SSE2-NEXT:    pcmpgtb %xmm4, %xmm2
1624; SSE2-NEXT:    pand %xmm2, %xmm0
1625; SSE2-NEXT:    pandn %xmm4, %xmm2
1626; SSE2-NEXT:    por %xmm0, %xmm2
1627; SSE2-NEXT:    movdqa %xmm3, %xmm0
1628; SSE2-NEXT:    pcmpgtb %xmm7, %xmm0
1629; SSE2-NEXT:    pand %xmm0, %xmm3
1630; SSE2-NEXT:    pandn %xmm7, %xmm0
1631; SSE2-NEXT:    por %xmm3, %xmm0
1632; SSE2-NEXT:    movdqa %xmm1, %xmm3
1633; SSE2-NEXT:    pcmpgtb %xmm5, %xmm3
1634; SSE2-NEXT:    pand %xmm3, %xmm1
1635; SSE2-NEXT:    pandn %xmm5, %xmm3
1636; SSE2-NEXT:    por %xmm1, %xmm3
1637; SSE2-NEXT:    movdqa %xmm3, %xmm1
1638; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1639; SSE2-NEXT:    pand %xmm1, %xmm3
1640; SSE2-NEXT:    pandn %xmm0, %xmm1
1641; SSE2-NEXT:    por %xmm3, %xmm1
1642; SSE2-NEXT:    movdqa %xmm2, %xmm0
1643; SSE2-NEXT:    pcmpgtb %xmm8, %xmm0
1644; SSE2-NEXT:    pand %xmm0, %xmm2
1645; SSE2-NEXT:    pandn %xmm8, %xmm0
1646; SSE2-NEXT:    por %xmm2, %xmm0
1647; SSE2-NEXT:    movdqa %xmm0, %xmm2
1648; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1649; SSE2-NEXT:    pand %xmm2, %xmm0
1650; SSE2-NEXT:    pandn %xmm1, %xmm2
1651; SSE2-NEXT:    por %xmm0, %xmm2
1652; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1653; SSE2-NEXT:    movdqa %xmm2, %xmm1
1654; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1655; SSE2-NEXT:    pand %xmm1, %xmm2
1656; SSE2-NEXT:    pandn %xmm0, %xmm1
1657; SSE2-NEXT:    por %xmm2, %xmm1
1658; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1659; SSE2-NEXT:    movdqa %xmm1, %xmm2
1660; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1661; SSE2-NEXT:    pand %xmm2, %xmm1
1662; SSE2-NEXT:    pandn %xmm0, %xmm2
1663; SSE2-NEXT:    por %xmm1, %xmm2
1664; SSE2-NEXT:    movdqa %xmm2, %xmm0
1665; SSE2-NEXT:    psrld $16, %xmm0
1666; SSE2-NEXT:    movdqa %xmm2, %xmm1
1667; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
1668; SSE2-NEXT:    pand %xmm1, %xmm2
1669; SSE2-NEXT:    pandn %xmm0, %xmm1
1670; SSE2-NEXT:    por %xmm2, %xmm1
1671; SSE2-NEXT:    movdqa %xmm1, %xmm0
1672; SSE2-NEXT:    psrlw $8, %xmm0
1673; SSE2-NEXT:    movdqa %xmm1, %xmm2
1674; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
1675; SSE2-NEXT:    pand %xmm2, %xmm1
1676; SSE2-NEXT:    pandn %xmm0, %xmm2
1677; SSE2-NEXT:    por %xmm1, %xmm2
1678; SSE2-NEXT:    movd %xmm2, %eax
1679; SSE2-NEXT:    # kill: def $al killed $al killed $eax
1680; SSE2-NEXT:    retq
1681;
1682; SSE41-LABEL: test_v128i8:
1683; SSE41:       # %bb.0:
1684; SSE41-NEXT:    pmaxsb %xmm7, %xmm3
1685; SSE41-NEXT:    pmaxsb %xmm5, %xmm1
1686; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
1687; SSE41-NEXT:    pmaxsb %xmm6, %xmm2
1688; SSE41-NEXT:    pmaxsb %xmm4, %xmm0
1689; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
1690; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
1691; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1692; SSE41-NEXT:    pxor %xmm1, %xmm0
1693; SSE41-NEXT:    movdqa %xmm0, %xmm2
1694; SSE41-NEXT:    psrlw $8, %xmm2
1695; SSE41-NEXT:    pminub %xmm0, %xmm2
1696; SSE41-NEXT:    phminposuw %xmm2, %xmm0
1697; SSE41-NEXT:    pxor %xmm1, %xmm0
1698; SSE41-NEXT:    pextrb $0, %xmm0, %eax
1699; SSE41-NEXT:    # kill: def $al killed $al killed $eax
1700; SSE41-NEXT:    retq
1701;
1702; AVX1-LABEL: test_v128i8:
1703; AVX1:       # %bb.0:
1704; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1705; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
1706; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
1707; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1708; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
1709; AVX1-NEXT:    vpmaxsb %xmm5, %xmm6, %xmm5
1710; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
1711; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
1712; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
1713; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1714; AVX1-NEXT:    vpmaxsb %xmm4, %xmm0, %xmm0
1715; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1716; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1717; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
1718; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1719; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1720; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1721; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
1722; AVX1-NEXT:    # kill: def $al killed $al killed $eax
1723; AVX1-NEXT:    vzeroupper
1724; AVX1-NEXT:    retq
1725;
1726; AVX2-LABEL: test_v128i8:
1727; AVX2:       # %bb.0:
1728; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
1729; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
1730; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1731; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1732; AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1733; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1734; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1735; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
1736; AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1737; AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1738; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1739; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
1740; AVX2-NEXT:    # kill: def $al killed $al killed $eax
1741; AVX2-NEXT:    vzeroupper
1742; AVX2-NEXT:    retq
1743;
1744; AVX512-LABEL: test_v128i8:
1745; AVX512:       # %bb.0:
1746; AVX512-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
1747; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1748; AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1749; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1750; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1751; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1752; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1753; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
1754; AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
1755; AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1756; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
1757; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
1758; AVX512-NEXT:    # kill: def $al killed $al killed $eax
1759; AVX512-NEXT:    vzeroupper
1760; AVX512-NEXT:    retq
1761  %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> %a0)
1762  ret i8 %1
1763}
1764
1765declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
1766declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
1767declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
1768declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
1769
1770declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
1771declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
1772declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
1773declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
1774
1775declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
1776declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
1777declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
1778declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
1779
1780declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
1781declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
1782declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
1783declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)
1784