• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
3
4define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
5; KNL-LABEL: test1:
6; KNL:       ## BB#0:
7; KNL-NEXT:    vcmpleps %zmm1, %zmm0, %k1
8; KNL-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
9; KNL-NEXT:    vmovaps %zmm1, %zmm0
10; KNL-NEXT:    retq
11  %mask = fcmp ole <16 x float> %x, %y
12  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
13  ret <16 x float> %max
14}
15
16define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
17; KNL-LABEL: test2:
18; KNL:       ## BB#0:
19; KNL-NEXT:    vcmplepd %zmm1, %zmm0, %k1
20; KNL-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
21; KNL-NEXT:    vmovaps %zmm1, %zmm0
22; KNL-NEXT:    retq
23  %mask = fcmp ole <8 x double> %x, %y
24  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
25  ret <8 x double> %max
26}
27
28define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
29; KNL-LABEL: test3:
30; KNL:       ## BB#0:
31; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1
32; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
33; KNL-NEXT:    vmovaps %zmm1, %zmm0
34; KNL-NEXT:    retq
35  %y = load <16 x i32>, <16 x i32>* %yp, align 4
36  %mask = icmp eq <16 x i32> %x, %y
37  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
38  ret <16 x i32> %max
39}
40
41define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
42; KNL-LABEL: test4_unsigned:
43; KNL:       ## BB#0:
44; KNL-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
45; KNL-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
46; KNL-NEXT:    vmovaps %zmm1, %zmm0
47; KNL-NEXT:    retq
48  %mask = icmp uge <16 x i32> %x, %y
49  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
50  ret <16 x i32> %max
51}
52
53define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
54; KNL-LABEL: test5:
55; KNL:       ## BB#0:
56; KNL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
57; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
58; KNL-NEXT:    vmovaps %zmm1, %zmm0
59; KNL-NEXT:    retq
60  %mask = icmp eq <8 x i64> %x, %y
61  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
62  ret <8 x i64> %max
63}
64
65define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
66; KNL-LABEL: test6_unsigned:
67; KNL:       ## BB#0:
68; KNL-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
69; KNL-NEXT:    vmovdqa64 %zmm2, %zmm1 {%k1}
70; KNL-NEXT:    vmovaps %zmm1, %zmm0
71; KNL-NEXT:    retq
72  %mask = icmp ugt <8 x i64> %x, %y
73  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
74  ret <8 x i64> %max
75}
76
77define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
78; KNL-LABEL: test7:
79; KNL:       ## BB#0:
80; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
81; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
82; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
83; KNL-NEXT:    retq
84; SKX-LABEL: test7:
85; SKX:       ## BB#0:
86; SKX:    vxorps   %xmm2, %xmm2, %xmm2
87; SKX:    vcmpltps %xmm2, %xmm0, %k1
88; SKX:    vmovaps  %xmm0, %xmm1 {%k1}
89; SKX:    vmovaps  %zmm1, %zmm0
90; SKX:    retq
91
92  %mask = fcmp olt <4 x float> %a, zeroinitializer
93  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
94  ret <4 x float>%c
95}
96
97define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
98; KNL-LABEL: test8:
99; KNL:       ## BB#0:
100; KNL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
101; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
102; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
103; KNL-NEXT:    retq
104; SKX-LABEL: test8:
105; SKX:       ## BB#0:
106; SKX: vxorpd  %xmm2, %xmm2, %xmm2
107; SKX: vcmpltpd    %xmm2, %xmm0, %k1
108; SKX: vmovapd %xmm0, %xmm1 {%k1}
109; SKX: vmovaps %zmm1, %zmm0
110; SKX: retq
111  %mask = fcmp olt <2 x double> %a, zeroinitializer
112  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
113  ret <2 x double>%c
114}
115
116define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
117; KNL-LABEL: test9:
118; KNL:       ## BB#0:
119; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
120; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
121; KNL-NEXT:    retq
122  %mask = icmp eq <8 x i32> %x, %y
123  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
124  ret <8 x i32> %max
125}
126
127define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
128; KNL-LABEL: test10:
129; KNL:       ## BB#0:
130; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
131; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
132; KNL-NEXT:    retq
133; SKX-LABEL: test10:
134; SKX:       ## BB#0:
135; SKX: vcmpeqps    %ymm1, %ymm0, %k1
136; SKX: vmovaps %ymm0, %ymm1 {%k1}
137; SKX: vmovaps %zmm1, %zmm0
138; SKX: retq
139
140  %mask = fcmp oeq <8 x float> %x, %y
141  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
142  ret <8 x float> %max
143}
144
145define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
146; KNL-LABEL: test11_unsigned:
147; KNL:       ## BB#0:
148; KNL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
149; KNL-NEXT:    retq
150  %mask = icmp ugt <8 x i32> %x, %y
151  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
152  ret <8 x i32> %max
153}
154
155define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
156; KNL-LABEL: test12:
157; KNL:       ## BB#0:
158; KNL-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
159; KNL-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
160; KNL-NEXT:    kunpckbw %k0, %k1, %k0
161; KNL-NEXT:    kmovw %k0, %eax
162; KNL-NEXT:    retq
163  %res = icmp eq <16 x i64> %a, %b
164  %res1 = bitcast <16 x i1> %res to i16
165  ret i16 %res1
166}
167
168define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
169; SKX-LABEL: test12_v32i32:
170; SKX:       ## BB#0:
171; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
172; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
173; SKX-NEXT:    kunpckwd %k0, %k1, %k0
174; SKX-NEXT:    kmovd %k0, %eax
175; SKX-NEXT:    retq
176  %res = icmp eq <32 x i32> %a, %b
177  %res1 = bitcast <32 x i1> %res to i32
178  ret i32 %res1
179}
180
181define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
182; SKX-LABEL: test12_v64i16:
183; SKX:       ## BB#0:
184; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0
185; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
186; SKX-NEXT:    kunpckdq %k0, %k1, %k0
187; SKX-NEXT:    kmovq %k0, %rax
188; SKX-NEXT:    retq
189  %res = icmp eq <64 x i16> %a, %b
190  %res1 = bitcast <64 x i1> %res to i64
191  ret i64 %res1
192}
193
194define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
195; KNL-LABEL: test13:
196; KNL:       ## BB#0:
197; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
198; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
199; KNL-NEXT:    retq
200{
201  %cmpvector_i = fcmp oeq <16 x float> %a, %b
202  %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
203  ret <16 x i32> %conv
204}
205
206define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
207; KNL-LABEL: test14:
208; KNL:       ## BB#0:
209; KNL-NEXT:    vpsubd %zmm1, %zmm0, %zmm1
210; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
211; KNL-NEXT:    knotw %k0, %k0
212; KNL-NEXT:    knotw %k0, %k1
213; KNL-NEXT:    vmovdqu32 %zmm1, %zmm0 {%k1} {z}
214; KNL-NEXT:    retq
215  %sub_r = sub <16 x i32> %a, %b
216  %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
217  %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
218  %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
219  %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
220  ret <16 x i32>%res
221}
222
223define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
224; KNL-LABEL: test15:
225; KNL:       ## BB#0:
226; KNL-NEXT:    vpsubq %zmm1, %zmm0, %zmm1
227; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
228; KNL-NEXT:    knotw %k0, %k0
229; KNL-NEXT:    knotw %k0, %k1
230; KNL-NEXT:    vmovdqu64 %zmm1, %zmm0 {%k1} {z}
231; KNL-NEXT:    retq
232  %sub_r = sub <8 x i64> %a, %b
233  %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
234  %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
235  %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
236  %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
237  ret <8 x i64>%res
238}
239
240define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
241; KNL-LABEL: test16:
242; KNL:       ## BB#0:
243; KNL-NEXT:    vpcmpled %zmm0, %zmm1, %k1
244; KNL-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
245; KNL-NEXT:    vmovaps %zmm1, %zmm0
246; KNL-NEXT:    retq
247  %mask = icmp sge <16 x i32> %x, %y
248  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
249  ret <16 x i32> %max
250}
251
252define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
253; KNL-LABEL: test17:
254; KNL:       ## BB#0:
255; KNL-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1
256; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
257; KNL-NEXT:    vmovaps %zmm1, %zmm0
258; KNL-NEXT:    retq
259  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
260  %mask = icmp sgt <16 x i32> %x, %y
261  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
262  ret <16 x i32> %max
263}
264
265define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
266; KNL-LABEL: test18:
267; KNL:       ## BB#0:
268; KNL-NEXT:    vpcmpled (%rdi), %zmm0, %k1
269; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
270; KNL-NEXT:    vmovaps %zmm1, %zmm0
271; KNL-NEXT:    retq
272  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
273  %mask = icmp sle <16 x i32> %x, %y
274  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
275  ret <16 x i32> %max
276}
277
278define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
279; KNL-LABEL: test19:
280; KNL:       ## BB#0:
281; KNL-NEXT:    vpcmpleud (%rdi), %zmm0, %k1
282; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
283; KNL-NEXT:    vmovaps %zmm1, %zmm0
284; KNL-NEXT:    retq
285  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
286  %mask = icmp ule <16 x i32> %x, %y
287  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
288  ret <16 x i32> %max
289}
290
291define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
292; KNL-LABEL: test20:
293; KNL:       ## BB#0:
294; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
295; KNL-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
296; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
297; KNL-NEXT:    vmovaps %zmm1, %zmm0
298; KNL-NEXT:    retq
299  %mask1 = icmp eq <16 x i32> %x1, %y1
300  %mask0 = icmp eq <16 x i32> %x, %y
301  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
302  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
303  ret <16 x i32> %max
304}
305
306define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
307; KNL-LABEL: test21:
308; KNL:       ## BB#0:
309; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
310; KNL-NEXT:    vpcmpleq %zmm2, %zmm3, %k1 {%k1}
311; KNL-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k1}
312; KNL-NEXT:    vmovaps %zmm2, %zmm0
313; KNL-NEXT:    retq
314  %mask1 = icmp sge <8 x i64> %x1, %y1
315  %mask0 = icmp sle <8 x i64> %x, %y
316  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
317  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
318  ret <8 x i64> %max
319}
320
321define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
322; KNL-LABEL: test22:
323; KNL:       ## BB#0:
324; KNL-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
325; KNL-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
326; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
327; KNL-NEXT:    vmovaps %zmm1, %zmm0
328; KNL-NEXT:    retq
329  %mask1 = icmp sgt <8 x i64> %x1, %y1
330  %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
331  %mask0 = icmp sgt <8 x i64> %x, %y
332  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
333  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
334  ret <8 x i64> %max
335}
336
337define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
338; KNL-LABEL: test23:
339; KNL:       ## BB#0:
340; KNL-NEXT:    vpcmpled %zmm1, %zmm2, %k1
341; KNL-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1}
342; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
343; KNL-NEXT:    vmovaps %zmm1, %zmm0
344; KNL-NEXT:    retq
345  %mask1 = icmp sge <16 x i32> %x1, %y1
346  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
347  %mask0 = icmp ule <16 x i32> %x, %y
348  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
349  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
350  ret <16 x i32> %max
351}
352
353define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
354; KNL-LABEL: test24:
355; KNL:       ## BB#0:
356; KNL-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1
357; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
358; KNL-NEXT:    vmovaps %zmm1, %zmm0
359; KNL-NEXT:    retq
360  %yb = load i64, i64* %yb.ptr, align 4
361  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
362  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
363  %mask = icmp eq <8 x i64> %x, %y
364  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
365  ret <8 x i64> %max
366}
367
368define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
369; KNL-LABEL: test25:
370; KNL:       ## BB#0:
371; KNL-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1
372; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
373; KNL-NEXT:    vmovaps %zmm1, %zmm0
374; KNL-NEXT:    retq
375  %yb = load i32, i32* %yb.ptr, align 4
376  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
377  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
378  %mask = icmp sle <16 x i32> %x, %y
379  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
380  ret <16 x i32> %max
381}
382
383define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
384; KNL-LABEL: test26:
385; KNL:       ## BB#0:
386; KNL-NEXT:    vpcmpled %zmm1, %zmm2, %k1
387; KNL-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
388; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
389; KNL-NEXT:    vmovaps %zmm1, %zmm0
390; KNL-NEXT:    retq
391  %mask1 = icmp sge <16 x i32> %x1, %y1
392  %yb = load i32, i32* %yb.ptr, align 4
393  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
394  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
395  %mask0 = icmp sgt <16 x i32> %x, %y
396  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
397  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
398  ret <16 x i32> %max
399}
400
401define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
402; KNL-LABEL: test27:
403; KNL:       ## BB#0:
404; KNL-NEXT:    vpcmpleq        %zmm1, %zmm2, %k1
405; KNL-NEXT:    vpcmpleq        (%rdi){1to8}, %zmm0, %k1 {%k1}
406; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
407; KNL-NEXT:    vmovaps %zmm1, %zmm0
408; KNL-NEXT:    retq
409  %mask1 = icmp sge <8 x i64> %x1, %y1
410  %yb = load i64, i64* %yb.ptr, align 4
411  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
412  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
413  %mask0 = icmp sle <8 x i64> %x, %y
414  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
415  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
416  ret <8 x i64> %max
417}
418
419; KNL-LABEL: test28
420; KNL: vpcmpgtq
421; KNL: vpcmpgtq
422; KNL: kxnorw
423define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
424  %x_gt_y = icmp sgt <8 x i64> %x, %y
425  %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
426  %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
427  %resse = sext <8 x i1>%res to <8 x i32>
428  ret <8 x i32> %resse
429}
430
431; KNL-LABEL: test29
432; KNL: vpcmpgtd
433; KNL: vpcmpgtd
434; KNL: kxorw
435define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
436  %x_gt_y = icmp sgt <16 x i32> %x, %y
437  %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
438  %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
439  %resse = sext <16 x i1>%res to <16 x i8>
440  ret <16 x i8> %resse
441}
442
443define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
444; SKX-LABEL: test30:
445; SKX: vcmpeqpd   %ymm1, %ymm0, %k1
446; SKX: vmovapd    %ymm0, %ymm1 {%k1}
447
448  %mask = fcmp oeq <4 x double> %x, %y
449  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
450  ret <4 x double> %max
451}
452
453define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
454; SKX-LABEL: test31:
455; SKX: vcmpltpd        (%rdi), %xmm0, %k1
456; SKX: vmovapd %xmm0, %xmm1 {%k1}
457
458  %y = load <2 x double>, <2 x double>* %yp, align 4
459  %mask = fcmp olt <2 x double> %x, %y
460  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
461  ret <2 x double> %max
462}
463
464define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
465; SKX-LABEL: test32:
466; SKX: vcmpltpd        (%rdi), %ymm0, %k1
467; SKX: vmovapd %ymm0, %ymm1 {%k1}
468
469  %y = load <4 x double>, <4 x double>* %yp, align 4
470  %mask = fcmp ogt <4 x double> %y, %x
471  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
472  ret <4 x double> %max
473}
474
475define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
476; SKX-LABEL: test33:
477; SKX: vcmpltpd        (%rdi), %zmm0, %k1
478; SKX: vmovapd %zmm0, %zmm1 {%k1}
479  %y = load <8 x double>, <8 x double>* %yp, align 4
480  %mask = fcmp olt <8 x double> %x, %y
481  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
482  ret <8 x double> %max
483}
484
485define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
486; SKX-LABEL: test34:
487; SKX: vcmpltps        (%rdi), %xmm0, %k1
488; SKX: vmovaps %xmm0, %xmm1 {%k1}
489  %y = load <4 x float>, <4 x float>* %yp, align 4
490  %mask = fcmp olt <4 x float> %x, %y
491  %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
492  ret <4 x float> %max
493}
494
495define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
496; SKX-LABEL: test35:
497; SKX: vcmpltps        (%rdi), %ymm0, %k1
498; SKX: vmovaps %ymm0, %ymm1 {%k1}
499
500  %y = load <8 x float>, <8 x float>* %yp, align 4
501  %mask = fcmp ogt <8 x float> %y, %x
502  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
503  ret <8 x float> %max
504}
505
506define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
507; SKX-LABEL: test36:
508; SKX: vcmpltps        (%rdi), %zmm0, %k1
509; SKX: vmovaps %zmm0, %zmm1 {%k1}
510  %y = load <16 x float>, <16 x float>* %yp, align 4
511  %mask = fcmp olt <16 x float> %x, %y
512  %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
513  ret <16 x float> %max
514}
515
516define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
517; SKX-LABEL: test37:
518; SKX: vcmpltpd  (%rdi){1to8}, %zmm0, %k1
519; SKX: vmovapd %zmm0, %zmm1 {%k1}
520
521  %a = load double, double* %ptr
522  %v = insertelement <8 x double> undef, double %a, i32 0
523  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
524
525  %mask = fcmp ogt <8 x double> %shuffle, %x
526  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
527  ret <8 x double> %max
528}
529
530define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
531; SKX-LABEL: test38:
532; SKX: vcmpltpd  (%rdi){1to4}, %ymm0, %k1
533; SKX: vmovapd %ymm0, %ymm1 {%k1}
534
535  %a = load double, double* %ptr
536  %v = insertelement <4 x double> undef, double %a, i32 0
537  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
538
539  %mask = fcmp ogt <4 x double> %shuffle, %x
540  %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
541  ret <4 x double> %max
542}
543
544define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
545; SKX-LABEL: test39:
546; SKX: vcmpltpd  (%rdi){1to2}, %xmm0, %k1
547; SKX: vmovapd %xmm0, %xmm1 {%k1}
548
549  %a = load double, double* %ptr
550  %v = insertelement <2 x double> undef, double %a, i32 0
551  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
552
553  %mask = fcmp ogt <2 x double> %shuffle, %x
554  %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
555  ret <2 x double> %max
556}
557
558
559define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
560; SKX-LABEL: test40:
561; SKX: vcmpltps  (%rdi){1to16}, %zmm0, %k1
562; SKX: vmovaps %zmm0, %zmm1 {%k1}
563
564  %a = load float, float* %ptr
565  %v = insertelement <16  x float> undef, float %a, i32 0
566  %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
567
568  %mask = fcmp ogt <16  x float> %shuffle, %x
569  %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
570  ret <16  x float> %max
571}
572
573define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
574; SKX-LABEL: test41:
575; SKX: vcmpltps  (%rdi){1to8}, %ymm0, %k1
576; SKX: vmovaps %ymm0, %ymm1 {%k1}
577
578  %a = load float, float* %ptr
579  %v = insertelement <8  x float> undef, float %a, i32 0
580  %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
581
582  %mask = fcmp ogt <8  x float> %shuffle, %x
583  %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
584  ret <8  x float> %max
585}
586
587define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
588; SKX-LABEL: test42:
589; SKX: vcmpltps  (%rdi){1to4}, %xmm0, %k1
590; SKX: vmovaps %xmm0, %xmm1 {%k1}
591
592  %a = load float, float* %ptr
593  %v = insertelement <4  x float> undef, float %a, i32 0
594  %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
595
596  %mask = fcmp ogt <4  x float> %shuffle, %x
597  %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
598  ret <4  x float> %max
599}
600
601define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
602; SKX-LABEL: test43:
603; SKX: vpmovw2m  %xmm2, %k1
604; SKX: vcmpltpd  (%rdi){1to8}, %zmm0, %k1 {%k1}
605; SKX: vmovapd %zmm0, %zmm1 {%k1}
606
607  %a = load double, double* %ptr
608  %v = insertelement <8 x double> undef, double %a, i32 0
609  %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
610
611  %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
612  %mask = and <8 x i1> %mask_cmp, %mask_in
613  %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
614  ret <8 x double> %max
615}
616