• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
4
5;
6; Float Comparisons
7; Only equal/not-equal/ordered/unordered can be safely commuted
8;
9
10define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
11; SSE-LABEL: commute_cmpps_eq:
12; SSE:       # BB#0:
13; SSE-NEXT:    cmpeqps (%rdi), %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: commute_cmpps_eq:
17; AVX:       # BB#0:
18; AVX-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
19; AVX-NEXT:    retq
20;
21  %1 = load <4 x float>, <4 x float>* %a0
22  %2 = fcmp oeq <4 x float> %1, %a1
23  %3 = sext <4 x i1> %2 to <4 x i32>
24  ret <4 x i32> %3
25}
26
27define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
28; SSE-LABEL: commute_cmpps_ne:
29; SSE:       # BB#0:
30; SSE-NEXT:    cmpneqps (%rdi), %xmm0
31; SSE-NEXT:    retq
32;
33; AVX-LABEL: commute_cmpps_ne:
34; AVX:       # BB#0:
35; AVX-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
36; AVX-NEXT:    retq
37;
38  %1 = load <4 x float>, <4 x float>* %a0
39  %2 = fcmp une <4 x float> %1, %a1
40  %3 = sext <4 x i1> %2 to <4 x i32>
41  ret <4 x i32> %3
42}
43
44define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
45; SSE-LABEL: commute_cmpps_ord:
46; SSE:       # BB#0:
47; SSE-NEXT:    cmpordps (%rdi), %xmm0
48; SSE-NEXT:    retq
49;
50; AVX-LABEL: commute_cmpps_ord:
51; AVX:       # BB#0:
52; AVX-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
53; AVX-NEXT:    retq
54;
55  %1 = load <4 x float>, <4 x float>* %a0
56  %2 = fcmp ord <4 x float> %1, %a1
57  %3 = sext <4 x i1> %2 to <4 x i32>
58  ret <4 x i32> %3
59}
60
61define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
62; SSE-LABEL: commute_cmpps_uno:
63; SSE:       # BB#0:
64; SSE-NEXT:    cmpunordps (%rdi), %xmm0
65; SSE-NEXT:    retq
66;
67; AVX-LABEL: commute_cmpps_uno:
68; AVX:       # BB#0:
69; AVX-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
70; AVX-NEXT:    retq
71;
72  %1 = load <4 x float>, <4 x float>* %a0
73  %2 = fcmp uno <4 x float> %1, %a1
74  %3 = sext <4 x i1> %2 to <4 x i32>
75  ret <4 x i32> %3
76}
77
78define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
79; SSE-LABEL: commute_cmpps_ueq:
80; SSE:       # BB#0:
81; SSE-NEXT:    movaps (%rdi), %xmm1
82; SSE-NEXT:    movaps %xmm1, %xmm2
83; SSE-NEXT:    cmpeqps %xmm0, %xmm2
84; SSE-NEXT:    cmpunordps %xmm1, %xmm0
85; SSE-NEXT:    orps %xmm2, %xmm0
86; SSE-NEXT:    retq
87;
88; AVX-LABEL: commute_cmpps_ueq:
89; AVX:       # BB#0:
90; AVX-NEXT:    vmovaps (%rdi), %xmm1
91; AVX-NEXT:    vcmpeqps %xmm0, %xmm1, %xmm2
92; AVX-NEXT:    vcmpunordps %xmm0, %xmm1, %xmm0
93; AVX-NEXT:    vorps %xmm2, %xmm0, %xmm0
94; AVX-NEXT:    retq
95;
96  %1 = load <4 x float>, <4 x float>* %a0
97  %2 = fcmp ueq <4 x float> %1, %a1
98  %3 = sext <4 x i1> %2 to <4 x i32>
99  ret <4 x i32> %3
100}
101
102define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
103; SSE-LABEL: commute_cmpps_one:
104; SSE:       # BB#0:
105; SSE-NEXT:    movaps (%rdi), %xmm1
106; SSE-NEXT:    movaps %xmm1, %xmm2
107; SSE-NEXT:    cmpneqps %xmm0, %xmm2
108; SSE-NEXT:    cmpordps %xmm1, %xmm0
109; SSE-NEXT:    andps %xmm2, %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: commute_cmpps_one:
113; AVX:       # BB#0:
114; AVX-NEXT:    vmovaps (%rdi), %xmm1
115; AVX-NEXT:    vcmpneqps %xmm0, %xmm1, %xmm2
116; AVX-NEXT:    vcmpordps %xmm0, %xmm1, %xmm0
117; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
118; AVX-NEXT:    retq
119;
120  %1 = load <4 x float>, <4 x float>* %a0
121  %2 = fcmp one <4 x float> %1, %a1
122  %3 = sext <4 x i1> %2 to <4 x i32>
123  ret <4 x i32> %3
124}
125
126define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
127; SSE-LABEL: commute_cmpps_lt:
128; SSE:       # BB#0:
129; SSE-NEXT:    movaps (%rdi), %xmm1
130; SSE-NEXT:    cmpltps %xmm0, %xmm1
131; SSE-NEXT:    movaps %xmm1, %xmm0
132; SSE-NEXT:    retq
133;
134; AVX-LABEL: commute_cmpps_lt:
135; AVX:       # BB#0:
136; AVX-NEXT:    vmovaps (%rdi), %xmm1
137; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
138; AVX-NEXT:    retq
139;
140  %1 = load <4 x float>, <4 x float>* %a0
141  %2 = fcmp olt <4 x float> %1, %a1
142  %3 = sext <4 x i1> %2 to <4 x i32>
143  ret <4 x i32> %3
144}
145
146define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
147; SSE-LABEL: commute_cmpps_le:
148; SSE:       # BB#0:
149; SSE-NEXT:    movaps (%rdi), %xmm1
150; SSE-NEXT:    cmpleps %xmm0, %xmm1
151; SSE-NEXT:    movaps %xmm1, %xmm0
152; SSE-NEXT:    retq
153;
154; AVX-LABEL: commute_cmpps_le:
155; AVX:       # BB#0:
156; AVX-NEXT:    vmovaps (%rdi), %xmm1
157; AVX-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
158; AVX-NEXT:    retq
159;
160  %1 = load <4 x float>, <4 x float>* %a0
161  %2 = fcmp ole <4 x float> %1, %a1
162  %3 = sext <4 x i1> %2 to <4 x i32>
163  ret <4 x i32> %3
164}
165
166define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
167; SSE-LABEL: commute_cmpps_eq_ymm:
168; SSE:       # BB#0:
169; SSE-NEXT:    cmpeqps (%rdi), %xmm0
170; SSE-NEXT:    cmpeqps 16(%rdi), %xmm1
171; SSE-NEXT:    retq
172;
173; AVX-LABEL: commute_cmpps_eq_ymm:
174; AVX:       # BB#0:
175; AVX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
176; AVX-NEXT:    retq
177;
178  %1 = load <8 x float>, <8 x float>* %a0
179  %2 = fcmp oeq <8 x float> %1, %a1
180  %3 = sext <8 x i1> %2 to <8 x i32>
181  ret <8 x i32> %3
182}
183
184define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
185; SSE-LABEL: commute_cmpps_ne_ymm:
186; SSE:       # BB#0:
187; SSE-NEXT:    cmpneqps (%rdi), %xmm0
188; SSE-NEXT:    cmpneqps 16(%rdi), %xmm1
189; SSE-NEXT:    retq
190;
191; AVX-LABEL: commute_cmpps_ne_ymm:
192; AVX:       # BB#0:
193; AVX-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
194; AVX-NEXT:    retq
195;
196  %1 = load <8 x float>, <8 x float>* %a0
197  %2 = fcmp une <8 x float> %1, %a1
198  %3 = sext <8 x i1> %2 to <8 x i32>
199  ret <8 x i32> %3
200}
201
202define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
203; SSE-LABEL: commute_cmpps_ord_ymm:
204; SSE:       # BB#0:
205; SSE-NEXT:    cmpordps (%rdi), %xmm0
206; SSE-NEXT:    cmpordps 16(%rdi), %xmm1
207; SSE-NEXT:    retq
208;
209; AVX-LABEL: commute_cmpps_ord_ymm:
210; AVX:       # BB#0:
211; AVX-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
212; AVX-NEXT:    retq
213;
214  %1 = load <8 x float>, <8 x float>* %a0
215  %2 = fcmp ord <8 x float> %1, %a1
216  %3 = sext <8 x i1> %2 to <8 x i32>
217  ret <8 x i32> %3
218}
219
220define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
221; SSE-LABEL: commute_cmpps_uno_ymm:
222; SSE:       # BB#0:
223; SSE-NEXT:    cmpunordps (%rdi), %xmm0
224; SSE-NEXT:    cmpunordps 16(%rdi), %xmm1
225; SSE-NEXT:    retq
226;
227; AVX-LABEL: commute_cmpps_uno_ymm:
228; AVX:       # BB#0:
229; AVX-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
230; AVX-NEXT:    retq
231;
232  %1 = load <8 x float>, <8 x float>* %a0
233  %2 = fcmp uno <8 x float> %1, %a1
234  %3 = sext <8 x i1> %2 to <8 x i32>
235  ret <8 x i32> %3
236}
237
238define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
239; SSE-LABEL: commute_cmpps_ueq_ymm:
240; SSE:       # BB#0:
241; SSE-NEXT:    movaps (%rdi), %xmm2
242; SSE-NEXT:    movaps 16(%rdi), %xmm3
243; SSE-NEXT:    movaps %xmm2, %xmm4
244; SSE-NEXT:    cmpeqps %xmm0, %xmm4
245; SSE-NEXT:    cmpunordps %xmm2, %xmm0
246; SSE-NEXT:    orps %xmm4, %xmm0
247; SSE-NEXT:    movaps %xmm3, %xmm2
248; SSE-NEXT:    cmpeqps %xmm1, %xmm2
249; SSE-NEXT:    cmpunordps %xmm3, %xmm1
250; SSE-NEXT:    orps %xmm2, %xmm1
251; SSE-NEXT:    retq
252;
253; AVX-LABEL: commute_cmpps_ueq_ymm:
254; AVX:       # BB#0:
255; AVX-NEXT:    vmovaps (%rdi), %ymm1
256; AVX-NEXT:    vcmpeqps %ymm0, %ymm1, %ymm2
257; AVX-NEXT:    vcmpunordps %ymm0, %ymm1, %ymm0
258; AVX-NEXT:    vorps %ymm2, %ymm0, %ymm0
259; AVX-NEXT:    retq
260;
261  %1 = load <8 x float>, <8 x float>* %a0
262  %2 = fcmp ueq <8 x float> %1, %a1
263  %3 = sext <8 x i1> %2 to <8 x i32>
264  ret <8 x i32> %3
265}
266
267define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
268; SSE-LABEL: commute_cmpps_one_ymm:
269; SSE:       # BB#0:
270; SSE-NEXT:    movaps (%rdi), %xmm2
271; SSE-NEXT:    movaps 16(%rdi), %xmm3
272; SSE-NEXT:    movaps %xmm2, %xmm4
273; SSE-NEXT:    cmpneqps %xmm0, %xmm4
274; SSE-NEXT:    cmpordps %xmm2, %xmm0
275; SSE-NEXT:    andps %xmm4, %xmm0
276; SSE-NEXT:    movaps %xmm3, %xmm2
277; SSE-NEXT:    cmpneqps %xmm1, %xmm2
278; SSE-NEXT:    cmpordps %xmm3, %xmm1
279; SSE-NEXT:    andps %xmm2, %xmm1
280; SSE-NEXT:    retq
281;
282; AVX-LABEL: commute_cmpps_one_ymm:
283; AVX:       # BB#0:
284; AVX-NEXT:    vmovaps (%rdi), %ymm1
285; AVX-NEXT:    vcmpneqps %ymm0, %ymm1, %ymm2
286; AVX-NEXT:    vcmpordps %ymm0, %ymm1, %ymm0
287; AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
288; AVX-NEXT:    retq
289;
290  %1 = load <8 x float>, <8 x float>* %a0
291  %2 = fcmp one <8 x float> %1, %a1
292  %3 = sext <8 x i1> %2 to <8 x i32>
293  ret <8 x i32> %3
294}
295
296define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
297; SSE-LABEL: commute_cmpps_lt_ymm:
298; SSE:       # BB#0:
299; SSE-NEXT:    movaps (%rdi), %xmm2
300; SSE-NEXT:    movaps 16(%rdi), %xmm3
301; SSE-NEXT:    cmpltps %xmm0, %xmm2
302; SSE-NEXT:    cmpltps %xmm1, %xmm3
303; SSE-NEXT:    movaps %xmm2, %xmm0
304; SSE-NEXT:    movaps %xmm3, %xmm1
305; SSE-NEXT:    retq
306;
307; AVX-LABEL: commute_cmpps_lt_ymm:
308; AVX:       # BB#0:
309; AVX-NEXT:    vmovaps (%rdi), %ymm1
310; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
311; AVX-NEXT:    retq
312;
313  %1 = load <8 x float>, <8 x float>* %a0
314  %2 = fcmp olt <8 x float> %1, %a1
315  %3 = sext <8 x i1> %2 to <8 x i32>
316  ret <8 x i32> %3
317}
318
319define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
320; SSE-LABEL: commute_cmpps_le_ymm:
321; SSE:       # BB#0:
322; SSE-NEXT:    movaps (%rdi), %xmm2
323; SSE-NEXT:    movaps 16(%rdi), %xmm3
324; SSE-NEXT:    cmpleps %xmm0, %xmm2
325; SSE-NEXT:    cmpleps %xmm1, %xmm3
326; SSE-NEXT:    movaps %xmm2, %xmm0
327; SSE-NEXT:    movaps %xmm3, %xmm1
328; SSE-NEXT:    retq
329;
330; AVX-LABEL: commute_cmpps_le_ymm:
331; AVX:       # BB#0:
332; AVX-NEXT:    vmovaps (%rdi), %ymm1
333; AVX-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
334; AVX-NEXT:    retq
335;
336  %1 = load <8 x float>, <8 x float>* %a0
337  %2 = fcmp ole <8 x float> %1, %a1
338  %3 = sext <8 x i1> %2 to <8 x i32>
339  ret <8 x i32> %3
340}
341
342;
343; Double Comparisons
344; Only equal/not-equal/ordered/unordered can be safely commuted
345;
346
347define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
348; SSE-LABEL: commute_cmppd_eq:
349; SSE:       # BB#0:
350; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
351; SSE-NEXT:    retq
352;
353; AVX-LABEL: commute_cmppd_eq:
354; AVX:       # BB#0:
355; AVX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
356; AVX-NEXT:    retq
357;
358  %1 = load <2 x double>, <2 x double>* %a0
359  %2 = fcmp oeq <2 x double> %1, %a1
360  %3 = sext <2 x i1> %2 to <2 x i64>
361  ret <2 x i64> %3
362}
363
364define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
365; SSE-LABEL: commute_cmppd_ne:
366; SSE:       # BB#0:
367; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
368; SSE-NEXT:    retq
369;
370; AVX-LABEL: commute_cmppd_ne:
371; AVX:       # BB#0:
372; AVX-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
373; AVX-NEXT:    retq
374;
375  %1 = load <2 x double>, <2 x double>* %a0
376  %2 = fcmp une <2 x double> %1, %a1
377  %3 = sext <2 x i1> %2 to <2 x i64>
378  ret <2 x i64> %3
379}
380
381define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
382; SSE-LABEL: commute_cmppd_ord:
383; SSE:       # BB#0:
384; SSE-NEXT:    cmpordpd (%rdi), %xmm0
385; SSE-NEXT:    retq
386;
387; AVX-LABEL: commute_cmppd_ord:
388; AVX:       # BB#0:
389; AVX-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
390; AVX-NEXT:    retq
391;
392  %1 = load <2 x double>, <2 x double>* %a0
393  %2 = fcmp ord <2 x double> %1, %a1
394  %3 = sext <2 x i1> %2 to <2 x i64>
395  ret <2 x i64> %3
396}
397
398define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
399; SSE-LABEL: commute_cmppd_ueq:
400; SSE:       # BB#0:
401; SSE-NEXT:    movapd (%rdi), %xmm1
402; SSE-NEXT:    movapd %xmm1, %xmm2
403; SSE-NEXT:    cmpeqpd %xmm0, %xmm2
404; SSE-NEXT:    cmpunordpd %xmm1, %xmm0
405; SSE-NEXT:    orpd %xmm2, %xmm0
406; SSE-NEXT:    retq
407;
408; AVX-LABEL: commute_cmppd_ueq:
409; AVX:       # BB#0:
410; AVX-NEXT:    vmovapd (%rdi), %xmm1
411; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm2
412; AVX-NEXT:    vcmpunordpd %xmm0, %xmm1, %xmm0
413; AVX-NEXT:    vorpd %xmm2, %xmm0, %xmm0
414; AVX-NEXT:    retq
415;
416  %1 = load <2 x double>, <2 x double>* %a0
417  %2 = fcmp ueq <2 x double> %1, %a1
418  %3 = sext <2 x i1> %2 to <2 x i64>
419  ret <2 x i64> %3
420}
421
422define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
423; SSE-LABEL: commute_cmppd_one:
424; SSE:       # BB#0:
425; SSE-NEXT:    movapd (%rdi), %xmm1
426; SSE-NEXT:    movapd %xmm1, %xmm2
427; SSE-NEXT:    cmpneqpd %xmm0, %xmm2
428; SSE-NEXT:    cmpordpd %xmm1, %xmm0
429; SSE-NEXT:    andpd %xmm2, %xmm0
430; SSE-NEXT:    retq
431;
432; AVX-LABEL: commute_cmppd_one:
433; AVX:       # BB#0:
434; AVX-NEXT:    vmovapd (%rdi), %xmm1
435; AVX-NEXT:    vcmpneqpd %xmm0, %xmm1, %xmm2
436; AVX-NEXT:    vcmpordpd %xmm0, %xmm1, %xmm0
437; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
438; AVX-NEXT:    retq
439;
440  %1 = load <2 x double>, <2 x double>* %a0
441  %2 = fcmp one <2 x double> %1, %a1
442  %3 = sext <2 x i1> %2 to <2 x i64>
443  ret <2 x i64> %3
444}
445
446define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
447; SSE-LABEL: commute_cmppd_uno:
448; SSE:       # BB#0:
449; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
450; SSE-NEXT:    retq
451;
452; AVX-LABEL: commute_cmppd_uno:
453; AVX:       # BB#0:
454; AVX-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
455; AVX-NEXT:    retq
456;
457  %1 = load <2 x double>, <2 x double>* %a0
458  %2 = fcmp uno <2 x double> %1, %a1
459  %3 = sext <2 x i1> %2 to <2 x i64>
460  ret <2 x i64> %3
461}
462
463define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
464; SSE-LABEL: commute_cmppd_lt:
465; SSE:       # BB#0:
466; SSE-NEXT:    movapd (%rdi), %xmm1
467; SSE-NEXT:    cmpltpd %xmm0, %xmm1
468; SSE-NEXT:    movapd %xmm1, %xmm0
469; SSE-NEXT:    retq
470;
471; AVX-LABEL: commute_cmppd_lt:
472; AVX:       # BB#0:
473; AVX-NEXT:    vmovapd (%rdi), %xmm1
474; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
475; AVX-NEXT:    retq
476;
477  %1 = load <2 x double>, <2 x double>* %a0
478  %2 = fcmp olt <2 x double> %1, %a1
479  %3 = sext <2 x i1> %2 to <2 x i64>
480  ret <2 x i64> %3
481}
482
483define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
484; SSE-LABEL: commute_cmppd_le:
485; SSE:       # BB#0:
486; SSE-NEXT:    movapd (%rdi), %xmm1
487; SSE-NEXT:    cmplepd %xmm0, %xmm1
488; SSE-NEXT:    movapd %xmm1, %xmm0
489; SSE-NEXT:    retq
490;
491; AVX-LABEL: commute_cmppd_le:
492; AVX:       # BB#0:
493; AVX-NEXT:    vmovapd (%rdi), %xmm1
494; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
495; AVX-NEXT:    retq
496;
497  %1 = load <2 x double>, <2 x double>* %a0
498  %2 = fcmp ole <2 x double> %1, %a1
499  %3 = sext <2 x i1> %2 to <2 x i64>
500  ret <2 x i64> %3
501}
502
503define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
504; SSE-LABEL: commute_cmppd_eq_ymmm:
505; SSE:       # BB#0:
506; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
507; SSE-NEXT:    cmpeqpd 16(%rdi), %xmm1
508; SSE-NEXT:    retq
509;
510; AVX-LABEL: commute_cmppd_eq_ymmm:
511; AVX:       # BB#0:
512; AVX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
513; AVX-NEXT:    retq
514;
515  %1 = load <4 x double>, <4 x double>* %a0
516  %2 = fcmp oeq <4 x double> %1, %a1
517  %3 = sext <4 x i1> %2 to <4 x i64>
518  ret <4 x i64> %3
519}
520
521define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
522; SSE-LABEL: commute_cmppd_ne_ymmm:
523; SSE:       # BB#0:
524; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
525; SSE-NEXT:    cmpneqpd 16(%rdi), %xmm1
526; SSE-NEXT:    retq
527;
528; AVX-LABEL: commute_cmppd_ne_ymmm:
529; AVX:       # BB#0:
530; AVX-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
531; AVX-NEXT:    retq
532;
533  %1 = load <4 x double>, <4 x double>* %a0
534  %2 = fcmp une <4 x double> %1, %a1
535  %3 = sext <4 x i1> %2 to <4 x i64>
536  ret <4 x i64> %3
537}
538
539define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
540; SSE-LABEL: commute_cmppd_ord_ymmm:
541; SSE:       # BB#0:
542; SSE-NEXT:    cmpordpd (%rdi), %xmm0
543; SSE-NEXT:    cmpordpd 16(%rdi), %xmm1
544; SSE-NEXT:    retq
545;
546; AVX-LABEL: commute_cmppd_ord_ymmm:
547; AVX:       # BB#0:
548; AVX-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
549; AVX-NEXT:    retq
550;
551  %1 = load <4 x double>, <4 x double>* %a0
552  %2 = fcmp ord <4 x double> %1, %a1
553  %3 = sext <4 x i1> %2 to <4 x i64>
554  ret <4 x i64> %3
555}
556
557define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
558; SSE-LABEL: commute_cmppd_uno_ymmm:
559; SSE:       # BB#0:
560; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
561; SSE-NEXT:    cmpunordpd 16(%rdi), %xmm1
562; SSE-NEXT:    retq
563;
564; AVX-LABEL: commute_cmppd_uno_ymmm:
565; AVX:       # BB#0:
566; AVX-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
567; AVX-NEXT:    retq
568;
569  %1 = load <4 x double>, <4 x double>* %a0
570  %2 = fcmp uno <4 x double> %1, %a1
571  %3 = sext <4 x i1> %2 to <4 x i64>
572  ret <4 x i64> %3
573}
574
575define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
576; SSE-LABEL: commute_cmppd_ueq_ymmm:
577; SSE:       # BB#0:
578; SSE-NEXT:    movapd (%rdi), %xmm2
579; SSE-NEXT:    movapd 16(%rdi), %xmm3
580; SSE-NEXT:    movapd %xmm2, %xmm4
581; SSE-NEXT:    cmpeqpd %xmm0, %xmm4
582; SSE-NEXT:    cmpunordpd %xmm2, %xmm0
583; SSE-NEXT:    orpd %xmm4, %xmm0
584; SSE-NEXT:    movapd %xmm3, %xmm2
585; SSE-NEXT:    cmpeqpd %xmm1, %xmm2
586; SSE-NEXT:    cmpunordpd %xmm3, %xmm1
587; SSE-NEXT:    orpd %xmm2, %xmm1
588; SSE-NEXT:    retq
589;
590; AVX-LABEL: commute_cmppd_ueq_ymmm:
591; AVX:       # BB#0:
592; AVX-NEXT:    vmovapd (%rdi), %ymm1
593; AVX-NEXT:    vcmpeqpd %ymm0, %ymm1, %ymm2
594; AVX-NEXT:    vcmpunordpd %ymm0, %ymm1, %ymm0
595; AVX-NEXT:    vorpd %ymm2, %ymm0, %ymm0
596; AVX-NEXT:    retq
597;
598  %1 = load <4 x double>, <4 x double>* %a0
599  %2 = fcmp ueq <4 x double> %1, %a1
600  %3 = sext <4 x i1> %2 to <4 x i64>
601  ret <4 x i64> %3
602}
603
604define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
605; SSE-LABEL: commute_cmppd_one_ymmm:
606; SSE:       # BB#0:
607; SSE-NEXT:    movapd (%rdi), %xmm2
608; SSE-NEXT:    movapd 16(%rdi), %xmm3
609; SSE-NEXT:    movapd %xmm2, %xmm4
610; SSE-NEXT:    cmpneqpd %xmm0, %xmm4
611; SSE-NEXT:    cmpordpd %xmm2, %xmm0
612; SSE-NEXT:    andpd %xmm4, %xmm0
613; SSE-NEXT:    movapd %xmm3, %xmm2
614; SSE-NEXT:    cmpneqpd %xmm1, %xmm2
615; SSE-NEXT:    cmpordpd %xmm3, %xmm1
616; SSE-NEXT:    andpd %xmm2, %xmm1
617; SSE-NEXT:    retq
618;
619; AVX-LABEL: commute_cmppd_one_ymmm:
620; AVX:       # BB#0:
621; AVX-NEXT:    vmovapd (%rdi), %ymm1
622; AVX-NEXT:    vcmpneqpd %ymm0, %ymm1, %ymm2
623; AVX-NEXT:    vcmpordpd %ymm0, %ymm1, %ymm0
624; AVX-NEXT:    vandpd %ymm2, %ymm0, %ymm0
625; AVX-NEXT:    retq
626;
627  %1 = load <4 x double>, <4 x double>* %a0
628  %2 = fcmp one <4 x double> %1, %a1
629  %3 = sext <4 x i1> %2 to <4 x i64>
630  ret <4 x i64> %3
631}
632
633define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
634; SSE-LABEL: commute_cmppd_lt_ymmm:
635; SSE:       # BB#0:
636; SSE-NEXT:    movapd (%rdi), %xmm2
637; SSE-NEXT:    movapd 16(%rdi), %xmm3
638; SSE-NEXT:    cmpltpd %xmm0, %xmm2
639; SSE-NEXT:    cmpltpd %xmm1, %xmm3
640; SSE-NEXT:    movapd %xmm2, %xmm0
641; SSE-NEXT:    movapd %xmm3, %xmm1
642; SSE-NEXT:    retq
643;
644; AVX-LABEL: commute_cmppd_lt_ymmm:
645; AVX:       # BB#0:
646; AVX-NEXT:    vmovapd (%rdi), %ymm1
647; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
648; AVX-NEXT:    retq
649;
650  %1 = load <4 x double>, <4 x double>* %a0
651  %2 = fcmp olt <4 x double> %1, %a1
652  %3 = sext <4 x i1> %2 to <4 x i64>
653  ret <4 x i64> %3
654}
655
656define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
657; SSE-LABEL: commute_cmppd_le_ymmm:
658; SSE:       # BB#0:
659; SSE-NEXT:    movapd (%rdi), %xmm2
660; SSE-NEXT:    movapd 16(%rdi), %xmm3
661; SSE-NEXT:    cmplepd %xmm0, %xmm2
662; SSE-NEXT:    cmplepd %xmm1, %xmm3
663; SSE-NEXT:    movapd %xmm2, %xmm0
664; SSE-NEXT:    movapd %xmm3, %xmm1
665; SSE-NEXT:    retq
666;
667; AVX-LABEL: commute_cmppd_le_ymmm:
668; AVX:       # BB#0:
669; AVX-NEXT:    vmovapd (%rdi), %ymm1
670; AVX-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
671; AVX-NEXT:    retq
672;
673  %1 = load <4 x double>, <4 x double>* %a0
674  %2 = fcmp ole <4 x double> %1, %a1
675  %3 = sext <4 x i1> %2 to <4 x i64>
676  ret <4 x i64> %3
677}
678