• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512
5
6;
7; Float Comparisons
8; Only equal/not-equal/ordered/unordered can be safely commuted
9;
10
11define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
12; SSE-LABEL: commute_cmpps_eq:
13; SSE:       # %bb.0:
14; SSE-NEXT:    cmpeqps (%rdi), %xmm0
15; SSE-NEXT:    retq
16;
17; AVX-LABEL: commute_cmpps_eq:
18; AVX:       # %bb.0:
19; AVX-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
20; AVX-NEXT:    retq
21;
22; AVX512-LABEL: commute_cmpps_eq:
23; AVX512:       # %bb.0:
24; AVX512-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0
25; AVX512-NEXT:    retq
26  %1 = load <4 x float>, <4 x float>* %a0
27  %2 = fcmp oeq <4 x float> %1, %a1
28  %3 = sext <4 x i1> %2 to <4 x i32>
29  ret <4 x i32> %3
30}
31
32define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
33; SSE-LABEL: commute_cmpps_ne:
34; SSE:       # %bb.0:
35; SSE-NEXT:    cmpneqps (%rdi), %xmm0
36; SSE-NEXT:    retq
37;
38; AVX-LABEL: commute_cmpps_ne:
39; AVX:       # %bb.0:
40; AVX-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
41; AVX-NEXT:    retq
42;
43; AVX512-LABEL: commute_cmpps_ne:
44; AVX512:       # %bb.0:
45; AVX512-NEXT:    vcmpneqps (%rdi), %xmm0, %xmm0
46; AVX512-NEXT:    retq
47  %1 = load <4 x float>, <4 x float>* %a0
48  %2 = fcmp une <4 x float> %1, %a1
49  %3 = sext <4 x i1> %2 to <4 x i32>
50  ret <4 x i32> %3
51}
52
53define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
54; SSE-LABEL: commute_cmpps_ord:
55; SSE:       # %bb.0:
56; SSE-NEXT:    cmpordps (%rdi), %xmm0
57; SSE-NEXT:    retq
58;
59; AVX-LABEL: commute_cmpps_ord:
60; AVX:       # %bb.0:
61; AVX-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
62; AVX-NEXT:    retq
63;
64; AVX512-LABEL: commute_cmpps_ord:
65; AVX512:       # %bb.0:
66; AVX512-NEXT:    vcmpordps (%rdi), %xmm0, %xmm0
67; AVX512-NEXT:    retq
68  %1 = load <4 x float>, <4 x float>* %a0
69  %2 = fcmp ord <4 x float> %1, %a1
70  %3 = sext <4 x i1> %2 to <4 x i32>
71  ret <4 x i32> %3
72}
73
74define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
75; SSE-LABEL: commute_cmpps_uno:
76; SSE:       # %bb.0:
77; SSE-NEXT:    cmpunordps (%rdi), %xmm0
78; SSE-NEXT:    retq
79;
80; AVX-LABEL: commute_cmpps_uno:
81; AVX:       # %bb.0:
82; AVX-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
83; AVX-NEXT:    retq
84;
85; AVX512-LABEL: commute_cmpps_uno:
86; AVX512:       # %bb.0:
87; AVX512-NEXT:    vcmpunordps (%rdi), %xmm0, %xmm0
88; AVX512-NEXT:    retq
89  %1 = load <4 x float>, <4 x float>* %a0
90  %2 = fcmp uno <4 x float> %1, %a1
91  %3 = sext <4 x i1> %2 to <4 x i32>
92  ret <4 x i32> %3
93}
94
95define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
96; SSE-LABEL: commute_cmpps_ueq:
97; SSE:       # %bb.0:
98; SSE-NEXT:    movaps (%rdi), %xmm1
99; SSE-NEXT:    movaps %xmm1, %xmm2
100; SSE-NEXT:    cmpeqps %xmm0, %xmm2
101; SSE-NEXT:    cmpunordps %xmm1, %xmm0
102; SSE-NEXT:    orps %xmm2, %xmm0
103; SSE-NEXT:    retq
104;
105; AVX-LABEL: commute_cmpps_ueq:
106; AVX:       # %bb.0:
107; AVX-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %xmm0
108; AVX-NEXT:    retq
109;
110; AVX512-LABEL: commute_cmpps_ueq:
111; AVX512:       # %bb.0:
112; AVX512-NEXT:    vcmpeq_uqps (%rdi), %xmm0, %xmm0
113; AVX512-NEXT:    retq
114  %1 = load <4 x float>, <4 x float>* %a0
115  %2 = fcmp ueq <4 x float> %1, %a1
116  %3 = sext <4 x i1> %2 to <4 x i32>
117  ret <4 x i32> %3
118}
119
120define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
121; SSE-LABEL: commute_cmpps_one:
122; SSE:       # %bb.0:
123; SSE-NEXT:    movaps (%rdi), %xmm1
124; SSE-NEXT:    movaps %xmm1, %xmm2
125; SSE-NEXT:    cmpneqps %xmm0, %xmm2
126; SSE-NEXT:    cmpordps %xmm1, %xmm0
127; SSE-NEXT:    andps %xmm2, %xmm0
128; SSE-NEXT:    retq
129;
130; AVX-LABEL: commute_cmpps_one:
131; AVX:       # %bb.0:
132; AVX-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %xmm0
133; AVX-NEXT:    retq
134;
135; AVX512-LABEL: commute_cmpps_one:
136; AVX512:       # %bb.0:
137; AVX512-NEXT:    vcmpneq_oqps (%rdi), %xmm0, %xmm0
138; AVX512-NEXT:    retq
139  %1 = load <4 x float>, <4 x float>* %a0
140  %2 = fcmp one <4 x float> %1, %a1
141  %3 = sext <4 x i1> %2 to <4 x i32>
142  ret <4 x i32> %3
143}
144
145define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
146; SSE-LABEL: commute_cmpps_lt:
147; SSE:       # %bb.0:
148; SSE-NEXT:    movaps (%rdi), %xmm1
149; SSE-NEXT:    cmpltps %xmm0, %xmm1
150; SSE-NEXT:    movaps %xmm1, %xmm0
151; SSE-NEXT:    retq
152;
153; AVX-LABEL: commute_cmpps_lt:
154; AVX:       # %bb.0:
155; AVX-NEXT:    vmovaps (%rdi), %xmm1
156; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
157; AVX-NEXT:    retq
158;
159; AVX512-LABEL: commute_cmpps_lt:
160; AVX512:       # %bb.0:
161; AVX512-NEXT:    vmovaps (%rdi), %xmm1
162; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
163; AVX512-NEXT:    retq
164  %1 = load <4 x float>, <4 x float>* %a0
165  %2 = fcmp olt <4 x float> %1, %a1
166  %3 = sext <4 x i1> %2 to <4 x i32>
167  ret <4 x i32> %3
168}
169
170define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
171; SSE-LABEL: commute_cmpps_le:
172; SSE:       # %bb.0:
173; SSE-NEXT:    movaps (%rdi), %xmm1
174; SSE-NEXT:    cmpleps %xmm0, %xmm1
175; SSE-NEXT:    movaps %xmm1, %xmm0
176; SSE-NEXT:    retq
177;
178; AVX-LABEL: commute_cmpps_le:
179; AVX:       # %bb.0:
180; AVX-NEXT:    vmovaps (%rdi), %xmm1
181; AVX-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
182; AVX-NEXT:    retq
183;
184; AVX512-LABEL: commute_cmpps_le:
185; AVX512:       # %bb.0:
186; AVX512-NEXT:    vmovaps (%rdi), %xmm1
187; AVX512-NEXT:    vcmpleps %xmm0, %xmm1, %xmm0
188; AVX512-NEXT:    retq
189  %1 = load <4 x float>, <4 x float>* %a0
190  %2 = fcmp ole <4 x float> %1, %a1
191  %3 = sext <4 x i1> %2 to <4 x i32>
192  ret <4 x i32> %3
193}
194
195define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
196; SSE-LABEL: commute_cmpps_eq_ymm:
197; SSE:       # %bb.0:
198; SSE-NEXT:    cmpeqps (%rdi), %xmm0
199; SSE-NEXT:    cmpeqps 16(%rdi), %xmm1
200; SSE-NEXT:    retq
201;
202; AVX-LABEL: commute_cmpps_eq_ymm:
203; AVX:       # %bb.0:
204; AVX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
205; AVX-NEXT:    retq
206;
207; AVX512-LABEL: commute_cmpps_eq_ymm:
208; AVX512:       # %bb.0:
209; AVX512-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0
210; AVX512-NEXT:    retq
211  %1 = load <8 x float>, <8 x float>* %a0
212  %2 = fcmp oeq <8 x float> %1, %a1
213  %3 = sext <8 x i1> %2 to <8 x i32>
214  ret <8 x i32> %3
215}
216
217define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
218; SSE-LABEL: commute_cmpps_ne_ymm:
219; SSE:       # %bb.0:
220; SSE-NEXT:    cmpneqps (%rdi), %xmm0
221; SSE-NEXT:    cmpneqps 16(%rdi), %xmm1
222; SSE-NEXT:    retq
223;
224; AVX-LABEL: commute_cmpps_ne_ymm:
225; AVX:       # %bb.0:
226; AVX-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
227; AVX-NEXT:    retq
228;
229; AVX512-LABEL: commute_cmpps_ne_ymm:
230; AVX512:       # %bb.0:
231; AVX512-NEXT:    vcmpneqps (%rdi), %ymm0, %ymm0
232; AVX512-NEXT:    retq
233  %1 = load <8 x float>, <8 x float>* %a0
234  %2 = fcmp une <8 x float> %1, %a1
235  %3 = sext <8 x i1> %2 to <8 x i32>
236  ret <8 x i32> %3
237}
238
239define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
240; SSE-LABEL: commute_cmpps_ord_ymm:
241; SSE:       # %bb.0:
242; SSE-NEXT:    cmpordps (%rdi), %xmm0
243; SSE-NEXT:    cmpordps 16(%rdi), %xmm1
244; SSE-NEXT:    retq
245;
246; AVX-LABEL: commute_cmpps_ord_ymm:
247; AVX:       # %bb.0:
248; AVX-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
249; AVX-NEXT:    retq
250;
251; AVX512-LABEL: commute_cmpps_ord_ymm:
252; AVX512:       # %bb.0:
253; AVX512-NEXT:    vcmpordps (%rdi), %ymm0, %ymm0
254; AVX512-NEXT:    retq
255  %1 = load <8 x float>, <8 x float>* %a0
256  %2 = fcmp ord <8 x float> %1, %a1
257  %3 = sext <8 x i1> %2 to <8 x i32>
258  ret <8 x i32> %3
259}
260
261define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
262; SSE-LABEL: commute_cmpps_uno_ymm:
263; SSE:       # %bb.0:
264; SSE-NEXT:    cmpunordps (%rdi), %xmm0
265; SSE-NEXT:    cmpunordps 16(%rdi), %xmm1
266; SSE-NEXT:    retq
267;
268; AVX-LABEL: commute_cmpps_uno_ymm:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
271; AVX-NEXT:    retq
272;
273; AVX512-LABEL: commute_cmpps_uno_ymm:
274; AVX512:       # %bb.0:
275; AVX512-NEXT:    vcmpunordps (%rdi), %ymm0, %ymm0
276; AVX512-NEXT:    retq
277  %1 = load <8 x float>, <8 x float>* %a0
278  %2 = fcmp uno <8 x float> %1, %a1
279  %3 = sext <8 x i1> %2 to <8 x i32>
280  ret <8 x i32> %3
281}
282
283define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
284; SSE-LABEL: commute_cmpps_ueq_ymm:
285; SSE:       # %bb.0:
286; SSE-NEXT:    movaps (%rdi), %xmm2
287; SSE-NEXT:    movaps 16(%rdi), %xmm3
288; SSE-NEXT:    movaps %xmm2, %xmm4
289; SSE-NEXT:    cmpeqps %xmm0, %xmm4
290; SSE-NEXT:    cmpunordps %xmm2, %xmm0
291; SSE-NEXT:    orps %xmm4, %xmm0
292; SSE-NEXT:    movaps %xmm3, %xmm2
293; SSE-NEXT:    cmpeqps %xmm1, %xmm2
294; SSE-NEXT:    cmpunordps %xmm3, %xmm1
295; SSE-NEXT:    orps %xmm2, %xmm1
296; SSE-NEXT:    retq
297;
298; AVX-LABEL: commute_cmpps_ueq_ymm:
299; AVX:       # %bb.0:
300; AVX-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
301; AVX-NEXT:    retq
302;
303; AVX512-LABEL: commute_cmpps_ueq_ymm:
304; AVX512:       # %bb.0:
305; AVX512-NEXT:    vcmpeq_uqps (%rdi), %ymm0, %ymm0
306; AVX512-NEXT:    retq
307  %1 = load <8 x float>, <8 x float>* %a0
308  %2 = fcmp ueq <8 x float> %1, %a1
309  %3 = sext <8 x i1> %2 to <8 x i32>
310  ret <8 x i32> %3
311}
312
313define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
314; SSE-LABEL: commute_cmpps_one_ymm:
315; SSE:       # %bb.0:
316; SSE-NEXT:    movaps (%rdi), %xmm2
317; SSE-NEXT:    movaps 16(%rdi), %xmm3
318; SSE-NEXT:    movaps %xmm2, %xmm4
319; SSE-NEXT:    cmpneqps %xmm0, %xmm4
320; SSE-NEXT:    cmpordps %xmm2, %xmm0
321; SSE-NEXT:    andps %xmm4, %xmm0
322; SSE-NEXT:    movaps %xmm3, %xmm2
323; SSE-NEXT:    cmpneqps %xmm1, %xmm2
324; SSE-NEXT:    cmpordps %xmm3, %xmm1
325; SSE-NEXT:    andps %xmm2, %xmm1
326; SSE-NEXT:    retq
327;
328; AVX-LABEL: commute_cmpps_one_ymm:
329; AVX:       # %bb.0:
330; AVX-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
331; AVX-NEXT:    retq
332;
333; AVX512-LABEL: commute_cmpps_one_ymm:
334; AVX512:       # %bb.0:
335; AVX512-NEXT:    vcmpneq_oqps (%rdi), %ymm0, %ymm0
336; AVX512-NEXT:    retq
337  %1 = load <8 x float>, <8 x float>* %a0
338  %2 = fcmp one <8 x float> %1, %a1
339  %3 = sext <8 x i1> %2 to <8 x i32>
340  ret <8 x i32> %3
341}
342
343define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
344; SSE-LABEL: commute_cmpps_lt_ymm:
345; SSE:       # %bb.0:
346; SSE-NEXT:    movaps (%rdi), %xmm2
347; SSE-NEXT:    movaps 16(%rdi), %xmm3
348; SSE-NEXT:    cmpltps %xmm0, %xmm2
349; SSE-NEXT:    cmpltps %xmm1, %xmm3
350; SSE-NEXT:    movaps %xmm2, %xmm0
351; SSE-NEXT:    movaps %xmm3, %xmm1
352; SSE-NEXT:    retq
353;
354; AVX-LABEL: commute_cmpps_lt_ymm:
355; AVX:       # %bb.0:
356; AVX-NEXT:    vmovaps (%rdi), %ymm1
357; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
358; AVX-NEXT:    retq
359;
360; AVX512-LABEL: commute_cmpps_lt_ymm:
361; AVX512:       # %bb.0:
362; AVX512-NEXT:    vmovaps (%rdi), %ymm1
363; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
364; AVX512-NEXT:    retq
365  %1 = load <8 x float>, <8 x float>* %a0
366  %2 = fcmp olt <8 x float> %1, %a1
367  %3 = sext <8 x i1> %2 to <8 x i32>
368  ret <8 x i32> %3
369}
370
371define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
372; SSE-LABEL: commute_cmpps_le_ymm:
373; SSE:       # %bb.0:
374; SSE-NEXT:    movaps (%rdi), %xmm2
375; SSE-NEXT:    movaps 16(%rdi), %xmm3
376; SSE-NEXT:    cmpleps %xmm0, %xmm2
377; SSE-NEXT:    cmpleps %xmm1, %xmm3
378; SSE-NEXT:    movaps %xmm2, %xmm0
379; SSE-NEXT:    movaps %xmm3, %xmm1
380; SSE-NEXT:    retq
381;
382; AVX-LABEL: commute_cmpps_le_ymm:
383; AVX:       # %bb.0:
384; AVX-NEXT:    vmovaps (%rdi), %ymm1
385; AVX-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
386; AVX-NEXT:    retq
387;
388; AVX512-LABEL: commute_cmpps_le_ymm:
389; AVX512:       # %bb.0:
390; AVX512-NEXT:    vmovaps (%rdi), %ymm1
391; AVX512-NEXT:    vcmpleps %ymm0, %ymm1, %ymm0
392; AVX512-NEXT:    retq
393  %1 = load <8 x float>, <8 x float>* %a0
394  %2 = fcmp ole <8 x float> %1, %a1
395  %3 = sext <8 x i1> %2 to <8 x i32>
396  ret <8 x i32> %3
397}
398
399;
400; Double Comparisons
401; Only equal/not-equal/ordered/unordered can be safely commuted
402;
403
404define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
405; SSE-LABEL: commute_cmppd_eq:
406; SSE:       # %bb.0:
407; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
408; SSE-NEXT:    retq
409;
410; AVX-LABEL: commute_cmppd_eq:
411; AVX:       # %bb.0:
412; AVX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
413; AVX-NEXT:    retq
414;
415; AVX512-LABEL: commute_cmppd_eq:
416; AVX512:       # %bb.0:
417; AVX512-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0
418; AVX512-NEXT:    retq
419  %1 = load <2 x double>, <2 x double>* %a0
420  %2 = fcmp oeq <2 x double> %1, %a1
421  %3 = sext <2 x i1> %2 to <2 x i64>
422  ret <2 x i64> %3
423}
424
425define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
426; SSE-LABEL: commute_cmppd_ne:
427; SSE:       # %bb.0:
428; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
429; SSE-NEXT:    retq
430;
431; AVX-LABEL: commute_cmppd_ne:
432; AVX:       # %bb.0:
433; AVX-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
434; AVX-NEXT:    retq
435;
436; AVX512-LABEL: commute_cmppd_ne:
437; AVX512:       # %bb.0:
438; AVX512-NEXT:    vcmpneqpd (%rdi), %xmm0, %xmm0
439; AVX512-NEXT:    retq
440  %1 = load <2 x double>, <2 x double>* %a0
441  %2 = fcmp une <2 x double> %1, %a1
442  %3 = sext <2 x i1> %2 to <2 x i64>
443  ret <2 x i64> %3
444}
445
446define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
447; SSE-LABEL: commute_cmppd_ord:
448; SSE:       # %bb.0:
449; SSE-NEXT:    cmpordpd (%rdi), %xmm0
450; SSE-NEXT:    retq
451;
452; AVX-LABEL: commute_cmppd_ord:
453; AVX:       # %bb.0:
454; AVX-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
455; AVX-NEXT:    retq
456;
457; AVX512-LABEL: commute_cmppd_ord:
458; AVX512:       # %bb.0:
459; AVX512-NEXT:    vcmpordpd (%rdi), %xmm0, %xmm0
460; AVX512-NEXT:    retq
461  %1 = load <2 x double>, <2 x double>* %a0
462  %2 = fcmp ord <2 x double> %1, %a1
463  %3 = sext <2 x i1> %2 to <2 x i64>
464  ret <2 x i64> %3
465}
466
467define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
468; SSE-LABEL: commute_cmppd_ueq:
469; SSE:       # %bb.0:
470; SSE-NEXT:    movapd (%rdi), %xmm1
471; SSE-NEXT:    movapd %xmm1, %xmm2
472; SSE-NEXT:    cmpeqpd %xmm0, %xmm2
473; SSE-NEXT:    cmpunordpd %xmm1, %xmm0
474; SSE-NEXT:    orpd %xmm2, %xmm0
475; SSE-NEXT:    retq
476;
477; AVX-LABEL: commute_cmppd_ueq:
478; AVX:       # %bb.0:
479; AVX-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %xmm0
480; AVX-NEXT:    retq
481;
482; AVX512-LABEL: commute_cmppd_ueq:
483; AVX512:       # %bb.0:
484; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %xmm0, %xmm0
485; AVX512-NEXT:    retq
486  %1 = load <2 x double>, <2 x double>* %a0
487  %2 = fcmp ueq <2 x double> %1, %a1
488  %3 = sext <2 x i1> %2 to <2 x i64>
489  ret <2 x i64> %3
490}
491
492define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
493; SSE-LABEL: commute_cmppd_one:
494; SSE:       # %bb.0:
495; SSE-NEXT:    movapd (%rdi), %xmm1
496; SSE-NEXT:    movapd %xmm1, %xmm2
497; SSE-NEXT:    cmpneqpd %xmm0, %xmm2
498; SSE-NEXT:    cmpordpd %xmm1, %xmm0
499; SSE-NEXT:    andpd %xmm2, %xmm0
500; SSE-NEXT:    retq
501;
502; AVX-LABEL: commute_cmppd_one:
503; AVX:       # %bb.0:
504; AVX-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %xmm0
505; AVX-NEXT:    retq
506;
507; AVX512-LABEL: commute_cmppd_one:
508; AVX512:       # %bb.0:
509; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %xmm0, %xmm0
510; AVX512-NEXT:    retq
511  %1 = load <2 x double>, <2 x double>* %a0
512  %2 = fcmp one <2 x double> %1, %a1
513  %3 = sext <2 x i1> %2 to <2 x i64>
514  ret <2 x i64> %3
515}
516
517define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
518; SSE-LABEL: commute_cmppd_uno:
519; SSE:       # %bb.0:
520; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
521; SSE-NEXT:    retq
522;
523; AVX-LABEL: commute_cmppd_uno:
524; AVX:       # %bb.0:
525; AVX-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
526; AVX-NEXT:    retq
527;
528; AVX512-LABEL: commute_cmppd_uno:
529; AVX512:       # %bb.0:
530; AVX512-NEXT:    vcmpunordpd (%rdi), %xmm0, %xmm0
531; AVX512-NEXT:    retq
532  %1 = load <2 x double>, <2 x double>* %a0
533  %2 = fcmp uno <2 x double> %1, %a1
534  %3 = sext <2 x i1> %2 to <2 x i64>
535  ret <2 x i64> %3
536}
537
538define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
539; SSE-LABEL: commute_cmppd_lt:
540; SSE:       # %bb.0:
541; SSE-NEXT:    movapd (%rdi), %xmm1
542; SSE-NEXT:    cmpltpd %xmm0, %xmm1
543; SSE-NEXT:    movapd %xmm1, %xmm0
544; SSE-NEXT:    retq
545;
546; AVX-LABEL: commute_cmppd_lt:
547; AVX:       # %bb.0:
548; AVX-NEXT:    vmovapd (%rdi), %xmm1
549; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
550; AVX-NEXT:    retq
551;
552; AVX512-LABEL: commute_cmppd_lt:
553; AVX512:       # %bb.0:
554; AVX512-NEXT:    vmovapd (%rdi), %xmm1
555; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
556; AVX512-NEXT:    retq
557  %1 = load <2 x double>, <2 x double>* %a0
558  %2 = fcmp olt <2 x double> %1, %a1
559  %3 = sext <2 x i1> %2 to <2 x i64>
560  ret <2 x i64> %3
561}
562
563define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
564; SSE-LABEL: commute_cmppd_le:
565; SSE:       # %bb.0:
566; SSE-NEXT:    movapd (%rdi), %xmm1
567; SSE-NEXT:    cmplepd %xmm0, %xmm1
568; SSE-NEXT:    movapd %xmm1, %xmm0
569; SSE-NEXT:    retq
570;
571; AVX-LABEL: commute_cmppd_le:
572; AVX:       # %bb.0:
573; AVX-NEXT:    vmovapd (%rdi), %xmm1
574; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
575; AVX-NEXT:    retq
576;
577; AVX512-LABEL: commute_cmppd_le:
578; AVX512:       # %bb.0:
579; AVX512-NEXT:    vmovapd (%rdi), %xmm1
580; AVX512-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
581; AVX512-NEXT:    retq
582  %1 = load <2 x double>, <2 x double>* %a0
583  %2 = fcmp ole <2 x double> %1, %a1
584  %3 = sext <2 x i1> %2 to <2 x i64>
585  ret <2 x i64> %3
586}
587
588define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
589; SSE-LABEL: commute_cmppd_eq_ymmm:
590; SSE:       # %bb.0:
591; SSE-NEXT:    cmpeqpd (%rdi), %xmm0
592; SSE-NEXT:    cmpeqpd 16(%rdi), %xmm1
593; SSE-NEXT:    retq
594;
595; AVX-LABEL: commute_cmppd_eq_ymmm:
596; AVX:       # %bb.0:
597; AVX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
598; AVX-NEXT:    retq
599;
600; AVX512-LABEL: commute_cmppd_eq_ymmm:
601; AVX512:       # %bb.0:
602; AVX512-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0
603; AVX512-NEXT:    retq
604  %1 = load <4 x double>, <4 x double>* %a0
605  %2 = fcmp oeq <4 x double> %1, %a1
606  %3 = sext <4 x i1> %2 to <4 x i64>
607  ret <4 x i64> %3
608}
609
610define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
611; SSE-LABEL: commute_cmppd_ne_ymmm:
612; SSE:       # %bb.0:
613; SSE-NEXT:    cmpneqpd (%rdi), %xmm0
614; SSE-NEXT:    cmpneqpd 16(%rdi), %xmm1
615; SSE-NEXT:    retq
616;
617; AVX-LABEL: commute_cmppd_ne_ymmm:
618; AVX:       # %bb.0:
619; AVX-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
620; AVX-NEXT:    retq
621;
622; AVX512-LABEL: commute_cmppd_ne_ymmm:
623; AVX512:       # %bb.0:
624; AVX512-NEXT:    vcmpneqpd (%rdi), %ymm0, %ymm0
625; AVX512-NEXT:    retq
626  %1 = load <4 x double>, <4 x double>* %a0
627  %2 = fcmp une <4 x double> %1, %a1
628  %3 = sext <4 x i1> %2 to <4 x i64>
629  ret <4 x i64> %3
630}
631
632define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
633; SSE-LABEL: commute_cmppd_ord_ymmm:
634; SSE:       # %bb.0:
635; SSE-NEXT:    cmpordpd (%rdi), %xmm0
636; SSE-NEXT:    cmpordpd 16(%rdi), %xmm1
637; SSE-NEXT:    retq
638;
639; AVX-LABEL: commute_cmppd_ord_ymmm:
640; AVX:       # %bb.0:
641; AVX-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
642; AVX-NEXT:    retq
643;
644; AVX512-LABEL: commute_cmppd_ord_ymmm:
645; AVX512:       # %bb.0:
646; AVX512-NEXT:    vcmpordpd (%rdi), %ymm0, %ymm0
647; AVX512-NEXT:    retq
648  %1 = load <4 x double>, <4 x double>* %a0
649  %2 = fcmp ord <4 x double> %1, %a1
650  %3 = sext <4 x i1> %2 to <4 x i64>
651  ret <4 x i64> %3
652}
653
654define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
655; SSE-LABEL: commute_cmppd_uno_ymmm:
656; SSE:       # %bb.0:
657; SSE-NEXT:    cmpunordpd (%rdi), %xmm0
658; SSE-NEXT:    cmpunordpd 16(%rdi), %xmm1
659; SSE-NEXT:    retq
660;
661; AVX-LABEL: commute_cmppd_uno_ymmm:
662; AVX:       # %bb.0:
663; AVX-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
664; AVX-NEXT:    retq
665;
666; AVX512-LABEL: commute_cmppd_uno_ymmm:
667; AVX512:       # %bb.0:
668; AVX512-NEXT:    vcmpunordpd (%rdi), %ymm0, %ymm0
669; AVX512-NEXT:    retq
670  %1 = load <4 x double>, <4 x double>* %a0
671  %2 = fcmp uno <4 x double> %1, %a1
672  %3 = sext <4 x i1> %2 to <4 x i64>
673  ret <4 x i64> %3
674}
675
676define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
677; SSE-LABEL: commute_cmppd_ueq_ymmm:
678; SSE:       # %bb.0:
679; SSE-NEXT:    movapd (%rdi), %xmm2
680; SSE-NEXT:    movapd 16(%rdi), %xmm3
681; SSE-NEXT:    movapd %xmm2, %xmm4
682; SSE-NEXT:    cmpeqpd %xmm0, %xmm4
683; SSE-NEXT:    cmpunordpd %xmm2, %xmm0
684; SSE-NEXT:    orpd %xmm4, %xmm0
685; SSE-NEXT:    movapd %xmm3, %xmm2
686; SSE-NEXT:    cmpeqpd %xmm1, %xmm2
687; SSE-NEXT:    cmpunordpd %xmm3, %xmm1
688; SSE-NEXT:    orpd %xmm2, %xmm1
689; SSE-NEXT:    retq
690;
691; AVX-LABEL: commute_cmppd_ueq_ymmm:
692; AVX:       # %bb.0:
693; AVX-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
694; AVX-NEXT:    retq
695;
696; AVX512-LABEL: commute_cmppd_ueq_ymmm:
697; AVX512:       # %bb.0:
698; AVX512-NEXT:    vcmpeq_uqpd (%rdi), %ymm0, %ymm0
699; AVX512-NEXT:    retq
700  %1 = load <4 x double>, <4 x double>* %a0
701  %2 = fcmp ueq <4 x double> %1, %a1
702  %3 = sext <4 x i1> %2 to <4 x i64>
703  ret <4 x i64> %3
704}
705
706define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
707; SSE-LABEL: commute_cmppd_one_ymmm:
708; SSE:       # %bb.0:
709; SSE-NEXT:    movapd (%rdi), %xmm2
710; SSE-NEXT:    movapd 16(%rdi), %xmm3
711; SSE-NEXT:    movapd %xmm2, %xmm4
712; SSE-NEXT:    cmpneqpd %xmm0, %xmm4
713; SSE-NEXT:    cmpordpd %xmm2, %xmm0
714; SSE-NEXT:    andpd %xmm4, %xmm0
715; SSE-NEXT:    movapd %xmm3, %xmm2
716; SSE-NEXT:    cmpneqpd %xmm1, %xmm2
717; SSE-NEXT:    cmpordpd %xmm3, %xmm1
718; SSE-NEXT:    andpd %xmm2, %xmm1
719; SSE-NEXT:    retq
720;
721; AVX-LABEL: commute_cmppd_one_ymmm:
722; AVX:       # %bb.0:
723; AVX-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
724; AVX-NEXT:    retq
725;
726; AVX512-LABEL: commute_cmppd_one_ymmm:
727; AVX512:       # %bb.0:
728; AVX512-NEXT:    vcmpneq_oqpd (%rdi), %ymm0, %ymm0
729; AVX512-NEXT:    retq
730  %1 = load <4 x double>, <4 x double>* %a0
731  %2 = fcmp one <4 x double> %1, %a1
732  %3 = sext <4 x i1> %2 to <4 x i64>
733  ret <4 x i64> %3
734}
735
736define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
737; SSE-LABEL: commute_cmppd_lt_ymmm:
738; SSE:       # %bb.0:
739; SSE-NEXT:    movapd (%rdi), %xmm2
740; SSE-NEXT:    movapd 16(%rdi), %xmm3
741; SSE-NEXT:    cmpltpd %xmm0, %xmm2
742; SSE-NEXT:    cmpltpd %xmm1, %xmm3
743; SSE-NEXT:    movapd %xmm2, %xmm0
744; SSE-NEXT:    movapd %xmm3, %xmm1
745; SSE-NEXT:    retq
746;
747; AVX-LABEL: commute_cmppd_lt_ymmm:
748; AVX:       # %bb.0:
749; AVX-NEXT:    vmovapd (%rdi), %ymm1
750; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
751; AVX-NEXT:    retq
752;
753; AVX512-LABEL: commute_cmppd_lt_ymmm:
754; AVX512:       # %bb.0:
755; AVX512-NEXT:    vmovapd (%rdi), %ymm1
756; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
757; AVX512-NEXT:    retq
758  %1 = load <4 x double>, <4 x double>* %a0
759  %2 = fcmp olt <4 x double> %1, %a1
760  %3 = sext <4 x i1> %2 to <4 x i64>
761  ret <4 x i64> %3
762}
763
764define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
765; SSE-LABEL: commute_cmppd_le_ymmm:
766; SSE:       # %bb.0:
767; SSE-NEXT:    movapd (%rdi), %xmm2
768; SSE-NEXT:    movapd 16(%rdi), %xmm3
769; SSE-NEXT:    cmplepd %xmm0, %xmm2
770; SSE-NEXT:    cmplepd %xmm1, %xmm3
771; SSE-NEXT:    movapd %xmm2, %xmm0
772; SSE-NEXT:    movapd %xmm3, %xmm1
773; SSE-NEXT:    retq
774;
775; AVX-LABEL: commute_cmppd_le_ymmm:
776; AVX:       # %bb.0:
777; AVX-NEXT:    vmovapd (%rdi), %ymm1
778; AVX-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
779; AVX-NEXT:    retq
780;
781; AVX512-LABEL: commute_cmppd_le_ymmm:
782; AVX512:       # %bb.0:
783; AVX512-NEXT:    vmovapd (%rdi), %ymm1
784; AVX512-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
785; AVX512-NEXT:    retq
786  %1 = load <4 x double>, <4 x double>* %a0
787  %2 = fcmp ole <4 x double> %1, %a1
788  %3 = sext <4 x i1> %2 to <4 x i64>
789  ret <4 x i64> %3
790}
791