• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X64
4
5define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
6; X86-LABEL: commute_fold_vpcomb:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    vpcomgtb (%eax), %xmm0, %xmm0
10; X86-NEXT:    retl
11;
12; X64-LABEL: commute_fold_vpcomb:
13; X64:       # %bb.0:
14; X64-NEXT:    vpcomgtb (%rdi), %xmm0, %xmm0
15; X64-NEXT:    retq
16  %1 = load <16 x i8>, <16 x i8>* %a0
17  %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
18  ret <16 x i8> %2
19}
20declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
21
22define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
23; X86-LABEL: commute_fold_vpcomd:
24; X86:       # %bb.0:
25; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
26; X86-NEXT:    vpcomged (%eax), %xmm0, %xmm0
27; X86-NEXT:    retl
28;
29; X64-LABEL: commute_fold_vpcomd:
30; X64:       # %bb.0:
31; X64-NEXT:    vpcomged (%rdi), %xmm0, %xmm0
32; X64-NEXT:    retq
33  %1 = load <4 x i32>, <4 x i32>* %a0
34  %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
35  ret <4 x i32> %2
36}
37declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
38
39define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
40; X86-LABEL: commute_fold_vpcomq:
41; X86:       # %bb.0:
42; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
43; X86-NEXT:    vpcomltq (%eax), %xmm0, %xmm0
44; X86-NEXT:    retl
45;
46; X64-LABEL: commute_fold_vpcomq:
47; X64:       # %bb.0:
48; X64-NEXT:    vpcomltq (%rdi), %xmm0, %xmm0
49; X64-NEXT:    retq
50  %1 = load <2 x i64>, <2 x i64>* %a0
51  %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
52  ret <2 x i64> %2
53}
54declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
55
56define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
57; X86-LABEL: commute_fold_vpcomub:
58; X86:       # %bb.0:
59; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X86-NEXT:    vpcomleub (%eax), %xmm0, %xmm0
61; X86-NEXT:    retl
62;
63; X64-LABEL: commute_fold_vpcomub:
64; X64:       # %bb.0:
65; X64-NEXT:    vpcomleub (%rdi), %xmm0, %xmm0
66; X64-NEXT:    retq
67  %1 = load <16 x i8>, <16 x i8>* %a0
68  %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
69  ret <16 x i8> %2
70}
71declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
72
73define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
74; X86-LABEL: commute_fold_vpcomud:
75; X86:       # %bb.0:
76; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    vpcomeqd (%eax), %xmm0, %xmm0
78; X86-NEXT:    retl
79;
80; X64-LABEL: commute_fold_vpcomud:
81; X64:       # %bb.0:
82; X64-NEXT:    vpcomeqd (%rdi), %xmm0, %xmm0
83; X64-NEXT:    retq
84  %1 = load <4 x i32>, <4 x i32>* %a0
85  %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
86  ret <4 x i32> %2
87}
88declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
89
90define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
91; X86-LABEL: commute_fold_vpcomuq:
92; X86:       # %bb.0:
93; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
94; X86-NEXT:    vpcomneqq (%eax), %xmm0, %xmm0
95; X86-NEXT:    retl
96;
97; X64-LABEL: commute_fold_vpcomuq:
98; X64:       # %bb.0:
99; X64-NEXT:    vpcomneqq (%rdi), %xmm0, %xmm0
100; X64-NEXT:    retq
101  %1 = load <2 x i64>, <2 x i64>* %a0
102  %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
103  ret <2 x i64> %2
104}
105declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
106
107define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
108; CHECK-LABEL: commute_fold_vpcomuw:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
111; CHECK-NEXT:    ret{{[l|q]}}
112  %1 = load <8 x i16>, <8 x i16>* %a0
113  %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
114  ret <8 x i16> %2
115}
116declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
117
118define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
119; CHECK-LABEL: commute_fold_vpcomw:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
122; CHECK-NEXT:    ret{{[l|q]}}
123  %1 = load <8 x i16>, <8 x i16>* %a0
124  %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
125  ret <8 x i16> %2
126}
127declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
128
129define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
130; X86-LABEL: commute_fold_vpmacsdd:
131; X86:       # %bb.0:
132; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
133; X86-NEXT:    vpmacsdd %xmm1, (%eax), %xmm0, %xmm0
134; X86-NEXT:    retl
135;
136; X64-LABEL: commute_fold_vpmacsdd:
137; X64:       # %bb.0:
138; X64-NEXT:    vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
139; X64-NEXT:    retq
140  %1 = load <4 x i32>, <4 x i32>* %a0
141  %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
142  ret <4 x i32> %2
143}
144declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
145
146define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
147; X86-LABEL: commute_fold_vpmacsdqh:
148; X86:       # %bb.0:
149; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
150; X86-NEXT:    vpmacsdqh %xmm1, (%eax), %xmm0, %xmm0
151; X86-NEXT:    retl
152;
153; X64-LABEL: commute_fold_vpmacsdqh:
154; X64:       # %bb.0:
155; X64-NEXT:    vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
156; X64-NEXT:    retq
157  %1 = load <4 x i32>, <4 x i32>* %a0
158  %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
159  ret <2 x i64> %2
160}
161declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
162
163define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
164; X86-LABEL: commute_fold_vpmacsdql:
165; X86:       # %bb.0:
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
167; X86-NEXT:    vpmacsdql %xmm1, (%eax), %xmm0, %xmm0
168; X86-NEXT:    retl
169;
170; X64-LABEL: commute_fold_vpmacsdql:
171; X64:       # %bb.0:
172; X64-NEXT:    vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
173; X64-NEXT:    retq
174  %1 = load <4 x i32>, <4 x i32>* %a0
175  %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
176  ret <2 x i64> %2
177}
178declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
179
180define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
181; X86-LABEL: commute_fold_vpmacssdd:
182; X86:       # %bb.0:
183; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; X86-NEXT:    vpmacssdd %xmm1, (%eax), %xmm0, %xmm0
185; X86-NEXT:    retl
186;
187; X64-LABEL: commute_fold_vpmacssdd:
188; X64:       # %bb.0:
189; X64-NEXT:    vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
190; X64-NEXT:    retq
191  %1 = load <4 x i32>, <4 x i32>* %a0
192  %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
193  ret <4 x i32> %2
194}
195declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
196
197define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
198; X86-LABEL: commute_fold_vpmacssdqh:
199; X86:       # %bb.0:
200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
201; X86-NEXT:    vpmacssdqh %xmm1, (%eax), %xmm0, %xmm0
202; X86-NEXT:    retl
203;
204; X64-LABEL: commute_fold_vpmacssdqh:
205; X64:       # %bb.0:
206; X64-NEXT:    vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
207; X64-NEXT:    retq
208  %1 = load <4 x i32>, <4 x i32>* %a0
209  %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
210  ret <2 x i64> %2
211}
212declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
213
214define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
215; X86-LABEL: commute_fold_vpmacssdql:
216; X86:       # %bb.0:
217; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
218; X86-NEXT:    vpmacssdql %xmm1, (%eax), %xmm0, %xmm0
219; X86-NEXT:    retl
220;
221; X64-LABEL: commute_fold_vpmacssdql:
222; X64:       # %bb.0:
223; X64-NEXT:    vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
224; X64-NEXT:    retq
225  %1 = load <4 x i32>, <4 x i32>* %a0
226  %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
227  ret <2 x i64> %2
228}
229declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
230
231define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
232; X86-LABEL: commute_fold_vpmacsswd:
233; X86:       # %bb.0:
234; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
235; X86-NEXT:    vpmacsswd %xmm1, (%eax), %xmm0, %xmm0
236; X86-NEXT:    retl
237;
238; X64-LABEL: commute_fold_vpmacsswd:
239; X64:       # %bb.0:
240; X64-NEXT:    vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
241; X64-NEXT:    retq
242  %1 = load <8 x i16>, <8 x i16>* %a0
243  %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
244  ret <4 x i32> %2
245}
246declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
247
248define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
249; X86-LABEL: commute_fold_vpmacssww:
250; X86:       # %bb.0:
251; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
252; X86-NEXT:    vpmacssww %xmm1, (%eax), %xmm0, %xmm0
253; X86-NEXT:    retl
254;
255; X64-LABEL: commute_fold_vpmacssww:
256; X64:       # %bb.0:
257; X64-NEXT:    vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
258; X64-NEXT:    retq
259  %1 = load <8 x i16>, <8 x i16>* %a0
260  %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
261  ret <8 x i16> %2
262}
263declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
264
265define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
266; X86-LABEL: commute_fold_vpmacswd:
267; X86:       # %bb.0:
268; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
269; X86-NEXT:    vpmacswd %xmm1, (%eax), %xmm0, %xmm0
270; X86-NEXT:    retl
271;
272; X64-LABEL: commute_fold_vpmacswd:
273; X64:       # %bb.0:
274; X64-NEXT:    vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
275; X64-NEXT:    retq
276  %1 = load <8 x i16>, <8 x i16>* %a0
277  %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
278  ret <4 x i32> %2
279}
280declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
281
282define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
283; X86-LABEL: commute_fold_vpmacsww:
284; X86:       # %bb.0:
285; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
286; X86-NEXT:    vpmacsww %xmm1, (%eax), %xmm0, %xmm0
287; X86-NEXT:    retl
288;
289; X64-LABEL: commute_fold_vpmacsww:
290; X64:       # %bb.0:
291; X64-NEXT:    vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
292; X64-NEXT:    retq
293  %1 = load <8 x i16>, <8 x i16>* %a0
294  %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
295  ret <8 x i16> %2
296}
297declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
298
299define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
300; X86-LABEL: commute_fold_vpmadcsswd:
301; X86:       # %bb.0:
302; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
303; X86-NEXT:    vpmadcsswd %xmm1, (%eax), %xmm0, %xmm0
304; X86-NEXT:    retl
305;
306; X64-LABEL: commute_fold_vpmadcsswd:
307; X64:       # %bb.0:
308; X64-NEXT:    vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
309; X64-NEXT:    retq
310  %1 = load <8 x i16>, <8 x i16>* %a0
311  %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
312  ret <4 x i32> %2
313}
314declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
315
316define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
317; X86-LABEL: commute_fold_vpmadcswd:
318; X86:       # %bb.0:
319; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
320; X86-NEXT:    vpmadcswd %xmm1, (%eax), %xmm0, %xmm0
321; X86-NEXT:    retl
322;
323; X64-LABEL: commute_fold_vpmadcswd:
324; X64:       # %bb.0:
325; X64-NEXT:    vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
326; X64-NEXT:    retq
327  %1 = load <8 x i16>, <8 x i16>* %a0
328  %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
329  ret <4 x i32> %2
330}
331declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
332