• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
5; fold (add x, 0) -> x
6define <4 x i32> @combine_vec_add_to_zero(<4 x i32> %a) {
7; SSE-LABEL: combine_vec_add_to_zero:
8; SSE:       # %bb.0:
9; SSE-NEXT:    retq
10;
11; AVX-LABEL: combine_vec_add_to_zero:
12; AVX:       # %bb.0:
13; AVX-NEXT:    retq
14  %1 = add <4 x i32> %a, zeroinitializer
15  ret <4 x i32> %1
16}
17
18; fold ((c1-A)+c2) -> (c1+c2)-A
19define <4 x i32> @combine_vec_add_constant_sub(<4 x i32> %a) {
20; SSE-LABEL: combine_vec_add_constant_sub:
21; SSE:       # %bb.0:
22; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,2,4,6]
23; SSE-NEXT:    psubd %xmm0, %xmm1
24; SSE-NEXT:    movdqa %xmm1, %xmm0
25; SSE-NEXT:    retq
26;
27; AVX-LABEL: combine_vec_add_constant_sub:
28; AVX:       # %bb.0:
29; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2,4,6]
30; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
31; AVX-NEXT:    retq
32  %1 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %a
33  %2 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %1
34  ret <4 x i32> %2
35}
36
37; fold ((0-A) + B) -> B-A
38define <4 x i32> @combine_vec_add_neg0(<4 x i32> %a, <4 x i32> %b) {
39; SSE-LABEL: combine_vec_add_neg0:
40; SSE:       # %bb.0:
41; SSE-NEXT:    psubd %xmm0, %xmm1
42; SSE-NEXT:    movdqa %xmm1, %xmm0
43; SSE-NEXT:    retq
44;
45; AVX-LABEL: combine_vec_add_neg0:
46; AVX:       # %bb.0:
47; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
48; AVX-NEXT:    retq
49  %1 = sub <4 x i32> zeroinitializer, %a
50  %2 = add <4 x i32> %1, %b
51  ret <4 x i32> %2
52}
53
54; fold (A + (0-B)) -> A-B
55define <4 x i32> @combine_vec_add_neg1(<4 x i32> %a, <4 x i32> %b) {
56; SSE-LABEL: combine_vec_add_neg1:
57; SSE:       # %bb.0:
58; SSE-NEXT:    psubd %xmm1, %xmm0
59; SSE-NEXT:    retq
60;
61; AVX-LABEL: combine_vec_add_neg1:
62; AVX:       # %bb.0:
63; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
64; AVX-NEXT:    retq
65  %1 = sub <4 x i32> zeroinitializer, %b
66  %2 = add <4 x i32> %a, %1
67  ret <4 x i32> %2
68}
69
70; fold (A+(B-A)) -> B
71define <4 x i32> @combine_vec_add_sub0(<4 x i32> %a, <4 x i32> %b) {
72; SSE-LABEL: combine_vec_add_sub0:
73; SSE:       # %bb.0:
74; SSE-NEXT:    movaps %xmm1, %xmm0
75; SSE-NEXT:    retq
76;
77; AVX-LABEL: combine_vec_add_sub0:
78; AVX:       # %bb.0:
79; AVX-NEXT:    vmovaps %xmm1, %xmm0
80; AVX-NEXT:    retq
81  %1 = sub <4 x i32> %b, %a
82  %2 = add <4 x i32> %a, %1
83  ret <4 x i32> %2
84}
85
86; fold ((B-A)+A) -> B
87define <4 x i32> @combine_vec_add_sub1(<4 x i32> %a, <4 x i32> %b) {
88; SSE-LABEL: combine_vec_add_sub1:
89; SSE:       # %bb.0:
90; SSE-NEXT:    movaps %xmm1, %xmm0
91; SSE-NEXT:    retq
92;
93; AVX-LABEL: combine_vec_add_sub1:
94; AVX:       # %bb.0:
95; AVX-NEXT:    vmovaps %xmm1, %xmm0
96; AVX-NEXT:    retq
97  %1 = sub <4 x i32> %b, %a
98  %2 = add <4 x i32> %1, %a
99  ret <4 x i32> %2
100}
101
102; fold (A+(B-(A+C))) to (B-C)
103define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
104; SSE-LABEL: combine_vec_add_sub_add0:
105; SSE:       # %bb.0:
106; SSE-NEXT:    psubd %xmm2, %xmm1
107; SSE-NEXT:    movdqa %xmm1, %xmm0
108; SSE-NEXT:    retq
109;
110; AVX-LABEL: combine_vec_add_sub_add0:
111; AVX:       # %bb.0:
112; AVX-NEXT:    vpsubd %xmm2, %xmm1, %xmm0
113; AVX-NEXT:    retq
114  %1 = add <4 x i32> %a, %c
115  %2 = sub <4 x i32> %b, %1
116  %3 = add <4 x i32> %a, %2
117  ret <4 x i32> %3
118}
119
120; fold (A+(B-(C+A))) to (B-C)
121define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
122; SSE-LABEL: combine_vec_add_sub_add1:
123; SSE:       # %bb.0:
124; SSE-NEXT:    psubd %xmm2, %xmm1
125; SSE-NEXT:    movdqa %xmm1, %xmm0
126; SSE-NEXT:    retq
127;
128; AVX-LABEL: combine_vec_add_sub_add1:
129; AVX:       # %bb.0:
130; AVX-NEXT:    vpsubd %xmm2, %xmm1, %xmm0
131; AVX-NEXT:    retq
132  %1 = add <4 x i32> %c, %a
133  %2 = sub <4 x i32> %b, %1
134  %3 = add <4 x i32> %a, %2
135  ret <4 x i32> %3
136}
137
138; fold (A+((B-A)+C)) to (B+C)
139define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
140; SSE-LABEL: combine_vec_add_sub_add2:
141; SSE:       # %bb.0:
142; SSE-NEXT:    paddd %xmm2, %xmm1
143; SSE-NEXT:    movdqa %xmm1, %xmm0
144; SSE-NEXT:    retq
145;
146; AVX-LABEL: combine_vec_add_sub_add2:
147; AVX:       # %bb.0:
148; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm0
149; AVX-NEXT:    retq
150  %1 = sub <4 x i32> %b, %a
151  %2 = add <4 x i32> %1, %c
152  %3 = add <4 x i32> %a, %2
153  ret <4 x i32> %3
154}
155
156; fold (A+((B-A)-C)) to (B-C)
157define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
158; SSE-LABEL: combine_vec_add_sub_add3:
159; SSE:       # %bb.0:
160; SSE-NEXT:    psubd %xmm2, %xmm1
161; SSE-NEXT:    movdqa %xmm1, %xmm0
162; SSE-NEXT:    retq
163;
164; AVX-LABEL: combine_vec_add_sub_add3:
165; AVX:       # %bb.0:
166; AVX-NEXT:    vpsubd %xmm2, %xmm1, %xmm0
167; AVX-NEXT:    retq
168  %1 = sub <4 x i32> %b, %a
169  %2 = sub <4 x i32> %1, %c
170  %3 = add <4 x i32> %a, %2
171  ret <4 x i32> %3
172}
173
174; fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
175define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
176; SSE-LABEL: combine_vec_add_sub_sub:
177; SSE:       # %bb.0:
178; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
179; SSE-NEXT:    paddd %xmm2, %xmm1
180; SSE-NEXT:    psubd %xmm1, %xmm0
181; SSE-NEXT:    retq
182;
183; AVX-LABEL: combine_vec_add_sub_sub:
184; AVX:       # %bb.0:
185; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
186; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
187; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
188; AVX-NEXT:    retq
189  %1 = sub <4 x i32> %a, %b
190  %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
191  %3 = add <4 x i32> %1, %2
192  ret <4 x i32> %3
193}
194
195; fold (a+b) -> (a|b) iff a and b share no bits.
196define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) {
197; SSE-LABEL: combine_vec_add_uniquebits:
198; SSE:       # %bb.0:
199; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
200; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
201; SSE-NEXT:    orps %xmm1, %xmm0
202; SSE-NEXT:    retq
203;
204; AVX-LABEL: combine_vec_add_uniquebits:
205; AVX:       # %bb.0:
206; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
207; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
208; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
209; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
210; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
211; AVX-NEXT:    retq
212  %1 = and <4 x i32> %a, <i32 61680, i32 61680, i32 61680, i32 61680>
213  %2 = and <4 x i32> %b, <i32 3855, i32 3855, i32 3855, i32 3855>
214  %3 = add <4 x i32> %1, %2
215  ret <4 x i32> %3
216}
217
218; fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
219define <4 x i32> @combine_vec_add_shl_neg0(<4 x i32> %x, <4 x i32> %y) {
220; SSE-LABEL: combine_vec_add_shl_neg0:
221; SSE:       # %bb.0:
222; SSE-NEXT:    pslld $5, %xmm1
223; SSE-NEXT:    psubd %xmm1, %xmm0
224; SSE-NEXT:    retq
225;
226; AVX-LABEL: combine_vec_add_shl_neg0:
227; AVX:       # %bb.0:
228; AVX-NEXT:    vpslld $5, %xmm1, %xmm1
229; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
230; AVX-NEXT:    retq
231  %1 = sub <4 x i32> zeroinitializer, %y
232  %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
233  %3 = add <4 x i32> %x, %2
234  ret <4 x i32> %3
235}
236
237; fold (add shl(0 - y, n), x) -> sub(x, shl(y, n))
238define <4 x i32> @combine_vec_add_shl_neg1(<4 x i32> %x, <4 x i32> %y) {
239; SSE-LABEL: combine_vec_add_shl_neg1:
240; SSE:       # %bb.0:
241; SSE-NEXT:    pslld $5, %xmm1
242; SSE-NEXT:    psubd %xmm1, %xmm0
243; SSE-NEXT:    retq
244;
245; AVX-LABEL: combine_vec_add_shl_neg1:
246; AVX:       # %bb.0:
247; AVX-NEXT:    vpslld $5, %xmm1, %xmm1
248; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
249; AVX-NEXT:    retq
250  %1 = sub <4 x i32> zeroinitializer, %y
251  %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
252  %3 = add <4 x i32> %2, %x
253  ret <4 x i32> %3
254}
255
256; (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
257; and similar xforms where the inner op is either ~0 or 0.
258define <4 x i32> @combine_vec_add_and_compare(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
259; SSE-LABEL: combine_vec_add_and_compare:
260; SSE:       # %bb.0:
261; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
262; SSE-NEXT:    psubd %xmm1, %xmm0
263; SSE-NEXT:    retq
264;
265; AVX-LABEL: combine_vec_add_and_compare:
266; AVX:       # %bb.0:
267; AVX-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
268; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
269; AVX-NEXT:    retq
270  %1 = icmp eq <4 x i32> %a1, %a2
271  %2 = sext <4 x i1> %1 to <4 x i32>
272  %3 = and <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
273  %4 = add <4 x i32> %a0, %3
274  ret <4 x i32> %4
275}
276
277; add (sext i1), X -> sub X, (zext i1)
278define <4 x i32> @combine_vec_add_sext(<4 x i1> %a0, <4 x i32> %a1) {
279; SSE-LABEL: combine_vec_add_sext:
280; SSE:       # %bb.0:
281; SSE-NEXT:    pslld $31, %xmm0
282; SSE-NEXT:    psrad $31, %xmm0
283; SSE-NEXT:    paddd %xmm1, %xmm0
284; SSE-NEXT:    retq
285;
286; AVX-LABEL: combine_vec_add_sext:
287; AVX:       # %bb.0:
288; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
289; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
290; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
291; AVX-NEXT:    retq
292  %1 = sext <4 x i1> %a0 to <4 x i32>
293  %2 = add <4 x i32> %1, %a1
294  ret <4 x i32> %2
295}
296
297; add (sext i1), X -> sub X, (zext i1)
298define <4 x i32> @combine_vec_add_sextinreg(<4 x i32> %a0, <4 x i32> %a1) {
299; SSE-LABEL: combine_vec_add_sextinreg:
300; SSE:       # %bb.0:
301; SSE-NEXT:    pslld $31, %xmm0
302; SSE-NEXT:    psrad $31, %xmm0
303; SSE-NEXT:    paddd %xmm1, %xmm0
304; SSE-NEXT:    retq
305;
306; AVX-LABEL: combine_vec_add_sextinreg:
307; AVX:       # %bb.0:
308; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
309; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
310; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
311; AVX-NEXT:    retq
312  %1 = shl <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
313  %2 = ashr <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31>
314  %3 = add <4 x i32> %2, %a1
315  ret <4 x i32> %3
316}
317