• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
2
3; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
4
5; Canolicalize the sequence shl/zext/lshr performing the zeroextend
6; as the last instruction of the sequence.
7; This will help DAGCombiner to identify and then fold the sequence
8; of shifts into a single AND.
9; This transformation is profitable if the shift amounts are the same
10; and if there is only one use of the zext.
11
12define i16 @fun1(i8 zeroext %v) {
13entry:
14  %shr = lshr i8 %v, 4
15  %ext = zext i8 %shr to i16
16  %shl = shl i16 %ext, 4
17  ret i16 %shl
18}
19
20; CHECK-LABEL: @fun1
21; CHECK: and
22; CHECK-NOT: shr
23; CHECK-NOT: shl
24; CHECK: ret
25
26define i32 @fun2(i8 zeroext %v) {
27entry:
28  %shr = lshr i8 %v, 4
29  %ext = zext i8 %shr to i32
30  %shl = shl i32 %ext, 4
31  ret i32 %shl
32}
33
34; CHECK-LABEL: @fun2
35; CHECK: and
36; CHECK-NOT: shr
37; CHECK-NOT: shl
38; CHECK: ret
39
40define i32 @fun3(i16 zeroext %v) {
41entry:
42  %shr = lshr i16 %v, 4
43  %ext = zext i16 %shr to i32
44  %shl = shl i32 %ext, 4
45  ret i32 %shl
46}
47
48; CHECK-LABEL: @fun3
49; CHECK: and
50; CHECK-NOT: shr
51; CHECK-NOT: shl
52; CHECK: ret
53
54define i64 @fun4(i8 zeroext %v) {
55entry:
56  %shr = lshr i8 %v, 4
57  %ext = zext i8 %shr to i64
58  %shl = shl i64 %ext, 4
59  ret i64 %shl
60}
61
62; CHECK-LABEL: @fun4
63; CHECK: and
64; CHECK-NOT: shr
65; CHECK-NOT: shl
66; CHECK: ret
67
68define i64 @fun5(i16 zeroext %v) {
69entry:
70  %shr = lshr i16 %v, 4
71  %ext = zext i16 %shr to i64
72  %shl = shl i64 %ext, 4
73  ret i64 %shl
74}
75
76; CHECK-LABEL: @fun5
77; CHECK: and
78; CHECK-NOT: shr
79; CHECK-NOT: shl
80; CHECK: ret
81
82define i64 @fun6(i32 zeroext %v) {
83entry:
84  %shr = lshr i32 %v, 4
85  %ext = zext i32 %shr to i64
86  %shl = shl i64 %ext, 4
87  ret i64 %shl
88}
89
90; CHECK-LABEL: @fun6
91; CHECK: and
92; CHECK-NOT: shr
93; CHECK-NOT: shl
94; CHECK: ret
95
96; Don't fold the pattern if we use arithmetic shifts.
97
98define i64 @fun7(i8 zeroext %v) {
99entry:
100  %shr = ashr i8 %v, 4
101  %ext = zext i8 %shr to i64
102  %shl = shl i64 %ext, 4
103  ret i64 %shl
104}
105
106; CHECK-LABEL: @fun7
107; CHECK: sar
108; CHECK: shl
109; CHECK: ret
110
111define i64 @fun8(i16 zeroext %v) {
112entry:
113  %shr = ashr i16 %v, 4
114  %ext = zext i16 %shr to i64
115  %shl = shl i64 %ext, 4
116  ret i64 %shl
117}
118
119; CHECK-LABEL: @fun8
120; CHECK: sar
121; CHECK: shl
122; CHECK: ret
123
124define i64 @fun9(i32 zeroext %v) {
125entry:
126  %shr = ashr i32 %v, 4
127  %ext = zext i32 %shr to i64
128  %shl = shl i64 %ext, 4
129  ret i64 %shl
130}
131
132; CHECK-LABEL: @fun9
133; CHECK: sar
134; CHECK: shl
135; CHECK: ret
136
137; Don't fold the pattern if there is more than one use of the
138; operand in input to the shift left.
139
140define i64 @fun10(i8 zeroext %v) {
141entry:
142  %shr = lshr i8 %v, 4
143  %ext = zext i8 %shr to i64
144  %shl = shl i64 %ext, 4
145  %add = add i64 %shl, %ext
146  ret i64 %add
147}
148
149; CHECK-LABEL: @fun10
150; CHECK: shr
151; CHECK: shl
152; CHECK: ret
153
154define i64 @fun11(i16 zeroext %v) {
155entry:
156  %shr = lshr i16 %v, 4
157  %ext = zext i16 %shr to i64
158  %shl = shl i64 %ext, 4
159  %add = add i64 %shl, %ext
160  ret i64 %add
161}
162
163; CHECK-LABEL: @fun11
164; CHECK: shr
165; CHECK: shl
166; CHECK: ret
167
168define i64 @fun12(i32 zeroext %v) {
169entry:
170  %shr = lshr i32 %v, 4
171  %ext = zext i32 %shr to i64
172  %shl = shl i64 %ext, 4
173  %add = add i64 %shl, %ext
174  ret i64 %add
175}
176
177; CHECK-LABEL: @fun12
178; CHECK: shr
179; CHECK: shl
180; CHECK: ret
181
182; PR17380
183; Make sure that the combined dags are legal if we run the DAGCombiner after
184; Legalization took place. The add instruction is redundant and increases by
185; one the number of uses of the zext. This prevents the transformation from
186; firing before dags are legalized and optimized.
187; Once the add is removed, the number of uses becomes one and therefore the
188; dags are canonicalized. After Legalization, we need to make sure that the
189; valuetype for the shift count is legal.
190; Verify also that we correctly fold the shl-shr sequence into an
191; AND with bitmask.
192
193define void @g(i32 %a) {
194  %b = lshr i32 %a, 2
195  %c = zext i32 %b to i64
196  %d = add i64 %c, 1
197  %e = shl i64 %c, 2
198  tail call void @f(i64 %e)
199  ret void
200}
201
202; CHECK-LABEL: @g
203; CHECK-NOT: shr
204; CHECK-NOT: shl
205; CHECK: and
206; CHECK-NEXT: jmp
207
208declare void @f(i64)
209
210