• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
3
4; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
5
6; Canolicalize the sequence shl/zext/lshr performing the zeroextend
7; as the last instruction of the sequence.
8; This will help DAGCombiner to identify and then fold the sequence
9; of shifts into a single AND.
10; This transformation is profitable if the shift amounts are the same
11; and if there is only one use of the zext.
12
13define i16 @fun1(i8 zeroext %v) {
14; CHECK-LABEL: fun1:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    movl %edi, %eax
17; CHECK-NEXT:    andl $-16, %eax
18; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
19; CHECK-NEXT:    retq
20entry:
21  %shr = lshr i8 %v, 4
22  %ext = zext i8 %shr to i16
23  %shl = shl i16 %ext, 4
24  ret i16 %shl
25}
26
27define i32 @fun2(i8 zeroext %v) {
28; CHECK-LABEL: fun2:
29; CHECK:       # %bb.0: # %entry
30; CHECK-NEXT:    movl %edi, %eax
31; CHECK-NEXT:    andl $-16, %eax
32; CHECK-NEXT:    retq
33entry:
34  %shr = lshr i8 %v, 4
35  %ext = zext i8 %shr to i32
36  %shl = shl i32 %ext, 4
37  ret i32 %shl
38}
39
40define i32 @fun3(i16 zeroext %v) {
41; CHECK-LABEL: fun3:
42; CHECK:       # %bb.0: # %entry
43; CHECK-NEXT:    movl %edi, %eax
44; CHECK-NEXT:    andl $-16, %eax
45; CHECK-NEXT:    retq
46entry:
47  %shr = lshr i16 %v, 4
48  %ext = zext i16 %shr to i32
49  %shl = shl i32 %ext, 4
50  ret i32 %shl
51}
52
53define i64 @fun4(i8 zeroext %v) {
54; CHECK-LABEL: fun4:
55; CHECK:       # %bb.0: # %entry
56; CHECK-NEXT:    movl %edi, %eax
57; CHECK-NEXT:    andl $-16, %eax
58; CHECK-NEXT:    retq
59entry:
60  %shr = lshr i8 %v, 4
61  %ext = zext i8 %shr to i64
62  %shl = shl i64 %ext, 4
63  ret i64 %shl
64}
65
66define i64 @fun5(i16 zeroext %v) {
67; CHECK-LABEL: fun5:
68; CHECK:       # %bb.0: # %entry
69; CHECK-NEXT:    movl %edi, %eax
70; CHECK-NEXT:    andl $-16, %eax
71; CHECK-NEXT:    retq
72entry:
73  %shr = lshr i16 %v, 4
74  %ext = zext i16 %shr to i64
75  %shl = shl i64 %ext, 4
76  ret i64 %shl
77}
78
79define i64 @fun6(i32 zeroext %v) {
80; CHECK-LABEL: fun6:
81; CHECK:       # %bb.0: # %entry
82; CHECK-NEXT:    movl %edi, %eax
83; CHECK-NEXT:    andl $-16, %eax
84; CHECK-NEXT:    retq
85entry:
86  %shr = lshr i32 %v, 4
87  %ext = zext i32 %shr to i64
88  %shl = shl i64 %ext, 4
89  ret i64 %shl
90}
91
92; Don't fold the pattern if we use arithmetic shifts.
93
94define i64 @fun7(i8 zeroext %v) {
95; CHECK-LABEL: fun7:
96; CHECK:       # %bb.0: # %entry
97; CHECK-NEXT:    sarb $4, %dil
98; CHECK-NEXT:    movzbl %dil, %eax
99; CHECK-NEXT:    shlq $4, %rax
100; CHECK-NEXT:    retq
101entry:
102  %shr = ashr i8 %v, 4
103  %ext = zext i8 %shr to i64
104  %shl = shl i64 %ext, 4
105  ret i64 %shl
106}
107
108define i64 @fun8(i16 zeroext %v) {
109; CHECK-LABEL: fun8:
110; CHECK:       # %bb.0: # %entry
111; CHECK-NEXT:    movswl %di, %eax
112; CHECK-NEXT:    shrl $4, %eax
113; CHECK-NEXT:    movzwl %ax, %eax
114; CHECK-NEXT:    shlq $4, %rax
115; CHECK-NEXT:    retq
116entry:
117  %shr = ashr i16 %v, 4
118  %ext = zext i16 %shr to i64
119  %shl = shl i64 %ext, 4
120  ret i64 %shl
121}
122
123define i64 @fun9(i32 zeroext %v) {
124; CHECK-LABEL: fun9:
125; CHECK:       # %bb.0: # %entry
126; CHECK-NEXT:    movl %edi, %eax
127; CHECK-NEXT:    sarl $4, %eax
128; CHECK-NEXT:    shlq $4, %rax
129; CHECK-NEXT:    retq
130entry:
131  %shr = ashr i32 %v, 4
132  %ext = zext i32 %shr to i64
133  %shl = shl i64 %ext, 4
134  ret i64 %shl
135}
136
137; Don't fold the pattern if there is more than one use of the
138; operand in input to the shift left.
139
140define i64 @fun10(i8 zeroext %v) {
141; CHECK-LABEL: fun10:
142; CHECK:       # %bb.0: # %entry
143; CHECK-NEXT:    shrb $4, %dil
144; CHECK-NEXT:    movzbl %dil, %ecx
145; CHECK-NEXT:    movq %rcx, %rax
146; CHECK-NEXT:    shlq $4, %rax
147; CHECK-NEXT:    orq %rcx, %rax
148; CHECK-NEXT:    retq
149entry:
150  %shr = lshr i8 %v, 4
151  %ext = zext i8 %shr to i64
152  %shl = shl i64 %ext, 4
153  %add = add i64 %shl, %ext
154  ret i64 %add
155}
156
157define i64 @fun11(i16 zeroext %v) {
158; CHECK-LABEL: fun11:
159; CHECK:       # %bb.0: # %entry
160; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
161; CHECK-NEXT:    shrl $4, %edi
162; CHECK-NEXT:    movq %rdi, %rax
163; CHECK-NEXT:    shlq $4, %rax
164; CHECK-NEXT:    addq %rdi, %rax
165; CHECK-NEXT:    retq
166entry:
167  %shr = lshr i16 %v, 4
168  %ext = zext i16 %shr to i64
169  %shl = shl i64 %ext, 4
170  %add = add i64 %shl, %ext
171  ret i64 %add
172}
173
174define i64 @fun12(i32 zeroext %v) {
175; CHECK-LABEL: fun12:
176; CHECK:       # %bb.0: # %entry
177; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
178; CHECK-NEXT:    shrl $4, %edi
179; CHECK-NEXT:    movq %rdi, %rax
180; CHECK-NEXT:    shlq $4, %rax
181; CHECK-NEXT:    addq %rdi, %rax
182; CHECK-NEXT:    retq
183entry:
184  %shr = lshr i32 %v, 4
185  %ext = zext i32 %shr to i64
186  %shl = shl i64 %ext, 4
187  %add = add i64 %shl, %ext
188  ret i64 %add
189}
190
191; PR17380
192; Make sure that the combined dags are legal if we run the DAGCombiner after
193; Legalization took place. The add instruction is redundant and increases by
194; one the number of uses of the zext. This prevents the transformation from
195; firing before dags are legalized and optimized.
196; Once the add is removed, the number of uses becomes one and therefore the
197; dags are canonicalized. After Legalization, we need to make sure that the
198; valuetype for the shift count is legal.
199; Verify also that we correctly fold the shl-shr sequence into an
200; AND with bitmask.
201
202define void @g(i32 %a) {
203; CHECK-LABEL: g:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
206; CHECK-NEXT:    andl $-4, %edi
207; CHECK-NEXT:    jmp f # TAILCALL
208  %b = lshr i32 %a, 2
209  %c = zext i32 %b to i64
210  %d = add i64 %c, 1
211  %e = shl i64 %c, 2
212  tail call void @f(i64 %e)
213  ret void
214}
215
216declare dso_local void @f(i64)
217
218