• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
2; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
3; This file tests the different cases what are involved when codegen prepare
4; tries to get sign extension out of the way of addressing mode.
5; This tests require an actual target as addressing mode decisions depends
6; on the target.
7
8target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
9target triple = "x86_64-apple-macosx"
10
11
12; Check that we correctly promote both operands of the promotable add.
13; CHECK-LABEL: @twoArgsPromotion
14; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg1 to i64
15; CHECK: [[ARG2SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg2 to i64
16; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], [[ARG2SEXT]]
17; CHECK: inttoptr i64 [[PROMOTED]] to i8*
18; CHECK: ret
19define i8 @twoArgsPromotion(i32 %arg1, i32 %arg2) {
20  %add = add nsw i32 %arg1, %arg2
21  %sextadd = sext i32 %add to i64
22  %base = inttoptr i64 %sextadd to i8*
23  %res = load i8* %base
24  ret i8 %res
25}
26
27; Check that we do not promote both operands of the promotable add when
28; the instruction will not be folded into the addressing mode.
29; Otherwise, we will increase the number of instruction executed.
30; (This is a heuristic of course, because the new sext could have been
31; merged with something else.)
32; CHECK-LABEL: @twoArgsNoPromotion
33; CHECK: add nsw i32 %arg1, %arg2
34; CHECK: ret
35define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, i8* %base) {
36  %add = add nsw i32 %arg1, %arg2
37  %sextadd = sext i32 %add to i64
38  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
39  %res = load i8* %arrayidx
40  ret i8 %res
41}
42
43; Check that we do not promote when the related instruction does not have
44; the nsw flag.
45; CHECK-LABEL: @noPromotion
46; CHECK-NOT: add i64
47; CHECK: ret
48define i8 @noPromotion(i32 %arg1, i32 %arg2, i8* %base) {
49  %add = add i32 %arg1, %arg2
50  %sextadd = sext i32 %add to i64
51  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
52  %res = load i8* %arrayidx
53  ret i8 %res
54}
55
56; Check that we correctly promote constant arguments.
57; CHECK-LABEL: @oneArgPromotion
58; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg1 to i64
59; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
60; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
61; CHECK: ret
62define i8 @oneArgPromotion(i32 %arg1, i8* %base) {
63  %add = add nsw i32 %arg1, 1
64  %sextadd = sext i32 %add to i64
65  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
66  %res = load i8* %arrayidx
67  ret i8 %res
68}
69
70; Check that we do not promote truncate when we cannot determine the
71; bits that are dropped.
72; CHECK-LABEL: @oneArgPromotionBlockTrunc1
73; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 %arg1 to i8
74; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
75; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
76; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
77; CHECK: ret
78define i8 @oneArgPromotionBlockTrunc1(i32 %arg1, i8* %base) {
79  %trunc = trunc i32 %arg1 to i8
80  %add = add nsw i8 %trunc, 1
81  %sextadd = sext i8 %add to i64
82  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
83  %res = load i8* %arrayidx
84  ret i8 %res
85}
86
87; Check that we do not promote truncate when we cannot determine all the
88; bits that are dropped.
89; CHECK-LABEL: @oneArgPromotionBlockTrunc2
90; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i16 %arg1 to i32
91; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[ARG1SEXT]] to i8
92; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
93; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1
94; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
95; CHECK: ret
96define i8 @oneArgPromotionBlockTrunc2(i16 %arg1, i8* %base) {
97  %sextarg1 = sext i16 %arg1 to i32
98  %trunc = trunc i32 %sextarg1 to i8
99  %add = add nsw i8 %trunc, 1
100  %sextadd = sext i8 %add to i64
101  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
102  %res = load i8* %arrayidx
103  ret i8 %res
104}
105
106; Check that we are able to promote truncate when we know all the bits
107; that are dropped.
108; CHECK-LABEL: @oneArgPromotionPassTruncKeepSExt
109; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i64
110; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
111; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
112; CHECK: ret
113define i8 @oneArgPromotionPassTruncKeepSExt(i1 %arg1, i8* %base) {
114  %sextarg1 = sext i1 %arg1 to i32
115  %trunc = trunc i32 %sextarg1 to i8
116  %add = add nsw i8 %trunc, 1
117  %sextadd = sext i8 %add to i64
118  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
119  %res = load i8* %arrayidx
120  ret i8 %res
121}
122
123; On X86 truncate are free. Check that we are able to promote the add
124; to be used as addressing mode and that we insert a truncate for the other
125; use.
126; CHECK-LABEL: @oneArgPromotionTruncInsert
127; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
128; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
129; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
130; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
131; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
132; CHECK: add i8 [[LOAD]], [[TRUNC]]
133; CHECK: ret
134define i8 @oneArgPromotionTruncInsert(i8 %arg1, i8* %base) {
135  %add = add nsw i8 %arg1, 1
136  %sextadd = sext i8 %add to i64
137  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
138  %res = load i8* %arrayidx
139  %finalres = add i8 %res, %add
140  ret i8 %finalres
141}
142
143; Cannot sext from a larger type than the promoted type.
144; CHECK-LABEL: @oneArgPromotionLargerType
145; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i128 %arg1 to i8
146; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
147; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1
148; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
149; CHECK: ret
150define i8 @oneArgPromotionLargerType(i128 %arg1, i8* %base) {
151  %trunc = trunc i128 %arg1 to i8
152  %add = add nsw i8 %trunc, 1
153  %sextadd = sext i8 %add to i64
154  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
155  %res = load i8* %arrayidx
156  %finalres = add i8 %res, %add
157  ret i8 %finalres
158}
159
160; Use same inserted trunc
161; On X86 truncate are free. Check that we are able to promote the add
162; to be used as addressing mode and that we insert a truncate for
163; *all* the other uses.
164; CHECK-LABEL: @oneArgPromotionTruncInsertSeveralUse
165; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
166; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
167; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
168; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
169; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
170; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = add i8 [[LOAD]], [[TRUNC]]
171; CHECK: add i8 [[ADDRES]], [[TRUNC]]
172; CHECK: ret
173define i8 @oneArgPromotionTruncInsertSeveralUse(i8 %arg1, i8* %base) {
174  %add = add nsw i8 %arg1, 1
175  %sextadd = sext i8 %add to i64
176  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
177  %res = load i8* %arrayidx
178  %almostfinalres = add i8 %res, %add
179  %finalres = add i8 %almostfinalres, %add
180  ret i8 %finalres
181}
182
183; Check that the promoted instruction is used for all uses of the original
184; sign extension.
185; CHECK-LABEL: @oneArgPromotionSExtSeveralUse
186; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
187; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
188; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
189; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
190; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = zext i8 [[LOAD]] to i64
191; CHECK: add i64 [[ADDRES]], [[PROMOTED]]
192; CHECK: ret
193define i64 @oneArgPromotionSExtSeveralUse(i8 %arg1, i8* %base) {
194  %add = add nsw i8 %arg1, 1
195  %sextadd = sext i8 %add to i64
196  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
197  %res = load i8* %arrayidx
198  %almostfinalres = zext i8 %res to i64
199  %finalres = add i64 %almostfinalres, %sextadd
200  ret i64 %finalres
201}
202
203; Check all types of rollback mechanism.
204; For this test, the sign extension stays in place.
205; However, the matching process goes until promoting both the operands
206; of the first promotable add implies.
207; At this point the rollback mechanism kicks in and restores the states
208; until the addressing mode matcher is able to match something: in that
209; case promote nothing.
210; Along the way, the promotion mechanism involves:
211; - Mutating the type of %promotableadd1 and %promotableadd2.
212; - Creating a sext for %arg1 and %arg2.
213; - Creating a trunc for a use of %promotableadd1.
214; - Replacing a bunch of uses.
215; - Setting the operands of the promoted instruction with the promoted values.
216; - Moving instruction around (mainly sext when promoting instruction).
217; Each type of those promotions has to be undo at least once during this
218; specific test.
219; CHECK-LABEL: @twoArgsPromotionNest
220; CHECK: [[ORIG:%[a-zA-Z_0-9-]+]] = add nsw i32 %arg1, %arg2
221; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ORIG]], [[ORIG]]
222; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
223; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]]
224; CHECK: ret
225define i8 @twoArgsPromotionNest(i32 %arg1, i32 %arg2, i8* %base) {
226  %promotableadd1 = add nsw i32 %arg1, %arg2
227  %promotableadd2 = add nsw i32 %promotableadd1, %promotableadd1
228  %sextadd = sext i32 %promotableadd2 to i64
229  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
230  %res = load i8* %arrayidx
231  ret i8 %res
232}
233
234; Test the InstructionRemover undo, which was the only one not
235; kicked in the previous test.
236; The matcher first promotes the add, removes the trunc and promotes
237; the sext of arg1.
238; Then, the matcher cannot use an addressing mode r + r + r, thus it
239; rolls back.
240; CHECK-LABEL: @twoArgsNoPromotionRemove
241; CHECK: [[SEXTARG1:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i32
242; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[SEXTARG1]] to i8
243; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[TRUNC]], %arg2
244; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i64
245; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]]
246; CHECK: ret
247define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
248  %sextarg1 = sext i1 %arg1 to i32
249  %trunc = trunc i32 %sextarg1 to i8
250  %add = add nsw i8 %trunc, %arg2
251  %sextadd = sext i8 %add to i64
252  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
253  %res = load i8* %arrayidx
254  ret i8 %res
255}
256
257; Ensure that when the profitability checks kicks in, the IR is not modified
258; will IgnoreProfitability is on.
259; The profitabily check happens when a candidate instruction has several uses.
260; The matcher will create a new matcher for each use and check if the
261; instruction is in the list of the matched instructions of this new matcher.
262; All changes made by the new matchers must be dropped before pursuing
263; otherwise the state of the original matcher will be wrong.
264;
265; Without the profitability check, when checking for the second use of
266; arrayidx, the matcher promotes everything all the way to %arg1, %arg2.
267; Check that we did not promote anything in the final matching.
268;
269; <rdar://problem/16020230>
270; CHECK-LABEL: @checkProfitability
271; CHECK-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
272; CHECK-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
273; CHECK: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
274; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
275; CHECK: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
276; BB then
277; CHECK: [[BASE1:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
278; CHECK: [[ADDR1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE1]] to i32*
279; CHECK: load i32* [[ADDR1]]
280; BB else
281; CHECK: [[BASE2:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
282; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
283; CHECK: load i32* [[ADDR2]]
284; CHECK: ret
285; CHECK-GEP-LABEL: @checkProfitability
286; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
287; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
288; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
289; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
290; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
291; BB then
292; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
293; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
294; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
295; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
296; CHECK-GEP: load i32* [[ADDR1]]
297; BB else
298; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
299; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
300; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
301; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
302; CHECK-GEP: load i32* [[ADDR2]]
303; CHECK-GEP: ret
304define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
305  %shl = shl nsw i32 %arg1, 1
306  %add1 = add nsw i32 %shl, %arg2
307  %sextidx1 = sext i32 %add1 to i64
308  %tmpptr = inttoptr i64 %sextidx1 to i32*
309  %arrayidx1 = getelementptr i32* %tmpptr, i64 12
310  br i1 %test, label %then, label %else
311then:
312  %res1 = load i32* %arrayidx1
313  br label %end
314else:
315  %res2 = load i32* %arrayidx1
316  br label %end
317end:
318  %tmp = phi i32 [%res1, %then], [%res2, %else]
319  %res = add i32 %tmp, %add1
320  %addr = inttoptr i32 %res to i32*
321  %final = load i32* %addr
322  ret i32 %final
323}
324