• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
2; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
3; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
4
5; CodeGenPrepare should move the zext into the block with the load
6; so that SelectionDAG can select it with the load.
7;
8; OPTALL-LABEL: @foo
9; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
10; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
11; OPTALL: store i32 [[ZEXT]], i32* %q
12; OPTALL: ret
13define void @foo(i8* %p, i32* %q) {
14entry:
15  %t = load i8, i8* %p
16  %a = icmp slt i8 %t, 20
17  br i1 %a, label %true, label %false
18true:
19  %s = zext i8 %t to i32
20  store i32 %s, i32* %q
21  ret void
22false:
23  ret void
24}
25
26; Check that we manage to form a zextload is an operation with only one
27; argument to explicitly extend is in the way.
28; OPTALL-LABEL: @promoteOneArg
29; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
30; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
31; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
32; Make sure the operation is not promoted when the promotion pass is disabled.
33; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
34; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
35; OPTALL: store i32 [[RES]], i32* %q
36; OPTALL: ret
37define void @promoteOneArg(i8* %p, i32* %q) {
38entry:
39  %t = load i8, i8* %p
40  %add = add nuw i8 %t, 2
41  %a = icmp slt i8 %t, 20
42  br i1 %a, label %true, label %false
43true:
44  %s = zext i8 %add to i32
45  store i32 %s, i32* %q
46  ret void
47false:
48  ret void
49}
50
51; Check that we manage to form a sextload is an operation with only one
52; argument to explicitly extend is in the way.
53; Version with sext.
54; OPTALL-LABEL: @promoteOneArgSExt
55; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
56; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
57; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
58; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
59; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
60; OPTALL: store i32 [[RES]], i32* %q
61; OPTALL: ret
62define void @promoteOneArgSExt(i8* %p, i32* %q) {
63entry:
64  %t = load i8, i8* %p
65  %add = add nsw i8 %t, 2
66  %a = icmp slt i8 %t, 20
67  br i1 %a, label %true, label %false
68true:
69  %s = sext i8 %add to i32
70  store i32 %s, i32* %q
71  ret void
72false:
73  ret void
74}
75
76; Check that we manage to form a zextload is an operation with two
77; arguments to explicitly extend is in the way.
78; Extending %add will create two extensions:
79; 1. One for %b.
80; 2. One for %t.
81; #1 will not be removed as we do not know anything about %b.
82; #2 may not be merged with the load because %t is used in a comparison.
83; Since two extensions may be emitted in the end instead of one before the
84; transformation, the regular heuristic does not apply the optimization.
85;
86; OPTALL-LABEL: @promoteTwoArgZext
87; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
88;
89; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
90; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
91; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
92;
93; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
94; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
95;
96; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
97; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
98;
99; OPTALL: store i32 [[RES]], i32* %q
100; OPTALL: ret
101define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
102entry:
103  %t = load i8, i8* %p
104  %add = add nuw i8 %t, %b
105  %a = icmp slt i8 %t, 20
106  br i1 %a, label %true, label %false
107true:
108  %s = zext i8 %add to i32
109  store i32 %s, i32* %q
110  ret void
111false:
112  ret void
113}
114
115; Check that we manage to form a sextload is an operation with two
116; arguments to explicitly extend is in the way.
117; Version with sext.
118; OPTALL-LABEL: @promoteTwoArgSExt
119; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
120;
121; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
122; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
123; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
124;
125; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
126; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
127;
128; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
129; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
130; OPTALL: store i32 [[RES]], i32* %q
131; OPTALL: ret
132define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
133entry:
134  %t = load i8, i8* %p
135  %add = add nsw i8 %t, %b
136  %a = icmp slt i8 %t, 20
137  br i1 %a, label %true, label %false
138true:
139  %s = sext i8 %add to i32
140  store i32 %s, i32* %q
141  ret void
142false:
143  ret void
144}
145
146; Check that we do not a zextload if we need to introduce more than
147; one additional extension.
148; OPTALL-LABEL: @promoteThreeArgZext
149; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
150;
151; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
152; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
153; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
154; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
155; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
156;
157; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
158; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
159; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
160;
161; DISABLE: add nuw i8
162; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
163; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
164;
165; OPTALL: store i32 [[RES]], i32* %q
166; OPTALL: ret
167define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
168entry:
169  %t = load i8, i8* %p
170  %tmp = add nuw i8 %t, %b
171  %add = add nuw i8 %tmp, %c
172  %a = icmp slt i8 %t, 20
173  br i1 %a, label %true, label %false
174true:
175  %s = zext i8 %add to i32
176  store i32 %s, i32* %q
177  ret void
178false:
179  ret void
180}
181
182; Check that we manage to form a zextload after promoting and merging
183; two extensions.
184; OPTALL-LABEL: @promoteMergeExtArgZExt
185; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
186;
187; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
188; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
189; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
190;
191; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
192; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
193; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
194;
195; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
196; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
197; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
198;
199; OPTALL: store i32 [[RES]], i32* %q
200; OPTALL: ret
201define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
202entry:
203  %t = load i8, i8* %p
204  %ext = zext i8 %t to i16
205  %add = add nuw i16 %ext, %b
206  %a = icmp slt i8 %t, 20
207  br i1 %a, label %true, label %false
208true:
209  %s = zext i16 %add to i32
210  store i32 %s, i32* %q
211  ret void
212false:
213  ret void
214}
215
216; Check that we manage to form a sextload after promoting and merging
217; two extensions.
218; Version with sext.
219; OPTALL-LABEL: @promoteMergeExtArgSExt
220; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
221;
222; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
223; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
224; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
225;
226; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
227; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
228; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
229;
230; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
231; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
232; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
233; OPTALL: store i32 [[RES]], i32* %q
234; OPTALL: ret
235define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
236entry:
237  %t = load i8, i8* %p
238  %ext = zext i8 %t to i16
239  %add = add nsw i16 %ext, %b
240  %a = icmp slt i8 %t, 20
241  br i1 %a, label %true, label %false
242true:
243  %s = sext i16 %add to i32
244  store i32 %s, i32* %q
245  ret void
246false:
247  ret void
248}
249
250; Check that we manage to catch all the extload opportunities that are exposed
251; by the different iterations of codegen prepare.
252; Moreover, check that we do not promote more than we need to.
253; Here is what is happening in this test (not necessarly in this order):
254; 1. We try to promote the operand of %sextadd.
255;    a. This creates one sext of %ld2 and one of %zextld
256;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
257;       introduced one. This is fine with the current heuristic: neutral.
258;    => We have one zext of %zextld left and we created one sext of %ld2.
259; 2. We try to promote the operand of %sextaddza.
260;    a. This creates one sext of %zexta and one of %zextld
261;    b. The sext of %zexta does not lead to any load, it stays here, even if it
262;       could have been combine with the zext of %a.
263;    c. The sext of %zextld leads to %ld and can be combined with it. This is
264;       done by promoting %zextld. This is fine with the current heuristic:
265;       neutral.
266;    => We have created a new zext of %ld and we created one sext of %zexta.
267; 3. We try to promote the operand of %sextaddb.
268;    a. This creates one sext of %b and one of %zextld
269;    b. The sext of %b is a dead-end, nothing to be done.
270;    c. Same thing as 2.c. happens.
271;    => We have created a new zext of %ld and we created one sext of %b.
272; 4. We try to promote the operand of the zext of %zextld introduced in #1.
273;    a. Same thing as 2.c. happens.
274;    b. %zextld does not have any other uses. It is dead coded.
275;    => We have created a new zext of %ld and we removed a zext of %zextld and
276;       a zext of %ld.
277; Currently we do not try to reuse existing extensions, so in the end we have
278; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
279;
280; OPTALL-LABEL: @severalPromotions
281; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
282; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
283; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
284; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
285; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
286; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
287; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
288; We do not combine this one: see 2.b.
289; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
290; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
291; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
292; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
293; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
294;
295; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
296; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
297; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
298; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
299; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
300; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
301;
302; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
303; OPTALL: ret
304define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
305  %ld = load i8, i8* %addr1
306  %zextld = zext i8 %ld to i32
307  %ld2 = load i32, i32* %addr2
308  %add = add nsw i32 %ld2, %zextld
309  %sextadd = sext i32 %add to i64
310  %zexta = zext i8 %a to i32
311  %addza = add nsw i32 %zexta, %zextld
312  %sextaddza = sext i32 %addza to i64
313  %addb = add nsw i32 %b, %zextld
314  %sextaddb = sext i32 %addb to i64
315  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
316  ret void
317}
318
319declare void @dummy(i64, i64, i64)
320
321; Make sure we do not try to promote vector types since the type promotion
322; helper does not support them for now.
323; OPTALL-LABEL: @vectorPromotion
324; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
325; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
326; OPTALL: ret
327define void @vectorPromotion() {
328entry:
329  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
330  %b = zext <2 x i32> %a to <2 x i64>
331  ret void
332}
333
334@a = common global i32 0, align 4
335@c = common global [2 x i32] zeroinitializer, align 4
336
337; Make sure we support promotion of operands that produces a Value as opposed
338; to an instruction.
339; This used to cause a crash.
340; OPTALL-LABEL: @promotionOfArgEndsUpInValue
341; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
342;
343; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
344; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
345;
346; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
347; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
348;
349; OPTALL-NEXT: ret i32 [[RES]]
350define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
351entry:
352  %val = load i16, i16* %addr
353  %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
354  %conv3 = sext i16 %add to i32
355  ret i32 %conv3
356}
357
358; Check that we see that one zext can be derived from the other for free.
359; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
360; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
361;
362; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
363; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
364; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
365; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
366; OPT-NEXT: store i32 [[RES32]], i32* %addr
367; OPT-NEXT: store i64 [[RES64]], i64* %q
368;
369; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
370; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
371; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
372; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
373; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
374; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
375;
376; OPTALL-NEXT: ret void
377define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
378entry:
379  %t = load i8, i8* %p
380  %zextt = zext i8 %t to i32
381  %add = add nuw i32 %zextt, %b
382  %add2 = add nuw i32 %zextt, 12
383  store i32 %add, i32 *%addr
384  %s = zext i32 %add2 to i64
385  store i64 %s, i64* %q
386  ret void
387}
388
389; Check that we do not increase the cost of the code.
390; The input has one free zext and one free sext. If we would have promoted
391; all the way through the load we would end up with a free zext and a
392; non-free sext (of %b).
393; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
394; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
395;
396; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
397; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
398; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
399; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
400;
401; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
402; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
403; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
404;
405; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
406; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
407; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
408;
409; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
410; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
411; OPTALL-NEXT: ret void
412define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
413entry:
414  %t = load i8, i8* %p
415  %zextt = zext i8 %t to i32
416  %add = add nsw i32 %zextt, %b
417  %idx64 = sext i32 %add to i64
418  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
419  store i32 %add, i32 *%staddr
420  ret void
421}
422
423; Check that we do not increase the cost of the code.
424; The input has one free zext and one free sext. If we would have promoted
425; all the way through the load we would end up with a free zext and a
426; non-free sext (of %b).
427; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
428; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
429;
430; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
431; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
432; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
433;
434; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
435; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
436; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
437;
438; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
439; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
440; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
441;
442; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
443; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
444; OPTALL-NEXT: ret void
445define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
446entry:
447  %t = load i8, i8* %p
448  %zextt = zext i8 %t to i32
449  %add = add nsw i32 %zextt, %b
450  %idx64 = sext i32 %add to i64
451  %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
452  store i64 %stuff, i64 *%staddr
453  ret void
454}
455
456; Check that we do not increase the cost of the code.
457; The input has one free zext and one free sext. If we would have promoted
458; all the way through the load we would end up with a free zext and a
459; non-free sext (of %b).
460; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
461; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
462;
463; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
464; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
465; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
466;
467; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
468; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
469; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
470;
471; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
472; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
473; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
474;
475; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
476; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
477; OPTALL-NEXT: ret void
478define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
479entry:
480  %t = load i8, i8* %p
481  %zextt = zext i8 %t to i32
482  %add = add nsw i32 %zextt, %b
483  %idx64 = sext i32 %add to i64
484  %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
485  store i128 %stuff, i128 *%staddr
486  ret void
487}
488
489
490; Check that we do not increase the cost of the code.
491; The input has one free zext and one free sext. If we would have promoted
492; all the way through the load we would end up with a free zext and a
493; non-free sext (of %b).
494; OPTALL-LABEL: @promoteSExtFromAddrMode256
495; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
496;
497; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
498; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
499; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
500;
501; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
502; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
503; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
504;
505; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
506; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
507; OPTALL-NEXT: ret void
508define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
509entry:
510  %t = load i8, i8* %p
511  %zextt = zext i8 %t to i32
512  %add = add nsw i32 %zextt, %b
513  %idx64 = sext i32 %add to i64
514  %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
515  store i256 %stuff, i256 *%staddr
516  ret void
517}
518
519; Check that we do not increase the cost of the code.
520; The input has one free zext and one free zext.
521; When we promote all the way through the load, we end up with
522; a free zext and a non-free zext (of %b).
523; However, the current target lowering says zext i32 to i64 is free
524; so the promotion happens because the cost did not change and may
525; expose more opportunities.
526; This would need to be fixed at some point.
527; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
528; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
529;
530; This transformation should really happen only for stress mode.
531; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
532; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
533; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
534; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
535;
536; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
537; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
538; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
539;
540; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
541; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
542; OPTALL-NEXT: ret void
543define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
544entry:
545  %t = load i8, i8* %p
546  %zextt = zext i8 %t to i32
547  %add = add nuw i32 %zextt, %b
548  %idx64 = zext i32 %add to i64
549  %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
550  store i32 %add, i32 *%staddr
551  ret void
552}
553
554; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
555; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
556;
557; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
558; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
559; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
560;
561; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
562; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
563; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
564;
565; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
566; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
567; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
568;
569; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
570; OPTALL-NEXT: ret i64 %staddr
571define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
572entry:
573  %t = load i8, i8* %p
574  %zextt = zext i8 %t to i32
575  %add = add nsw i32 %zextt, %b
576  %idx64 = sext i32 %add to i64
577  %staddr = shl i64 %idx64, 12
578  ret i64 %staddr
579}
580
581; Same comment as doNotPromoteFreeZExtFromAddrMode.
582; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
583; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
584;
585; This transformation should really happen only for stress mode.
586; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
587; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
588; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
589;
590; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
591; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
592; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
593;
594; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
595; OPTALL-NEXT: ret i64 %staddr
596define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
597entry:
598  %t = load i8, i8* %p
599  %zextt = zext i8 %t to i32
600  %add = add nuw i32 %zextt, %b
601  %idx64 = zext i32 %add to i64
602  %staddr = shl i64 %idx64, 12
603  ret i64 %staddr
604}
605
606; The input has one free zext and one non-free sext.
607; When we promote all the way through to the load, we end up with
608; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
609; However, we when generate load pair and the free sext(%ld1) becomes
610; non-free. So technically, we trade a non-free sext to two non-free
611; sext.
612; This would need to be fixed at some point.
613; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
614; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
615; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
616; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
617;
618; This transformation should really happen only for stress mode.
619; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
620; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
621; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
622;
623; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
624; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
625;
626; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
627; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
628; OPTALL-NEXT: ret i64 [[FINAL]]
629define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
630  %ld0 = load i32, i32* %p
631  %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
632  %ld1 = load i32, i32* %idxLd1
633  %res = add nsw i32 %ld1, %cst
634  %sextres = sext i32 %res to i64
635  %zextLd0 = zext i32 %ld0 to i64
636  %final = add i64 %sextres, %zextLd0
637  ret i64 %final
638}
639