1; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 2; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 3; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE 4 5; CodeGenPrepare should move the zext into the block with the load 6; so that SelectionDAG can select it with the load. 7; 8; OPTALL-LABEL: @foo 9; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 10; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 11; OPTALL: store i32 [[ZEXT]], i32* %q 12; OPTALL: ret 13define void @foo(i8* %p, i32* %q) { 14entry: 15 %t = load i8, i8* %p 16 %a = icmp slt i8 %t, 20 17 br i1 %a, label %true, label %false 18true: 19 %s = zext i8 %t to i32 20 store i32 %s, i32* %q 21 ret void 22false: 23 ret void 24} 25 26; Check that we manage to form a zextload is an operation with only one 27; argument to explicitly extend is in the way. 28; OPTALL-LABEL: @promoteOneArg 29; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 30; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 31; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 32; Make sure the operation is not promoted when the promotion pass is disabled. 33; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 34; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 35; OPTALL: store i32 [[RES]], i32* %q 36; OPTALL: ret 37define void @promoteOneArg(i8* %p, i32* %q) { 38entry: 39 %t = load i8, i8* %p 40 %add = add nuw i8 %t, 2 41 %a = icmp slt i8 %t, 20 42 br i1 %a, label %true, label %false 43true: 44 %s = zext i8 %add to i32 45 store i32 %s, i32* %q 46 ret void 47false: 48 ret void 49} 50 51; Check that we manage to form a sextload is an operation with only one 52; argument to explicitly extend is in the way. 53; Version with sext. 54; OPTALL-LABEL: @promoteOneArgSExt 55; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 56; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 57; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 58; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 59; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 60; OPTALL: store i32 [[RES]], i32* %q 61; OPTALL: ret 62define void @promoteOneArgSExt(i8* %p, i32* %q) { 63entry: 64 %t = load i8, i8* %p 65 %add = add nsw i8 %t, 2 66 %a = icmp slt i8 %t, 20 67 br i1 %a, label %true, label %false 68true: 69 %s = sext i8 %add to i32 70 store i32 %s, i32* %q 71 ret void 72false: 73 ret void 74} 75 76; Check that we manage to form a zextload is an operation with two 77; arguments to explicitly extend is in the way. 78; Extending %add will create two extensions: 79; 1. One for %b. 80; 2. One for %t. 81; #1 will not be removed as we do not know anything about %b. 82; #2 may not be merged with the load because %t is used in a comparison. 83; Since two extensions may be emitted in the end instead of one before the 84; transformation, the regular heuristic does not apply the optimization. 85; 86; OPTALL-LABEL: @promoteTwoArgZext 87; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 88; 89; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 90; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 91; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 92; 93; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 94; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 95; 96; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 97; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 98; 99; OPTALL: store i32 [[RES]], i32* %q 100; OPTALL: ret 101define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 102entry: 103 %t = load i8, i8* %p 104 %add = add nuw i8 %t, %b 105 %a = icmp slt i8 %t, 20 106 br i1 %a, label %true, label %false 107true: 108 %s = zext i8 %add to i32 109 store i32 %s, i32* %q 110 ret void 111false: 112 ret void 113} 114 115; Check that we manage to form a sextload is an operation with two 116; arguments to explicitly extend is in the way. 117; Version with sext. 118; OPTALL-LABEL: @promoteTwoArgSExt 119; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 120; 121; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 122; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 123; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 124; 125; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 126; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 127; 128; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 129; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 130; OPTALL: store i32 [[RES]], i32* %q 131; OPTALL: ret 132define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 133entry: 134 %t = load i8, i8* %p 135 %add = add nsw i8 %t, %b 136 %a = icmp slt i8 %t, 20 137 br i1 %a, label %true, label %false 138true: 139 %s = sext i8 %add to i32 140 store i32 %s, i32* %q 141 ret void 142false: 143 ret void 144} 145 146; Check that we do not a zextload if we need to introduce more than 147; one additional extension. 148; OPTALL-LABEL: @promoteThreeArgZext 149; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 150; 151; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 152; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 153; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 154; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 155; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 156; 157; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 158; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 159; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 160; 161; DISABLE: add nuw i8 162; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 163; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 164; 165; OPTALL: store i32 [[RES]], i32* %q 166; OPTALL: ret 167define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 168entry: 169 %t = load i8, i8* %p 170 %tmp = add nuw i8 %t, %b 171 %add = add nuw i8 %tmp, %c 172 %a = icmp slt i8 %t, 20 173 br i1 %a, label %true, label %false 174true: 175 %s = zext i8 %add to i32 176 store i32 %s, i32* %q 177 ret void 178false: 179 ret void 180} 181 182; Check that we manage to form a zextload after promoting and merging 183; two extensions. 184; OPTALL-LABEL: @promoteMergeExtArgZExt 185; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 186; 187; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 188; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 189; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 190; 191; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 192; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 193; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 194; 195; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 196; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 197; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 198; 199; OPTALL: store i32 [[RES]], i32* %q 200; OPTALL: ret 201define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 202entry: 203 %t = load i8, i8* %p 204 %ext = zext i8 %t to i16 205 %add = add nuw i16 %ext, %b 206 %a = icmp slt i8 %t, 20 207 br i1 %a, label %true, label %false 208true: 209 %s = zext i16 %add to i32 210 store i32 %s, i32* %q 211 ret void 212false: 213 ret void 214} 215 216; Check that we manage to form a sextload after promoting and merging 217; two extensions. 218; Version with sext. 219; OPTALL-LABEL: @promoteMergeExtArgSExt 220; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 221; 222; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 223; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 224; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 225; 226; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 227; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 228; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 229; 230; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 231; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 232; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 233; OPTALL: store i32 [[RES]], i32* %q 234; OPTALL: ret 235define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 236entry: 237 %t = load i8, i8* %p 238 %ext = zext i8 %t to i16 239 %add = add nsw i16 %ext, %b 240 %a = icmp slt i8 %t, 20 241 br i1 %a, label %true, label %false 242true: 243 %s = sext i16 %add to i32 244 store i32 %s, i32* %q 245 ret void 246false: 247 ret void 248} 249 250; Check that we manage to catch all the extload opportunities that are exposed 251; by the different iterations of codegen prepare. 252; Moreover, check that we do not promote more than we need to. 253; Here is what is happening in this test (not necessarly in this order): 254; 1. We try to promote the operand of %sextadd. 255; a. This creates one sext of %ld2 and one of %zextld 256; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 257; introduced one. This is fine with the current heuristic: neutral. 258; => We have one zext of %zextld left and we created one sext of %ld2. 259; 2. We try to promote the operand of %sextaddza. 260; a. This creates one sext of %zexta and one of %zextld 261; b. The sext of %zexta does not lead to any load, it stays here, even if it 262; could have been combine with the zext of %a. 263; c. The sext of %zextld leads to %ld and can be combined with it. This is 264; done by promoting %zextld. This is fine with the current heuristic: 265; neutral. 266; => We have created a new zext of %ld and we created one sext of %zexta. 267; 3. We try to promote the operand of %sextaddb. 268; a. This creates one sext of %b and one of %zextld 269; b. The sext of %b is a dead-end, nothing to be done. 270; c. Same thing as 2.c. happens. 271; => We have created a new zext of %ld and we created one sext of %b. 272; 4. We try to promote the operand of the zext of %zextld introduced in #1. 273; a. Same thing as 2.c. happens. 274; b. %zextld does not have any other uses. It is dead coded. 275; => We have created a new zext of %ld and we removed a zext of %zextld and 276; a zext of %ld. 277; Currently we do not try to reuse existing extensions, so in the end we have 278; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 279; 280; OPTALL-LABEL: @severalPromotions 281; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 282; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 283; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 284; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 285; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 286; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 287; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]] 288; We do not combine this one: see 2.b. 289; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32 290; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64 291; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]] 292; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 293; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]] 294; 295; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 296; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 297; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 298; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 299; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 300; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 301; 302; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 303; OPTALL: ret 304define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 305 %ld = load i8, i8* %addr1 306 %zextld = zext i8 %ld to i32 307 %ld2 = load i32, i32* %addr2 308 %add = add nsw i32 %ld2, %zextld 309 %sextadd = sext i32 %add to i64 310 %zexta = zext i8 %a to i32 311 %addza = add nsw i32 %zexta, %zextld 312 %sextaddza = sext i32 %addza to i64 313 %addb = add nsw i32 %b, %zextld 314 %sextaddb = sext i32 %addb to i64 315 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 316 ret void 317} 318 319declare void @dummy(i64, i64, i64) 320 321; Make sure we do not try to promote vector types since the type promotion 322; helper does not support them for now. 323; OPTALL-LABEL: @vectorPromotion 324; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 325; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 326; OPTALL: ret 327define void @vectorPromotion() { 328entry: 329 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 330 %b = zext <2 x i32> %a to <2 x i64> 331 ret void 332} 333 334@a = common global i32 0, align 4 335@c = common global [2 x i32] zeroinitializer, align 4 336 337; Make sure we support promotion of operands that produces a Value as opposed 338; to an instruction. 339; This used to cause a crash. 340; OPTALL-LABEL: @promotionOfArgEndsUpInValue 341; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 342; 343; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 344; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 345; 346; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 347; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 348; 349; OPTALL-NEXT: ret i32 [[RES]] 350define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 351entry: 352 %val = load i16, i16* %addr 353 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 354 %conv3 = sext i16 %add to i32 355 ret i32 %conv3 356} 357 358; Check that we see that one zext can be derived from the other for free. 359; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 360; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 361; 362; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 363; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 364; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 365; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 366; OPT-NEXT: store i32 [[RES32]], i32* %addr 367; OPT-NEXT: store i64 [[RES64]], i64* %q 368; 369; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 370; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 371; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 372; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 373; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 374; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 375; 376; OPTALL-NEXT: ret void 377define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 378entry: 379 %t = load i8, i8* %p 380 %zextt = zext i8 %t to i32 381 %add = add nuw i32 %zextt, %b 382 %add2 = add nuw i32 %zextt, 12 383 store i32 %add, i32 *%addr 384 %s = zext i32 %add2 to i64 385 store i64 %s, i64* %q 386 ret void 387} 388 389; Check that we do not increase the cost of the code. 390; The input has one free zext and one free sext. If we would have promoted 391; all the way through the load we would end up with a free zext and a 392; non-free sext (of %b). 393; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode 394; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 395; 396; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 397; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 398; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 399; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 400; 401; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 402; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 403; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 404; 405; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 406; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 407; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 408; 409; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 410; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 411; OPTALL-NEXT: ret void 412define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 413entry: 414 %t = load i8, i8* %p 415 %zextt = zext i8 %t to i32 416 %add = add nsw i32 %zextt, %b 417 %idx64 = sext i32 %add to i64 418 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 419 store i32 %add, i32 *%staddr 420 ret void 421} 422 423; Check that we do not increase the cost of the code. 424; The input has one free zext and one free sext. If we would have promoted 425; all the way through the load we would end up with a free zext and a 426; non-free sext (of %b). 427; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64 428; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 429; 430; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 431; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 432; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 433; 434; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 435; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 436; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 437; 438; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 439; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 440; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 441; 442; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]] 443; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]] 444; OPTALL-NEXT: ret void 445define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) { 446entry: 447 %t = load i8, i8* %p 448 %zextt = zext i8 %t to i32 449 %add = add nsw i32 %zextt, %b 450 %idx64 = sext i32 %add to i64 451 %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64 452 store i64 %stuff, i64 *%staddr 453 ret void 454} 455 456; Check that we do not increase the cost of the code. 457; The input has one free zext and one free sext. If we would have promoted 458; all the way through the load we would end up with a free zext and a 459; non-free sext (of %b). 460; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128 461; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 462; 463; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 464; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 465; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 466; 467; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 468; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 469; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 470; 471; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 472; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 473; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 474; 475; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]] 476; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]] 477; OPTALL-NEXT: ret void 478define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) { 479entry: 480 %t = load i8, i8* %p 481 %zextt = zext i8 %t to i32 482 %add = add nsw i32 %zextt, %b 483 %idx64 = sext i32 %add to i64 484 %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64 485 store i128 %stuff, i128 *%staddr 486 ret void 487} 488 489 490; Check that we do not increase the cost of the code. 491; The input has one free zext and one free sext. If we would have promoted 492; all the way through the load we would end up with a free zext and a 493; non-free sext (of %b). 494; OPTALL-LABEL: @promoteSExtFromAddrMode256 495; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 496; 497; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 498; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 499; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 500; 501; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 502; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 503; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 504; 505; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]] 506; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]] 507; OPTALL-NEXT: ret void 508define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) { 509entry: 510 %t = load i8, i8* %p 511 %zextt = zext i8 %t to i32 512 %add = add nsw i32 %zextt, %b 513 %idx64 = sext i32 %add to i64 514 %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64 515 store i256 %stuff, i256 *%staddr 516 ret void 517} 518 519; Check that we do not increase the cost of the code. 520; The input has one free zext and one free zext. 521; When we promote all the way through the load, we end up with 522; a free zext and a non-free zext (of %b). 523; However, the current target lowering says zext i32 to i64 is free 524; so the promotion happens because the cost did not change and may 525; expose more opportunities. 526; This would need to be fixed at some point. 527; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode 528; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 529; 530; This transformation should really happen only for stress mode. 531; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 532; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 533; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 534; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32 535; 536; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 537; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 538; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 539; 540; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]] 541; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]] 542; OPTALL-NEXT: ret void 543define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) { 544entry: 545 %t = load i8, i8* %p 546 %zextt = zext i8 %t to i32 547 %add = add nuw i32 %zextt, %b 548 %idx64 = zext i32 %add to i64 549 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64 550 store i32 %add, i32 *%staddr 551 ret void 552} 553 554; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift 555; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 556; 557; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 558; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 559; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] 560; 561; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 562; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 563; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 564; 565; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 566; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b 567; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 568; 569; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 570; OPTALL-NEXT: ret i64 %staddr 571define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) { 572entry: 573 %t = load i8, i8* %p 574 %zextt = zext i8 %t to i32 575 %add = add nsw i32 %zextt, %b 576 %idx64 = sext i32 %add to i64 577 %staddr = shl i64 %idx64, 12 578 ret i64 %staddr 579} 580 581; Same comment as doNotPromoteFreeZExtFromAddrMode. 582; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift 583; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 584; 585; This transformation should really happen only for stress mode. 586; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 587; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64 588; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]] 589; 590; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 591; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 592; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64 593; 594; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12 595; OPTALL-NEXT: ret i64 %staddr 596define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) { 597entry: 598 %t = load i8, i8* %p 599 %zextt = zext i8 %t to i32 600 %add = add nuw i32 %zextt, %b 601 %idx64 = zext i32 %add to i64 602 %staddr = shl i64 %idx64, 12 603 ret i64 %staddr 604} 605 606; The input has one free zext and one non-free sext. 607; When we promote all the way through to the load, we end up with 608; a free zext, a free sext (%ld1), and a non-free sext (of %cst). 609; However, we when generate load pair and the free sext(%ld1) becomes 610; non-free. So technically, we trade a non-free sext to two non-free 611; sext. 612; This would need to be fixed at some point. 613; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad 614; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p 615; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1 616; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]] 617; 618; This transformation should really happen only for stress mode. 619; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64 620; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64 621; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]] 622; 623; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst 624; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64 625; 626; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64 627; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]] 628; OPTALL-NEXT: ret i64 [[FINAL]] 629define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) { 630 %ld0 = load i32, i32* %p 631 %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1 632 %ld1 = load i32, i32* %idxLd1 633 %res = add nsw i32 %ld1, %cst 634 %sextres = sext i32 %res to i64 635 %zextLd0 = zext i32 %ld0 to i64 636 %final = add i64 %sextres, %zextLd0 637 ret i64 %final 638} 639