1; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s 2; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s 3; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS 4; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS 5; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE 6 7; rdar://7304838 8; CodeGenPrepare should move the zext into the block with the load 9; so that SelectionDAG can select it with the load. 10; 11; CHECK-LABEL: foo: 12; CHECK: movsbl ({{%rdi|%rcx}}), %eax 13; 14; OPTALL-LABEL: @foo 15; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 16; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 17; OPTALL: store i32 [[ZEXT]], i32* %q 18; OPTALL: ret 19define void @foo(i8* %p, i32* %q) { 20entry: 21 %t = load i8, i8* %p 22 %a = icmp slt i8 %t, 20 23 br i1 %a, label %true, label %false 24true: 25 %s = zext i8 %t to i32 26 store i32 %s, i32* %q 27 ret void 28false: 29 ret void 30} 31 32; Check that we manage to form a zextload is an operation with only one 33; argument to explicitly extend is in the way. 34; OPTALL-LABEL: @promoteOneArg 35; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 36; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 37; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2 38; Make sure the operation is not promoted when the promotion pass is disabled. 39; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2 40; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 41; OPTALL: store i32 [[RES]], i32* %q 42; OPTALL: ret 43define void @promoteOneArg(i8* %p, i32* %q) { 44entry: 45 %t = load i8, i8* %p 46 %add = add nuw i8 %t, 2 47 %a = icmp slt i8 %t, 20 48 br i1 %a, label %true, label %false 49true: 50 %s = zext i8 %add to i32 51 store i32 %s, i32* %q 52 ret void 53false: 54 ret void 55} 56 57; Check that we manage to form a sextload is an operation with only one 58; argument to explicitly extend is in the way. 59; Version with sext. 60; OPTALL-LABEL: @promoteOneArgSExt 61; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 62; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 63; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2 64; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2 65; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 66; OPTALL: store i32 [[RES]], i32* %q 67; OPTALL: ret 68define void @promoteOneArgSExt(i8* %p, i32* %q) { 69entry: 70 %t = load i8, i8* %p 71 %add = add nsw i8 %t, 2 72 %a = icmp slt i8 %t, 20 73 br i1 %a, label %true, label %false 74true: 75 %s = sext i8 %add to i32 76 store i32 %s, i32* %q 77 ret void 78false: 79 ret void 80} 81 82; Check that we manage to form a zextload is an operation with two 83; arguments to explicitly extend is in the way. 84; Extending %add will create two extensions: 85; 1. One for %b. 86; 2. One for %t. 87; #1 will not be removed as we do not know anything about %b. 88; #2 may not be merged with the load because %t is used in a comparison. 89; Since two extensions may be emitted in the end instead of one before the 90; transformation, the regular heuristic does not apply the optimization. 91; 92; OPTALL-LABEL: @promoteTwoArgZext 93; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 94; 95; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 96; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 97; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 98; 99; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 100; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 101; 102; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 103; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 104; 105; OPTALL: store i32 [[RES]], i32* %q 106; OPTALL: ret 107define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) { 108entry: 109 %t = load i8, i8* %p 110 %add = add nuw i8 %t, %b 111 %a = icmp slt i8 %t, 20 112 br i1 %a, label %true, label %false 113true: 114 %s = zext i8 %add to i32 115 store i32 %s, i32* %q 116 ret void 117false: 118 ret void 119} 120 121; Check that we manage to form a sextload is an operation with two 122; arguments to explicitly extend is in the way. 123; Version with sext. 124; OPTALL-LABEL: @promoteTwoArgSExt 125; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 126; 127; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32 128; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32 129; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]] 130; 131; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 132; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 133; 134; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b 135; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32 136; OPTALL: store i32 [[RES]], i32* %q 137; OPTALL: ret 138define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) { 139entry: 140 %t = load i8, i8* %p 141 %add = add nsw i8 %t, %b 142 %a = icmp slt i8 %t, 20 143 br i1 %a, label %true, label %false 144true: 145 %s = sext i8 %add to i32 146 store i32 %s, i32* %q 147 ret void 148false: 149 ret void 150} 151 152; Check that we do not a zextload if we need to introduce more than 153; one additional extension. 154; OPTALL-LABEL: @promoteThreeArgZext 155; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 156; 157; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 158; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32 159; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 160; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32 161; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]] 162; 163; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b 164; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c 165; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 166; 167; DISABLE: add nuw i8 168; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 169; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32 170; 171; OPTALL: store i32 [[RES]], i32* %q 172; OPTALL: ret 173define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) { 174entry: 175 %t = load i8, i8* %p 176 %tmp = add nuw i8 %t, %b 177 %add = add nuw i8 %tmp, %c 178 %a = icmp slt i8 %t, 20 179 br i1 %a, label %true, label %false 180true: 181 %s = zext i8 %add to i32 182 store i32 %s, i32* %q 183 ret void 184false: 185 ret void 186} 187 188; Check that we manage to form a zextload after promoting and merging 189; two extensions. 190; OPTALL-LABEL: @promoteMergeExtArgZExt 191; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 192; 193; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 194; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32 195; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]] 196; 197; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 198; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 199; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 200; 201; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 202; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b 203; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32 204; 205; OPTALL: store i32 [[RES]], i32* %q 206; OPTALL: ret 207define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) { 208entry: 209 %t = load i8, i8* %p 210 %ext = zext i8 %t to i16 211 %add = add nuw i16 %ext, %b 212 %a = icmp slt i8 %t, 20 213 br i1 %a, label %true, label %false 214true: 215 %s = zext i16 %add to i32 216 store i32 %s, i32* %q 217 ret void 218false: 219 ret void 220} 221 222; Check that we manage to form a sextload after promoting and merging 223; two extensions. 224; Version with sext. 225; OPTALL-LABEL: @promoteMergeExtArgSExt 226; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 227; 228; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 229; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32 230; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]] 231; 232; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 233; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 234; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 235; 236; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16 237; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b 238; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 239; OPTALL: store i32 [[RES]], i32* %q 240; OPTALL: ret 241define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) { 242entry: 243 %t = load i8, i8* %p 244 %ext = zext i8 %t to i16 245 %add = add nsw i16 %ext, %b 246 %a = icmp slt i8 %t, 20 247 br i1 %a, label %true, label %false 248true: 249 %s = sext i16 %add to i32 250 store i32 %s, i32* %q 251 ret void 252false: 253 ret void 254} 255 256; Check that we manage to catch all the extload opportunities that are exposed 257; by the different iterations of codegen prepare. 258; Moreover, check that we do not promote more than we need to. 259; Here is what is happening in this test (not necessarly in this order): 260; 1. We try to promote the operand of %sextadd. 261; a. This creates one sext of %ld2 and one of %zextld 262; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but 263; introduced one. This is fine with the current heuristic: neutral. 264; => We have one zext of %zextld left and we created one sext of %ld2. 265; 2. We try to promote the operand of %sextaddza. 266; a. This creates one sext of %zexta and one of %zextld 267; b. The sext of %zexta does not lead to any load, it stays here, even if it 268; could have been combine with the zext of %a. 269; c. The sext of %zextld leads to %ld and can be combined with it. This is 270; done by promoting %zextld. This is fine with the current heuristic: 271; neutral. 272; => We have created a new zext of %ld and we created one sext of %zexta. 273; 3. We try to promote the operand of %sextaddb. 274; a. This creates one sext of %b and one of %zextld 275; b. The sext of %b is a dead-end, nothing to be done. 276; c. Same thing as 2.c. happens. 277; => We have created a new zext of %ld and we created one sext of %b. 278; 4. We try to promote the operand of the zext of %zextld introduced in #1. 279; a. Same thing as 2.c. happens. 280; b. %zextld does not have any other uses. It is dead coded. 281; => We have created a new zext of %ld and we removed a zext of %zextld and 282; a zext of %ld. 283; Currently we do not try to reuse existing extensions, so in the end we have 284; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. 285; 286; OPTALL-LABEL: @severalPromotions 287; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 288; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 289; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 290; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 291; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 292; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 293; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]] 294; We do not combine this one: see 2.b. 295; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32 296; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64 297; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]] 298; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 299; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]] 300; 301; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 302; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 303; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32 304; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64 305; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32 306; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64 307; 308; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]]) 309; OPTALL: ret 310define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) { 311 %ld = load i8, i8* %addr1 312 %zextld = zext i8 %ld to i32 313 %ld2 = load i32, i32* %addr2 314 %add = add nsw i32 %ld2, %zextld 315 %sextadd = sext i32 %add to i64 316 %zexta = zext i8 %a to i32 317 %addza = add nsw i32 %zexta, %zextld 318 %sextaddza = sext i32 %addza to i64 319 %addb = add nsw i32 %b, %zextld 320 %sextaddb = sext i32 %addb to i64 321 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) 322 ret void 323} 324 325declare void @dummy(i64, i64, i64) 326 327; Make sure we do not try to promote vector types since the type promotion 328; helper does not support them for now. 329; OPTALL-LABEL: @vectorPromotion 330; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 331; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64> 332; OPTALL: ret 333define void @vectorPromotion() { 334entry: 335 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> 336 %b = zext <2 x i32> %a to <2 x i64> 337 ret void 338} 339 340@a = common global i32 0, align 4 341@c = common global [2 x i32] zeroinitializer, align 4 342 343; PR21978. 344; Make sure we support promotion of operands that produces a Value as opposed 345; to an instruction. 346; This used to cause a crash. 347; OPTALL-LABEL: @promotionOfArgEndsUpInValue 348; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr 349 350; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32 351; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32) 352; 353; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 354; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32 355; 356; OPTALL-NEXT: ret i32 [[RES]] 357define i32 @promotionOfArgEndsUpInValue(i16* %addr) { 358entry: 359 %val = load i16, i16* %addr 360 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16) 361 %conv3 = sext i16 %add to i32 362 ret i32 %conv3 363} 364 365; Check that we see that one zext can be derived from the other for free. 366; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice 367; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p 368 369; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 370; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 371; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 372; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12 373; OPT-NEXT: store i32 [[RES32]], i32* %addr 374; OPT-NEXT: store i64 [[RES64]], i64* %q 375; 376; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 377; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b 378; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12 379; DISABLE-NEXT: store i32 [[RES32]], i32* %addr 380; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64 381; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q 382; 383; OPTALL-NEXT: ret void 384define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) { 385entry: 386 %t = load i8, i8* %p 387 %zextt = zext i8 %t to i32 388 %add = add nuw i32 %zextt, %b 389 %add2 = add nuw i32 %zextt, 12 390 store i32 %add, i32 *%addr 391 %s = zext i32 %add2 to i64 392 store i64 %s, i64* %q 393 ret void 394} 395