1; RUN: opt -S -loop-sink < %s | FileCheck %s 2; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s 3; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s 4; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s 5 6@g = global i32 0, align 4 7 8; b1 9; / \ 10; b2 b6 11; / \ | 12; b3 b4 | 13; \ / | 14; b5 | 15; \ / 16; b7 17; preheader: 1000 18; b2: 15 19; b3: 7 20; b4: 7 21; Sink load to b2 22; CHECK: t1 23; CHECK: .b2: 24; CHECK: load i32, i32* @g 25; CHECK: .b3: 26; CHECK-NOT: load i32, i32* @g 27define i32 @t1(i32, i32) #0 !prof !0 { 28 %3 = icmp eq i32 %1, 0 29 br i1 %3, label %.exit, label %.preheader 30 31.preheader: 32 %invariant = load i32, i32* @g 33 br label %.b1 34 35.b1: 36 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 37 %c1 = icmp sgt i32 %iv, %0 38 br i1 %c1, label %.b2, label %.b6, !prof !1 39 40.b2: 41 %c2 = icmp sgt i32 %iv, 1 42 br i1 %c2, label %.b3, label %.b4 43 44.b3: 45 %t3 = sub nsw i32 %invariant, %iv 46 br label %.b5 47 48.b4: 49 %t4 = add nsw i32 %invariant, %iv 50 br label %.b5 51 52.b5: 53 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 54 %t5 = mul nsw i32 %p5, 5 55 br label %.b7 56 57.b6: 58 %t6 = add nsw i32 %iv, 100 59 br label %.b7 60 61.b7: 62 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 63 %t7 = add nuw nsw i32 %iv, 1 64 %c7 = icmp eq i32 %t7, %p7 65 br i1 %c7, label %.b1, label %.exit, !prof !3 66 67.exit: 68 ret i32 10 69} 70 71; b1 72; / \ 73; b2 b6 74; / \ | 75; b3 b4 | 76; \ / | 77; b5 | 78; \ / 79; b7 80; preheader: 500 81; b1: 16016 82; b3: 8 83; b6: 8 84; Sink load to b3 and b6 85; CHECK: t2 86; CHECK: .preheader: 87; CHECK-NOT: load i32, i32* @g 88; CHECK: .b3: 89; CHECK: load i32, i32* @g 90; CHECK: .b4: 91; CHECK: .b6: 92; CHECK: load i32, i32* @g 93; CHECK: .b7: 94define i32 @t2(i32, i32) #0 !prof !0 { 95 %3 = icmp eq i32 %1, 0 96 br i1 %3, label %.exit, label %.preheader 97 98.preheader: 99 %invariant = load i32, i32* @g 100 br label %.b1 101 102.b1: 103 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 104 %c1 = icmp sgt i32 %iv, %0 105 br i1 %c1, label %.b2, label %.b6, !prof !2 106 107.b2: 108 %c2 = icmp sgt i32 %iv, 1 109 br i1 %c2, label %.b3, label %.b4, !prof !1 110 111.b3: 112 %t3 = sub nsw i32 %invariant, %iv 113 br label %.b5 114 115.b4: 116 %t4 = add nsw i32 5, %iv 117 br label %.b5 118 119.b5: 120 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 121 %t5 = mul nsw i32 %p5, 5 122 br label %.b7 123 124.b6: 125 %t6 = add nsw i32 %iv, %invariant 126 br label %.b7 127 128.b7: 129 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 130 %t7 = add nuw nsw i32 %iv, 1 131 %c7 = icmp eq i32 %t7, %p7 132 br i1 %c7, label %.b1, label %.exit, !prof !3 133 134.exit: 135 ret i32 10 136} 137 138; b1 139; / \ 140; b2 b6 141; / \ | 142; b3 b4 | 143; \ / | 144; b5 | 145; \ / 146; b7 147; preheader: 500 148; b3: 8 149; b5: 16008 150; Do not sink load from preheader. 151; CHECK: t3 152; CHECK: .preheader: 153; CHECK: load i32, i32* @g 154; CHECK: .b1: 155; CHECK-NOT: load i32, i32* @g 156define i32 @t3(i32, i32) #0 !prof !0 { 157 %3 = icmp eq i32 %1, 0 158 br i1 %3, label %.exit, label %.preheader 159 160.preheader: 161 %invariant = load i32, i32* @g 162 br label %.b1 163 164.b1: 165 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 166 %c1 = icmp sgt i32 %iv, %0 167 br i1 %c1, label %.b2, label %.b6, !prof !2 168 169.b2: 170 %c2 = icmp sgt i32 %iv, 1 171 br i1 %c2, label %.b3, label %.b4, !prof !1 172 173.b3: 174 %t3 = sub nsw i32 %invariant, %iv 175 br label %.b5 176 177.b4: 178 %t4 = add nsw i32 5, %iv 179 br label %.b5 180 181.b5: 182 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 183 %t5 = mul nsw i32 %p5, %invariant 184 br label %.b7 185 186.b6: 187 %t6 = add nsw i32 %iv, 5 188 br label %.b7 189 190.b7: 191 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 192 %t7 = add nuw nsw i32 %iv, 1 193 %c7 = icmp eq i32 %t7, %p7 194 br i1 %c7, label %.b1, label %.exit, !prof !3 195 196.exit: 197 ret i32 10 198} 199 200; For single-BB loop with <=1 avg trip count, sink load to b1 201; CHECK: t4 202; CHECK: .preheader: 203; CHECK-NOT: load i32, i32* @g 204; CHECK: .b1: 205; CHECK: load i32, i32* @g 206; CHECK: .exit: 207define i32 @t4(i32, i32) #0 !prof !0 { 208.preheader: 209 %invariant = load i32, i32* @g 210 br label %.b1 211 212.b1: 213 %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ] 214 %t1 = add nsw i32 %invariant, %iv 215 %c1 = icmp sgt i32 %iv, %0 216 br i1 %c1, label %.b1, label %.exit, !prof !1 217 218.exit: 219 ret i32 10 220} 221 222; b1 223; / \ 224; b2 b6 225; / \ | 226; b3 b4 | 227; \ / | 228; b5 | 229; \ / 230; b7 231; preheader: 1000 232; b2: 15 233; b3: 7 234; b4: 7 235; There is alias store in loop, do not sink load 236; CHECK: t5 237; CHECK: .preheader: 238; CHECK: load i32, i32* @g 239; CHECK: .b1: 240; CHECK-NOT: load i32, i32* @g 241define i32 @t5(i32, i32*) #0 !prof !0 { 242 %3 = icmp eq i32 %0, 0 243 br i1 %3, label %.exit, label %.preheader 244 245.preheader: 246 %invariant = load i32, i32* @g 247 br label %.b1 248 249.b1: 250 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 251 %c1 = icmp sgt i32 %iv, %0 252 br i1 %c1, label %.b2, label %.b6, !prof !1 253 254.b2: 255 %c2 = icmp sgt i32 %iv, 1 256 br i1 %c2, label %.b3, label %.b4 257 258.b3: 259 %t3 = sub nsw i32 %invariant, %iv 260 br label %.b5 261 262.b4: 263 %t4 = add nsw i32 %invariant, %iv 264 br label %.b5 265 266.b5: 267 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 268 %t5 = mul nsw i32 %p5, 5 269 br label %.b7 270 271.b6: 272 %t6 = call i32 @foo() 273 br label %.b7 274 275.b7: 276 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 277 %t7 = add nuw nsw i32 %iv, 1 278 %c7 = icmp eq i32 %t7, %p7 279 br i1 %c7, label %.b1, label %.exit, !prof !3 280 281.exit: 282 ret i32 10 283} 284 285; b1 286; / \ 287; b2 b6 288; / \ | 289; b3 b4 | 290; \ / | 291; b5 | 292; \ / 293; b7 294; preheader: 1000 295; b2: 15 296; b3: 7 297; b4: 7 298; Regardless of aliasing store in loop this load from constant memory can be sunk. 299; CHECK: t5_const_memory 300; CHECK: .preheader: 301; CHECK-NOT: load i32, i32* @g_const 302; CHECK: .b2: 303; CHECK: load i32, i32* @g_const 304; CHECK: br i1 %c2, label %.b3, label %.b4 305define i32 @t5_const_memory(i32, i32*) #0 !prof !0 { 306 %3 = icmp eq i32 %0, 0 307 br i1 %3, label %.exit, label %.preheader 308 309.preheader: 310 %invariant = load i32, i32* @g_const 311 br label %.b1 312 313.b1: 314 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 315 %c1 = icmp sgt i32 %iv, %0 316 br i1 %c1, label %.b2, label %.b6, !prof !1 317 318.b2: 319 %c2 = icmp sgt i32 %iv, 1 320 br i1 %c2, label %.b3, label %.b4 321 322.b3: 323 %t3 = sub nsw i32 %invariant, %iv 324 br label %.b5 325 326.b4: 327 %t4 = add nsw i32 %invariant, %iv 328 br label %.b5 329 330.b5: 331 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 332 %t5 = mul nsw i32 %p5, 5 333 br label %.b7 334 335.b6: 336 %t6 = call i32 @foo() 337 br label %.b7 338 339.b7: 340 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 341 %t7 = add nuw nsw i32 %iv, 1 342 %c7 = icmp eq i32 %t7, %p7 343 br i1 %c7, label %.b1, label %.exit, !prof !3 344 345.exit: 346 ret i32 10 347} 348 349; b1 350; / \ 351; b2 b3 352; \ / 353; b4 354; preheader: 1000 355; b2: 15 356; b3: 7 357; Do not sink unordered atomic load to b2 358; CHECK: t6 359; CHECK: .preheader: 360; CHECK: load atomic i32, i32* @g unordered, align 4 361; CHECK: .b2: 362; CHECK-NOT: load atomic i32, i32* @g unordered, align 4 363define i32 @t6(i32, i32) #0 !prof !0 { 364 %3 = icmp eq i32 %1, 0 365 br i1 %3, label %.exit, label %.preheader 366 367.preheader: 368 %invariant = load atomic i32, i32* @g unordered, align 4 369 br label %.b1 370 371.b1: 372 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 373 %c1 = icmp sgt i32 %iv, %0 374 br i1 %c1, label %.b2, label %.b3, !prof !1 375 376.b2: 377 %t1 = add nsw i32 %invariant, %iv 378 br label %.b4 379 380.b3: 381 %t2 = add nsw i32 %iv, 100 382 br label %.b4 383 384.b4: 385 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 386 %t3 = add nuw nsw i32 %iv, 1 387 %c2 = icmp eq i32 %t3, %p1 388 br i1 %c2, label %.b1, label %.exit, !prof !3 389 390.exit: 391 ret i32 10 392} 393 394@g_const = constant i32 0, align 4 395 396; b1 397; / \ 398; b2 b3 399; \ / 400; b4 401; preheader: 1000 402; b2: 0.5 403; b3: 999.5 404; Sink unordered atomic load to b2. It is allowed to sink into loop unordered 405; load from constant. 406; CHECK: t7 407; CHECK: .preheader: 408; CHECK-NOT: load atomic i32, i32* @g_const unordered, align 4 409; CHECK: .b2: 410; CHECK: load atomic i32, i32* @g_const unordered, align 4 411define i32 @t7(i32, i32) #0 !prof !0 { 412 %3 = icmp eq i32 %1, 0 413 br i1 %3, label %.exit, label %.preheader 414 415.preheader: 416 %invariant = load atomic i32, i32* @g_const unordered, align 4 417 br label %.b1 418 419.b1: 420 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 421 %c1 = icmp sgt i32 %iv, %0 422 br i1 %c1, label %.b2, label %.b3, !prof !1 423 424.b2: 425 %t1 = add nsw i32 %invariant, %iv 426 br label %.b4 427 428.b3: 429 %t2 = add nsw i32 %iv, 100 430 br label %.b4 431 432.b4: 433 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 434 %t3 = add nuw nsw i32 %iv, 1 435 %c2 = icmp eq i32 %t3, %p1 436 br i1 %c2, label %.b1, label %.exit, !prof !3 437 438.exit: 439 ret i32 10 440} 441 442declare i32 @foo() 443 444!0 = !{!"function_entry_count", i64 1} 445!1 = !{!"branch_weights", i32 1, i32 2000} 446!2 = !{!"branch_weights", i32 2000, i32 1} 447!3 = !{!"branch_weights", i32 100, i32 1} 448