1; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s 2; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S | FileCheck %s 3 4define internal i32 @callee1(i32 %A, i32 %B) { 5 %C = sdiv i32 %A, %B 6 ret i32 %C 7} 8 9define i32 @caller1() { 10; CHECK-LABEL: define i32 @caller1( 11; CHECK-NEXT: ret i32 3 12 13 %X = call i32 @callee1( i32 10, i32 3 ) 14 ret i32 %X 15} 16 17define i32 @caller2() { 18; Check that we can constant-prop through instructions after inlining callee21 19; to get constants in the inlined callsite to callee22. 20; FIXME: Currently, the threshold is fixed at 20 because we don't perform 21; *recursive* cost analysis to realize that the nested call site will definitely 22; inline and be cheap. We should eventually do that and lower the threshold here 23; to 1. 24; 25; CHECK-LABEL: @caller2( 26; CHECK-NOT: call void @callee2 27; CHECK: ret 28 29 %x = call i32 @callee21(i32 42, i32 48) 30 ret i32 %x 31} 32 33define i32 @callee21(i32 %x, i32 %y) { 34 %sub = sub i32 %y, %x 35 %result = call i32 @callee22(i32 %sub) 36 ret i32 %result 37} 38 39declare i8* @getptr() 40 41define i32 @callee22(i32 %x) { 42 %icmp = icmp ugt i32 %x, 42 43 br i1 %icmp, label %bb.true, label %bb.false 44bb.true: 45 ; This block musn't be counted in the inline cost. 46 %x1 = add i32 %x, 1 47 %x2 = add i32 %x1, 1 48 %x3 = add i32 %x2, 1 49 %x4 = add i32 %x3, 1 50 %x5 = add i32 %x4, 1 51 %x6 = add i32 %x5, 1 52 %x7 = add i32 %x6, 1 53 %x8 = add i32 %x7, 1 54 55 ret i32 %x8 56bb.false: 57 ret i32 %x 58} 59 60define i32 @caller3() { 61; Check that even if the expensive path is hidden behind several basic blocks, 62; it doesn't count toward the inline cost when constant-prop proves those paths 63; dead. 64; 65; CHECK-LABEL: @caller3( 66; CHECK-NOT: call 67; CHECK: ret i32 6 68 69entry: 70 %x = call i32 @callee3(i32 42, i32 48) 71 ret i32 %x 72} 73 74define i32 @callee3(i32 %x, i32 %y) { 75 %sub = sub i32 %y, %x 76 %icmp = icmp ugt i32 %sub, 42 77 br i1 %icmp, label %bb.true, label %bb.false 78 79bb.true: 80 %icmp2 = icmp ult i32 %sub, 64 81 br i1 %icmp2, label %bb.true.true, label %bb.true.false 82 83bb.true.true: 84 ; This block musn't be counted in the inline cost. 85 %x1 = add i32 %x, 1 86 %x2 = add i32 %x1, 1 87 %x3 = add i32 %x2, 1 88 %x4 = add i32 %x3, 1 89 %x5 = add i32 %x4, 1 90 %x6 = add i32 %x5, 1 91 %x7 = add i32 %x6, 1 92 %x8 = add i32 %x7, 1 93 br label %bb.merge 94 95bb.true.false: 96 ; This block musn't be counted in the inline cost. 97 %y1 = add i32 %y, 1 98 %y2 = add i32 %y1, 1 99 %y3 = add i32 %y2, 1 100 %y4 = add i32 %y3, 1 101 %y5 = add i32 %y4, 1 102 %y6 = add i32 %y5, 1 103 %y7 = add i32 %y6, 1 104 %y8 = add i32 %y7, 1 105 br label %bb.merge 106 107bb.merge: 108 %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] 109 ret i32 %result 110 111bb.false: 112 ret i32 %sub 113} 114 115declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) 116 117define i8 @caller4(i8 %z) { 118; Check that we can constant fold through intrinsics such as the 119; overflow-detecting arithmetic instrinsics. These are particularly important 120; as they are used heavily in standard library code and generic C++ code where 121; the arguments are oftent constant but complete generality is required. 122; 123; CHECK-LABEL: @caller4( 124; CHECK-NOT: call 125; CHECK: ret i8 -1 126 127entry: 128 %x = call i8 @callee4(i8 254, i8 14, i8 %z) 129 ret i8 %x 130} 131 132define i8 @callee4(i8 %x, i8 %y, i8 %z) { 133 %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) 134 %o = extractvalue {i8, i1} %uadd, 1 135 br i1 %o, label %bb.true, label %bb.false 136 137bb.true: 138 ret i8 -1 139 140bb.false: 141 ; This block musn't be counted in the inline cost. 142 %z1 = add i8 %z, 1 143 %z2 = add i8 %z1, 1 144 %z3 = add i8 %z2, 1 145 %z4 = add i8 %z3, 1 146 %z5 = add i8 %z4, 1 147 %z6 = add i8 %z5, 1 148 %z7 = add i8 %z6, 1 149 %z8 = add i8 %z7, 1 150 ret i8 %z8 151} 152 153define i64 @caller5(i64 %y) { 154; Check that we can round trip constants through various kinds of casts etc w/o 155; losing track of the constant prop in the inline cost analysis. 156; 157; CHECK-LABEL: @caller5( 158; CHECK-NOT: call 159; CHECK: ret i64 -1 160 161entry: 162 %x = call i64 @callee5(i64 42, i64 %y) 163 ret i64 %x 164} 165 166define i64 @callee5(i64 %x, i64 %y) { 167 %inttoptr = inttoptr i64 %x to i8* 168 %bitcast = bitcast i8* %inttoptr to i32* 169 %ptrtoint = ptrtoint i32* %bitcast to i64 170 %trunc = trunc i64 %ptrtoint to i32 171 %zext = zext i32 %trunc to i64 172 %cmp = icmp eq i64 %zext, 42 173 br i1 %cmp, label %bb.true, label %bb.false 174 175bb.true: 176 ret i64 -1 177 178bb.false: 179 ; This block musn't be counted in the inline cost. 180 %y1 = add i64 %y, 1 181 %y2 = add i64 %y1, 1 182 %y3 = add i64 %y2, 1 183 %y4 = add i64 %y3, 1 184 %y5 = add i64 %y4, 1 185 %y6 = add i64 %y5, 1 186 %y7 = add i64 %y6, 1 187 %y8 = add i64 %y7, 1 188 ret i64 %y8 189} 190 191define float @caller6() { 192; Check that we can constant-prop through fcmp instructions 193; 194; CHECK-LABEL: @caller6( 195; CHECK-NOT: call 196; CHECK: ret 197 %x = call float @callee6(float 42.0) 198 ret float %x 199} 200 201define float @callee6(float %x) { 202 %icmp = fcmp ugt float %x, 42.0 203 br i1 %icmp, label %bb.true, label %bb.false 204 205bb.true: 206 ; This block musn't be counted in the inline cost. 207 %x1 = fadd float %x, 1.0 208 %x2 = fadd float %x1, 1.0 209 %x3 = fadd float %x2, 1.0 210 %x4 = fadd float %x3, 1.0 211 %x5 = fadd float %x4, 1.0 212 %x6 = fadd float %x5, 1.0 213 %x7 = fadd float %x6, 1.0 214 %x8 = fadd float %x7, 1.0 215 ret float %x8 216 217bb.false: 218 ret float %x 219} 220 221 222 223define i32 @PR13412.main() { 224; This is a somewhat complicated three layer subprogram that was reported to 225; compute the wrong value for a branch due to assuming that an argument 226; mid-inline couldn't be equal to another pointer. 227; 228; After inlining, the branch should point directly to the exit block, not to 229; the intermediate block. 230; CHECK: @PR13412.main 231; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]] 232; CHECK: [[FALSE_DEST]]: 233; CHECK-NEXT: call void @PR13412.fail() 234; CHECK: [[TRUE_DEST]]: 235; CHECK-NEXT: ret i32 0 236 237entry: 238 %i1 = alloca i64 239 store i64 0, i64* %i1 240 %arraydecay = bitcast i64* %i1 to i32* 241 %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay) 242 br i1 %call, label %cond.end, label %cond.false 243 244cond.false: 245 call void @PR13412.fail() 246 br label %cond.end 247 248cond.end: 249 ret i32 0 250} 251 252define internal i1 @PR13412.first(i32* %a, i32* %b) { 253entry: 254 %call = call i32* @PR13412.second(i32* %a, i32* %b) 255 %cmp = icmp eq i32* %call, %b 256 ret i1 %cmp 257} 258 259declare void @PR13412.fail() 260 261define internal i32* @PR13412.second(i32* %a, i32* %b) { 262entry: 263 %sub.ptr.lhs.cast = ptrtoint i32* %b to i64 264 %sub.ptr.rhs.cast = ptrtoint i32* %a to i64 265 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast 266 %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2 267 %cmp = icmp ugt i64 %sub.ptr.div, 1 268 br i1 %cmp, label %if.then, label %if.end3 269 270if.then: 271 %0 = load i32, i32* %a 272 %1 = load i32, i32* %b 273 %cmp1 = icmp eq i32 %0, %1 274 br i1 %cmp1, label %return, label %if.end3 275 276if.end3: 277 br label %return 278 279return: 280 %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ] 281 ret i32* %retval.0 282} 283 284declare i32 @PR28802.external(i32 returned %p1) 285 286define internal i32 @PR28802.callee() { 287entry: 288 br label %cont 289 290cont: 291 %0 = phi i32 [ 0, %entry ] 292 %call = call i32 @PR28802.external(i32 %0) 293 ret i32 %call 294} 295 296define i32 @PR28802() { 297entry: 298 %call = call i32 @PR28802.callee() 299 ret i32 %call 300} 301 302; CHECK-LABEL: define i32 @PR28802( 303; CHECK: %[[call:.*]] = call i32 @PR28802.external(i32 0) 304; CHECK: ret i32 %[[call]] 305 306define internal i32 @PR28848.callee(i32 %p2, i1 %c) { 307entry: 308 br i1 %c, label %cond.end, label %cond.true 309 310cond.true: 311 br label %cond.end 312 313cond.end: 314 %cond = phi i32 [ 0, %cond.true ], [ %p2, %entry ] 315 %or = or i32 %cond, %p2 316 ret i32 %or 317} 318 319define i32 @PR28848() { 320entry: 321 %call = call i32 @PR28848.callee(i32 0, i1 false) 322 ret i32 %call 323} 324; CHECK-LABEL: define i32 @PR28848( 325; CHECK: ret i32 0 326 327define internal void @callee7(i16 %param1, i16 %param2) { 328entry: 329 br label %bb 330 331bb: 332 %phi = phi i16 [ %param2, %entry ] 333 %add = add i16 %phi, %param1 334 ret void 335} 336 337declare i16 @caller7.external(i16 returned) 338 339define void @caller7() { 340bb1: 341 %call = call i16 @caller7.external(i16 1) 342 call void @callee7(i16 0, i16 %call) 343 ret void 344} 345; CHECK-LABEL: define void @caller7( 346; CHECK: %call = call i16 @caller7.external(i16 1) 347; CHECK-NEXT: ret void 348 349define float @caller8(float %y) { 350; Check that we can constant-prop through fneg instructions 351; 352; CHECK-LABEL: @caller8( 353; CHECK-NOT: call 354; CHECK: ret 355 %x = call float @callee8(float -42.0, float %y) 356 ret float %x 357} 358 359define float @callee8(float %x, float %y) { 360 %neg = fneg float %x 361 %icmp = fcmp ugt float %neg, 42.0 362 br i1 %icmp, label %bb.true, label %bb.false 363 364bb.true: 365 ; This block musn't be counted in the inline cost. 366 %y1 = fadd float %y, 1.0 367 %y2 = fadd float %y1, 1.0 368 %y3 = fadd float %y2, 1.0 369 %y4 = fadd float %y3, 1.0 370 %y5 = fadd float %y4, 1.0 371 %y6 = fadd float %y5, 1.0 372 %y7 = fadd float %y6, 1.0 373 %y8 = fadd float %y7, 1.0 374 ret float %y8 375 376bb.false: 377 ret float %x 378} 379