1; RUN: opt -S -codegenprepare < %s | FileCheck %s 2 3target datalayout = 4"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 5target triple = "x86_64-unknown-linux-gnu" 6 7@x = external global [1 x [2 x <4 x float>]] 8 9; Can we sink single addressing mode computation to use? 10define void @test1(i1 %cond, i64* %base) { 11; CHECK-LABEL: @test1 12; CHECK: getelementptr inbounds i8, {{.+}} 40 13entry: 14 %addr = getelementptr inbounds i64, i64* %base, i64 5 15 %casted = bitcast i64* %addr to i32* 16 br i1 %cond, label %if.then, label %fallthrough 17 18if.then: 19 %v = load i32, i32* %casted, align 4 20 br label %fallthrough 21 22fallthrough: 23 ret void 24} 25 26declare void @foo(i32) 27 28; Make sure sinking two copies of addressing mode into different blocks works 29define void @test2(i1 %cond, i64* %base) { 30; CHECK-LABEL: @test2 31entry: 32 %addr = getelementptr inbounds i64, i64* %base, i64 5 33 %casted = bitcast i64* %addr to i32* 34 br i1 %cond, label %if.then, label %fallthrough 35 36if.then: 37; CHECK-LABEL: if.then: 38; CHECK: getelementptr inbounds i8, {{.+}} 40 39 %v1 = load i32, i32* %casted, align 4 40 call void @foo(i32 %v1) 41 %cmp = icmp eq i32 %v1, 0 42 br i1 %cmp, label %next, label %fallthrough 43 44next: 45; CHECK-LABEL: next: 46; CHECK: getelementptr inbounds i8, {{.+}} 40 47 %v2 = load i32, i32* %casted, align 4 48 call void @foo(i32 %v2) 49 br label %fallthrough 50 51fallthrough: 52 ret void 53} 54 55; If we have two loads in the same block, only need one copy of addressing mode 56; - instruction selection will duplicate if needed 57define void @test3(i1 %cond, i64* %base) { 58; CHECK-LABEL: @test3 59entry: 60 %addr = getelementptr inbounds i64, i64* %base, i64 5 61 %casted = bitcast i64* %addr to i32* 62 br i1 %cond, label %if.then, label %fallthrough 63 64if.then: 65; CHECK-LABEL: if.then: 66; CHECK: getelementptr inbounds i8, {{.+}} 40 67 %v1 = load i32, i32* %casted, align 4 68 call void @foo(i32 %v1) 69; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 70 %v2 = load i32, i32* %casted, align 4 71 call void @foo(i32 %v2) 72 br label %fallthrough 73 74fallthrough: 75 ret void 76} 77 78; Can we still sink addressing mode if there's a cold use of the 79; address itself? 80define void @test4(i1 %cond, i64* %base) { 81; CHECK-LABEL: @test4 82entry: 83 %addr = getelementptr inbounds i64, i64* %base, i64 5 84 %casted = bitcast i64* %addr to i32* 85 br i1 %cond, label %if.then, label %fallthrough 86 87if.then: 88; CHECK-LABEL: if.then: 89; CHECK: getelementptr inbounds i8, {{.+}} 40 90 %v1 = load i32, i32* %casted, align 4 91 call void @foo(i32 %v1) 92 %cmp = icmp eq i32 %v1, 0 93 br i1 %cmp, label %rare.1, label %fallthrough 94 95fallthrough: 96 ret void 97 98rare.1: 99; CHECK-LABEL: rare.1: 100; CHECK: getelementptr inbounds i8, {{.+}} 40 101 call void @slowpath(i32 %v1, i32* %casted) cold 102 br label %fallthrough 103} 104 105; Negative test - don't want to duplicate addressing into hot path 106define void @test5(i1 %cond, i64* %base) { 107; CHECK-LABEL: @test5 108entry: 109; CHECK: %addr = getelementptr inbounds 110 %addr = getelementptr inbounds i64, i64* %base, i64 5 111 %casted = bitcast i64* %addr to i32* 112 br i1 %cond, label %if.then, label %fallthrough 113 114if.then: 115; CHECK-LABEL: if.then: 116; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 117 %v1 = load i32, i32* %casted, align 4 118 call void @foo(i32 %v1) 119 %cmp = icmp eq i32 %v1, 0 120 br i1 %cmp, label %rare.1, label %fallthrough 121 122fallthrough: 123 ret void 124 125rare.1: 126 call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD 127 br label %fallthrough 128} 129 130; Negative test - opt for size 131define void @test6(i1 %cond, i64* %base) minsize { 132; CHECK-LABEL: @test6 133entry: 134; CHECK: %addr = getelementptr 135 %addr = getelementptr inbounds i64, i64* %base, i64 5 136 %casted = bitcast i64* %addr to i32* 137 br i1 %cond, label %if.then, label %fallthrough 138 139if.then: 140; CHECK-LABEL: if.then: 141; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 142 %v1 = load i32, i32* %casted, align 4 143 call void @foo(i32 %v1) 144 %cmp = icmp eq i32 %v1, 0 145 br i1 %cmp, label %rare.1, label %fallthrough 146 147fallthrough: 148 ret void 149 150rare.1: 151 call void @slowpath(i32 %v1, i32* %casted) cold 152 br label %fallthrough 153} 154 155; Negative test - opt for size 156define void @test6_pgso(i1 %cond, i64* %base) !prof !14 { 157; CHECK-LABEL: @test6 158entry: 159; CHECK: %addr = getelementptr 160 %addr = getelementptr inbounds i64, i64* %base, i64 5 161 %casted = bitcast i64* %addr to i32* 162 br i1 %cond, label %if.then, label %fallthrough 163 164if.then: 165; CHECK-LABEL: if.then: 166; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 167 %v1 = load i32, i32* %casted, align 4 168 call void @foo(i32 %v1) 169 %cmp = icmp eq i32 %v1, 0 170 br i1 %cmp, label %rare.1, label %fallthrough 171 172fallthrough: 173 ret void 174 175rare.1: 176 call void @slowpath(i32 %v1, i32* %casted) cold 177 br label %fallthrough 178} 179 180; Make sure sinking two copies of addressing mode into different blocks works 181; when there are cold paths for each. 182define void @test7(i1 %cond, i64* %base) { 183; CHECK-LABEL: @test7 184entry: 185 %addr = getelementptr inbounds i64, i64* %base, i64 5 186 %casted = bitcast i64* %addr to i32* 187 br i1 %cond, label %if.then, label %fallthrough 188 189if.then: 190; CHECK-LABEL: if.then: 191; CHECK: getelementptr inbounds i8, {{.+}} 40 192 %v1 = load i32, i32* %casted, align 4 193 call void @foo(i32 %v1) 194 %cmp = icmp eq i32 %v1, 0 195 br i1 %cmp, label %rare.1, label %next 196 197next: 198; CHECK-LABEL: next: 199; CHECK: getelementptr inbounds i8, {{.+}} 40 200 %v2 = load i32, i32* %casted, align 4 201 call void @foo(i32 %v2) 202 %cmp2 = icmp eq i32 %v2, 0 203 br i1 %cmp2, label %rare.1, label %fallthrough 204 205fallthrough: 206 ret void 207 208rare.1: 209; CHECK-LABEL: rare.1: 210; CHECK: getelementptr inbounds i8, {{.+}} 40 211 call void @slowpath(i32 %v1, i32* %casted) cold 212 br label %next 213 214rare.2: 215; CHECK-LABEL: rare.2: 216; CHECK: getelementptr inbounds i8, {{.+}} 40 217 call void @slowpath(i32 %v2, i32* %casted) cold 218 br label %fallthrough 219} 220 221declare void @slowpath(i32, i32*) 222 223; Make sure we don't end up in an infinite loop after we fail to sink. 224; CHECK-LABEL: define void @test8 225; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef 226define void @test8() { 227allocas: 228 %aFOO_load = load float*, float** undef 229 %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64 230 %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 231 %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8* 232 %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef 233 br label %load.i145 234 235load.i145: 236 %ptr.i143 = bitcast i8* %ptr to <4 x float>* 237 %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4 238 %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0 239 br label %pl_loop.i.i122 240 241pl_loop.i.i122: 242 br label %pl_loop.i.i122 243} 244 245; Make sure we can sink address computation even 246; if there is a cycle in phi nodes. 247define void @test9(i1 %cond, i64* %base) { 248; CHECK-LABEL: @test9 249entry: 250 %addr = getelementptr inbounds i64, i64* %base, i64 5 251 %casted = bitcast i64* %addr to i32* 252 br label %header 253 254header: 255 %iv = phi i32 [0, %entry], [%iv.inc, %backedge] 256 %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge] 257 br i1 %cond, label %if.then, label %backedge 258 259if.then: 260 call void @foo(i32 %iv) 261 %addr.1 = getelementptr inbounds i64, i64* %base, i64 5 262 %casted.1 = bitcast i64* %addr.1 to i32* 263 br label %backedge 264 265backedge: 266; CHECK-LABEL: backedge: 267; CHECK: getelementptr inbounds i8, {{.+}} 40 268 %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then] 269 %v = load i32, i32* %casted.merged, align 4 270 call void @foo(i32 %v) 271 %iv.inc = add i32 %iv, 1 272 %cmp = icmp slt i32 %iv.inc, 1000 273 br i1 %cmp, label %header, label %exit 274 275exit: 276 ret void 277} 278 279; Make sure we can eliminate a select when both arguments perform equivalent 280; address computation. 281define void @test10(i1 %cond, i64* %base) { 282; CHECK-LABEL: @test10 283; CHECK: getelementptr inbounds i8, {{.+}} 40 284; CHECK-NOT: select 285entry: 286 %gep1 = getelementptr inbounds i64, i64* %base, i64 5 287 %gep1.casted = bitcast i64* %gep1 to i32* 288 %base.casted = bitcast i64* %base to i32* 289 %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10 290 %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2 291 %v = load i32, i32* %casted.merged, align 4 292 call void @foo(i32 %v) 293 ret void 294} 295 296; Found by fuzzer, getSExtValue of > 64 bit constant 297define void @i96_mul(i1* %base, i96 %offset) { 298BB: 299 ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF 300 %B84 = mul i96 %offset, 39614081257132168796771975167 301 %G23 = getelementptr i1, i1* %base, i96 %B84 302 store i1 false, i1* %G23 303 ret void 304} 305 306!llvm.module.flags = !{!0} 307!0 = !{i32 1, !"ProfileSummary", !1} 308!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 309!2 = !{!"ProfileFormat", !"InstrProf"} 310!3 = !{!"TotalCount", i64 10000} 311!4 = !{!"MaxCount", i64 10} 312!5 = !{!"MaxInternalCount", i64 1} 313!6 = !{!"MaxFunctionCount", i64 1000} 314!7 = !{!"NumCounts", i64 3} 315!8 = !{!"NumFunctions", i64 3} 316!9 = !{!"DetailedSummary", !10} 317!10 = !{!11, !12, !13} 318!11 = !{i32 10000, i64 100, i32 1} 319!12 = !{i32 999000, i64 100, i32 1} 320!13 = !{i32 999999, i64 1, i32 2} 321!14 = !{!"function_entry_count", i64 0} 322