1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s 2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s 3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s 4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 7 8; OPT-LABEL: @test_sink_global_small_offset_i32( 9; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in 10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in 11; OPT: br i1 12; OPT-CI: ptrtoint 13 14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: 15; GCN: {{^}}BB0_2: 16define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 17entry: 18 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 19 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 20 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 21 %tmp0 = icmp eq i32 %tid, 0 22 br i1 %tmp0, label %endif, label %if 23 24if: 25 %tmp1 = load i32, i32 addrspace(1)* %in.gep 26 br label %endif 27 28endif: 29 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 30 store i32 %x, i32 addrspace(1)* %out.gep 31 br label %done 32 33done: 34 ret void 35} 36 37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( 38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 39; OPT: br i1 40 41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: 42; GCN: s_and_saveexec_b64 43; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 44; GCN: {{^}}BB1_2: 45; GCN: s_or_b64 exec 46define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 47entry: 48 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 49 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 50 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 51 %tmp0 = icmp eq i32 %tid, 0 52 br i1 %tmp0, label %endif, label %if 53 54if: 55 %tmp1 = load i8, i8 addrspace(1)* %in.gep 56 %tmp2 = sext i8 %tmp1 to i32 57 br label %endif 58 59endif: 60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 61 store i32 %x, i32 addrspace(1)* %out.gep 62 br label %done 63 64done: 65 ret void 66} 67 68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: 69; GCN: s_and_saveexec_b64 70; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} 71; GCN: {{^}}BB2_2: 72; GCN: s_or_b64 exec 73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 74entry: 75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 77 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 78 %tmp0 = icmp eq i32 %tid, 0 79 br i1 %tmp0, label %endif, label %if 80 81if: 82 %tmp1 = load i8, i8 addrspace(1)* %in.gep 83 %tmp2 = sext i8 %tmp1 to i32 84 br label %endif 85 86endif: 87 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 88 store i32 %x, i32 addrspace(1)* %out.gep 89 br label %done 90 91done: 92 ret void 93} 94 95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: 96; GCN: s_and_saveexec_b64 97; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 98; GCN: {{^}}BB3_2: 99; GCN: s_or_b64 exec 100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 101entry: 102 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 103 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 104 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 105 %tmp0 = icmp eq i32 %tid, 0 106 br i1 %tmp0, label %endif, label %if 107 108if: 109 %tmp1 = load i8, i8 addrspace(1)* %in.gep 110 %tmp2 = sext i8 %tmp1 to i32 111 br label %endif 112 113endif: 114 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 115 store i32 %x, i32 addrspace(1)* %out.gep 116 br label %done 117 118done: 119 ret void 120} 121 122; OPT-LABEL: @test_sink_scratch_small_offset_i32( 123; OPT-NOT: getelementptr [512 x i32] 124; OPT: br i1 125; OPT: ptrtoint 126 127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: 128; GCN: s_and_saveexec_b64 129; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 130; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 131; GCN: {{^}}BB4_2: 132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { 133entry: 134 %alloca = alloca [512 x i32], align 4 135 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 136 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 137 %add.arg = add i32 %arg, 8 138 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023 139 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 140 %tmp0 = icmp eq i32 %tid, 0 141 br i1 %tmp0, label %endif, label %if 142 143if: 144 store volatile i32 123, i32* %alloca.gep 145 %tmp1 = load volatile i32, i32* %alloca.gep 146 br label %endif 147 148endif: 149 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 150 store i32 %x, i32 addrspace(1)* %out.gep.0 151 %load = load volatile i32, i32* %alloca.gep 152 store i32 %load, i32 addrspace(1)* %out.gep.1 153 br label %done 154 155done: 156 ret void 157} 158 159; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( 160; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 161; OPT: br i1 162; OPT-NOT: ptrtoint 163 164; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32: 165; GCN: s_and_saveexec_b64 166; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 167; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 168; GCN: {{^}}BB5_2: 169define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { 170entry: 171 %alloca = alloca [512 x i32], align 4 172 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 173 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 174 %add.arg = add i32 %arg, 8 175 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 176 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 177 %tmp0 = icmp eq i32 %tid, 0 178 br i1 %tmp0, label %endif, label %if 179 180if: 181 store volatile i32 123, i32* %alloca.gep 182 %tmp1 = load volatile i32, i32* %alloca.gep 183 br label %endif 184 185endif: 186 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 187 store i32 %x, i32 addrspace(1)* %out.gep.0 188 %load = load volatile i32, i32* %alloca.gep 189 store i32 %load, i32 addrspace(1)* %out.gep.1 190 br label %done 191 192done: 193 ret void 194} 195 196; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: 197; GCN: s_and_saveexec_b64 198; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 199; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] 200; GCN: {{^}}BB6_2: 201define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { 202entry: 203 %offset.ext = zext i32 %offset to i64 204 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 205 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext 206 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 207 %tmp0 = icmp eq i32 %tid, 0 208 br i1 %tmp0, label %endif, label %if 209 210if: 211 %tmp1 = load i32, i32 addrspace(1)* %in.gep 212 br label %endif 213 214endif: 215 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 216 store i32 %x, i32 addrspace(1)* %out.gep 217 br label %done 218 219done: 220 ret void 221} 222 223attributes #0 = { nounwind readnone } 224attributes #1 = { nounwind } 225 226 227 228; OPT-LABEL: @test_sink_constant_small_offset_i32 229; OPT-NOT: getelementptr i32, i32 addrspace(2)* 230; OPT: br i1 231 232; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: 233; GCN: s_and_saveexec_b64 234; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} 235; GCN: s_or_b64 exec, exec 236define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 237entry: 238 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 239 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 240 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 241 %tmp0 = icmp eq i32 %tid, 0 242 br i1 %tmp0, label %endif, label %if 243 244if: 245 %tmp1 = load i32, i32 addrspace(2)* %in.gep 246 br label %endif 247 248endif: 249 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 250 store i32 %x, i32 addrspace(1)* %out.gep 251 br label %done 252 253done: 254 ret void 255} 256 257; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 258; OPT-NOT: getelementptr i32, i32 addrspace(2)* 259; OPT: br i1 260 261; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: 262; GCN: s_and_saveexec_b64 263; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} 264; GCN: s_or_b64 exec, exec 265define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 266entry: 267 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 268 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 269 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 270 %tmp0 = icmp eq i32 %tid, 0 271 br i1 %tmp0, label %endif, label %if 272 273if: 274 %tmp1 = load i32, i32 addrspace(2)* %in.gep 275 br label %endif 276 277endif: 278 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 279 store i32 %x, i32 addrspace(1)* %out.gep 280 br label %done 281 282done: 283 ret void 284} 285 286; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 287; OPT-SI: getelementptr i32, i32 addrspace(2)* 288; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 289; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* 290; OPT: br i1 291 292; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: 293; GCN: s_and_saveexec_b64 294; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 295 296; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 297; GCN: s_or_b64 exec, exec 298define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 299entry: 300 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 301 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 302 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 303 %tmp0 = icmp eq i32 %tid, 0 304 br i1 %tmp0, label %endif, label %if 305 306if: 307 %tmp1 = load i32, i32 addrspace(2)* %in.gep 308 br label %endif 309 310endif: 311 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 312 store i32 %x, i32 addrspace(1)* %out.gep 313 br label %done 314 315done: 316 ret void 317} 318 319; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 320; OPT-SI: getelementptr i32, i32 addrspace(2)* 321; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 322; OPT: br i1 323 324; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: 325; GCN: s_and_saveexec_b64 326; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} 327; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} 328; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 329; GCN: s_or_b64 exec, exec 330define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 331entry: 332 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 333 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 334 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 335 %tmp0 = icmp eq i32 %tid, 0 336 br i1 %tmp0, label %endif, label %if 337 338if: 339 %tmp1 = load i32, i32 addrspace(2)* %in.gep 340 br label %endif 341 342endif: 343 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 344 store i32 %x, i32 addrspace(1)* %out.gep 345 br label %done 346 347done: 348 ret void 349} 350 351; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 352; OPT: getelementptr i32, i32 addrspace(2)* 353; OPT: br i1 354 355; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: 356; GCN: s_and_saveexec_b64 357; GCN: s_add_u32 358; GCN: s_addc_u32 359; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 360; GCN: s_or_b64 exec, exec 361define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 362entry: 363 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 364 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 365 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 366 %tmp0 = icmp eq i32 %tid, 0 367 br i1 %tmp0, label %endif, label %if 368 369if: 370 %tmp1 = load i32, i32 addrspace(2)* %in.gep 371 br label %endif 372 373endif: 374 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 375 store i32 %x, i32 addrspace(1)* %out.gep 376 br label %done 377 378done: 379 ret void 380} 381 382; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: 383; GCN: s_and_saveexec_b64 384; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} 385; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 386 387; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} 388; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} 389 390; GCN: s_or_b64 exec, exec 391define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 392entry: 393 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 394 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 395 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 396 %tmp0 = icmp eq i32 %tid, 0 397 br i1 %tmp0, label %endif, label %if 398 399if: 400 %tmp1 = load i32, i32 addrspace(2)* %in.gep 401 br label %endif 402 403endif: 404 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 405 store i32 %x, i32 addrspace(1)* %out.gep 406 br label %done 407 408done: 409 ret void 410} 411 412; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 413; OPT-SI: getelementptr i32, i32 addrspace(2)* 414; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 415; OPT-VI: getelementptr i32, i32 addrspace(2)* 416; OPT: br i1 417 418; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: 419; GCN: s_and_saveexec_b64 420; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 421; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 422 423; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} 424 425; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 426; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 427 428; GCN: s_or_b64 exec, exec 429define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 430entry: 431 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 432 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 433 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 434 %tmp0 = icmp eq i32 %tid, 0 435 br i1 %tmp0, label %endif, label %if 436 437if: 438 %tmp1 = load i32, i32 addrspace(2)* %in.gep 439 br label %endif 440 441endif: 442 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 443 store i32 %x, i32 addrspace(1)* %out.gep 444 br label %done 445 446done: 447 ret void 448} 449 450%struct.foo = type { [3 x float], [3 x float] } 451 452; OPT-LABEL: @sink_ds_address( 453; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64 454 455; GCN-LABEL: {{^}}sink_ds_address: 456; GCN: s_load_dword [[SREG1:s[0-9]+]], 457; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] 458; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 459define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { 460entry: 461 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 462 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 463 br label %bb32 464 465bb32: 466 %a = load float, float addrspace(3)* %x, align 4 467 %b = load float, float addrspace(3)* %y, align 4 468 %cmp = fcmp one float %a, %b 469 br i1 %cmp, label %bb34, label %bb33 470 471bb33: 472 unreachable 473 474bb34: 475 unreachable 476} 477 478declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 479 480attributes #0 = { nounwind readnone } 481