1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}use_dispatch_ptr: 5; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 6; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 7; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 8define void @use_dispatch_ptr() #1 { 9 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 11 %value = load volatile i32, i32 addrspace(4)* %header_ptr 12 ret void 13} 14 15; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: 16; GCN: enable_sgpr_dispatch_ptr = 1 17; GCN: s_mov_b64 s[6:7], s[4:5] 18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { 19 call void @use_dispatch_ptr() 20 ret void 21} 22 23; GCN-LABEL: {{^}}use_queue_ptr: 24; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 25; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 26; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 27define void @use_queue_ptr() #1 { 28 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 29 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 30 %value = load volatile i32, i32 addrspace(4)* %header_ptr 31 ret void 32} 33 34; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: 35; GCN: enable_sgpr_queue_ptr = 1 36; GCN: s_mov_b64 s[6:7], s[4:5] 37; GCN: s_swappc_b64 38define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { 39 call void @use_queue_ptr() 40 ret void 41} 42 43; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: 44; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1] 45; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] 46; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 47; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] 48; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} 49; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} 50define void @use_queue_ptr_addrspacecast() #1 { 51 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* 52 store volatile i32 0, i32* %asc 53 ret void 54} 55 56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: 57; CIVI: enable_sgpr_queue_ptr = 1 58 59; CIVI: s_mov_b64 s[6:7], s[4:5] 60; GFX9-NOT: s_mov_b64 61; GCN: s_swappc_b64 62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { 63 call void @use_queue_ptr_addrspacecast() 64 ret void 65} 66 67; GCN-LABEL: {{^}}use_kernarg_segment_ptr: 68; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 69; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 70; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 71define void @use_kernarg_segment_ptr() #1 { 72 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 73 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 74 %value = load volatile i32, i32 addrspace(4)* %header_ptr 75 ret void 76} 77 78; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: 79; GCN: enable_sgpr_kernarg_segment_ptr = 1 80; GCN: s_mov_b64 s[6:7], s[4:5] 81; GCN: s_swappc_b64 82define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { 83 call void @use_kernarg_segment_ptr() 84 ret void 85} 86 87; GCN-LABEL: {{^}}use_dispatch_id: 88; GCN: ; use s[6:7] 89define void @use_dispatch_id() #1 { 90 %id = call i64 @llvm.amdgcn.dispatch.id() 91 call void asm sideeffect "; use $0", "s"(i64 %id) 92 ret void 93} 94 95; No kernarg segment so that there is a mov to check. With kernarg 96; pointer enabled, it happens to end up in the right place anyway. 97 98; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: 99; GCN: enable_sgpr_dispatch_id = 1 100 101; GCN: s_mov_b64 s[6:7], s[4:5] 102define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { 103 call void @use_dispatch_id() 104 ret void 105} 106 107; GCN-LABEL: {{^}}use_workgroup_id_x: 108; GCN: s_waitcnt 109; GCN: ; use s6 110define void @use_workgroup_id_x() #1 { 111 %val = call i32 @llvm.amdgcn.workgroup.id.x() 112 call void asm sideeffect "; use $0", "s"(i32 %val) 113 ret void 114} 115 116; GCN-LABEL: {{^}}use_stack_workgroup_id_x: 117; GCN: s_waitcnt 118; GCN: s_mov_b32 s5, s32 119; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4 120; GCN: ; use s6 121; GCN: s_setpc_b64 122define void @use_stack_workgroup_id_x() #1 { 123 %alloca = alloca i32, addrspace(5) 124 store volatile i32 0, i32 addrspace(5)* %alloca 125 %val = call i32 @llvm.amdgcn.workgroup.id.x() 126 call void asm sideeffect "; use $0", "s"(i32 %val) 127 ret void 128} 129 130; GCN-LABEL: {{^}}use_workgroup_id_y: 131; GCN: s_waitcnt 132; GCN: ; use s6 133define void @use_workgroup_id_y() #1 { 134 %val = call i32 @llvm.amdgcn.workgroup.id.y() 135 call void asm sideeffect "; use $0", "s"(i32 %val) 136 ret void 137} 138 139; GCN-LABEL: {{^}}use_workgroup_id_z: 140; GCN: s_waitcnt 141; GCN: ; use s6 142define void @use_workgroup_id_z() #1 { 143 %val = call i32 @llvm.amdgcn.workgroup.id.z() 144 call void asm sideeffect "; use $0", "s"(i32 %val) 145 ret void 146} 147 148; GCN-LABEL: {{^}}use_workgroup_id_xy: 149; GCN: ; use s6 150; GCN: ; use s7 151define void @use_workgroup_id_xy() #1 { 152 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 153 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 154 call void asm sideeffect "; use $0", "s"(i32 %val0) 155 call void asm sideeffect "; use $0", "s"(i32 %val1) 156 ret void 157} 158 159; GCN-LABEL: {{^}}use_workgroup_id_xyz: 160; GCN: ; use s6 161; GCN: ; use s7 162; GCN: ; use s8 163define void @use_workgroup_id_xyz() #1 { 164 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 165 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 166 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 167 call void asm sideeffect "; use $0", "s"(i32 %val0) 168 call void asm sideeffect "; use $0", "s"(i32 %val1) 169 call void asm sideeffect "; use $0", "s"(i32 %val2) 170 ret void 171} 172 173; GCN-LABEL: {{^}}use_workgroup_id_xz: 174; GCN: ; use s6 175; GCN: ; use s7 176define void @use_workgroup_id_xz() #1 { 177 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 178 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 179 call void asm sideeffect "; use $0", "s"(i32 %val0) 180 call void asm sideeffect "; use $0", "s"(i32 %val1) 181 ret void 182} 183 184; GCN-LABEL: {{^}}use_workgroup_id_yz: 185; GCN: ; use s6 186; GCN: ; use s7 187define void @use_workgroup_id_yz() #1 { 188 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 189 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 190 call void asm sideeffect "; use $0", "s"(i32 %val0) 191 call void asm sideeffect "; use $0", "s"(i32 %val1) 192 ret void 193} 194 195; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: 196; GCN: enable_sgpr_workgroup_id_x = 1 197; GCN: enable_sgpr_workgroup_id_y = 0 198; GCN: enable_sgpr_workgroup_id_z = 0 199 200; GCN-NOT: s6 201; GCN: s_mov_b32 s33, s7 202; GCN-NOT: s6 203; GCN: s_mov_b32 s4, s33 204; GCN-NOT: s6 205; GCN: s_mov_b32 s32, s33 206; GCN: s_swappc_b64 207define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { 208 call void @use_workgroup_id_x() 209 ret void 210} 211 212; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: 213; GCN: enable_sgpr_workgroup_id_x = 1 214; GCN: enable_sgpr_workgroup_id_y = 1 215; GCN: enable_sgpr_workgroup_id_z = 0 216 217; GCN: s_mov_b32 s33, s8 218; GCN-DAG: s_mov_b32 s4, s33 219; GCN-DAG: s_mov_b32 s6, s7 220; GCN: s_mov_b32 s32, s33 221; GCN: s_swappc_b64 222define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { 223 call void @use_workgroup_id_y() 224 ret void 225} 226 227; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: 228; GCN: enable_sgpr_workgroup_id_x = 1 229; GCN: enable_sgpr_workgroup_id_y = 0 230; GCN: enable_sgpr_workgroup_id_z = 1 231 232; GCN: s_mov_b32 s33, s8 233; GCN-DAG: s_mov_b32 s4, s33 234; GCN-DAG: s_mov_b32 s6, s7 235; GCN: s_swappc_b64 236define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { 237 call void @use_workgroup_id_z() 238 ret void 239} 240 241; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: 242; GCN: enable_sgpr_workgroup_id_x = 1 243; GCN: enable_sgpr_workgroup_id_y = 1 244; GCN: enable_sgpr_workgroup_id_z = 0 245 246; GCN: s_mov_b32 s33, s8 247; GCN-NOT: s6 248; GCN-NOT: s7 249; GCN: s_mov_b32 s4, s33 250; GCN-NOT: s6 251; GCN-NOT: s7 252; GCN: s_mov_b32 s32, s33 253; GCN-NOT: s6 254; GCN-NOT: s7 255; GCN: s_swappc_b64 256define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { 257 call void @use_workgroup_id_xy() 258 ret void 259} 260 261; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: 262; GCN: enable_sgpr_workgroup_id_x = 1 263; GCN: enable_sgpr_workgroup_id_y = 1 264; GCN: enable_sgpr_workgroup_id_z = 1 265 266; GCN: s_mov_b32 s33, s9 267 268; GCN-NOT: s6 269; GCN-NOT: s7 270; GCN-NOT: s8 271 272; GCN: s_mov_b32 s4, s33 273 274; GCN-NOT: s6 275; GCN-NOT: s7 276; GCN-NOT: s8 277 278; GCN: s_mov_b32 s32, s33 279 280; GCN-NOT: s6 281; GCN-NOT: s7 282; GCN-NOT: s8 283 284; GCN: s_swappc_b64 285define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { 286 call void @use_workgroup_id_xyz() 287 ret void 288} 289 290; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: 291; GCN: enable_sgpr_workgroup_id_x = 1 292; GCN: enable_sgpr_workgroup_id_y = 0 293; GCN: enable_sgpr_workgroup_id_z = 1 294 295; GCN: s_mov_b32 s33, s8 296; GCN-NOT: s6 297; GCN-NOT: s7 298 299; GCN: s_mov_b32 s4, s33 300; GCN-NOT: s6 301; GCN-NOT: s7 302 303; GCN: s_mov_b32 s32, s33 304; GCN-NOT: s6 305; GCN-NOT: s7 306 307; GCN: s_swappc_b64 308define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { 309 call void @use_workgroup_id_xz() 310 ret void 311} 312 313; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: 314; GCN: enable_sgpr_workgroup_id_x = 1 315; GCN: enable_sgpr_workgroup_id_y = 1 316; GCN: enable_sgpr_workgroup_id_z = 1 317 318; GCN: s_mov_b32 s33, s9 319; GCN: s_mov_b32 s6, s7 320; GCN: s_mov_b32 s4, s33 321; GCN: s_mov_b32 s7, s8 322; GCN: s_mov_b32 s32, s33 323; GCN: s_swappc_b64 324define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { 325 call void @use_workgroup_id_yz() 326 ret void 327} 328 329; Argument is in right place already 330; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: 331; GCN-NOT: s6 332define void @func_indirect_use_workgroup_id_x() #1 { 333 call void @use_workgroup_id_x() 334 ret void 335} 336 337; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: 338; GCN-NOT: s6 339define void @func_indirect_use_workgroup_id_y() #1 { 340 call void @use_workgroup_id_y() 341 ret void 342} 343 344; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: 345; GCN-NOT: s6 346define void @func_indirect_use_workgroup_id_z() #1 { 347 call void @use_workgroup_id_z() 348 ret void 349} 350 351; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: 352; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 353; GCN: ; use s6 354define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { 355 %val = call i32 @llvm.amdgcn.workgroup.id.x() 356 store volatile i32 %arg0, i32 addrspace(1)* undef 357 call void asm sideeffect "; use $0", "s"(i32 %val) 358 ret void 359} 360 361; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: 362; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 363; GCN: ; use s6 364define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { 365 %val = call i32 @llvm.amdgcn.workgroup.id.y() 366 store volatile i32 %arg0, i32 addrspace(1)* undef 367 call void asm sideeffect "; use $0", "s"(i32 %val) 368 ret void 369} 370 371; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: 372; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 373; GCN: ; use s6 374define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { 375 %val = call i32 @llvm.amdgcn.workgroup.id.z() 376 store volatile i32 %arg0, i32 addrspace(1)* undef 377 call void asm sideeffect "; use $0", "s"(i32 %val) 378 ret void 379} 380 381; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: 382; GCN: enable_sgpr_workgroup_id_x = 1 383; GCN: enable_sgpr_workgroup_id_y = 0 384; GCN: enable_sgpr_workgroup_id_z = 0 385 386; GCN-DAG: s_mov_b32 s33, s7 387; GCN-DAG: v_mov_b32_e32 v0, 0x22b 388 389; GCN-NOT: s6 390; GCN: s_mov_b32 s4, s33 391; GCN-NOT: s6 392; GCN-DAG: s_mov_b32 s32, s33 393; GCN: s_swappc_b64 394define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { 395 call void @other_arg_use_workgroup_id_x(i32 555) 396 ret void 397} 398 399; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: 400; GCN: enable_sgpr_workgroup_id_x = 1 401; GCN: enable_sgpr_workgroup_id_y = 1 402; GCN: enable_sgpr_workgroup_id_z = 0 403 404; GCN-DAG: s_mov_b32 s33, s8 405; GCN-DAG: v_mov_b32_e32 v0, 0x22b 406; GCN-DAG: s_mov_b32 s4, s33 407; GCN-DAG: s_mov_b32 s6, s7 408; GCN-DAG: s_mov_b32 s32, s33 409; GCN: s_swappc_b64 410define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { 411 call void @other_arg_use_workgroup_id_y(i32 555) 412 ret void 413} 414 415; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: 416; GCN: enable_sgpr_workgroup_id_x = 1 417; GCN: enable_sgpr_workgroup_id_y = 0 418; GCN: enable_sgpr_workgroup_id_z = 1 419 420; GCN: s_mov_b32 s33, s8 421; GCN-DAG: v_mov_b32_e32 v0, 0x22b 422; GCN-DAG: s_mov_b32 s4, s33 423; GCN-DAG: s_mov_b32 s6, s7 424 425; GCN: s_mov_b32 s32, s33 426; GCN: s_swappc_b64 427define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { 428 call void @other_arg_use_workgroup_id_z(i32 555) 429 ret void 430} 431 432; GCN-LABEL: {{^}}use_every_sgpr_input: 433; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 434; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 435; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 436; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 437; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8 438; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9 439; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 440; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10 441; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11 442; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 443; GCN: ; use s[12:13] 444; GCN: ; use s14 445; GCN: ; use s15 446; GCN: ; use s16 447define void @use_every_sgpr_input() #1 { 448 %alloca = alloca i32, align 4, addrspace(5) 449 store volatile i32 0, i32 addrspace(5)* %alloca 450 451 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 452 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 453 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 454 455 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 456 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 457 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 458 459 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 460 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 461 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 462 463 %val3 = call i64 @llvm.amdgcn.dispatch.id() 464 call void asm sideeffect "; use $0", "s"(i64 %val3) 465 466 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 467 call void asm sideeffect "; use $0", "s"(i32 %val4) 468 469 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 470 call void asm sideeffect "; use $0", "s"(i32 %val5) 471 472 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 473 call void asm sideeffect "; use $0", "s"(i32 %val6) 474 475 ret void 476} 477 478; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: 479; GCN: enable_sgpr_workgroup_id_x = 1 480; GCN: enable_sgpr_workgroup_id_y = 1 481; GCN: enable_sgpr_workgroup_id_z = 1 482; GCN: enable_sgpr_workgroup_info = 0 483 484; GCN: enable_sgpr_private_segment_buffer = 1 485; GCN: enable_sgpr_dispatch_ptr = 1 486; GCN: enable_sgpr_queue_ptr = 1 487; GCN: enable_sgpr_kernarg_segment_ptr = 1 488; GCN: enable_sgpr_dispatch_id = 1 489; GCN: enable_sgpr_flat_scratch_init = 1 490 491; GCN: s_mov_b32 s33, s17 492; GCN: s_mov_b64 s[12:13], s[10:11] 493; GCN: s_mov_b64 s[10:11], s[8:9] 494; GCN: s_mov_b64 s[8:9], s[6:7] 495; GCN: s_mov_b64 s[6:7], s[4:5] 496; GCN: s_mov_b32 s4, s33 497; GCN: s_mov_b32 s32, s33 498; GCN: s_swappc_b64 499define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { 500 call void @use_every_sgpr_input() 501 ret void 502} 503 504; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: 505; GCN-NOT: s6 506; GCN-NOT: s7 507; GCN-NOT: s8 508; GCN-NOT: s9 509; GCN-NOT: s10 510; GCN-NOT: s11 511; GCN-NOT: s12 512; GCN-NOT: s13 513; GCN-NOT: s[6:7] 514; GCN-NOT: s[8:9] 515; GCN-NOT: s[10:11] 516; GCN-NOT: s[12:13] 517define void @func_indirect_use_every_sgpr_input() #1 { 518 call void @use_every_sgpr_input() 519 ret void 520} 521 522; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: 523; GCN-DAG: s_mov_b32 s6, s14 524; GCN-DAG: s_mov_b32 s7, s15 525; GCN-DAG: s_mov_b32 s8, s16 526; GCN: s_swappc_b64 527define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { 528 %alloca = alloca i32, align 4, addrspace(5) 529 store volatile i32 0, i32 addrspace(5)* %alloca 530 531 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 532 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 533 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 534 535 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 536 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 537 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 538 539 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 540 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 541 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 542 543 %val3 = call i64 @llvm.amdgcn.dispatch.id() 544 call void asm sideeffect "; use $0", "s"(i64 %val3) 545 546 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 547 call void asm sideeffect "; use $0", "s"(i32 %val4) 548 549 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 550 call void asm sideeffect "; use $0", "s"(i32 %val5) 551 552 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 553 call void asm sideeffect "; use $0", "s"(i32 %val6) 554 555 call void @use_workgroup_id_xyz() 556 ret void 557} 558 559; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: 560; GCN: s_mov_b32 s5, s32 561; GCN: s_add_u32 s32, s32, 0x400 562 563; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14 564; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15 565; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16 566; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7] 567; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9] 568; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11] 569 570; GCN-DAG: s_mov_b32 s6, s14 571; GCN-DAG: s_mov_b32 s7, s15 572; GCN-DAG: s_mov_b32 s8, s16 573 574; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7] 575; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9] 576; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11] 577 578; GCN: s_swappc_b64 579 580; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 581; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]] 582; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]] 583; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 584; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]] 585; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]] 586; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 587; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]] 588; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]] 589; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 590; GCN: ; use 591; GCN: ; use [[SAVE_X]] 592; GCN: ; use [[SAVE_Y]] 593; GCN: ; use [[SAVE_Z]] 594define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { 595 %alloca = alloca i32, align 4, addrspace(5) 596 call void @use_workgroup_id_xyz() 597 598 store volatile i32 0, i32 addrspace(5)* %alloca 599 600 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 601 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 602 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 603 604 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 605 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 606 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 607 608 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 609 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 610 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 611 612 %val3 = call i64 @llvm.amdgcn.dispatch.id() 613 call void asm sideeffect "; use $0", "s"(i64 %val3) 614 615 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 616 call void asm sideeffect "; use $0", "s"(i32 %val4) 617 618 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 619 call void asm sideeffect "; use $0", "s"(i32 %val5) 620 621 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 622 call void asm sideeffect "; use $0", "s"(i32 %val6) 623 624 ret void 625} 626 627declare i32 @llvm.amdgcn.workgroup.id.x() #0 628declare i32 @llvm.amdgcn.workgroup.id.y() #0 629declare i32 @llvm.amdgcn.workgroup.id.z() #0 630declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 631declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 632declare i64 @llvm.amdgcn.dispatch.id() #0 633declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 634 635attributes #0 = { nounwind readnone speculatable } 636attributes #1 = { nounwind noinline } 637