1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; Legacy intrinsics that just read implicit parameters 6 7; FUNC-LABEL: {{^}}ngroups_x: 8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 11; GCN-NOHSA: buffer_store_dword [[VVAL]] 12 13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 14; EG: MOV {{\*? *}}[[VAL]], KC0[0].X 15define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) { 16entry: 17 %0 = call i32 @llvm.r600.read.ngroups.x() #0 18 store i32 %0, i32 addrspace(1)* %out 19 ret void 20} 21 22; FUNC-LABEL: {{^}}ngroups_y: 23; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 26; GCN-NOHSA: buffer_store_dword [[VVAL]] 27 28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 29; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y 30define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) { 31entry: 32 %0 = call i32 @llvm.r600.read.ngroups.y() #0 33 store i32 %0, i32 addrspace(1)* %out 34 ret void 35} 36 37; FUNC-LABEL: {{^}}ngroups_z: 38; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 41; GCN-NOHSA: buffer_store_dword [[VVAL]] 42 43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z 45define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) { 46entry: 47 %0 = call i32 @llvm.r600.read.ngroups.z() #0 48 store i32 %0, i32 addrspace(1)* %out 49 ret void 50} 51 52; FUNC-LABEL: {{^}}global_size_x: 53; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc 55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 56; GCN-NOHSA: buffer_store_dword [[VVAL]] 57 58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 59; EG: MOV {{\*? *}}[[VAL]], KC0[0].W 60define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) { 61entry: 62 %0 = call i32 @llvm.r600.read.global.size.x() #0 63 store i32 %0, i32 addrspace(1)* %out 64 ret void 65} 66 67; FUNC-LABEL: {{^}}global_size_y: 68; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 71; GCN-NOHSA: buffer_store_dword [[VVAL]] 72 73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 74; EG: MOV {{\*? *}}[[VAL]], KC0[1].X 75define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) { 76entry: 77 %0 = call i32 @llvm.r600.read.global.size.y() #0 78 store i32 %0, i32 addrspace(1)* %out 79 ret void 80} 81 82; FUNC-LABEL: {{^}}global_size_z: 83; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 86; GCN-NOHSA: buffer_store_dword [[VVAL]] 87 88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 89; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y 90define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) { 91entry: 92 %0 = call i32 @llvm.r600.read.global.size.z() #0 93 store i32 %0, i32 addrspace(1)* %out 94 ret void 95} 96 97; FUNC-LABEL: {{^}}local_size_x: 98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 101; GCN-NOHSA: buffer_store_dword [[VVAL]] 102 103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z 105define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) { 106entry: 107 %0 = call i32 @llvm.r600.read.local.size.x() #0 108 store i32 %0, i32 addrspace(1)* %out 109 ret void 110} 111 112; FUNC-LABEL: {{^}}local_size_y: 113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 116; GCN-NOHSA: buffer_store_dword [[VVAL]] 117 118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 119; EG: MOV {{\*? *}}[[VAL]], KC0[1].W 120define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) { 121entry: 122 %0 = call i32 @llvm.r600.read.local.size.y() #0 123 store i32 %0, i32 addrspace(1)* %out 124 ret void 125} 126 127; FUNC-LABEL: {{^}}local_size_z: 128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 131; GCN-NOHSA: buffer_store_dword [[VVAL]] 132 133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 134; EG: MOV {{\*? *}}[[VAL]], KC0[2].X 135define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) { 136entry: 137 %0 = call i32 @llvm.r600.read.local.size.z() #0 138 store i32 %0, i32 addrspace(1)* %out 139 ret void 140} 141 142; Legacy use of r600 intrinsics by GCN 143 144; The tgid values are stored in sgprs offset by the number of user 145; sgprs. 146 147; FUNC-LABEL: {{^}}tgid_x_legacy: 148; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} 149; GCN-NOHSA: buffer_store_dword [[VVAL]] 150 151; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 152; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 153; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 154; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 155; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 156define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) { 157entry: 158 %0 = call i32 @llvm.r600.read.tgid.x() #0 159 store i32 %0, i32 addrspace(1)* %out 160 ret void 161} 162 163; FUNC-LABEL: {{^}}tgid_y_legacy: 164; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 165; GCN-NOHSA: buffer_store_dword [[VVAL]] 166 167; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 168define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) { 169entry: 170 %0 = call i32 @llvm.r600.read.tgid.y() #0 171 store i32 %0, i32 addrspace(1)* %out 172 ret void 173} 174 175; FUNC-LABEL: {{^}}tgid_z_legacy: 176; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} 177; GCN-NOHSA: buffer_store_dword [[VVAL]] 178 179; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 180; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 181; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 182; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 183; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 184define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) { 185entry: 186 %0 = call i32 @llvm.r600.read.tgid.z() #0 187 store i32 %0, i32 addrspace(1)* %out 188 ret void 189} 190 191; GCN-NOHSA: .section .AMDGPU.config 192; GCN-NOHSA: .long 47180 193; GCN-NOHSA-NEXT: .long 132{{$}} 194 195; FUNC-LABEL: {{^}}tidig_x_legacy: 196; GCN-NOHSA: buffer_store_dword v0 197define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) { 198entry: 199 %0 = call i32 @llvm.r600.read.tidig.x() #0 200 store i32 %0, i32 addrspace(1)* %out 201 ret void 202} 203 204; GCN-NOHSA: .section .AMDGPU.config 205; GCN-NOHSA: .long 47180 206; GCN-NOHSA-NEXT: .long 2180{{$}} 207 208; FUNC-LABEL: {{^}}tidig_y_legacy: 209 210; GCN-NOHSA: buffer_store_dword v1 211define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) { 212entry: 213 %0 = call i32 @llvm.r600.read.tidig.y() #0 214 store i32 %0, i32 addrspace(1)* %out 215 ret void 216} 217 218; GCN-NOHSA: .section .AMDGPU.config 219; GCN-NOHSA: .long 47180 220; GCN-NOHSA-NEXT: .long 4228{{$}} 221 222; FUNC-LABEL: {{^}}tidig_z_legacy: 223; GCN-NOHSA: buffer_store_dword v2 224define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) { 225entry: 226 %0 = call i32 @llvm.r600.read.tidig.z() #0 227 store i32 %0, i32 addrspace(1)* %out 228 ret void 229} 230 231declare i32 @llvm.r600.read.ngroups.x() #0 232declare i32 @llvm.r600.read.ngroups.y() #0 233declare i32 @llvm.r600.read.ngroups.z() #0 234 235declare i32 @llvm.r600.read.global.size.x() #0 236declare i32 @llvm.r600.read.global.size.y() #0 237declare i32 @llvm.r600.read.global.size.z() #0 238 239declare i32 @llvm.r600.read.local.size.x() #0 240declare i32 @llvm.r600.read.local.size.y() #0 241declare i32 @llvm.r600.read.local.size.z() #0 242 243declare i32 @llvm.r600.read.tgid.x() #0 244declare i32 @llvm.r600.read.tgid.y() #0 245declare i32 @llvm.r600.read.tgid.z() #0 246 247declare i32 @llvm.r600.read.tidig.x() #0 248declare i32 @llvm.r600.read.tidig.y() #0 249declare i32 @llvm.r600.read.tidig.z() #0 250 251attributes #0 = { readnone } 252