1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s 4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 6 7 8; FUNC-LABEL: {{^}}ngroups_x: 9; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 10; EG: MOV {{\*? *}}[[VAL]], KC0[0].X 11 12; HSA: .amd_kernel_code_t 13 14; HSA: enable_sgpr_private_segment_buffer = 1 15; HSA: enable_sgpr_dispatch_ptr = 0 16; HSA: enable_sgpr_queue_ptr = 0 17; HSA: enable_sgpr_kernarg_segment_ptr = 1 18; HSA: enable_sgpr_dispatch_id = 0 19; HSA: enable_sgpr_flat_scratch_init = 0 20; HSA: enable_sgpr_private_segment_size = 0 21; HSA: enable_sgpr_grid_workgroup_count_x = 0 22; HSA: enable_sgpr_grid_workgroup_count_y = 0 23; HSA: enable_sgpr_grid_workgroup_count_z = 0 24 25; HSA: .end_amd_kernel_code_t 26 27 28; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 29; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 30; GCN-NOHSA: buffer_store_dword [[VVAL]] 31 32define void @ngroups_x (i32 addrspace(1)* %out) { 33entry: 34 %0 = call i32 @llvm.r600.read.ngroups.x() #0 35 store i32 %0, i32 addrspace(1)* %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}ngroups_y: 40; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 41; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y 42 43; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 44; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 45; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 46; GCN-NOHSA: buffer_store_dword [[VVAL]] 47define void @ngroups_y (i32 addrspace(1)* %out) { 48entry: 49 %0 = call i32 @llvm.r600.read.ngroups.y() #0 50 store i32 %0, i32 addrspace(1)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}ngroups_z: 55; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 56; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z 57 58; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 59; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 60; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 61; GCN-NOHSA: buffer_store_dword [[VVAL]] 62define void @ngroups_z (i32 addrspace(1)* %out) { 63entry: 64 %0 = call i32 @llvm.r600.read.ngroups.z() #0 65 store i32 %0, i32 addrspace(1)* %out 66 ret void 67} 68 69; FUNC-LABEL: {{^}}global_size_x: 70; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 71; EG: MOV {{\*? *}}[[VAL]], KC0[0].W 72 73; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 74; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc 75; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 76; GCN-NOHSA: buffer_store_dword [[VVAL]] 77define void @global_size_x (i32 addrspace(1)* %out) { 78entry: 79 %0 = call i32 @llvm.r600.read.global.size.x() #0 80 store i32 %0, i32 addrspace(1)* %out 81 ret void 82} 83 84; FUNC-LABEL: {{^}}global_size_y: 85; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 86; EG: MOV {{\*? *}}[[VAL]], KC0[1].X 87 88; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 89; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 90; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 91; GCN-NOHSA: buffer_store_dword [[VVAL]] 92define void @global_size_y (i32 addrspace(1)* %out) { 93entry: 94 %0 = call i32 @llvm.r600.read.global.size.y() #0 95 store i32 %0, i32 addrspace(1)* %out 96 ret void 97} 98 99; FUNC-LABEL: {{^}}global_size_z: 100; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 101; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y 102 103; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 104; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 105; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 106; GCN-NOHSA: buffer_store_dword [[VVAL]] 107define void @global_size_z (i32 addrspace(1)* %out) { 108entry: 109 %0 = call i32 @llvm.r600.read.global.size.z() #0 110 store i32 %0, i32 addrspace(1)* %out 111 ret void 112} 113 114; The tgid values are stored in sgprs offset by the number of user 115; sgprs. 116 117; FUNC-LABEL: {{^}}tgid_x: 118; HSA: .amd_kernel_code_t 119; HSA: compute_pgm_rsrc2_user_sgpr = 6 120; HSA: compute_pgm_rsrc2_tgid_x_en = 1 121; HSA: compute_pgm_rsrc2_tgid_y_en = 0 122; HSA: compute_pgm_rsrc2_tgid_z_en = 0 123; HSA: compute_pgm_rsrc2_tg_size_en = 0 124; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 125; HSA: enable_sgpr_grid_workgroup_count_x = 0 126; HSA: enable_sgpr_grid_workgroup_count_y = 0 127; HSA: enable_sgpr_grid_workgroup_count_z = 0 128; HSA: .end_amd_kernel_code_t 129 130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} 131; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}} 132; GCN: buffer_store_dword [[VVAL]] 133 134; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 135; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 136; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 137; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 138; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 139; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 140define void @tgid_x(i32 addrspace(1)* %out) { 141entry: 142 %0 = call i32 @llvm.r600.read.tgid.x() #0 143 store i32 %0, i32 addrspace(1)* %out 144 ret void 145} 146 147; FUNC-LABEL: {{^}}tgid_y: 148; HSA: compute_pgm_rsrc2_user_sgpr = 6 149; HSA: compute_pgm_rsrc2_tgid_x_en = 1 150; HSA: compute_pgm_rsrc2_tgid_y_en = 1 151; HSA: compute_pgm_rsrc2_tgid_z_en = 0 152; HSA: compute_pgm_rsrc2_tg_size_en = 0 153; HSA: enable_sgpr_grid_workgroup_count_x = 0 154; HSA: enable_sgpr_grid_workgroup_count_y = 0 155; HSA: enable_sgpr_grid_workgroup_count_z = 0 156; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 157; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7 158; GCN: buffer_store_dword [[VVAL]] 159 160; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 161; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 162; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 163; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 164; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 165; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 166define void @tgid_y(i32 addrspace(1)* %out) { 167entry: 168 %0 = call i32 @llvm.r600.read.tgid.y() #0 169 store i32 %0, i32 addrspace(1)* %out 170 ret void 171} 172 173; FUNC-LABEL: {{^}}tgid_z: 174; HSA: compute_pgm_rsrc2_user_sgpr = 6 175; HSA: compute_pgm_rsrc2_tgid_x_en = 1 176; HSA: compute_pgm_rsrc2_tgid_y_en = 0 177; HSA: compute_pgm_rsrc2_tgid_z_en = 1 178; HSA: compute_pgm_rsrc2_tg_size_en = 0 179; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 180; HSA: enable_sgpr_private_segment_buffer = 1 181; HSA: enable_sgpr_dispatch_ptr = 0 182; HSA: enable_sgpr_queue_ptr = 0 183; HSA: enable_sgpr_kernarg_segment_ptr = 1 184; HSA: enable_sgpr_dispatch_id = 0 185; HSA: enable_sgpr_flat_scratch_init = 0 186; HSA: enable_sgpr_private_segment_size = 0 187; HSA: enable_sgpr_grid_workgroup_count_x = 0 188; HSA: enable_sgpr_grid_workgroup_count_y = 0 189; HSA: enable_sgpr_grid_workgroup_count_z = 0 190 191; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} 192; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}} 193; GCN: buffer_store_dword [[VVAL]] 194 195; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 196; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 197; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 198; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 199; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 200; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 201define void @tgid_z(i32 addrspace(1)* %out) { 202entry: 203 %0 = call i32 @llvm.r600.read.tgid.z() #0 204 store i32 %0, i32 addrspace(1)* %out 205 ret void 206} 207 208; GCN-NOHSA: .section .AMDGPU.config 209; GCN-NOHSA: .long 47180 210; GCN-NOHSA-NEXT: .long 132{{$}} 211 212; FUNC-LABEL: {{^}}tidig_x: 213; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 214; GCN: buffer_store_dword v0 215define void @tidig_x(i32 addrspace(1)* %out) { 216entry: 217 %0 = call i32 @llvm.r600.read.tidig.x() #0 218 store i32 %0, i32 addrspace(1)* %out 219 ret void 220} 221 222; GCN-NOHSA: .section .AMDGPU.config 223; GCN-NOHSA: .long 47180 224; GCN-NOHSA-NEXT: .long 2180{{$}} 225 226; FUNC-LABEL: {{^}}tidig_y: 227 228; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 229; GCN: buffer_store_dword v1 230define void @tidig_y(i32 addrspace(1)* %out) { 231entry: 232 %0 = call i32 @llvm.r600.read.tidig.y() #0 233 store i32 %0, i32 addrspace(1)* %out 234 ret void 235} 236 237; GCN-NOHSA: .section .AMDGPU.config 238; GCN-NOHSA: .long 47180 239; GCN-NOHSA-NEXT: .long 4228{{$}} 240 241; FUNC-LABEL: {{^}}tidig_z: 242; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 243; GCN: buffer_store_dword v2 244define void @tidig_z(i32 addrspace(1)* %out) { 245entry: 246 %0 = call i32 @llvm.r600.read.tidig.z() #0 247 store i32 %0, i32 addrspace(1)* %out 248 ret void 249} 250 251declare i32 @llvm.r600.read.ngroups.x() #0 252declare i32 @llvm.r600.read.ngroups.y() #0 253declare i32 @llvm.r600.read.ngroups.z() #0 254 255declare i32 @llvm.r600.read.global.size.x() #0 256declare i32 @llvm.r600.read.global.size.y() #0 257declare i32 @llvm.r600.read.global.size.z() #0 258 259declare i32 @llvm.r600.read.tgid.x() #0 260declare i32 @llvm.r600.read.tgid.y() #0 261declare i32 @llvm.r600.read.tgid.z() #0 262 263declare i32 @llvm.r600.read.tidig.x() #0 264declare i32 @llvm.r600.read.tidig.y() #0 265declare i32 @llvm.r600.read.tidig.z() #0 266 267declare i32 @llvm.AMDGPU.read.workdim() #0 268 269attributes #0 = { readnone } 270