• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6
7
8; FUNC-LABEL: {{^}}ngroups_x:
9; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
10; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
11
12; HSA: .amd_kernel_code_t
13
14; HSA: enable_sgpr_private_segment_buffer = 1
15; HSA: enable_sgpr_dispatch_ptr = 0
16; HSA: enable_sgpr_queue_ptr = 0
17; HSA: enable_sgpr_kernarg_segment_ptr = 1
18; HSA: enable_sgpr_dispatch_id = 0
19; HSA: enable_sgpr_flat_scratch_init = 0
20; HSA: enable_sgpr_private_segment_size = 0
21; HSA: enable_sgpr_grid_workgroup_count_x = 0
22; HSA: enable_sgpr_grid_workgroup_count_y = 0
23; HSA: enable_sgpr_grid_workgroup_count_z = 0
24
25; HSA: .end_amd_kernel_code_t
26
27
28; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
29; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
30; GCN-NOHSA: buffer_store_dword [[VVAL]]
31
32define void @ngroups_x (i32 addrspace(1)* %out) {
33entry:
34  %0 = call i32 @llvm.r600.read.ngroups.x() #0
35  store i32 %0, i32 addrspace(1)* %out
36  ret void
37}
38
39; FUNC-LABEL: {{^}}ngroups_y:
40; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
41; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
42
43; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
44; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
45; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
46; GCN-NOHSA: buffer_store_dword [[VVAL]]
47define void @ngroups_y (i32 addrspace(1)* %out) {
48entry:
49  %0 = call i32 @llvm.r600.read.ngroups.y() #0
50  store i32 %0, i32 addrspace(1)* %out
51  ret void
52}
53
54; FUNC-LABEL: {{^}}ngroups_z:
55; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
56; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
57
58; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
59; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
60; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
61; GCN-NOHSA: buffer_store_dword [[VVAL]]
62define void @ngroups_z (i32 addrspace(1)* %out) {
63entry:
64  %0 = call i32 @llvm.r600.read.ngroups.z() #0
65  store i32 %0, i32 addrspace(1)* %out
66  ret void
67}
68
69; FUNC-LABEL: {{^}}global_size_x:
70; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
71; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
72
73; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
74; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
75; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
76; GCN-NOHSA: buffer_store_dword [[VVAL]]
77define void @global_size_x (i32 addrspace(1)* %out) {
78entry:
79  %0 = call i32 @llvm.r600.read.global.size.x() #0
80  store i32 %0, i32 addrspace(1)* %out
81  ret void
82}
83
84; FUNC-LABEL: {{^}}global_size_y:
85; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
86; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
87
88; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
89; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
90; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
91; GCN-NOHSA: buffer_store_dword [[VVAL]]
92define void @global_size_y (i32 addrspace(1)* %out) {
93entry:
94  %0 = call i32 @llvm.r600.read.global.size.y() #0
95  store i32 %0, i32 addrspace(1)* %out
96  ret void
97}
98
99; FUNC-LABEL: {{^}}global_size_z:
100; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
101; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
102
103; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
104; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
105; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
106; GCN-NOHSA: buffer_store_dword [[VVAL]]
107define void @global_size_z (i32 addrspace(1)* %out) {
108entry:
109  %0 = call i32 @llvm.r600.read.global.size.z() #0
110  store i32 %0, i32 addrspace(1)* %out
111  ret void
112}
113
114; The tgid values are stored in sgprs offset by the number of user
115; sgprs.
116
117; FUNC-LABEL: {{^}}tgid_x:
118; HSA: .amd_kernel_code_t
119; HSA: compute_pgm_rsrc2_user_sgpr = 6
120; HSA: compute_pgm_rsrc2_tgid_x_en = 1
121; HSA: compute_pgm_rsrc2_tgid_y_en = 0
122; HSA: compute_pgm_rsrc2_tgid_z_en = 0
123; HSA: compute_pgm_rsrc2_tg_size_en = 0
124; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
125; HSA: enable_sgpr_grid_workgroup_count_x = 0
126; HSA: enable_sgpr_grid_workgroup_count_y = 0
127; HSA: enable_sgpr_grid_workgroup_count_z = 0
128; HSA: .end_amd_kernel_code_t
129
130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
131; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
132; GCN: buffer_store_dword [[VVAL]]
133
134; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
135; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
136; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
137; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
138; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
139; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
140define void @tgid_x(i32 addrspace(1)* %out) {
141entry:
142  %0 = call i32 @llvm.r600.read.tgid.x() #0
143  store i32 %0, i32 addrspace(1)* %out
144  ret void
145}
146
147; FUNC-LABEL: {{^}}tgid_y:
148; HSA: compute_pgm_rsrc2_user_sgpr = 6
149; HSA: compute_pgm_rsrc2_tgid_x_en = 1
150; HSA: compute_pgm_rsrc2_tgid_y_en = 1
151; HSA: compute_pgm_rsrc2_tgid_z_en = 0
152; HSA: compute_pgm_rsrc2_tg_size_en = 0
153; HSA: enable_sgpr_grid_workgroup_count_x = 0
154; HSA: enable_sgpr_grid_workgroup_count_y = 0
155; HSA: enable_sgpr_grid_workgroup_count_z = 0
156; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
157; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
158; GCN: buffer_store_dword [[VVAL]]
159
160; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
161; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
162; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
163; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
164; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
165; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
166define void @tgid_y(i32 addrspace(1)* %out) {
167entry:
168  %0 = call i32 @llvm.r600.read.tgid.y() #0
169  store i32 %0, i32 addrspace(1)* %out
170  ret void
171}
172
173; FUNC-LABEL: {{^}}tgid_z:
174; HSA: compute_pgm_rsrc2_user_sgpr = 6
175; HSA: compute_pgm_rsrc2_tgid_x_en = 1
176; HSA: compute_pgm_rsrc2_tgid_y_en = 0
177; HSA: compute_pgm_rsrc2_tgid_z_en = 1
178; HSA: compute_pgm_rsrc2_tg_size_en = 0
179; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
180; HSA: enable_sgpr_private_segment_buffer = 1
181; HSA: enable_sgpr_dispatch_ptr = 0
182; HSA: enable_sgpr_queue_ptr = 0
183; HSA: enable_sgpr_kernarg_segment_ptr = 1
184; HSA: enable_sgpr_dispatch_id = 0
185; HSA: enable_sgpr_flat_scratch_init = 0
186; HSA: enable_sgpr_private_segment_size = 0
187; HSA: enable_sgpr_grid_workgroup_count_x = 0
188; HSA: enable_sgpr_grid_workgroup_count_y = 0
189; HSA: enable_sgpr_grid_workgroup_count_z = 0
190
191; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
192; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
193; GCN: buffer_store_dword [[VVAL]]
194
195; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
196; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
197; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
198; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
199; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
200; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
201define void @tgid_z(i32 addrspace(1)* %out) {
202entry:
203  %0 = call i32 @llvm.r600.read.tgid.z() #0
204  store i32 %0, i32 addrspace(1)* %out
205  ret void
206}
207
208; GCN-NOHSA: .section .AMDGPU.config
209; GCN-NOHSA: .long 47180
210; GCN-NOHSA-NEXT: .long 132{{$}}
211
212; FUNC-LABEL: {{^}}tidig_x:
213; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
214; GCN: buffer_store_dword v0
215define void @tidig_x(i32 addrspace(1)* %out) {
216entry:
217  %0 = call i32 @llvm.r600.read.tidig.x() #0
218  store i32 %0, i32 addrspace(1)* %out
219  ret void
220}
221
222; GCN-NOHSA: .section .AMDGPU.config
223; GCN-NOHSA: .long 47180
224; GCN-NOHSA-NEXT: .long 2180{{$}}
225
226; FUNC-LABEL: {{^}}tidig_y:
227
228; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
229; GCN: buffer_store_dword v1
230define void @tidig_y(i32 addrspace(1)* %out) {
231entry:
232  %0 = call i32 @llvm.r600.read.tidig.y() #0
233  store i32 %0, i32 addrspace(1)* %out
234  ret void
235}
236
237; GCN-NOHSA: .section .AMDGPU.config
238; GCN-NOHSA: .long 47180
239; GCN-NOHSA-NEXT: .long 4228{{$}}
240
241; FUNC-LABEL: {{^}}tidig_z:
242; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
243; GCN: buffer_store_dword v2
244define void @tidig_z(i32 addrspace(1)* %out) {
245entry:
246  %0 = call i32 @llvm.r600.read.tidig.z() #0
247  store i32 %0, i32 addrspace(1)* %out
248  ret void
249}
250
251declare i32 @llvm.r600.read.ngroups.x() #0
252declare i32 @llvm.r600.read.ngroups.y() #0
253declare i32 @llvm.r600.read.ngroups.z() #0
254
255declare i32 @llvm.r600.read.global.size.x() #0
256declare i32 @llvm.r600.read.global.size.y() #0
257declare i32 @llvm.r600.read.global.size.z() #0
258
259declare i32 @llvm.r600.read.tgid.x() #0
260declare i32 @llvm.r600.read.tgid.y() #0
261declare i32 @llvm.r600.read.tgid.z() #0
262
263declare i32 @llvm.r600.read.tidig.x() #0
264declare i32 @llvm.r600.read.tidig.y() #0
265declare i32 @llvm.r600.read.tidig.z() #0
266
267declare i32 @llvm.AMDGPU.read.workdim() #0
268
269attributes #0 = { readnone }
270