• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
6declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
7declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
8
9declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
10declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
11declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
12
13declare i32 @llvm.amdgcn.workitem.id.x() #1
14
15; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
16; CIVI-DAG: s_mov_b32 m0
17; GFX9-NOT: m0
18
19; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
20; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
21define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
22  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
23  store i32 %result, i32 addrspace(1)* %out
24  ret void
25}
26
27; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
28; CIVI-DAG: s_mov_b32 m0
29; GFX9-NOT: m0
30
31; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
32; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
33define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
34  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
35  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
36  store i32 %result, i32 addrspace(1)* %out
37  ret void
38}
39
40; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
41; CIVI-DAG: s_mov_b32 m0
42; GFX9-NOT: m0
43
44; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
45; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
46; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
47; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
48define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
49  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
50  ret void
51}
52
53; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
54; CIVI-DAG: s_mov_b32 m0
55; GFX9-NOT: m0
56
57; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
58; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
59define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
60  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
61  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
62  ret void
63}
64
65; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
66; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
67; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
68; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}}
69define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
70  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
71  store i32 %result, i32 addrspace(1)* %out
72  ret void
73}
74
75; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
76; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
77; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
78; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}}
79define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
80  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
81  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
82  store i32 %result, i32 addrspace(1)* %out
83  ret void
84}
85
86; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
87; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
88; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
89; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}}
90define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
91  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
92  ret void
93}
94
95; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
96; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
97; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
98; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}}
99define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
100  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
101  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
102  ret void
103}
104
105; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
106; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
107; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
108; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
109define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
110  %id = call i32 @llvm.amdgcn.workitem.id.x()
111  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
112  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
113  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
114  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
115  store i32 %result, i32 addrspace(1)* %out.gep
116  ret void
117}
118
119; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
120; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
121; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
122; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
123define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
124  %id = call i32 @llvm.amdgcn.workitem.id.x()
125  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
126  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
127  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
128  ret void
129}
130
131@lds0 = addrspace(3) global [512 x i32] undef, align 4
132
133; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
134; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
135; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
136define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
137  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
138  %idx.0 = add nsw i32 %tid.x, 2
139  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
140  %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
141  store i32 %idx.0, i32 addrspace(1)* %add_use
142  store i32 %val0, i32 addrspace(1)* %out
143  ret void
144}
145
146; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
147; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
148; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
149; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
150define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
151  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
152  store i64 %result, i64 addrspace(1)* %out
153  ret void
154}
155
156; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
157; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
158; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
159; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
160define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
161  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
162  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
163  store i64 %result, i64 addrspace(1)* %out
164  ret void
165}
166
167; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
168; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
169; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
170; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
171define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
172  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
173  ret void
174}
175
176; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
177; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
178; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
179; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
180define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
181  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
182  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
183  ret void
184}
185
186; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
187; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
188; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
189; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
190; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}}
191define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
192  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
193  store i64 %result, i64 addrspace(1)* %out
194  ret void
195}
196
197; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
198; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
199; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
200; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
201; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}}
202define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
203  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
204  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
205  store i64 %result, i64 addrspace(1)* %out
206  ret void
207}
208
209; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
210; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
211; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
212; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
213
214; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}}
215define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
216  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
217  ret void
218}
219
220; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
221; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
222; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
223; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
224; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}}
225define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
226  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
227  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
228  ret void
229}
230
231; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
232; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
233; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
234; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
235; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
236; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
237define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
238  %id = call i32 @llvm.amdgcn.workitem.id.x()
239  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
240  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
241  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
242  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
243  store i64 %result, i64 addrspace(1)* %out.gep
244  ret void
245}
246
247; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
248; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
249; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
250; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
251; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
252; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
253define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
254  %id = call i32 @llvm.amdgcn.workitem.id.x()
255  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
256  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
257  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
258  ret void
259}
260
261; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
262; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
263; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
264define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
265  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
266  store i32 %result, i32* %out
267  ret void
268}
269
270; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
271; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
272; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
273; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
274define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
275  %gep = getelementptr i32, i32* %ptr, i32 4
276  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
277  store i32 %result, i32* %out
278  ret void
279}
280
281; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
282; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
283; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
284define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
285  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
286  ret void
287}
288
289; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
290; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
291; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
292; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
293define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
294  %gep = getelementptr i32, i32* %ptr, i32 4
295  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
296  ret void
297}
298
299; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
300; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
301; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
302; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
303define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
304  %id = call i32 @llvm.amdgcn.workitem.id.x()
305  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
306  %out.gep = getelementptr i32, i32* %out, i32 %id
307  %gep = getelementptr i32, i32* %gep.tid, i32 5
308  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
309  store i32 %result, i32* %out.gep
310  ret void
311}
312
313; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
314; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
315; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
316; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
317define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
318  %id = call i32 @llvm.amdgcn.workitem.id.x()
319  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
320  %gep = getelementptr i32, i32* %gep.tid, i32 5
321  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
322  ret void
323}
324
325@lds1 = addrspace(3) global [512 x i64] undef, align 8
326
327; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
328; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
329; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
330define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
331  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
332  %idx.0 = add nsw i32 %tid.x, 2
333  %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
334  %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
335  store i32 %idx.0, i32 addrspace(1)* %add_use
336  store i64 %val0, i64 addrspace(1)* %out
337  ret void
338}
339
340; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
341; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
342; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
343; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
344define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
345  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
346  store i64 %result, i64* %out
347  ret void
348}
349
350; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
351; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
352; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
353; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
354; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
355define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
356  %gep = getelementptr i64, i64* %ptr, i32 4
357  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
358  store i64 %result, i64* %out
359  ret void
360}
361
362; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
363; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
364; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
365; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
366define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
367  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
368  ret void
369}
370
371; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
372; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
373; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
374; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
375; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
376define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
377  %gep = getelementptr i64, i64* %ptr, i32 4
378  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
379  ret void
380}
381
382; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
383; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
384; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
385; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
386; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
387define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
388  %id = call i32 @llvm.amdgcn.workitem.id.x()
389  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
390  %out.gep = getelementptr i64, i64* %out, i32 %id
391  %gep = getelementptr i64, i64* %gep.tid, i32 5
392  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
393  store i64 %result, i64* %out.gep
394  ret void
395}
396
397; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
398; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
399; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
400; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
401; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
402define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
403  %id = call i32 @llvm.amdgcn.workitem.id.x()
404  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
405  %gep = getelementptr i64, i64* %gep.tid, i32 5
406  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
407  ret void
408}
409
410; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
411; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
412; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
413; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
414define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
415  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
416  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
417
418  store i32 %result0, i32 addrspace(1)* %out0
419  store i32 %result1, i32 addrspace(1)* %out1
420  ret void
421}
422
423attributes #0 = { nounwind }
424attributes #1 = { nounwind readnone }
425attributes #2 = { nounwind argmemonly }
426