1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 6declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 7declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 8 9declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 10declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 11declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 12 13declare i32 @llvm.amdgcn.workitem.id.x() #1 14 15; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32: 16; CIVI-DAG: s_mov_b32 m0 17; GFX9-NOT: m0 18 19; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 20; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 21define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 22 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 23 store i32 %result, i32 addrspace(1)* %out 24 ret void 25} 26 27; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: 28; CIVI-DAG: s_mov_b32 m0 29; GFX9-NOT: m0 30 31; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 32; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 33define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 34 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 35 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 36 store i32 %result, i32 addrspace(1)* %out 37 ret void 38} 39 40; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32: 41; CIVI-DAG: s_mov_b32 m0 42; GFX9-NOT: m0 43 44; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 45; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 46; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 47; GCN: ds_inc_u32 [[VPTR]], [[DATA]] 48define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { 49 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 50 ret void 51} 52 53; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: 54; CIVI-DAG: s_mov_b32 m0 55; GFX9-NOT: m0 56 57; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 58; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16 59define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 60 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 61 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 62 ret void 63} 64 65; GCN-LABEL: {{^}}global_atomic_inc_ret_i32: 66; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 67; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 68; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}} 69define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 70 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 71 store i32 %result, i32 addrspace(1)* %out 72 ret void 73} 74 75; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset: 76; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 77; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} 78; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}} 79define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 80 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 81 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 82 store i32 %result, i32 addrspace(1)* %out 83 ret void 84} 85 86; GCN-LABEL: {{^}}global_atomic_inc_noret_i32: 87; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 88; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 89; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}} 90define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { 91 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 92 ret void 93} 94 95; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset: 96; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 97; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} 98; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}} 99define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { 100 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 101 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 102 ret void 103} 104 105; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64: 106; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 107; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} 108; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 109define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 110 %id = call i32 @llvm.amdgcn.workitem.id.x() 111 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 112 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id 113 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 114 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 115 store i32 %result, i32 addrspace(1)* %out.gep 116 ret void 117} 118 119; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64: 120; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 121; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} 122; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 123define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { 124 %id = call i32 @llvm.amdgcn.workitem.id.x() 125 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 126 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 127 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 128 ret void 129} 130 131@lds0 = addrspace(3) global [512 x i32] undef, align 4 132 133; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32: 134; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} 135; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 136define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 137 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 138 %idx.0 = add nsw i32 %tid.x, 2 139 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 140 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) 141 store i32 %idx.0, i32 addrspace(1)* %add_use 142 store i32 %val0, i32 addrspace(1)* %out 143 ret void 144} 145 146; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64: 147; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 148; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 149; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 150define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 151 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 152 store i64 %result, i64 addrspace(1)* %out 153 ret void 154} 155 156; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: 157; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 158; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 159; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 160define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 161 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 162 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 163 store i64 %result, i64 addrspace(1)* %out 164 ret void 165} 166 167; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64: 168; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 169; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 170; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 171define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { 172 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 173 ret void 174} 175 176; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset: 177; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 178; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 179; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 180define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 181 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 182 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 183 ret void 184} 185 186; GCN-LABEL: {{^}}global_atomic_inc_ret_i64: 187; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 188; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 189; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 190; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}} 191define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 192 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 193 store i64 %result, i64 addrspace(1)* %out 194 ret void 195} 196 197; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset: 198; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 199; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 200; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} 201; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}} 202define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 203 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 204 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 205 store i64 %result, i64 addrspace(1)* %out 206 ret void 207} 208 209; GCN-LABEL: {{^}}global_atomic_inc_noret_i64: 210; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 211; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 212; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 213 214; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}} 215define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { 216 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 217 ret void 218} 219 220; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset: 221; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 222; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 223; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} 224; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}} 225define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { 226 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 227 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 228 ret void 229} 230 231; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64: 232; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 233; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 234; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 235; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} 236; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 237define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 238 %id = call i32 @llvm.amdgcn.workitem.id.x() 239 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 240 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id 241 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 242 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 243 store i64 %result, i64 addrspace(1)* %out.gep 244 ret void 245} 246 247; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64: 248; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 249; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 250; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 251; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} 252; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 253define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { 254 %id = call i32 @llvm.amdgcn.workitem.id.x() 255 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 256 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 257 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 258 ret void 259} 260 261; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32: 262; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 263; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 264define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { 265 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 266 store i32 %result, i32* %out 267 ret void 268} 269 270; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset: 271; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 272; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 273; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} 274define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { 275 %gep = getelementptr i32, i32* %ptr, i32 4 276 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 277 store i32 %result, i32* %out 278 ret void 279} 280 281; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32: 282; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 283; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 284define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { 285 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 286 ret void 287} 288 289; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset: 290; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 291; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 292; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} 293define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { 294 %gep = getelementptr i32, i32* %ptr, i32 4 295 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 296 ret void 297} 298 299; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64: 300; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 301; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 302; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} 303define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { 304 %id = call i32 @llvm.amdgcn.workitem.id.x() 305 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 306 %out.gep = getelementptr i32, i32* %out, i32 %id 307 %gep = getelementptr i32, i32* %gep.tid, i32 5 308 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 309 store i32 %result, i32* %out.gep 310 ret void 311} 312 313; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64: 314; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 315; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 316; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} 317define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { 318 %id = call i32 @llvm.amdgcn.workitem.id.x() 319 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 320 %gep = getelementptr i32, i32* %gep.tid, i32 5 321 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 322 ret void 323} 324 325@lds1 = addrspace(3) global [512 x i64] undef, align 8 326 327; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64: 328; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} 329; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 330define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 331 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 332 %idx.0 = add nsw i32 %tid.x, 2 333 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 334 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) 335 store i32 %idx.0, i32 addrspace(1)* %add_use 336 store i64 %val0, i64 addrspace(1)* %out 337 ret void 338} 339 340; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64: 341; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 342; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 343; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 344define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { 345 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 346 store i64 %result, i64* %out 347 ret void 348} 349 350; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset: 351; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 352; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 353; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 354; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}} 355define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { 356 %gep = getelementptr i64, i64* %ptr, i32 4 357 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 358 store i64 %result, i64* %out 359 ret void 360} 361 362; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64: 363; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 364; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 365; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 366define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { 367 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 368 ret void 369} 370 371; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset: 372; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 373; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 374; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 375; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 376define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { 377 %gep = getelementptr i64, i64* %ptr, i32 4 378 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 379 ret void 380} 381 382; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64: 383; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 384; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 385; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 386; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}} 387define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { 388 %id = call i32 @llvm.amdgcn.workitem.id.x() 389 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 390 %out.gep = getelementptr i64, i64* %out, i32 %id 391 %gep = getelementptr i64, i64* %gep.tid, i32 5 392 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 393 store i64 %result, i64* %out.gep 394 ret void 395} 396 397; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64: 398; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 399; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 400; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 401; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}} 402define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { 403 %id = call i32 @llvm.amdgcn.workitem.id.x() 404 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 405 %gep = getelementptr i64, i64* %gep.tid, i32 5 406 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 407 ret void 408} 409 410; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32: 411; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 412; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 413; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 414define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { 415 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 416 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 417 418 store i32 %result0, i32 addrspace(1)* %out0 419 store i32 %result1, i32 addrspace(1)* %out1 420 ret void 421} 422 423attributes #0 = { nounwind } 424attributes #1 = { nounwind readnone } 425attributes #2 = { nounwind argmemonly } 426