1; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s 2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s 3 4; This works because promote allocas pass replaces these with LDS atomics. 5 6; Private atomics have no real use, but at least shouldn't crash on it. 7define amdgpu_kernel void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind { 8entry: 9 %tmp = alloca [2 x i32], addrspace(5) 10 %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 11 %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 12 store i32 0, i32 addrspace(5)* %tmp1 13 store i32 1, i32 addrspace(5)* %tmp2 14 %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in 15 %tmp4 = atomicrmw add i32 addrspace(5)* %tmp3, i32 7 acq_rel 16 store i32 %tmp4, i32 addrspace(1)* %out 17 ret void 18} 19 20define amdgpu_kernel void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind { 21entry: 22 %tmp = alloca [2 x i32], addrspace(5) 23 %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 24 %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 25 store i32 0, i32 addrspace(5)* %tmp1 26 store i32 1, i32 addrspace(5)* %tmp2 27 %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in 28 %tmp4 = cmpxchg i32 addrspace(5)* %tmp3, i32 0, i32 1 acq_rel monotonic 29 %val = extractvalue { i32, i1 } %tmp4, 0 30 store i32 %val, i32 addrspace(1)* %out 31 ret void 32} 33