1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2 5declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2 6declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2 7 8declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2 9declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2 10declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2 11 12declare i32 @llvm.amdgcn.workitem.id.x() #1 13 14; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32: 15; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 16; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 17define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 18 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42) 19 store i32 %result, i32 addrspace(1)* %out 20 ret void 21} 22 23; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: 24; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 25; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 26define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 27 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 28 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42) 29 store i32 %result, i32 addrspace(1)* %out 30 ret void 31} 32 33; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32: 34; GCN: s_load_dword [[SPTR:s[0-9]+]], 35; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 36; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 37; GCN: ds_dec_u32 [[VPTR]], [[DATA]] 38define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { 39 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42) 40 ret void 41} 42 43; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: 44; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 45; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16 46define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 47 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 48 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42) 49 ret void 50} 51 52; GCN-LABEL: {{^}}global_atomic_dec_ret_i32: 53; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 54; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 55define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 56 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) 57 store i32 %result, i32 addrspace(1)* %out 58 ret void 59} 60 61; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset: 62; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 63; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} 64define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 65 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 66 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) 67 store i32 %result, i32 addrspace(1)* %out 68 ret void 69} 70 71; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32: 72; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 73define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { 74 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) 75 ret void 76} 77 78; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset: 79; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 80; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} 81define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { 82 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 83 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) 84 ret void 85} 86 87; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64: 88; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 89; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} 90; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 91define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 92 %id = call i32 @llvm.amdgcn.workitem.id.x() 93 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 94 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id 95 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 96 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) 97 store i32 %result, i32 addrspace(1)* %out.gep 98 ret void 99} 100 101; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64: 102; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 103; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} 104; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 105define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { 106 %id = call i32 @llvm.amdgcn.workitem.id.x() 107 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 108 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 109 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) 110 ret void 111} 112 113; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32: 114; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 115; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 116define void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { 117 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42) 118 store i32 %result, i32 addrspace(4)* %out 119 ret void 120} 121 122; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset: 123; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 124; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 125define void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { 126 %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 127 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42) 128 store i32 %result, i32 addrspace(4)* %out 129 ret void 130} 131 132; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32: 133; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 134define void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind { 135 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42) 136 ret void 137} 138 139; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32_offset: 140; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 141; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 142define void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { 143 %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 144 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42) 145 ret void 146} 147 148; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64: 149; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 150; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 151define void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id 154 %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id 155 %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 156 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42) 157 store i32 %result, i32 addrspace(4)* %out.gep 158 ret void 159} 160 161; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64: 162; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 163; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 164define void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 { 165 %id = call i32 @llvm.amdgcn.workitem.id.x() 166 %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id 167 %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 168 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42) 169 ret void 170} 171 172; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64: 173; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 174; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 175; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 176define void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { 177 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42) 178 store i64 %result, i64 addrspace(4)* %out 179 ret void 180} 181 182; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset: 183; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 184; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 185; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 186define void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { 187 %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 188 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42) 189 store i64 %result, i64 addrspace(4)* %out 190 ret void 191} 192 193; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64: 194; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 195; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 196; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 197define void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind { 198 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42) 199 ret void 200} 201 202; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64_offset: 203; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 204; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 205; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 206define void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind { 207 %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 208 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42) 209 ret void 210} 211 212; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64: 213; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 214; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 215; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 216define void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { 217 %id = call i32 @llvm.amdgcn.workitem.id.x() 218 %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id 219 %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id 220 %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 221 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42) 222 store i64 %result, i64 addrspace(4)* %out.gep 223 ret void 224} 225 226; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64: 227; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 228; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 229; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 230define void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 { 231 %id = call i32 @llvm.amdgcn.workitem.id.x() 232 %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id 233 %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 234 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42) 235 ret void 236} 237 238@lds0 = addrspace(3) global [512 x i32] undef 239 240; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0: 241; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} 242; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8 243define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 244 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 245 %idx.0 = add nsw i32 %tid.x, 2 246 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 247 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9) 248 store i32 %idx.0, i32 addrspace(1)* %add_use 249 store i32 %val0, i32 addrspace(1)* %out 250 ret void 251} 252 253; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64: 254; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 255; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 256; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 257define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 258 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42) 259 store i64 %result, i64 addrspace(1)* %out 260 ret void 261} 262 263; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: 264; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 265; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 266; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 267define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 268 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 269 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42) 270 store i64 %result, i64 addrspace(1)* %out 271 ret void 272} 273 274; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64: 275; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 276; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 277; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 278define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { 279 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42) 280 ret void 281} 282 283; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset: 284; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 285; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 286; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 287define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 288 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 289 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42) 290 ret void 291} 292 293; GCN-LABEL: {{^}}global_atomic_dec_ret_i64: 294; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 295; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 296; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 297define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 298 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) 299 store i64 %result, i64 addrspace(1)* %out 300 ret void 301} 302 303; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset: 304; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 305; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 306; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} 307define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 308 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 309 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) 310 store i64 %result, i64 addrspace(1)* %out 311 ret void 312} 313 314; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64: 315; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 316; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 317; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 318define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind { 319 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) 320 ret void 321} 322 323; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset: 324; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 325; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 326; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} 327define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { 328 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 329 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) 330 ret void 331} 332 333; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64: 334; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 335; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 336; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} 337; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 338define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 339 %id = call i32 @llvm.amdgcn.workitem.id.x() 340 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 341 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id 342 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 343 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) 344 store i64 %result, i64 addrspace(1)* %out.gep 345 ret void 346} 347 348; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64: 349; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 350; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 351; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} 352; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 353define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { 354 %id = call i32 @llvm.amdgcn.workitem.id.x() 355 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 356 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 357 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) 358 ret void 359} 360 361@lds1 = addrspace(3) global [512 x i64] undef, align 8 362 363; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64: 364; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} 365; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 366define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 367 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 368 %idx.0 = add nsw i32 %tid.x, 2 369 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 370 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9) 371 store i32 %idx.0, i32 addrspace(1)* %add_use 372 store i64 %val0, i64 addrspace(1)* %out 373 ret void 374} 375 376attributes #0 = { nounwind } 377attributes #1 = { nounwind readnone } 378attributes #2 = { nounwind argmemonly } 379 380 381 382 383 384 385 386 387 388