1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s 6 7; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: 8; EG: LDS_WRXCHG_RET * 9 10; SICIVI-DAG: s_mov_b32 m0 11; GFX9-NOT: m0 12 13; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 14; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 15; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 16; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 17; GCN: buffer_store_dword [[RESULT]], 18; GCN: s_endpgm 19define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 20 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 21 store i32 %result, i32 addrspace(1)* %out, align 4 22 ret void 23} 24 25; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: 26; SICIVI: s_mov_b32 m0 27; GFX9-NOT: m0 28 29; EG: LDS_WRXCHG_RET * 30; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 31; GCN: s_endpgm 32define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 33 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 34 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 35 store i32 %result, i32 addrspace(1)* %out, align 4 36 ret void 37} 38 39; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset: 40; SICIVI: s_mov_b32 m0 41; GFX9-NOT: m0 42 43; EG: LDS_WRXCHG_RET * 44; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 45; GCN: s_endpgm 46define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind { 47 %gep = getelementptr float, float addrspace(3)* %ptr, i32 4 48 %result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst 49 store float %result, float addrspace(1)* %out, align 4 50 ret void 51} 52 53; XXX - Is it really necessary to load 4 into VGPR? 54; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: 55; EG: LDS_ADD_RET * 56 57; SICIVI-DAG: s_mov_b32 m0 58; GFX9-NOT: m0 59 60; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 61; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 62; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 63; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 64; GCN: buffer_store_dword [[RESULT]], 65; GCN: s_endpgm 66define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 67 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 68 store i32 %result, i32 addrspace(1)* %out, align 4 69 ret void 70} 71 72; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: 73; SICIVI: s_mov_b32 m0 74; GFX9-NOT: m0 75 76; EG: LDS_ADD_RET * 77; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 78; GCN: s_endpgm 79define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 80 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 81 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 82 store i32 %result, i32 addrspace(1)* %out, align 4 83 ret void 84} 85 86; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: 87; SICIVI: s_mov_b32 m0 88; GFX9-NOT: m0 89 90; EG: LDS_ADD_RET * 91; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 92; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 93; GCN: s_endpgm 94define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 95 %sub = sub i32 %a, %b 96 %add = add i32 %sub, 4 97 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 98 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 99 store i32 %result, i32 addrspace(1)* %out, align 4 100 ret void 101} 102 103; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32: 104; EG: LDS_ADD_RET * 105 106; SICIVI-DAG: s_mov_b32 m0 107; GFX9-NOT: m0 108 109; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 110; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 111; GCN: s_endpgm 112define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 113 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 114 store i32 %result, i32 addrspace(1)* %out, align 4 115 ret void 116} 117 118; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset: 119; EG: LDS_ADD_RET * 120 121; SICIVI-DAG: s_mov_b32 m0 122; GFX9-NOT: m0 123 124; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 125; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 126; GCN: s_endpgm 127define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 128 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 129 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 130 store i32 %result, i32 addrspace(1)* %out, align 4 131 ret void 132} 133 134; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset: 135; SICIVI: s_mov_b32 m0 136; GFX9-NOT: m0 137 138; EG: LDS_ADD_RET * 139; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 140; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 141; GCN: s_endpgm 142define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 143 %sub = sub i32 %a, %b 144 %add = add i32 %sub, 4 145 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 146 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 147 store i32 %result, i32 addrspace(1)* %out, align 4 148 ret void 149} 150 151; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: 152; EG: LDS_SUB_RET * 153 154; SICIVI: s_mov_b32 m0 155; GFX9-NOT: m0 156 157; GCN: ds_sub_rtn_u32 158; GCN: s_endpgm 159define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 160 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 161 store i32 %result, i32 addrspace(1)* %out, align 4 162 ret void 163} 164 165; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: 166; EG: LDS_SUB_RET * 167 168; SICIVI: s_mov_b32 m0 169; GFX9-NOT: m0 170 171; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 172; GCN: s_endpgm 173define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 174 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 175 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 176 store i32 %result, i32 addrspace(1)* %out, align 4 177 ret void 178} 179 180; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32: 181; EG: LDS_SUB_RET * 182 183; SICIVI-DAG: s_mov_b32 m0 184; GFX9-NOT: m0 185 186; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 187; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 188; GCN: s_endpgm 189define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 190 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 191 store i32 %result, i32 addrspace(1)* %out, align 4 192 ret void 193} 194 195; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset: 196; EG: LDS_SUB_RET * 197 198; SICIVI-DAG: s_mov_b32 m0 199; GFX9-NOT: m0 200 201; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 202; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 203; GCN: s_endpgm 204define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 205 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 206 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 207 store i32 %result, i32 addrspace(1)* %out, align 4 208 ret void 209} 210 211; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: 212; EG: LDS_AND_RET * 213 214; SICIVI-DAG: s_mov_b32 m0 215; GFX9-NOT: m0 216 217; GCN: ds_and_rtn_b32 218; GCN: s_endpgm 219define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 220 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 221 store i32 %result, i32 addrspace(1)* %out, align 4 222 ret void 223} 224 225; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: 226; SICIVI: s_mov_b32 m0 227; GFX9-NOT: m0 228 229; EG: LDS_AND_RET * 230; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 231; GCN: s_endpgm 232define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 233 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 234 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 235 store i32 %result, i32 addrspace(1)* %out, align 4 236 ret void 237} 238 239; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: 240; SICIVI: s_mov_b32 m0 241; GFX9-NOT: m0 242 243; EG: LDS_OR_RET * 244; GCN: ds_or_rtn_b32 245; GCN: s_endpgm 246define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 247 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 248 store i32 %result, i32 addrspace(1)* %out, align 4 249 ret void 250} 251 252; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: 253; SICIVI: s_mov_b32 m0 254; GFX9-NOT: m0 255 256; EG: LDS_OR_RET * 257; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 258; GCN: s_endpgm 259define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 260 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 261 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 262 store i32 %result, i32 addrspace(1)* %out, align 4 263 ret void 264} 265 266; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: 267; SICIVI: s_mov_b32 m0 268; GFX9-NOT: m0 269 270; EG: LDS_XOR_RET * 271; GCN: ds_xor_rtn_b32 272; GCN: s_endpgm 273define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 274 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 275 store i32 %result, i32 addrspace(1)* %out, align 4 276 ret void 277} 278 279; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: 280; SICIVI: s_mov_b32 m0 281; GFX9-NOT: m0 282 283; EG: LDS_XOR_RET * 284; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 285; GCN: s_endpgm 286define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 287 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 288 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 289 store i32 %result, i32 addrspace(1)* %out, align 4 290 ret void 291} 292 293; FIXME: There is no atomic nand instr 294; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. 295; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 296; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 297; store i32 %result, i32 addrspace(1)* %out, align 4 298; ret void 299; } 300 301; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: 302; SICIVI: s_mov_b32 m0 303; GFX9-NOT: m0 304 305; EG: LDS_MIN_INT_RET * 306; GCN: ds_min_rtn_i32 307; GCN: s_endpgm 308define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 309 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 310 store i32 %result, i32 addrspace(1)* %out, align 4 311 ret void 312} 313 314; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: 315; SICIVI: s_mov_b32 m0 316; GFX9-NOT: m0 317 318; EG: LDS_MIN_INT_RET * 319; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 320; GCN: s_endpgm 321define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 322 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 323 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 324 store i32 %result, i32 addrspace(1)* %out, align 4 325 ret void 326} 327 328; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: 329; SICIVI: s_mov_b32 m0 330; GFX9-NOT: m0 331 332; EG: LDS_MAX_INT_RET * 333; GCN: ds_max_rtn_i32 334; GCN: s_endpgm 335define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 336 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 337 store i32 %result, i32 addrspace(1)* %out, align 4 338 ret void 339} 340 341; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: 342; SICIVI: s_mov_b32 m0 343; GFX9-NOT: m0 344 345; EG: LDS_MAX_INT_RET * 346; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 347; GCN: s_endpgm 348define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 349 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 350 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 351 store i32 %result, i32 addrspace(1)* %out, align 4 352 ret void 353} 354 355; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: 356; SICIVI: s_mov_b32 m0 357; GFX9-NOT: m0 358 359; EG: LDS_MIN_UINT_RET * 360; GCN: ds_min_rtn_u32 361; GCN: s_endpgm 362define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 363 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 364 store i32 %result, i32 addrspace(1)* %out, align 4 365 ret void 366} 367 368; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: 369; SICIVI: s_mov_b32 m0 370; GFX9-NOT: m0 371 372; EG: LDS_MIN_UINT_RET * 373; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 374; GCN: s_endpgm 375define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 376 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 377 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 378 store i32 %result, i32 addrspace(1)* %out, align 4 379 ret void 380} 381 382; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: 383; SICIVI: s_mov_b32 m0 384; GFX9-NOT: m0 385 386; EG: LDS_MAX_UINT_RET * 387; GCN: ds_max_rtn_u32 388; GCN: s_endpgm 389define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 390 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 391 store i32 %result, i32 addrspace(1)* %out, align 4 392 ret void 393} 394 395; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: 396; SICIVI: s_mov_b32 m0 397; GFX9-NOT: m0 398 399; EG: LDS_MAX_UINT_RET * 400; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 401; GCN: s_endpgm 402define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 403 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 404 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 405 store i32 %result, i32 addrspace(1)* %out, align 4 406 ret void 407} 408 409; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: 410; SICIVI-DAG: s_mov_b32 m0 411; GFX9-NOT: m0 412 413; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 414; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 415; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 416; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 417; GCN: s_endpgm 418define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { 419 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 420 ret void 421} 422 423; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: 424; SICIVI: s_mov_b32 m0 425; GFX9-NOT: m0 426 427; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 428; GCN: s_endpgm 429define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 430 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 431 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 432 ret void 433} 434 435; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: 436; SICIVI-DAG: s_mov_b32 m0 437; GFX9-NOT: m0 438 439; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 440; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 441; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 442; GCN: ds_add_u32 [[VPTR]], [[DATA]] 443; GCN: s_endpgm 444define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { 445 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 446 ret void 447} 448 449; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: 450; SICIVI: s_mov_b32 m0 451; GFX9-NOT: m0 452 453; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 454; GCN: s_endpgm 455define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 456 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 457 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 458 ret void 459} 460 461; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset 462; SICIVI: s_mov_b32 m0 463; GFX9-NOT: m0 464 465; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 466; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 467; GCN: s_endpgm 468define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 469 %sub = sub i32 %a, %b 470 %add = add i32 %sub, 4 471 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 472 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 473 ret void 474} 475 476; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32: 477; SICIVI-DAG: s_mov_b32 m0 478; GFX9-NOT: m0 479 480; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 481; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] 482; GCN: s_endpgm 483define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 484 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 485 ret void 486} 487 488; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset: 489; SICIVI-DAG: s_mov_b32 m0 490; GFX9-NOT: m0 491 492; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 493; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16 494; GCN: s_endpgm 495define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 496 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 497 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 498 ret void 499} 500 501; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset: 502; SICIVI: s_mov_b32 m0 503; GFX9-NOT: m0 504 505; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 506; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 507; GCN: s_endpgm 508define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 509 %sub = sub i32 %a, %b 510 %add = add i32 %sub, 4 511 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 512 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 513 ret void 514} 515 516; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: 517; SICIVI: s_mov_b32 m0 518; GFX9-NOT: m0 519 520; GCN: ds_sub_u32 521; GCN: s_endpgm 522define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { 523 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 524 ret void 525} 526 527; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: 528; SICIVI: s_mov_b32 m0 529; GFX9-NOT: m0 530 531; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 532; GCN: s_endpgm 533define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 534 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 535 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 536 ret void 537} 538 539; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32: 540; SICIVI-DAG: s_mov_b32 m0 541; GFX9-NOT: m0 542 543; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 544; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] 545; GCN: s_endpgm 546define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 547 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 548 ret void 549} 550 551; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset: 552; SICIVI-DAG: s_mov_b32 m0 553; GFX9-NOT: m0 554 555; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 556; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16 557; GCN: s_endpgm 558define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 559 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 560 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 561 ret void 562} 563 564; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: 565; SICIVI: s_mov_b32 m0 566; GFX9-NOT: m0 567 568; GCN: ds_and_b32 569; GCN: s_endpgm 570define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { 571 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 572 ret void 573} 574 575; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: 576; SICIVI: s_mov_b32 m0 577; GFX9-NOT: m0 578 579; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 580; GCN: s_endpgm 581define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 582 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 583 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 584 ret void 585} 586 587; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: 588; SICIVI: s_mov_b32 m0 589; GFX9-NOT: m0 590 591; GCN: ds_or_b32 592; GCN: s_endpgm 593define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { 594 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 595 ret void 596} 597 598; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: 599; SICIVI: s_mov_b32 m0 600; GFX9-NOT: m0 601 602; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 603; GCN: s_endpgm 604define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 605 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 606 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 607 ret void 608} 609 610; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: 611; SICIVI: s_mov_b32 m0 612; GFX9-NOT: m0 613 614; GCN: ds_xor_b32 615; GCN: s_endpgm 616define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { 617 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 618 ret void 619} 620 621; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: 622; SICIVI: s_mov_b32 m0 623; GFX9-NOT: m0 624 625; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 626; GCN: s_endpgm 627define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 628 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 629 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 630 ret void 631} 632 633; FIXME: There is no atomic nand instr 634; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this. 635; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind { 636; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 637; ret void 638; } 639 640; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: 641; SICIVI: s_mov_b32 m0 642; GFX9-NOT: m0 643 644; GCN: ds_min_i32 645; GCN: s_endpgm 646define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { 647 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 648 ret void 649} 650 651; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: 652; SICIVI: s_mov_b32 m0 653; GFX9-NOT: m0 654 655; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 656; GCN: s_endpgm 657define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 658 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 659 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 660 ret void 661} 662 663; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: 664; SICIVI: s_mov_b32 m0 665; GFX9-NOT: m0 666 667; GCN: ds_max_i32 668; GCN: s_endpgm 669define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { 670 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 671 ret void 672} 673 674; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: 675; SICIVI: s_mov_b32 m0 676; GFX9-NOT: m0 677 678; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 679; GCN: s_endpgm 680define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 681 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 682 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 683 ret void 684} 685 686; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: 687; SICIVI: s_mov_b32 m0 688; GFX9-NOT: m0 689 690; GCN: ds_min_u32 691; GCN: s_endpgm 692define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { 693 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 694 ret void 695} 696 697; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: 698; SICIVI: s_mov_b32 m0 699; GFX9-NOT: m0 700 701; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 702; GCN: s_endpgm 703define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 704 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 705 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 706 ret void 707} 708 709; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: 710; SICIVI: s_mov_b32 m0 711; GFX9-NOT: m0 712 713; GCN: ds_max_u32 714; GCN: s_endpgm 715define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { 716 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 717 ret void 718} 719 720; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: 721; SICIVI: s_mov_b32 m0 722; GFX9-NOT: m0 723 724; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 725; GCN: s_endpgm 726define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 727 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 728 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 729 ret void 730} 731