1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s 6 7; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: 8; EG: LDS_WRXCHG_RET * 9 10; SICIVI-DAG: s_mov_b32 m0 11; GFX9-NOT: m0 12 13; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 14; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 15; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 16; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 17; GCN: buffer_store_dword [[RESULT]], 18; GCN: s_endpgm 19define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 20 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 21 store i32 %result, i32 addrspace(1)* %out, align 4 22 ret void 23} 24 25; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: 26; SICIVI: s_mov_b32 m0 27; GFX9-NOT: m0 28 29; EG: LDS_WRXCHG_RET * 30; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 31; GCN: s_endpgm 32define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 33 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 34 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 35 store i32 %result, i32 addrspace(1)* %out, align 4 36 ret void 37} 38 39; XXX - Is it really necessary to load 4 into VGPR? 40; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: 41; EG: LDS_ADD_RET * 42 43; SICIVI-DAG: s_mov_b32 m0 44; GFX9-NOT: m0 45 46; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 47; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 48; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 49; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 50; GCN: buffer_store_dword [[RESULT]], 51; GCN: s_endpgm 52define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 53 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 54 store i32 %result, i32 addrspace(1)* %out, align 4 55 ret void 56} 57 58; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: 59; SICIVI: s_mov_b32 m0 60; GFX9-NOT: m0 61 62; EG: LDS_ADD_RET * 63; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 64; GCN: s_endpgm 65define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 66 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 67 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 68 store i32 %result, i32 addrspace(1)* %out, align 4 69 ret void 70} 71 72; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: 73; SICIVI: s_mov_b32 m0 74; GFX9-NOT: m0 75 76; EG: LDS_ADD_RET * 77; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 78; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 79; GCN: s_endpgm 80define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 81 %sub = sub i32 %a, %b 82 %add = add i32 %sub, 4 83 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 84 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 85 store i32 %result, i32 addrspace(1)* %out, align 4 86 ret void 87} 88 89; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32: 90; EG: LDS_ADD_RET * 91 92; SICIVI-DAG: s_mov_b32 m0 93; GFX9-NOT: m0 94 95; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 96; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 97; GCN: s_endpgm 98define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 99 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 100 store i32 %result, i32 addrspace(1)* %out, align 4 101 ret void 102} 103 104; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset: 105; EG: LDS_ADD_RET * 106 107; SICIVI-DAG: s_mov_b32 m0 108; GFX9-NOT: m0 109 110; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 111; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 112; GCN: s_endpgm 113define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 114 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 115 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 116 store i32 %result, i32 addrspace(1)* %out, align 4 117 ret void 118} 119 120; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset: 121; SICIVI: s_mov_b32 m0 122; GFX9-NOT: m0 123 124; EG: LDS_ADD_RET * 125; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 126; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 127; GCN: s_endpgm 128define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 129 %sub = sub i32 %a, %b 130 %add = add i32 %sub, 4 131 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 132 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 133 store i32 %result, i32 addrspace(1)* %out, align 4 134 ret void 135} 136 137; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: 138; EG: LDS_SUB_RET * 139 140; SICIVI: s_mov_b32 m0 141; GFX9-NOT: m0 142 143; GCN: ds_sub_rtn_u32 144; GCN: s_endpgm 145define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 146 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 147 store i32 %result, i32 addrspace(1)* %out, align 4 148 ret void 149} 150 151; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: 152; EG: LDS_SUB_RET * 153 154; SICIVI: s_mov_b32 m0 155; GFX9-NOT: m0 156 157; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 158; GCN: s_endpgm 159define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 160 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 161 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 162 store i32 %result, i32 addrspace(1)* %out, align 4 163 ret void 164} 165 166; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32: 167; EG: LDS_SUB_RET * 168 169; SICIVI-DAG: s_mov_b32 m0 170; GFX9-NOT: m0 171 172; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 173; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 174; GCN: s_endpgm 175define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 176 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 177 store i32 %result, i32 addrspace(1)* %out, align 4 178 ret void 179} 180 181; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset: 182; EG: LDS_SUB_RET * 183 184; SICIVI-DAG: s_mov_b32 m0 185; GFX9-NOT: m0 186 187; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 188; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 189; GCN: s_endpgm 190define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 191 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 192 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 193 store i32 %result, i32 addrspace(1)* %out, align 4 194 ret void 195} 196 197; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: 198; EG: LDS_AND_RET * 199 200; SICIVI-DAG: s_mov_b32 m0 201; GFX9-NOT: m0 202 203; GCN: ds_and_rtn_b32 204; GCN: s_endpgm 205define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 206 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 207 store i32 %result, i32 addrspace(1)* %out, align 4 208 ret void 209} 210 211; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: 212; SICIVI: s_mov_b32 m0 213; GFX9-NOT: m0 214 215; EG: LDS_AND_RET * 216; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 217; GCN: s_endpgm 218define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 219 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 220 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 221 store i32 %result, i32 addrspace(1)* %out, align 4 222 ret void 223} 224 225; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: 226; SICIVI: s_mov_b32 m0 227; GFX9-NOT: m0 228 229; EG: LDS_OR_RET * 230; GCN: ds_or_rtn_b32 231; GCN: s_endpgm 232define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 233 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 234 store i32 %result, i32 addrspace(1)* %out, align 4 235 ret void 236} 237 238; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: 239; SICIVI: s_mov_b32 m0 240; GFX9-NOT: m0 241 242; EG: LDS_OR_RET * 243; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 244; GCN: s_endpgm 245define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 246 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 247 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 248 store i32 %result, i32 addrspace(1)* %out, align 4 249 ret void 250} 251 252; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: 253; SICIVI: s_mov_b32 m0 254; GFX9-NOT: m0 255 256; EG: LDS_XOR_RET * 257; GCN: ds_xor_rtn_b32 258; GCN: s_endpgm 259define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 260 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 261 store i32 %result, i32 addrspace(1)* %out, align 4 262 ret void 263} 264 265; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: 266; SICIVI: s_mov_b32 m0 267; GFX9-NOT: m0 268 269; EG: LDS_XOR_RET * 270; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 271; GCN: s_endpgm 272define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 273 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 274 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 275 store i32 %result, i32 addrspace(1)* %out, align 4 276 ret void 277} 278 279; FIXME: There is no atomic nand instr 280; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. 281; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 282; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 283; store i32 %result, i32 addrspace(1)* %out, align 4 284; ret void 285; } 286 287; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: 288; SICIVI: s_mov_b32 m0 289; GFX9-NOT: m0 290 291; EG: LDS_MIN_INT_RET * 292; GCN: ds_min_rtn_i32 293; GCN: s_endpgm 294define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 295 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 296 store i32 %result, i32 addrspace(1)* %out, align 4 297 ret void 298} 299 300; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: 301; SICIVI: s_mov_b32 m0 302; GFX9-NOT: m0 303 304; EG: LDS_MIN_INT_RET * 305; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 306; GCN: s_endpgm 307define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 308 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 309 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 310 store i32 %result, i32 addrspace(1)* %out, align 4 311 ret void 312} 313 314; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: 315; SICIVI: s_mov_b32 m0 316; GFX9-NOT: m0 317 318; EG: LDS_MAX_INT_RET * 319; GCN: ds_max_rtn_i32 320; GCN: s_endpgm 321define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 322 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 323 store i32 %result, i32 addrspace(1)* %out, align 4 324 ret void 325} 326 327; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: 328; SICIVI: s_mov_b32 m0 329; GFX9-NOT: m0 330 331; EG: LDS_MAX_INT_RET * 332; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 333; GCN: s_endpgm 334define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 335 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 336 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 337 store i32 %result, i32 addrspace(1)* %out, align 4 338 ret void 339} 340 341; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: 342; SICIVI: s_mov_b32 m0 343; GFX9-NOT: m0 344 345; EG: LDS_MIN_UINT_RET * 346; GCN: ds_min_rtn_u32 347; GCN: s_endpgm 348define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 349 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 350 store i32 %result, i32 addrspace(1)* %out, align 4 351 ret void 352} 353 354; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: 355; SICIVI: s_mov_b32 m0 356; GFX9-NOT: m0 357 358; EG: LDS_MIN_UINT_RET * 359; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 360; GCN: s_endpgm 361define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 362 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 363 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 364 store i32 %result, i32 addrspace(1)* %out, align 4 365 ret void 366} 367 368; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: 369; SICIVI: s_mov_b32 m0 370; GFX9-NOT: m0 371 372; EG: LDS_MAX_UINT_RET * 373; GCN: ds_max_rtn_u32 374; GCN: s_endpgm 375define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 376 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 377 store i32 %result, i32 addrspace(1)* %out, align 4 378 ret void 379} 380 381; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: 382; SICIVI: s_mov_b32 m0 383; GFX9-NOT: m0 384 385; EG: LDS_MAX_UINT_RET * 386; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 387; GCN: s_endpgm 388define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 389 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 390 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 391 store i32 %result, i32 addrspace(1)* %out, align 4 392 ret void 393} 394 395; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: 396; SICIVI-DAG: s_mov_b32 m0 397; GFX9-NOT: m0 398 399; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 400; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 401; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 402; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 403; GCN: s_endpgm 404define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { 405 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 406 ret void 407} 408 409; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: 410; SICIVI: s_mov_b32 m0 411; GFX9-NOT: m0 412 413; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 414; GCN: s_endpgm 415define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 416 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 417 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 418 ret void 419} 420 421; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: 422; SICIVI-DAG: s_mov_b32 m0 423; GFX9-NOT: m0 424 425; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 426; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 427; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 428; GCN: ds_add_u32 [[VPTR]], [[DATA]] 429; GCN: s_endpgm 430define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { 431 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 432 ret void 433} 434 435; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: 436; SICIVI: s_mov_b32 m0 437; GFX9-NOT: m0 438 439; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 440; GCN: s_endpgm 441define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 442 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 443 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 444 ret void 445} 446 447; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset 448; SICIVI: s_mov_b32 m0 449; GFX9-NOT: m0 450 451; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 452; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 453; GCN: s_endpgm 454define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 455 %sub = sub i32 %a, %b 456 %add = add i32 %sub, 4 457 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 458 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 459 ret void 460} 461 462; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32: 463; SICIVI-DAG: s_mov_b32 m0 464; GFX9-NOT: m0 465 466; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 467; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] 468; GCN: s_endpgm 469define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 470 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 471 ret void 472} 473 474; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset: 475; SICIVI-DAG: s_mov_b32 m0 476; GFX9-NOT: m0 477 478; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 479; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16 480; GCN: s_endpgm 481define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 482 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 483 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 484 ret void 485} 486 487; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset: 488; SICIVI: s_mov_b32 m0 489; GFX9-NOT: m0 490 491; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 492; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 493; GCN: s_endpgm 494define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 495 %sub = sub i32 %a, %b 496 %add = add i32 %sub, 4 497 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 498 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 499 ret void 500} 501 502; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: 503; SICIVI: s_mov_b32 m0 504; GFX9-NOT: m0 505 506; GCN: ds_sub_u32 507; GCN: s_endpgm 508define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { 509 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 510 ret void 511} 512 513; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: 514; SICIVI: s_mov_b32 m0 515; GFX9-NOT: m0 516 517; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 518; GCN: s_endpgm 519define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 520 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 521 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 522 ret void 523} 524 525; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32: 526; SICIVI-DAG: s_mov_b32 m0 527; GFX9-NOT: m0 528 529; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 530; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] 531; GCN: s_endpgm 532define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 533 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 534 ret void 535} 536 537; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset: 538; SICIVI-DAG: s_mov_b32 m0 539; GFX9-NOT: m0 540 541; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 542; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16 543; GCN: s_endpgm 544define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 545 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 546 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 547 ret void 548} 549 550; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: 551; SICIVI: s_mov_b32 m0 552; GFX9-NOT: m0 553 554; GCN: ds_and_b32 555; GCN: s_endpgm 556define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { 557 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 558 ret void 559} 560 561; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: 562; SICIVI: s_mov_b32 m0 563; GFX9-NOT: m0 564 565; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 566; GCN: s_endpgm 567define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 568 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 569 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 570 ret void 571} 572 573; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: 574; SICIVI: s_mov_b32 m0 575; GFX9-NOT: m0 576 577; GCN: ds_or_b32 578; GCN: s_endpgm 579define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { 580 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 581 ret void 582} 583 584; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: 585; SICIVI: s_mov_b32 m0 586; GFX9-NOT: m0 587 588; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 589; GCN: s_endpgm 590define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 591 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 592 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 593 ret void 594} 595 596; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: 597; SICIVI: s_mov_b32 m0 598; GFX9-NOT: m0 599 600; GCN: ds_xor_b32 601; GCN: s_endpgm 602define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { 603 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 604 ret void 605} 606 607; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: 608; SICIVI: s_mov_b32 m0 609; GFX9-NOT: m0 610 611; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 612; GCN: s_endpgm 613define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 614 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 615 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 616 ret void 617} 618 619; FIXME: There is no atomic nand instr 620; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this. 621; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind { 622; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 623; ret void 624; } 625 626; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: 627; SICIVI: s_mov_b32 m0 628; GFX9-NOT: m0 629 630; GCN: ds_min_i32 631; GCN: s_endpgm 632define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { 633 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 634 ret void 635} 636 637; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: 638; SICIVI: s_mov_b32 m0 639; GFX9-NOT: m0 640 641; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 642; GCN: s_endpgm 643define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 644 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 645 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 646 ret void 647} 648 649; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: 650; SICIVI: s_mov_b32 m0 651; GFX9-NOT: m0 652 653; GCN: ds_max_i32 654; GCN: s_endpgm 655define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { 656 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 657 ret void 658} 659 660; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: 661; SICIVI: s_mov_b32 m0 662; GFX9-NOT: m0 663 664; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 665; GCN: s_endpgm 666define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 667 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 668 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 669 ret void 670} 671 672; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: 673; SICIVI: s_mov_b32 m0 674; GFX9-NOT: m0 675 676; GCN: ds_min_u32 677; GCN: s_endpgm 678define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { 679 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 680 ret void 681} 682 683; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: 684; SICIVI: s_mov_b32 m0 685; GFX9-NOT: m0 686 687; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 688; GCN: s_endpgm 689define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 690 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 691 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 692 ret void 693} 694 695; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: 696; SICIVI: s_mov_b32 m0 697; GFX9-NOT: m0 698 699; GCN: ds_max_u32 700; GCN: s_endpgm 701define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { 702 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 703 ret void 704} 705 706; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: 707; SICIVI: s_mov_b32 m0 708; GFX9-NOT: m0 709 710; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 711; GCN: s_endpgm 712define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 713 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 714 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 715 ret void 716} 717