1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s 4 5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: 6; SICIVI: s_mov_b32 m0 7; GFX9-NOT: m0 8 9; GCN: ds_wrxchg_rtn_b64 10; GCN: s_endpgm 11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 12 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 13 store i64 %result, i64 addrspace(1)* %out, align 8 14 ret void 15} 16 17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: 18; SICIVI: s_mov_b32 m0 19; GFX9-NOT: m0 20 21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 22; GCN: s_endpgm 23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 24 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 25 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 26 store i64 %result, i64 addrspace(1)* %out, align 8 27 ret void 28} 29 30; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset: 31; SICIVI: s_mov_b32 m0 32; GFX9-NOT: m0 33 34; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 35; GCN: s_endpgm 36define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind { 37 %gep = getelementptr double, double addrspace(3)* %ptr, i32 4 38 %result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst 39 store double %result, double addrspace(1)* %out, align 8 40 ret void 41} 42 43; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: 44; SICIVI: s_mov_b32 m0 45; GFX9-NOT: m0 46 47; GCN: ds_add_rtn_u64 48; GCN: s_endpgm 49define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 50 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 51 store i64 %result, i64 addrspace(1)* %out, align 8 52 ret void 53} 54 55; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset: 56; SICIVI-DAG: s_mov_b32 m0 57; GFX9-NOT: m0 58 59; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 60; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 61; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 62; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 63; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 64; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 65; GCN: buffer_store_dwordx2 [[RESULT]], 66; GCN: s_endpgm 67define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 68 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 69 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 70 store i64 %result, i64 addrspace(1)* %out, align 8 71 ret void 72} 73 74; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64: 75; SICIVI-DAG: s_mov_b32 m0 76; GFX9-NOT: m0 77 78; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 79; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 80; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 81; GCN: buffer_store_dwordx2 [[RESULT]], 82; GCN: s_endpgm 83define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 84 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 85 store i64 %result, i64 addrspace(1)* %out, align 8 86 ret void 87} 88 89; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset: 90; SICIVI: s_mov_b32 m0 91; GFX9-NOT: m0 92 93; GCN: ds_add_rtn_u64 {{.*}} offset:32 94; GCN: s_endpgm 95define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 96 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 97 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 98 store i64 %result, i64 addrspace(1)* %out, align 8 99 ret void 100} 101 102; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64: 103; SICIVI: s_mov_b32 m0 104; GFX9-NOT: m0 105 106; GCN: ds_sub_rtn_u64 107; GCN: s_endpgm 108define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 109 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 110 store i64 %result, i64 addrspace(1)* %out, align 8 111 ret void 112} 113 114; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: 115; SICIVI: s_mov_b32 m0 116; GFX9-NOT: m0 117 118; GCN: ds_sub_rtn_u64 {{.*}} offset:32 119; GCN: s_endpgm 120define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 121 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 122 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 123 store i64 %result, i64 addrspace(1)* %out, align 8 124 ret void 125} 126 127; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64: 128; SICIVI-DAG: s_mov_b32 m0 129; GFX9-NOT: m0 130 131; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 132; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 133; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 134; GCN: buffer_store_dwordx2 [[RESULT]], 135; GCN: s_endpgm 136define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 137 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 138 store i64 %result, i64 addrspace(1)* %out, align 8 139 ret void 140} 141 142; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset: 143; SICIVI: s_mov_b32 m0 144; GFX9-NOT: m0 145 146; GCN: ds_sub_rtn_u64 {{.*}} offset:32 147; GCN: s_endpgm 148define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 149 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 150 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 151 store i64 %result, i64 addrspace(1)* %out, align 8 152 ret void 153} 154 155; GCN-LABEL: {{^}}lds_atomic_and_ret_i64: 156; SICIVI: s_mov_b32 m0 157; GFX9-NOT: m0 158 159; GCN: ds_and_rtn_b64 160; GCN: s_endpgm 161define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 162 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 163 store i64 %result, i64 addrspace(1)* %out, align 8 164 ret void 165} 166 167; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset: 168; SICIVI: s_mov_b32 m0 169; GFX9-NOT: m0 170 171; GCN: ds_and_rtn_b64 {{.*}} offset:32 172; GCN: s_endpgm 173define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 174 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 175 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 176 store i64 %result, i64 addrspace(1)* %out, align 8 177 ret void 178} 179 180; GCN-LABEL: {{^}}lds_atomic_or_ret_i64: 181; SICIVI: s_mov_b32 m0 182; GFX9-NOT: m0 183 184; GCN: ds_or_rtn_b64 185; GCN: s_endpgm 186define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 187 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 188 store i64 %result, i64 addrspace(1)* %out, align 8 189 ret void 190} 191 192; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset: 193; SICIVI: s_mov_b32 m0 194; GFX9-NOT: m0 195 196; GCN: ds_or_rtn_b64 {{.*}} offset:32 197; GCN: s_endpgm 198define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 199 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 200 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 201 store i64 %result, i64 addrspace(1)* %out, align 8 202 ret void 203} 204 205; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64: 206; SICIVI: s_mov_b32 m0 207; GFX9-NOT: m0 208 209; GCN: ds_xor_rtn_b64 210; GCN: s_endpgm 211define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 212 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 213 store i64 %result, i64 addrspace(1)* %out, align 8 214 ret void 215} 216 217; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: 218; SICIVI: s_mov_b32 m0 219; GFX9-NOT: m0 220 221; GCN: ds_xor_rtn_b64 {{.*}} offset:32 222; GCN: s_endpgm 223define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 224 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 225 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 226 store i64 %result, i64 addrspace(1)* %out, align 8 227 ret void 228} 229 230; FIXME: There is no atomic nand instr 231; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this. 232; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 233; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 234; store i64 %result, i64 addrspace(1)* %out, align 8 235; ret void 236; } 237 238; GCN-LABEL: {{^}}lds_atomic_min_ret_i64: 239; SICIVI: s_mov_b32 m0 240; GFX9-NOT: m0 241 242; GCN: ds_min_rtn_i64 243; GCN: s_endpgm 244define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 245 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 246 store i64 %result, i64 addrspace(1)* %out, align 8 247 ret void 248} 249 250; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset: 251; SICIVI: s_mov_b32 m0 252; GFX9-NOT: m0 253 254; GCN: ds_min_rtn_i64 {{.*}} offset:32 255; GCN: s_endpgm 256define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 257 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 258 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 259 store i64 %result, i64 addrspace(1)* %out, align 8 260 ret void 261} 262 263; GCN-LABEL: {{^}}lds_atomic_max_ret_i64: 264; SICIVI: s_mov_b32 m0 265; GFX9-NOT: m0 266 267; GCN: ds_max_rtn_i64 268; GCN: s_endpgm 269define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 270 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 271 store i64 %result, i64 addrspace(1)* %out, align 8 272 ret void 273} 274 275; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset: 276; SICIVI: s_mov_b32 m0 277; GFX9-NOT: m0 278 279; GCN: ds_max_rtn_i64 {{.*}} offset:32 280; GCN: s_endpgm 281define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 282 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 283 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 284 store i64 %result, i64 addrspace(1)* %out, align 8 285 ret void 286} 287 288; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64: 289; SICIVI: s_mov_b32 m0 290; GFX9-NOT: m0 291 292; GCN: ds_min_rtn_u64 293; GCN: s_endpgm 294define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 295 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 296 store i64 %result, i64 addrspace(1)* %out, align 8 297 ret void 298} 299 300; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: 301; SICIVI: s_mov_b32 m0 302; GFX9-NOT: m0 303 304; GCN: ds_min_rtn_u64 {{.*}} offset:32 305; GCN: s_endpgm 306define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 307 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 308 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 309 store i64 %result, i64 addrspace(1)* %out, align 8 310 ret void 311} 312 313; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64: 314; SICIVI: s_mov_b32 m0 315; GFX9-NOT: m0 316 317; GCN: ds_max_rtn_u64 318; GCN: s_endpgm 319define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 320 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 321 store i64 %result, i64 addrspace(1)* %out, align 8 322 ret void 323} 324 325; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: 326; SICIVI: s_mov_b32 m0 327; GFX9-NOT: m0 328 329; GCN: ds_max_rtn_u64 {{.*}} offset:32 330; GCN: s_endpgm 331define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 332 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 333 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 334 store i64 %result, i64 addrspace(1)* %out, align 8 335 ret void 336} 337 338; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64: 339; SICIVI: s_mov_b32 m0 340; GFX9-NOT: m0 341 342; GCN: ds_wrxchg_rtn_b64 343; GCN: s_endpgm 344define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { 345 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 346 ret void 347} 348 349; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: 350; SICIVI: s_mov_b32 m0 351; GFX9-NOT: m0 352 353; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 354; GCN: s_endpgm 355define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 356 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 357 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 358 ret void 359} 360 361; GCN-LABEL: {{^}}lds_atomic_add_noret_i64: 362; SICIVI: s_mov_b32 m0 363; GFX9-NOT: m0 364 365; GCN: ds_add_u64 366; GCN: s_endpgm 367define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { 368 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 369 ret void 370} 371 372; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset: 373; SICIVI-DAG: s_mov_b32 m0 374; GFX9-NOT: m0 375 376; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 377; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 378; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 379; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 380; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 381; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 382; GCN: s_endpgm 383define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 384 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 385 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 386 ret void 387} 388 389; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64: 390; SICIVI-DAG: s_mov_b32 m0 391; GFX9-NOT: m0 392 393; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 394; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 395; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 396; GCN: s_endpgm 397define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 398 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 399 ret void 400} 401 402; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset: 403; SICIVI: s_mov_b32 m0 404; GFX9-NOT: m0 405 406; GCN: ds_add_u64 {{.*}} offset:32 407; GCN: s_endpgm 408define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 409 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 410 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 411 ret void 412} 413 414; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64: 415; SICIVI: s_mov_b32 m0 416; GFX9-NOT: m0 417 418; GCN: ds_sub_u64 419; GCN: s_endpgm 420define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { 421 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 422 ret void 423} 424 425; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: 426; SICIVI: s_mov_b32 m0 427; GFX9-NOT: m0 428 429; GCN: ds_sub_u64 {{.*}} offset:32 430; GCN: s_endpgm 431define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 432 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 433 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 434 ret void 435} 436 437; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64: 438; SICIVI-DAG: s_mov_b32 m0 439; GFX9-NOT: m0 440 441; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 442; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 443; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 444; GCN: s_endpgm 445define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 446 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 447 ret void 448} 449 450; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset: 451; SICIVI: s_mov_b32 m0 452; GFX9-NOT: m0 453 454; GCN: ds_sub_u64 {{.*}} offset:32 455; GCN: s_endpgm 456define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 457 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 458 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 459 ret void 460} 461 462; GCN-LABEL: {{^}}lds_atomic_and_noret_i64: 463; SICIVI: s_mov_b32 m0 464; GFX9-NOT: m0 465 466; GCN: ds_and_b64 467; GCN: s_endpgm 468define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { 469 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 470 ret void 471} 472 473; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset: 474; SICIVI: s_mov_b32 m0 475; GFX9-NOT: m0 476 477; GCN: ds_and_b64 {{.*}} offset:32 478; GCN: s_endpgm 479define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 480 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 481 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 482 ret void 483} 484 485; GCN-LABEL: {{^}}lds_atomic_or_noret_i64: 486; SICIVI: s_mov_b32 m0 487; GFX9-NOT: m0 488 489; GCN: ds_or_b64 490; GCN: s_endpgm 491define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { 492 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 493 ret void 494} 495 496; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset: 497; SICIVI: s_mov_b32 m0 498; GFX9-NOT: m0 499 500; GCN: ds_or_b64 {{.*}} offset:32 501; GCN: s_endpgm 502define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 503 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 504 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 505 ret void 506} 507 508; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64: 509; SICIVI: s_mov_b32 m0 510; GFX9-NOT: m0 511 512; GCN: ds_xor_b64 513; GCN: s_endpgm 514define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { 515 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 516 ret void 517} 518 519; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: 520; SICIVI: s_mov_b32 m0 521; GFX9-NOT: m0 522 523; GCN: ds_xor_b64 {{.*}} offset:32 524; GCN: s_endpgm 525define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 526 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 527 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 528 ret void 529} 530 531; FIXME: There is no atomic nand instr 532; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this. 533; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind { 534; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 535; ret void 536; } 537 538; GCN-LABEL: {{^}}lds_atomic_min_noret_i64: 539; SICIVI: s_mov_b32 m0 540; GFX9-NOT: m0 541 542; GCN: ds_min_i64 543; GCN: s_endpgm 544define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { 545 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 546 ret void 547} 548 549; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset: 550; SICIVI: s_mov_b32 m0 551; GFX9-NOT: m0 552 553; GCN: ds_min_i64 {{.*}} offset:32 554; GCN: s_endpgm 555define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 556 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 557 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 558 ret void 559} 560 561; GCN-LABEL: {{^}}lds_atomic_max_noret_i64: 562; SICIVI: s_mov_b32 m0 563; GFX9-NOT: m0 564 565; GCN: ds_max_i64 566; GCN: s_endpgm 567define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { 568 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 569 ret void 570} 571 572; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset: 573; SICIVI: s_mov_b32 m0 574; GFX9-NOT: m0 575 576; GCN: ds_max_i64 {{.*}} offset:32 577; GCN: s_endpgm 578define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 579 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 580 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 581 ret void 582} 583 584; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64: 585; SICIVI: s_mov_b32 m0 586; GFX9-NOT: m0 587 588; GCN: ds_min_u64 589; GCN: s_endpgm 590define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { 591 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 592 ret void 593} 594 595; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: 596; SICIVI: s_mov_b32 m0 597; GFX9-NOT: m0 598 599; GCN: ds_min_u64 {{.*}} offset:32 600; GCN: s_endpgm 601define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 602 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 603 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 604 ret void 605} 606 607; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64: 608; SICIVI: s_mov_b32 m0 609; GFX9-NOT: m0 610 611; GCN: ds_max_u64 612; GCN: s_endpgm 613define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { 614 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 615 ret void 616} 617 618; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: 619; SICIVI: s_mov_b32 m0 620; GFX9-NOT: m0 621 622; GCN: ds_max_u64 {{.*}} offset:32 623; GCN: s_endpgm 624define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 625 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 626 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 627 ret void 628} 629