1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s 3 4define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) { 5; GFX908-LABEL: global_atomic_fadd_f32: 6; GFX908: ; %bb.0: 7; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off 9; GFX908-NEXT: s_waitcnt vmcnt(0) 10; GFX908-NEXT: s_setpc_b64 s[30:31] 11 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) 12 ret void 13} 14 15define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %data) { 16; GFX908-LABEL: global_atomic_fadd_f32_off_2048: 17; GFX908: ; %bb.0: 18; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX908-NEXT: s_movk_i32 s4, 0x800 20; GFX908-NEXT: s_mov_b32 s5, 0 21; GFX908-NEXT: v_mov_b32_e32 v3, s4 22; GFX908-NEXT: v_mov_b32_e32 v4, s5 23; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 24; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 25; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off 26; GFX908-NEXT: s_waitcnt vmcnt(0) 27; GFX908-NEXT: s_setpc_b64 s[30:31] 28 %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 29 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) 30 ret void 31} 32 33define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float %data) { 34; GFX908-LABEL: global_atomic_fadd_f32_off_neg2047: 35; GFX908: ; %bb.0: 36; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX908-NEXT: s_mov_b32 s4, 0xfffff804 38; GFX908-NEXT: s_mov_b32 s5, -1 39; GFX908-NEXT: v_mov_b32_e32 v3, s4 40; GFX908-NEXT: v_mov_b32_e32 v4, s5 41; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 42; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 43; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off 44; GFX908-NEXT: s_waitcnt vmcnt(0) 45; GFX908-NEXT: s_setpc_b64 s[30:31] 46 %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511 47 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) 48 ret void 49} 50 51define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %ptr, float %data) { 52; GFX908-LABEL: global_atomic_fadd_f32_off_ss: 53; GFX908: ; %bb.0: 54; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 55; GFX908-NEXT: s_load_dword s2, s[4:5], 0x8 56; GFX908-NEXT: s_waitcnt lgkmcnt(0) 57; GFX908-NEXT: s_add_u32 s0, s0, 0x800 58; GFX908-NEXT: s_addc_u32 s1, s1, 0 59; GFX908-NEXT: v_mov_b32_e32 v0, s0 60; GFX908-NEXT: v_mov_b32_e32 v1, s1 61; GFX908-NEXT: v_mov_b32_e32 v2, s2 62; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off 63; GFX908-NEXT: s_endpgm 64 %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 65 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) 66 ret void 67} 68 69define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { 70; GFX908-LABEL: global_atomic_fadd_v2f16: 71; GFX908: ; %bb.0: 72; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off 74; GFX908-NEXT: s_waitcnt vmcnt(0) 75; GFX908-NEXT: s_setpc_b64 s[30:31] 76 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) 77 ret void 78} 79 80define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { 81; GFX908-LABEL: global_atomic_fadd_v2f16_off_neg2047: 82; GFX908: ; %bb.0: 83; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX908-NEXT: s_mov_b32 s4, 0xfffff804 85; GFX908-NEXT: s_mov_b32 s5, -1 86; GFX908-NEXT: v_mov_b32_e32 v3, s4 87; GFX908-NEXT: v_mov_b32_e32 v4, s5 88; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 89; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 90; GFX908-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off 91; GFX908-NEXT: s_waitcnt vmcnt(0) 92; GFX908-NEXT: s_setpc_b64 s[30:31] 93 %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511 94 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %gep, <2 x half> %data) 95 ret void 96} 97 98declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #0 99declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0 100 101attributes #0 = { argmemonly nounwind willreturn } 102