1; RUN: llc < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs | FileCheck %s -check-prefix=GCN 2 3declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) 4declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1) 5declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) 6declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) 7 8; GCN-LABEL: {{^}}buffer_atomic_add_f32: 9; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen 10define amdgpu_ps void @buffer_atomic_add_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 11main_body: 12 %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) 13 ret void 14} 15 16; GCN-LABEL: {{^}}buffer_atomic_add_f32_off4_slc: 17; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen offset:4 slc 18define amdgpu_ps void @buffer_atomic_add_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 19main_body: 20 %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) 21 ret void 22} 23 24; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16: 25; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen 26define amdgpu_ps void @buffer_atomic_pk_add_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) { 27main_body: 28 %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) 29 ret void 30} 31 32; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16_off4_slc: 33; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen offset:4 slc 34define amdgpu_ps void @buffer_atomic_pk_add_v2f16_off4_slc(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) { 35main_body: 36 %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) 37 ret void 38} 39 40; GCN-LABEL: {{^}}global_atomic_add_f32: 41; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} 42define amdgpu_kernel void @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) { 43main_body: 44 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) 45 ret void 46} 47 48; GCN-LABEL: {{^}}global_atomic_add_f32_off4: 49; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 50define amdgpu_kernel void @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) { 51main_body: 52 %p = getelementptr float, float addrspace(1)* %ptr, i64 1 53 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) 54 ret void 55} 56 57; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4: 58; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4 59define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) { 60main_body: 61 %p = getelementptr float, float addrspace(1)* %ptr, i64 -1 62 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) 63 ret void 64} 65 66; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16: 67; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 68define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { 69main_body: 70 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) 71 ret void 72} 73 74; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: 75; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 76define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { 77main_body: 78 %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1 79 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) 80 ret void 81} 82 83; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: 84; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}} 85define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { 86main_body: 87 %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1 88 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) 89 ret void 90} 91 92; Make sure this artificially selects with an incorrect subtarget, but 93; the feature set. 94; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: 95; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 96define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 { 97 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) 98 ret void 99} 100 101attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" } 102