1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 4 5; FUNC-LABEL: {{^}}s_add_i32: 6; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} 7; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] 8; GCN: buffer_store_dword v[[V_REG]], 9define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 10 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 11 %a = load i32, i32 addrspace(1)* %in 12 %b = load i32, i32 addrspace(1)* %b_ptr 13 %result = add i32 %a, %b 14 store i32 %result, i32 addrspace(1)* %out 15 ret void 16} 17 18; FUNC-LABEL: {{^}}s_add_v2i32: 19; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 20; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 21define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 22 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 23 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 24 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 25 %result = add <2 x i32> %a, %b 26 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 27 ret void 28} 29 30; FUNC-LABEL: {{^}}s_add_v4i32: 31; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 32; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 33; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 34; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 35define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 36 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 37 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 38 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 39 %result = add <4 x i32> %a, %b 40 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 41 ret void 42} 43 44; FUNC-LABEL: {{^}}s_add_v8i32: 45; GCN: s_add_i32 46; GCN: s_add_i32 47; GCN: s_add_i32 48; GCN: s_add_i32 49; GCN: s_add_i32 50; GCN: s_add_i32 51; GCN: s_add_i32 52; GCN: s_add_i32 53define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { 54entry: 55 %0 = add <8 x i32> %a, %b 56 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 57 ret void 58} 59 60; FUNC-LABEL: {{^}}s_add_v16i32: 61; GCN: s_add_i32 62; GCN: s_add_i32 63; GCN: s_add_i32 64; GCN: s_add_i32 65; GCN: s_add_i32 66; GCN: s_add_i32 67; GCN: s_add_i32 68; GCN: s_add_i32 69; GCN: s_add_i32 70; GCN: s_add_i32 71; GCN: s_add_i32 72; GCN: s_add_i32 73; GCN: s_add_i32 74; GCN: s_add_i32 75; GCN: s_add_i32 76; GCN: s_add_i32 77define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { 78entry: 79 %0 = add <16 x i32> %a, %b 80 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 81 ret void 82} 83 84; FUNC-LABEL: {{^}}v_add_i32: 85; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 86; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] 87; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]] 88; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] 89define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 90 %tid = call i32 @llvm.amdgcn.workitem.id.x() 91 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 92 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 93 %a = load volatile i32, i32 addrspace(1)* %gep 94 %b = load volatile i32, i32 addrspace(1)* %b_ptr 95 %result = add i32 %a, %b 96 store i32 %result, i32 addrspace(1)* %out 97 ret void 98} 99 100; FUNC-LABEL: {{^}}v_add_imm_i32: 101; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 102; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] 103; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] 104define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 105 %tid = call i32 @llvm.amdgcn.workitem.id.x() 106 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 107 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 108 %a = load volatile i32, i32 addrspace(1)* %gep 109 %result = add i32 %a, 123 110 store i32 %result, i32 addrspace(1)* %out 111 ret void 112} 113 114; FUNC-LABEL: {{^}}add64: 115; GCN: s_add_u32 116; GCN: s_addc_u32 117define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { 118entry: 119 %add = add i64 %a, %b 120 store i64 %add, i64 addrspace(1)* %out 121 ret void 122} 123 124; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they 125; use VCC. The test is designed so that %a will be stored in an SGPR and 126; %0 will be stored in a VGPR, so the comiler will be forced to copy %a 127; to a VGPR before doing the add. 128 129; FUNC-LABEL: {{^}}add64_sgpr_vgpr: 130; GCN-NOT: v_addc_u32_e32 s 131define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { 132entry: 133 %0 = load i64, i64 addrspace(1)* %in 134 %1 = add i64 %a, %0 135 store i64 %1, i64 addrspace(1)* %out 136 ret void 137} 138 139; Test i64 add inside a branch. 140; FUNC-LABEL: {{^}}add64_in_branch: 141; GCN: s_add_u32 142; GCN: s_addc_u32 143define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { 144entry: 145 %0 = icmp eq i64 %a, 0 146 br i1 %0, label %if, label %else 147 148if: 149 %1 = load i64, i64 addrspace(1)* %in 150 br label %endif 151 152else: 153 %2 = add i64 %a, %b 154 br label %endif 155 156endif: 157 %3 = phi i64 [%1, %if], [%2, %else] 158 store i64 %3, i64 addrspace(1)* %out 159 ret void 160} 161 162; Make sure the VOP3 form of add is initially selected. Otherwise pair 163; of opies from/to VCC would be necessary 164 165; GCN-LABEL: {{^}}add_select_vop3: 166; SI: v_add_i32_e64 v0, s[0:1], s0, v0 167; VI: v_add_u32_e64 v0, s[0:1], s0, v0 168; GFX9: v_add_u32_e32 v0, s0, v0 169 170; GCN: ; def vcc 171; GCN: ds_write_b32 172; GCN: ; use vcc 173define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) { 174 %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() 175 %sub = add i32 %v, %s 176 store i32 %sub, i32 addrspace(3)* undef 177 call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) 178 ret void 179} 180 181declare i32 @llvm.amdgcn.workitem.id.x() #1 182 183attributes #0 = { nounwind } 184attributes #1 = { nounwind readnone speculatable } 185