1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s 5 6; FUNC-LABEL: {{^}}s_add_i32: 7; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 8 9; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} 10; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] 11; GCN: buffer_store_dword v[[V_REG]], 12define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 13 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 14 %a = load i32, i32 addrspace(1)* %in 15 %b = load i32, i32 addrspace(1)* %b_ptr 16 %result = add i32 %a, %b 17 store i32 %result, i32 addrspace(1)* %out 18 ret void 19} 20 21; FUNC-LABEL: {{^}}s_add_v2i32: 22; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 23; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 24 25; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 26; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 27define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 28 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 29 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 30 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 31 %result = add <2 x i32> %a, %b 32 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 33 ret void 34} 35 36; FUNC-LABEL: {{^}}s_add_v4i32: 37; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 38; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 39; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 40; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 41 42; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 43; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 44; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 45; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 46define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 47 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 48 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 49 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 50 %result = add <4 x i32> %a, %b 51 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 52 ret void 53} 54 55; FUNC-LABEL: {{^}}s_add_v8i32: 56; EG: ADD_INT 57; EG: ADD_INT 58; EG: ADD_INT 59; EG: ADD_INT 60; EG: ADD_INT 61; EG: ADD_INT 62; EG: ADD_INT 63; EG: ADD_INT 64 65; GCN: s_add_i32 66; GCN: s_add_i32 67; GCN: s_add_i32 68; GCN: s_add_i32 69; GCN: s_add_i32 70; GCN: s_add_i32 71; GCN: s_add_i32 72; GCN: s_add_i32 73define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { 74entry: 75 %0 = add <8 x i32> %a, %b 76 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 77 ret void 78} 79 80; FUNC-LABEL: {{^}}s_add_v16i32: 81; EG: ADD_INT 82; EG: ADD_INT 83; EG: ADD_INT 84; EG: ADD_INT 85; EG: ADD_INT 86; EG: ADD_INT 87; EG: ADD_INT 88; EG: ADD_INT 89; EG: ADD_INT 90; EG: ADD_INT 91; EG: ADD_INT 92; EG: ADD_INT 93; EG: ADD_INT 94; EG: ADD_INT 95; EG: ADD_INT 96; EG: ADD_INT 97 98; GCN: s_add_i32 99; GCN: s_add_i32 100; GCN: s_add_i32 101; GCN: s_add_i32 102; GCN: s_add_i32 103; GCN: s_add_i32 104; GCN: s_add_i32 105; GCN: s_add_i32 106; GCN: s_add_i32 107; GCN: s_add_i32 108; GCN: s_add_i32 109; GCN: s_add_i32 110; GCN: s_add_i32 111; GCN: s_add_i32 112; GCN: s_add_i32 113; GCN: s_add_i32 114define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { 115entry: 116 %0 = add <16 x i32> %a, %b 117 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 118 ret void 119} 120 121; FUNC-LABEL: {{^}}v_add_i32: 122; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 123; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] 124; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[B]], [[A]] 125; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] 126define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 127 %tid = call i32 @llvm.r600.read.tidig.x() 128 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 129 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 130 %a = load volatile i32, i32 addrspace(1)* %gep 131 %b = load volatile i32, i32 addrspace(1)* %b_ptr 132 %result = add i32 %a, %b 133 store i32 %result, i32 addrspace(1)* %out 134 ret void 135} 136 137; FUNC-LABEL: {{^}}v_add_imm_i32: 138; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 139; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] 140; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] 141define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 142 %tid = call i32 @llvm.r600.read.tidig.x() 143 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 144 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 145 %a = load volatile i32, i32 addrspace(1)* %gep 146 %result = add i32 %a, 123 147 store i32 %result, i32 addrspace(1)* %out 148 ret void 149} 150 151; FUNC-LABEL: {{^}}add64: 152; GCN: s_add_u32 153; GCN: s_addc_u32 154 155; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] 156; EG-DAG: ADD_INT {{[* ]*}} 157; EG-DAG: ADDC_UINT 158; EG-DAG: ADD_INT 159; EG-DAG: ADD_INT {{[* ]*}} 160; EG-NOT: SUB 161define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { 162entry: 163 %add = add i64 %a, %b 164 store i64 %add, i64 addrspace(1)* %out 165 ret void 166} 167 168; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they 169; use VCC. The test is designed so that %a will be stored in an SGPR and 170; %0 will be stored in a VGPR, so the comiler will be forced to copy %a 171; to a VGPR before doing the add. 172 173; FUNC-LABEL: {{^}}add64_sgpr_vgpr: 174; GCN-NOT: v_addc_u32_e32 s 175 176; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] 177; EG-DAG: ADD_INT {{[* ]*}} 178; EG-DAG: ADDC_UINT 179; EG-DAG: ADD_INT 180; EG-DAG: ADD_INT {{[* ]*}} 181; EG-NOT: SUB 182define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { 183entry: 184 %0 = load i64, i64 addrspace(1)* %in 185 %1 = add i64 %a, %0 186 store i64 %1, i64 addrspace(1)* %out 187 ret void 188} 189 190; Test i64 add inside a branch. 191; FUNC-LABEL: {{^}}add64_in_branch: 192; GCN: s_add_u32 193; GCN: s_addc_u32 194 195; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] 196; EG-DAG: ADD_INT {{[* ]*}} 197; EG-DAG: ADDC_UINT 198; EG-DAG: ADD_INT 199; EG-DAG: ADD_INT {{[* ]*}} 200; EG-NOT: SUB 201define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { 202entry: 203 %0 = icmp eq i64 %a, 0 204 br i1 %0, label %if, label %else 205 206if: 207 %1 = load i64, i64 addrspace(1)* %in 208 br label %endif 209 210else: 211 %2 = add i64 %a, %b 212 br label %endif 213 214endif: 215 %3 = phi i64 [%1, %if], [%2, %else] 216 store i64 %3, i64 addrspace(1)* %out 217 ret void 218} 219 220declare i32 @llvm.r600.read.tidig.x() #1 221 222attributes #0 = { nounwind } 223attributes #1 = { nounwind readnone speculatable } 224