• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
4
5; FUNC-LABEL: {{^}}s_add_i32:
6; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}}
7; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]]
8; GCN: buffer_store_dword v[[V_REG]],
9define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
10  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
11  %a = load i32, i32 addrspace(1)* %in
12  %b = load i32, i32 addrspace(1)* %b_ptr
13  %result = add i32 %a, %b
14  store i32 %result, i32 addrspace(1)* %out
15  ret void
16}
17
18; FUNC-LABEL: {{^}}s_add_v2i32:
19; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
20; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
21define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
22  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
23  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
24  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
25  %result = add <2 x i32> %a, %b
26  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
27  ret void
28}
29
30; FUNC-LABEL: {{^}}s_add_v4i32:
31; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
32; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
33; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
34; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
35define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
36  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
37  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
38  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
39  %result = add <4 x i32> %a, %b
40  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
41  ret void
42}
43
44; FUNC-LABEL: {{^}}s_add_v8i32:
45; GCN: s_add_i32
46; GCN: s_add_i32
47; GCN: s_add_i32
48; GCN: s_add_i32
49; GCN: s_add_i32
50; GCN: s_add_i32
51; GCN: s_add_i32
52; GCN: s_add_i32
53define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
54entry:
55  %0 = add <8 x i32> %a, %b
56  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
57  ret void
58}
59
60; FUNC-LABEL: {{^}}s_add_v16i32:
61; GCN: s_add_i32
62; GCN: s_add_i32
63; GCN: s_add_i32
64; GCN: s_add_i32
65; GCN: s_add_i32
66; GCN: s_add_i32
67; GCN: s_add_i32
68; GCN: s_add_i32
69; GCN: s_add_i32
70; GCN: s_add_i32
71; GCN: s_add_i32
72; GCN: s_add_i32
73; GCN: s_add_i32
74; GCN: s_add_i32
75; GCN: s_add_i32
76; GCN: s_add_i32
77define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
78entry:
79  %0 = add <16 x i32> %a, %b
80  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
81  ret void
82}
83
84; FUNC-LABEL: {{^}}v_add_i32:
85; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
86; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
87; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]]
88; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
89define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
90  %tid = call i32 @llvm.amdgcn.workitem.id.x()
91  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
92  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
93  %a = load volatile i32, i32 addrspace(1)* %gep
94  %b = load volatile i32, i32 addrspace(1)* %b_ptr
95  %result = add i32 %a, %b
96  store i32 %result, i32 addrspace(1)* %out
97  ret void
98}
99
100; FUNC-LABEL: {{^}}v_add_imm_i32:
101; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
102; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]]
103; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]]
104define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
105  %tid = call i32 @llvm.amdgcn.workitem.id.x()
106  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
107  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
108  %a = load volatile i32, i32 addrspace(1)* %gep
109  %result = add i32 %a, 123
110  store i32 %result, i32 addrspace(1)* %out
111  ret void
112}
113
114; FUNC-LABEL: {{^}}add64:
115; GCN: s_add_u32
116; GCN: s_addc_u32
117define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
118entry:
119  %add = add i64 %a, %b
120  store i64 %add, i64 addrspace(1)* %out
121  ret void
122}
123
124; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
125; use VCC.  The test is designed so that %a will be stored in an SGPR and
126; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
127; to a VGPR before doing the add.
128
129; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
130; GCN-NOT: v_addc_u32_e32 s
131define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
132entry:
133  %0 = load i64, i64 addrspace(1)* %in
134  %1 = add i64 %a, %0
135  store i64 %1, i64 addrspace(1)* %out
136  ret void
137}
138
139; Test i64 add inside a branch.
140; FUNC-LABEL: {{^}}add64_in_branch:
141; GCN: s_add_u32
142; GCN: s_addc_u32
143define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
144entry:
145  %0 = icmp eq i64 %a, 0
146  br i1 %0, label %if, label %else
147
148if:
149  %1 = load i64, i64 addrspace(1)* %in
150  br label %endif
151
152else:
153  %2 = add i64 %a, %b
154  br label %endif
155
156endif:
157  %3 = phi i64 [%1, %if], [%2, %else]
158  store i64 %3, i64 addrspace(1)* %out
159  ret void
160}
161
162; Make sure the VOP3 form of add is initially selected. Otherwise pair
163; of opies from/to VCC would be necessary
164
165; GCN-LABEL: {{^}}add_select_vop3:
166; SI: v_add_i32_e64 v0, s[0:1], s0, v0
167; VI: v_add_u32_e64 v0, s[0:1], s0, v0
168; GFX9: v_add_u32_e32 v0, s0, v0
169
170; GCN: ; def vcc
171; GCN: ds_write_b32
172; GCN: ; use vcc
173define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) {
174  %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"()
175  %sub = add i32 %v, %s
176  store i32 %sub, i32 addrspace(3)* undef
177  call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc)
178  ret void
179}
180
181declare i32 @llvm.amdgcn.workitem.id.x() #1
182
183attributes #0 = { nounwind }
184attributes #1 = { nounwind readnone speculatable }
185