• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
2; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
3
4declare i32 @llvm.r600.read.tidig.x() nounwind readnone
5
6; FUNC-LABEL: @mova_same_clause
7
8; R600-CHECK: LDS_WRITE
9; R600-CHECK: LDS_WRITE
10; R600-CHECK: LDS_READ
11; R600-CHECK: LDS_READ
12
13; SI-CHECK: DS_WRITE_B32
14; SI-CHECK: DS_WRITE_B32
15; SI-CHECK: DS_READ_B32
16; SI-CHECK: DS_READ_B32
17define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
18entry:
19  %stack = alloca [5 x i32], align 4
20  %0 = load i32 addrspace(1)* %in, align 4
21  %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
22  store i32 4, i32* %arrayidx1, align 4
23  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
24  %1 = load i32 addrspace(1)* %arrayidx2, align 4
25  %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
26  store i32 5, i32* %arrayidx3, align 4
27  %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
28  %2 = load i32* %arrayidx10, align 4
29  store i32 %2, i32 addrspace(1)* %out, align 4
30  %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
31  %3 = load i32* %arrayidx12
32  %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
33  store i32 %3, i32 addrspace(1)* %arrayidx13
34  ret void
35}
36
37; This test checks that the stack offset is calculated correctly for structs.
38; All register loads/stores should be optimized away, so there shouldn't be
39; any MOVA instructions.
40;
41; XXX: This generated code has unnecessary MOVs, we should be able to optimize
42; this.
43
44; FUNC-LABEL: @multiple_structs
45; R600-CHECK-NOT: MOVA_INT
46; SI-CHECK-NOT: V_MOVREL
47%struct.point = type { i32, i32 }
48
49define void @multiple_structs(i32 addrspace(1)* %out) {
50entry:
51  %a = alloca %struct.point
52  %b = alloca %struct.point
53  %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
54  %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
55  %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
56  %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
57  store i32 0, i32* %a.x.ptr
58  store i32 1, i32* %a.y.ptr
59  store i32 2, i32* %b.x.ptr
60  store i32 3, i32* %b.y.ptr
61  %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
62  %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
63  %a.indirect = load i32* %a.indirect.ptr
64  %b.indirect = load i32* %b.indirect.ptr
65  %0 = add i32 %a.indirect, %b.indirect
66  store i32 %0, i32 addrspace(1)* %out
67  ret void
68}
69
70; Test direct access of a private array inside a loop.  The private array
71; loads and stores should be lowered to copies, so there shouldn't be any
72; MOVA instructions.
73
74; FUNC-LABEL: @direct_loop
75; R600-CHECK-NOT: MOVA_INT
76; SI-CHECK-NOT: V_MOVREL
77
78define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
79entry:
80  %prv_array_const = alloca [2 x i32]
81  %prv_array = alloca [2 x i32]
82  %a = load i32 addrspace(1)* %in
83  %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
84  %b = load i32 addrspace(1)* %b_src_ptr
85  %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
86  store i32 %a, i32* %a_dst_ptr
87  %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
88  store i32 %b, i32* %b_dst_ptr
89  br label %for.body
90
91for.body:
92  %inc = phi i32 [0, %entry], [%count, %for.body]
93  %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
94  %x = load i32* %x_ptr
95  %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
96  %y = load i32* %y_ptr
97  %xy = add i32 %x, %y
98  store i32 %xy, i32* %y_ptr
99  %count = add i32 %inc, 1
100  %done = icmp eq i32 %count, 4095
101  br i1 %done, label %for.end, label %for.body
102
103for.end:
104  %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
105  %value = load i32* %value_ptr
106  store i32 %value, i32 addrspace(1)* %out
107  ret void
108}
109
110; FUNC-LABEL: @short_array
111
112; R600-CHECK: MOVA_INT
113
114; SI-CHECK: V_MOVRELS_B32_e32
115define void @short_array(i32 addrspace(1)* %out, i32 %index) {
116entry:
117  %0 = alloca [2 x i16]
118  %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
119  %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
120  store i16 0, i16* %1
121  store i16 1, i16* %2
122  %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
123  %4 = load i16* %3
124  %5 = sext i16 %4 to i32
125  store i32 %5, i32 addrspace(1)* %out
126  ret void
127}
128
129; FUNC-LABEL: @char_array
130
131; R600-CHECK: MOVA_INT
132
133; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
134; SI-CHECK: V_MOVRELS_B32_e32
135define void @char_array(i32 addrspace(1)* %out, i32 %index) {
136entry:
137  %0 = alloca [2 x i8]
138  %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
139  %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
140  store i8 0, i8* %1
141  store i8 1, i8* %2
142  %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
143  %4 = load i8* %3
144  %5 = sext i8 %4 to i32
145  store i32 %5, i32 addrspace(1)* %out
146  ret void
147
148}
149
150; Make sure we don't overwrite workitem information with private memory
151
152; FUNC-LABEL: @work_item_info
153; R600-CHECK-NOT: MOV T0.X
154; Additional check in case the move ends up in the last slot
155; R600-CHECK-NOT: MOV * TO.X
156
157; SI-CHECK-NOT: V_MOV_B32_e{{(32|64)}} v0
158define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
159entry:
160  %0 = alloca [2 x i32]
161  %1 = getelementptr [2 x i32]* %0, i32 0, i32 0
162  %2 = getelementptr [2 x i32]* %0, i32 0, i32 1
163  store i32 0, i32* %1
164  store i32 1, i32* %2
165  %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in
166  %4 = load i32* %3
167  %5 = call i32 @llvm.r600.read.tidig.x()
168  %6 = add i32 %4, %5
169  store i32 %6, i32 addrspace(1)* %out
170  ret void
171}
172
173; Test that two stack objects are not stored in the same register
174; The second stack object should be in T3.X
175; FUNC-LABEL: @no_overlap
176; R600_CHECK: MOV
177; R600_CHECK: [[CHAN:[XYZW]]]+
178; R600-CHECK-NOT: [[CHAN]]+
179; SI-CHECK: V_MOV_B32_e32 v3
180define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
181entry:
182  %0 = alloca [3 x i8], align 1
183  %1 = alloca [2 x i8], align 1
184  %2 = getelementptr [3 x i8]* %0, i32 0, i32 0
185  %3 = getelementptr [3 x i8]* %0, i32 0, i32 1
186  %4 = getelementptr [3 x i8]* %0, i32 0, i32 2
187  %5 = getelementptr [2 x i8]* %1, i32 0, i32 0
188  %6 = getelementptr [2 x i8]* %1, i32 0, i32 1
189  store i8 0, i8* %2
190  store i8 1, i8* %3
191  store i8 2, i8* %4
192  store i8 1, i8* %5
193  store i8 0, i8* %6
194  %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in
195  %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in
196  %9 = load i8* %7
197  %10 = load i8* %8
198  %11 = add i8 %9, %10
199  %12 = sext i8 %11 to i32
200  store i32 %12, i32 addrspace(1)* %out
201  ret void
202}
203
204define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
205entry:
206  %alloca = alloca [2 x [2 x i8]]
207  %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
208  %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
209  store i8 0, i8* %gep0
210  store i8 1, i8* %gep1
211  %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
212  %load = load i8* %gep2
213  %sext = sext i8 %load to i32
214  store i32 %sext, i32 addrspace(1)* %out
215  ret void
216}
217
218define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
219entry:
220  %alloca = alloca [2 x [2 x i32]]
221  %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
222  %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
223  store i32 0, i32* %gep0
224  store i32 1, i32* %gep1
225  %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
226  %load = load i32* %gep2
227  store i32 %load, i32 addrspace(1)* %out
228  ret void
229}
230
231define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
232entry:
233  %alloca = alloca [2 x [2 x i64]]
234  %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
235  %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
236  store i64 0, i64* %gep0
237  store i64 1, i64* %gep1
238  %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
239  %load = load i64* %gep2
240  store i64 %load, i64 addrspace(1)* %out
241  ret void
242}
243
244%struct.pair32 = type { i32, i32 }
245
246define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
247entry:
248  %alloca = alloca [2 x [2 x %struct.pair32]]
249  %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
250  %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
251  store i32 0, i32* %gep0
252  store i32 1, i32* %gep1
253  %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
254  %load = load i32* %gep2
255  store i32 %load, i32 addrspace(1)* %out
256  ret void
257}
258
259define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
260entry:
261  %alloca = alloca [2 x %struct.pair32]
262  %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
263  %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
264  store i32 0, i32* %gep0
265  store i32 1, i32* %gep1
266  %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
267  %load = load i32* %gep2
268  store i32 %load, i32 addrspace(1)* %out
269  ret void
270}
271
272define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
273entry:
274  %tmp = alloca [2 x i32]
275  %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
276  %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
277  store i32 0, i32* %tmp1
278  store i32 1, i32* %tmp2
279  %cmp = icmp eq i32 %in, 0
280  %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
281  %load = load i32* %sel
282  store i32 %load, i32 addrspace(1)* %out
283  ret void
284}
285
286