1 /*
2 * Copyright 2021 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9
10 #include <gtest/gtest.h>
11
12 #define CASE(copies, expected) \
13 do { \
14 agx_builder *A = agx_test_builder(mem_ctx); \
15 agx_builder *B = agx_test_builder(mem_ctx); \
16 \
17 A->shader->scratch_size = 2000; \
18 agx_emit_parallel_copies(A, copies, ARRAY_SIZE(copies)); \
19 \
20 { \
21 agx_builder *b = B; \
22 expected; \
23 } \
24 \
25 ASSERT_SHADER_EQUAL(A->shader, B->shader); \
26 } while (0)
27
28 static inline void
extr_swap(agx_builder * b,agx_index x)29 extr_swap(agx_builder *b, agx_index x)
30 {
31 x.size = AGX_SIZE_32;
32 agx_extr_to(b, x, x, x, agx_immediate(16), 0);
33 }
34
35 static inline void
xor_swap(agx_builder * b,agx_index x,agx_index y)36 xor_swap(agx_builder *b, agx_index x, agx_index y)
37 {
38 agx_xor_to(b, x, x, y);
39 agx_xor_to(b, y, x, y);
40 agx_xor_to(b, x, x, y);
41 }
42
43 class LowerParallelCopy : public testing::Test {
44 protected:
LowerParallelCopy()45 LowerParallelCopy()
46 {
47 mem_ctx = ralloc_context(NULL);
48 }
49
~LowerParallelCopy()50 ~LowerParallelCopy()
51 {
52 ralloc_free(mem_ctx);
53 }
54
55 void *mem_ctx;
56 };
57
TEST_F(LowerParallelCopy,UnrelatedCopies)58 TEST_F(LowerParallelCopy, UnrelatedCopies)
59 {
60 struct agx_copy test_1[] = {
61 {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
62 {.dest = 4, .src = agx_register(6, AGX_SIZE_32)},
63 };
64
65 CASE(test_1, {
66 agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
67 agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
68 });
69
70 struct agx_copy test_2[] = {
71 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
72 {.dest = 4, .src = agx_register(5, AGX_SIZE_16)},
73 };
74
75 CASE(test_2, {
76 agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
77 agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(5, AGX_SIZE_16));
78 });
79 }
80
TEST_F(LowerParallelCopy,RelatedSource)81 TEST_F(LowerParallelCopy, RelatedSource)
82 {
83 struct agx_copy test_1[] = {
84 {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
85 {.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
86 };
87
88 CASE(test_1, {
89 agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
90 agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
91 });
92
93 struct agx_copy test_2[] = {
94 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
95 {.dest = 4, .src = agx_register(1, AGX_SIZE_16)},
96 };
97
98 CASE(test_2, {
99 agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
100 agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
101 });
102 }
103
TEST_F(LowerParallelCopy,DependentCopies)104 TEST_F(LowerParallelCopy, DependentCopies)
105 {
106 struct agx_copy test_1[] = {
107 {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
108 {.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
109 };
110
111 CASE(test_1, {
112 agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
113 agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
114 });
115
116 struct agx_copy test_2[] = {
117 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
118 {.dest = 4, .src = agx_register(0, AGX_SIZE_16)},
119 };
120
121 CASE(test_2, {
122 agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
123 agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
124 });
125 }
126
TEST_F(LowerParallelCopy,ManyDependentCopies)127 TEST_F(LowerParallelCopy, ManyDependentCopies)
128 {
129 struct agx_copy test_1[] = {
130 {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
131 {.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
132 {.dest = 8, .src = agx_register(6, AGX_SIZE_32)},
133 {.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
134 };
135
136 CASE(test_1, {
137 agx_mov_to(b, agx_register(8, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
138 agx_mov_to(b, agx_register(6, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
139 agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
140 agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
141 });
142
143 struct agx_copy test_2[] = {
144 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
145 {.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
146 {.dest = 4, .src = agx_register(3, AGX_SIZE_16)},
147 {.dest = 3, .src = agx_register(2, AGX_SIZE_16)},
148 };
149
150 CASE(test_2, {
151 agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(3, AGX_SIZE_16));
152 agx_mov_to(b, agx_register(3, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
153 agx_mov_to(b, agx_register(2, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
154 agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
155 });
156 }
157
TEST_F(LowerParallelCopy,Swap)158 TEST_F(LowerParallelCopy, Swap)
159 {
160 struct agx_copy test_1[] = {
161 {.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
162 {.dest = 2, .src = agx_register(0, AGX_SIZE_32)},
163 };
164
165 CASE(test_1, {
166 xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
167 });
168
169 struct agx_copy test_2[] = {
170 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
171 {.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
172 };
173
174 CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
175 }
176
TEST_F(LowerParallelCopy,Cycle3)177 TEST_F(LowerParallelCopy, Cycle3)
178 {
179 struct agx_copy test[] = {
180 {.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
181 {.dest = 1, .src = agx_register(2, AGX_SIZE_16)},
182 {.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
183 };
184
185 CASE(test, {
186 extr_swap(b, agx_register(0, AGX_SIZE_16));
187 xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
188 });
189 }
190
TEST_F(LowerParallelCopy,Immediate64)191 TEST_F(LowerParallelCopy, Immediate64)
192 {
193 agx_index imm = agx_immediate(10);
194 imm.size = AGX_SIZE_64;
195
196 struct agx_copy test_1[] = {
197 {.dest = 4, .src = imm},
198 };
199
200 CASE(test_1, {
201 agx_mov_imm_to(b, agx_register(4, AGX_SIZE_32), 10);
202 agx_mov_imm_to(b, agx_register(6, AGX_SIZE_32), 0);
203 });
204 }
205
206 /* Test case from Hack et al */
TEST_F(LowerParallelCopy,TwoSwaps)207 TEST_F(LowerParallelCopy, TwoSwaps)
208 {
209 struct agx_copy test[] = {
210 {.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
211 {.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
212 {.dest = 2, .src = agx_register(6, AGX_SIZE_32)},
213 {.dest = 8, .src = agx_register(8, AGX_SIZE_32)},
214 };
215
216 CASE(test, {
217 xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
218 xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
219 });
220 }
221
TEST_F(LowerParallelCopy,VectorizeAlignedHalfRegs)222 TEST_F(LowerParallelCopy, VectorizeAlignedHalfRegs)
223 {
224 struct agx_copy test[] = {
225 {.dest = 0, .src = agx_register(10, AGX_SIZE_16)},
226 {.dest = 1, .src = agx_register(11, AGX_SIZE_16)},
227 {.dest = 2, .src = agx_uniform(8, AGX_SIZE_16)},
228 {.dest = 3, .src = agx_uniform(9, AGX_SIZE_16)},
229 };
230
231 CASE(test, {
232 agx_mov_to(b, agx_register(0, AGX_SIZE_32),
233 agx_register(10, AGX_SIZE_32));
234 agx_mov_to(b, agx_register(2, AGX_SIZE_32), agx_uniform(8, AGX_SIZE_32));
235 });
236 }
237
TEST_F(LowerParallelCopy,StackCopies)238 TEST_F(LowerParallelCopy, StackCopies)
239 {
240 struct agx_copy test[] = {
241 {.dest = 21, .dest_mem = true, .src = agx_register(20, AGX_SIZE_16)},
242 {.dest = 22, .dest_mem = true, .src = agx_register(22, AGX_SIZE_32)},
243 {.dest = 0, .src = agx_memory_register(10, AGX_SIZE_16)},
244 {.dest = 1, .src = agx_memory_register(11, AGX_SIZE_16)},
245 {.dest = 0, .dest_mem = true, .src = agx_memory_register(12, AGX_SIZE_16)},
246 {.dest = 1, .dest_mem = true, .src = agx_memory_register(13, AGX_SIZE_16)},
247 {.dest = 2,
248 .dest_mem = true,
249 .src = agx_memory_register(804, AGX_SIZE_32)},
250 {.dest = 804,
251 .dest_mem = true,
252 .src = agx_memory_register(2, AGX_SIZE_32)},
253 {.dest = 807,
254 .dest_mem = true,
255 .src = agx_memory_register(808, AGX_SIZE_16)},
256 {.dest = 808,
257 .dest_mem = true,
258 .src = agx_memory_register(807, AGX_SIZE_16)},
259 };
260
261 CASE(test, {
262 /* Vectorized fill */
263 agx_mov_to(b, agx_register(0, AGX_SIZE_32),
264 agx_memory_register(10, AGX_SIZE_32));
265
266 /* Regular spills */
267 agx_mov_to(b, agx_memory_register(21, AGX_SIZE_16),
268 agx_register(20, AGX_SIZE_16));
269 agx_mov_to(b, agx_memory_register(22, AGX_SIZE_32),
270 agx_register(22, AGX_SIZE_32));
271
272 /* Vectorized stack->stack copy */
273 agx_mov_to(b, agx_memory_register(1000, AGX_SIZE_32),
274 agx_register(0, AGX_SIZE_32));
275
276 agx_mov_to(b, agx_register(0, AGX_SIZE_32),
277 agx_memory_register(12, AGX_SIZE_32));
278
279 agx_mov_to(b, agx_memory_register(0, AGX_SIZE_32),
280 agx_register(0, AGX_SIZE_32));
281
282 agx_mov_to(b, agx_register(0, AGX_SIZE_32),
283 agx_memory_register(1000, AGX_SIZE_32));
284
285 /* Stack swap: 32-bit */
286 agx_index temp1 = agx_register(0, AGX_SIZE_32);
287 agx_index temp2 = agx_register(2, AGX_SIZE_32);
288 agx_index spilled_gpr_vec2 = agx_register(0, AGX_SIZE_32);
289 agx_index scratch_vec2 = agx_memory_register(1000, AGX_SIZE_32);
290 spilled_gpr_vec2.channels_m1++;
291 scratch_vec2.channels_m1++;
292
293 agx_mov_to(b, scratch_vec2, spilled_gpr_vec2);
294 agx_mov_to(b, temp1, agx_memory_register(2, AGX_SIZE_32));
295 agx_mov_to(b, temp2, agx_memory_register(804, AGX_SIZE_32));
296 agx_mov_to(b, agx_memory_register(804, AGX_SIZE_32), temp1);
297 agx_mov_to(b, agx_memory_register(2, AGX_SIZE_32), temp2);
298 agx_mov_to(b, spilled_gpr_vec2, scratch_vec2);
299
300 /* Stack swap: 16-bit */
301 spilled_gpr_vec2.size = AGX_SIZE_16;
302 scratch_vec2.size = AGX_SIZE_16;
303 temp1.size = AGX_SIZE_16;
304 temp2.size = AGX_SIZE_16;
305
306 agx_mov_to(b, scratch_vec2, spilled_gpr_vec2);
307 agx_mov_to(b, temp1, agx_memory_register(807, AGX_SIZE_16));
308 agx_mov_to(b, temp2, agx_memory_register(808, AGX_SIZE_16));
309 agx_mov_to(b, agx_memory_register(808, AGX_SIZE_16), temp1);
310 agx_mov_to(b, agx_memory_register(807, AGX_SIZE_16), temp2);
311 agx_mov_to(b, spilled_gpr_vec2, scratch_vec2);
312 });
313 }
314
315 #if 0
316 TEST_F(LowerParallelCopy, LooksLikeASwap) {
317 struct agx_copy test[] = {
318 { .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
319 { .dest = 2, .src = agx_register(0, AGX_SIZE_32) },
320 { .dest = 4, .src = agx_register(2, AGX_SIZE_32) },
321 };
322
323 CASE(test, {
324 agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
325 agx_mov_to(b, agx_register(2, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
326 agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
327 });
328 }
329 #endif
330