1 /*
2 * Copyright 2021 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9
10 #include <gtest/gtest.h>
11
12 static void
agx_optimize_and_dce(agx_context * ctx)13 agx_optimize_and_dce(agx_context *ctx)
14 {
15 agx_optimizer_backward(ctx);
16 agx_optimizer_forward(ctx);
17 agx_dce(ctx, true);
18 }
19
20 #define CASE(instr, expected, size, returns) \
21 INSTRUCTION_CASE( \
22 { \
23 UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
24 instr; \
25 if (returns) \
26 agx_unit_test(b, out); \
27 }, \
28 { \
29 UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
30 expected; \
31 if (returns) \
32 agx_unit_test(b, out); \
33 }, \
34 agx_optimize_and_dce)
35
36 #define NEGCASE(instr, size) CASE(instr, instr, size, true)
37
38 #define CASE16(instr, expected) CASE(instr, expected, 16, true)
39 #define CASE32(instr, expected) CASE(instr, expected, 32, true)
40 #define CASE64(instr, expected) CASE(instr, expected, 64, true)
41
42 #define CASE_NO_RETURN(instr, expected) \
43 CASE(instr, expected, 32 /* irrelevant */, false)
44
45 #define NEGCASE16(instr) NEGCASE(instr, 16)
46 #define NEGCASE32(instr) NEGCASE(instr, 32)
47
48 static inline agx_index
agx_fmov(agx_builder * b,agx_index s0)49 agx_fmov(agx_builder *b, agx_index s0)
50 {
51 agx_index tmp = agx_temp(b->shader, s0.size);
52 agx_fmov_to(b, tmp, s0);
53 return tmp;
54 }
55
56 class Optimizer : public testing::Test {
57 protected:
Optimizer()58 Optimizer()
59 {
60 mem_ctx = ralloc_context(NULL);
61
62 dx = agx_register(0, AGX_SIZE_64);
63 dz = agx_register(4, AGX_SIZE_64);
64
65 wx = agx_register(0, AGX_SIZE_32);
66 wy = agx_register(2, AGX_SIZE_32);
67 wz = agx_register(4, AGX_SIZE_32);
68
69 hx = agx_register(0, AGX_SIZE_16);
70 hy = agx_register(1, AGX_SIZE_16);
71 hz = agx_register(2, AGX_SIZE_16);
72 }
73
~Optimizer()74 ~Optimizer()
75 {
76 ralloc_free(mem_ctx);
77 }
78
79 void *mem_ctx;
80
81 agx_index dx, dz, wx, wy, wz, hx, hy, hz;
82 };
83
TEST_F(Optimizer,FloatCopyprop)84 TEST_F(Optimizer, FloatCopyprop)
85 {
86 CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, wx)), wy),
87 agx_fadd_to(b, out, agx_abs(wx), wy));
88
89 CASE32(agx_fadd_to(b, out, agx_neg(agx_fmov(b, wx)), wy),
90 agx_fadd_to(b, out, agx_neg(wx), wy));
91 }
92
TEST_F(Optimizer,FloatConversion)93 TEST_F(Optimizer, FloatConversion)
94 {
95 CASE32(
96 {
97 agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
98 agx_fmov_to(b, cvt, hx);
99 agx_fadd_to(b, out, cvt, wy);
100 },
101 { agx_fadd_to(b, out, hx, wy); });
102
103 CASE16(
104 {
105 agx_index sum = agx_temp(b->shader, AGX_SIZE_32);
106 agx_fadd_to(b, sum, wx, wy);
107 agx_fmov_to(b, out, sum);
108 },
109 { agx_fadd_to(b, out, wx, wy); });
110 }
111
TEST_F(Optimizer,FusedFABSNEG)112 TEST_F(Optimizer, FusedFABSNEG)
113 {
114 CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy),
115 agx_fadd_to(b, out, agx_abs(wx), wy));
116
117 CASE32(agx_fmul_to(b, out, wx, agx_fmov(b, agx_neg(agx_abs(wx)))),
118 agx_fmul_to(b, out, wx, agx_neg(agx_abs(wx))));
119 }
120
TEST_F(Optimizer,FusedFabsAbsorb)121 TEST_F(Optimizer, FusedFabsAbsorb)
122 {
123 CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, agx_abs(wx))), wy),
124 agx_fadd_to(b, out, agx_abs(wx), wy));
125 }
126
TEST_F(Optimizer,FusedFnegCancel)127 TEST_F(Optimizer, FusedFnegCancel)
128 {
129 CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(wx)))),
130 agx_fmul_to(b, out, wx, wx));
131
132 CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(agx_abs(wx))))),
133 agx_fmul_to(b, out, wx, agx_abs(wx)));
134 }
135
TEST_F(Optimizer,FusedNot)136 TEST_F(Optimizer, FusedNot)
137 {
138 CASE32(agx_not_to(b, out, agx_and(b, wx, wx)), agx_nand_to(b, out, wx, wx));
139
140 CASE32(agx_not_to(b, out, agx_or(b, wx, wx)), agx_nor_to(b, out, wx, wx));
141
142 CASE32(agx_not_to(b, out, agx_xor(b, wx, wx)), agx_xnor_to(b, out, wx, wx));
143
144 CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_not(b, wx)),
145 agx_xor_to(b, out, wx, wx));
146
147 CASE32(agx_xor_to(b, out, agx_not(b, wx), wx), agx_xnor_to(b, out, wx, wx));
148
149 CASE32(agx_xor_to(b, out, wx, agx_not(b, wx)), agx_xnor_to(b, out, wx, wx));
150
151 CASE32(agx_nand_to(b, out, agx_not(b, wx), agx_not(b, wx)),
152 agx_or_to(b, out, wx, wx));
153
154 CASE32(agx_andn1_to(b, out, agx_not(b, wx), wx), agx_and_to(b, out, wx, wx));
155
156 CASE32(agx_andn1_to(b, out, wx, agx_not(b, wx)), agx_nor_to(b, out, wx, wx));
157
158 CASE32(agx_andn2_to(b, out, agx_not(b, wx), wx), agx_nor_to(b, out, wx, wx));
159
160 CASE32(agx_andn2_to(b, out, wx, agx_not(b, wx)), agx_and_to(b, out, wx, wx));
161
162 CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_uniform(8, AGX_SIZE_32)),
163 agx_xnor_to(b, out, wx, agx_uniform(8, AGX_SIZE_32)));
164
165 CASE32(agx_or_to(b, out, agx_immediate(123), agx_not(b, wx)),
166 agx_orn2_to(b, out, agx_immediate(123), wx));
167
168 CASE32(agx_xor_to(b, out, wx, agx_not(b, wy)), agx_xnor_to(b, out, wx, wy));
169
170 CASE32(agx_xor_to(b, out, wy, agx_not(b, wx)), agx_xnor_to(b, out, wy, wx));
171
172 CASE32(agx_and_to(b, out, agx_not(b, wx), wy), agx_andn1_to(b, out, wx, wy));
173
174 CASE32(agx_or_to(b, out, wx, agx_not(b, wy)), agx_orn2_to(b, out, wx, wy));
175 }
176
TEST_F(Optimizer,FmulFsatF2F16)177 TEST_F(Optimizer, FmulFsatF2F16)
178 {
179 CASE16(
180 {
181 agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
182 agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
183 agx_fmov_to(b, out, tmp);
184 },
185 { agx_fmul_to(b, out, wx, wy)->saturate = true; });
186 }
187
TEST_F(Optimizer,FsatWithPhi)188 TEST_F(Optimizer, FsatWithPhi)
189 {
190 /*
191 * Construct the loop:
192 *
193 * A:
194 * ...
195 *
196 * B:
197 * phi ..., u
198 * u = wx * phi
199 * out = fsat u
200 * --> B
201 *
202 * This example shows that phi sources are read at the end of the
203 * predecessor, not at the start of the successor. If phis are not handled
204 * properly, the fsat would be fused incorrectly.
205 *
206 * This reproduces an issue hit in a Control shader. Astonishingly, it is not
207 * hit anywhere in CTS.
208 */
209 NEGCASE32({
210 agx_block *A = agx_start_block(b->shader);
211 agx_block *B = agx_test_block(b->shader);
212
213 agx_block_add_successor(A, B);
214 agx_block_add_successor(B, B);
215
216 b->cursor = agx_after_block(B);
217 agx_index u = agx_temp(b->shader, AGX_SIZE_32);
218
219 agx_instr *phi = agx_phi_to(b, agx_temp(b->shader, AGX_SIZE_32), 2);
220 phi->src[0] = wx;
221 phi->src[1] = u;
222
223 agx_fmul_to(b, u, wx, phi->dest[0]);
224 agx_fmov_to(b, out, u)->saturate = true;
225 });
226 }
227
TEST_F(Optimizer,Copyprop)228 TEST_F(Optimizer, Copyprop)
229 {
230 CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
231 CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
232 agx_fmul_to(b, out, wx, wy));
233 }
234
TEST_F(Optimizer,SourceZeroExtend)235 TEST_F(Optimizer, SourceZeroExtend)
236 {
237 CASE32(
238 {
239 agx_index t = agx_temp(b->shader, AGX_SIZE_32);
240 agx_mov_to(b, t, hy);
241 agx_ffs_to(b, out, t);
242 },
243 agx_ffs_to(b, out, hy));
244 }
245
TEST_F(Optimizer,AddSourceZeroExtend)246 TEST_F(Optimizer, AddSourceZeroExtend)
247 {
248 CASE32(
249 {
250 agx_index t = agx_temp(b->shader, AGX_SIZE_32);
251 agx_mov_to(b, t, hy);
252 agx_iadd_to(b, out, wx, t, 1);
253 },
254 agx_iadd_to(b, out, wx, agx_abs(hy), 1));
255 }
256
TEST_F(Optimizer,AddSourceSignExtend)257 TEST_F(Optimizer, AddSourceSignExtend)
258 {
259 CASE32(
260 {
261 agx_index t = agx_temp(b->shader, AGX_SIZE_32);
262 agx_signext_to(b, t, hy);
263 agx_iadd_to(b, out, wx, t, 1);
264 },
265 agx_iadd_to(b, out, wx, hy, 1));
266 }
267
TEST_F(Optimizer,SubInlineImmediate)268 TEST_F(Optimizer, SubInlineImmediate)
269 {
270 CASE16(agx_iadd_to(b, out, hx, agx_mov_imm(b, 16, -2), 0),
271 agx_iadd_to(b, out, hx, agx_neg(agx_immediate(2)), 0));
272
273 CASE32(agx_iadd_to(b, out, wx, agx_mov_imm(b, 32, -1), 0),
274 agx_iadd_to(b, out, wx, agx_neg(agx_immediate(1)), 0));
275
276 CASE64(agx_iadd_to(b, out, dx, agx_mov_imm(b, 64, -17), 0),
277 agx_iadd_to(b, out, dx, agx_neg(agx_immediate(17)), 0));
278
279 CASE16(agx_imad_to(b, out, hx, hy, agx_mov_imm(b, 16, -2), 0),
280 agx_imad_to(b, out, hx, hy, agx_neg(agx_immediate(2)), 0));
281
282 CASE32(agx_imad_to(b, out, wx, wy, agx_mov_imm(b, 32, -1), 0),
283 agx_imad_to(b, out, wx, wy, agx_neg(agx_immediate(1)), 0));
284
285 CASE64(agx_imad_to(b, out, dx, dz, agx_mov_imm(b, 64, -17), 0),
286 agx_imad_to(b, out, dx, dz, agx_neg(agx_immediate(17)), 0));
287 }
288
TEST_F(Optimizer,InlineHazards)289 TEST_F(Optimizer, InlineHazards)
290 {
291 NEGCASE32({
292 agx_index zero = agx_mov_imm(b, AGX_SIZE_32, 0);
293 agx_instr *I = agx_collect_to(b, out, 4);
294
295 I->src[0] = zero;
296 I->src[1] = wy;
297 I->src[2] = wz;
298 I->src[3] = wz;
299 });
300 }
301
TEST_F(Optimizer,CopypropRespectsAbsNeg)302 TEST_F(Optimizer, CopypropRespectsAbsNeg)
303 {
304 CASE32(agx_fadd_to(b, out, agx_abs(agx_mov(b, wx)), wy),
305 agx_fadd_to(b, out, agx_abs(wx), wy));
306
307 CASE32(agx_fadd_to(b, out, agx_neg(agx_mov(b, wx)), wy),
308 agx_fadd_to(b, out, agx_neg(wx), wy));
309
310 CASE32(agx_fadd_to(b, out, agx_neg(agx_abs(agx_mov(b, wx))), wy),
311 agx_fadd_to(b, out, agx_neg(agx_abs(wx)), wy));
312 }
313
TEST_F(Optimizer,IntCopyprop)314 TEST_F(Optimizer, IntCopyprop)
315 {
316 CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
317 }
318
TEST_F(Optimizer,CopypropSplitMovedUniform64)319 TEST_F(Optimizer, CopypropSplitMovedUniform64)
320 {
321 CASE32(
322 {
323 /* emit_load_preamble puts in the move, so we do too */
324 agx_index mov = agx_mov(b, agx_uniform(40, AGX_SIZE_64));
325 agx_instr *spl = agx_split(b, 2, mov);
326 spl->dest[0] = agx_temp(b->shader, AGX_SIZE_32);
327 spl->dest[1] = agx_temp(b->shader, AGX_SIZE_32);
328 agx_xor_to(b, out, spl->dest[0], spl->dest[1]);
329 },
330 {
331 agx_xor_to(b, out, agx_uniform(40, AGX_SIZE_32),
332 agx_uniform(42, AGX_SIZE_32));
333 });
334 }
335
TEST_F(Optimizer,IntCopypropDoesntConvert)336 TEST_F(Optimizer, IntCopypropDoesntConvert)
337 {
338 NEGCASE32({
339 agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
340 agx_mov_to(b, cvt, hx);
341 agx_fmul_to(b, out, cvt, wy);
342 });
343 }
344
TEST_F(Optimizer,SkipPreloads)345 TEST_F(Optimizer, SkipPreloads)
346 {
347 NEGCASE32({
348 agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
349 agx_xor_to(b, out, preload, wy);
350 });
351 }
352
TEST_F(Optimizer,NoConversionsOn16BitALU)353 TEST_F(Optimizer, NoConversionsOn16BitALU)
354 {
355 NEGCASE16({
356 agx_index cvt = agx_temp(b->shader, AGX_SIZE_16);
357 agx_fmov_to(b, cvt, wx);
358 agx_fadd_to(b, out, cvt, hy);
359 });
360
361 NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
362 }
363
TEST_F(Optimizer,BallotCondition)364 TEST_F(Optimizer, BallotCondition)
365 {
366 CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
367 agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
368
369 CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
370 agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
371
372 CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
373 agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
374
375 CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
376 agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
377 }
378
TEST_F(Optimizer,BallotMultipleUses)379 TEST_F(Optimizer, BallotMultipleUses)
380 {
381 CASE32(
382 {
383 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
384 agx_index ballot = agx_quad_ballot(b, cmp);
385 agx_fadd_to(b, out, cmp, ballot);
386 },
387 {
388 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
389 agx_index ballot =
390 agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
391 agx_fadd_to(b, out, cmp, ballot);
392 });
393 }
394
395 /*
396 * We had a bug where the ballot optimization didn't check the agx_index's type
397 * so would fuse constants with overlapping values. An unrelated common code
398 * change surfaced this in CTS case:
399 *
400 * dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bool_fragment
401 *
402 * We passed Vulkan CTS without hitting it though, hence the targeted test.
403 */
TEST_F(Optimizer,BallotConstant)404 TEST_F(Optimizer, BallotConstant)
405 {
406 CASE32(
407 {
408 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
409 agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
410 agx_index ballot2 = agx_quad_ballot(b, cmp);
411 agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
412 },
413 {
414 agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
415 agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
416 agx_index ballot2 =
417 agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
418 agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
419 });
420 }
421
TEST_F(Optimizer,IfCondition)422 TEST_F(Optimizer, IfCondition)
423 {
424 CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
425 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
426 agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, true, NULL));
427
428 CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true),
429 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
430 agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, true, NULL));
431
432 CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false),
433 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
434 agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, false, NULL));
435 }
436
TEST_F(Optimizer,SelectCondition)437 TEST_F(Optimizer, SelectCondition)
438 {
439 CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, false),
440 agx_zero(), wz, wx, AGX_ICOND_UEQ),
441 agx_icmpsel_to(b, out, wx, wy, wx, wz, AGX_ICOND_UEQ));
442
443 CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
444 agx_zero(), wz, wx, AGX_ICOND_UEQ),
445 agx_icmpsel_to(b, out, wx, wy, wz, wx, AGX_ICOND_UEQ));
446
447 CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, false),
448 agx_zero(), wz, wx, AGX_ICOND_UEQ),
449 agx_fcmpsel_to(b, out, wx, wy, wx, wz, AGX_FCOND_EQ));
450
451 CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_LT, true),
452 agx_zero(), wz, wx, AGX_ICOND_UEQ),
453 agx_fcmpsel_to(b, out, wx, wy, wz, wx, AGX_FCOND_LT));
454 }
455
TEST_F(Optimizer,IfInverted)456 TEST_F(Optimizer, IfInverted)
457 {
458 CASE_NO_RETURN(
459 agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(), 1,
460 AGX_ICOND_UEQ, true, NULL),
461 agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, false, NULL));
462
463 CASE_NO_RETURN(agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(),
464 1, AGX_ICOND_UEQ, false, NULL),
465 agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, true, NULL));
466 }
467
TEST_F(Optimizer,IfInvertedCondition)468 TEST_F(Optimizer, IfInvertedCondition)
469 {
470 CASE_NO_RETURN(
471 agx_if_icmp(
472 b,
473 agx_xor(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true), agx_immediate(1)),
474 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
475 agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, false, NULL));
476
477 CASE_NO_RETURN(
478 agx_if_icmp(
479 b,
480 agx_xor(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true), agx_immediate(1)),
481 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
482 agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, false, NULL));
483
484 CASE_NO_RETURN(
485 agx_if_icmp(
486 b,
487 agx_xor(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false), agx_immediate(1)),
488 agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
489 agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, true, NULL));
490 }
491