• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_test.h"
9 
10 #include <gtest/gtest.h>
11 
12 static void
agx_optimize_and_dce(agx_context * ctx)13 agx_optimize_and_dce(agx_context *ctx)
14 {
15    agx_optimizer_backward(ctx);
16    agx_optimizer_forward(ctx);
17    agx_dce(ctx, true);
18 }
19 
20 #define CASE(instr, expected, size, returns)                                   \
21    INSTRUCTION_CASE(                                                           \
22       {                                                                        \
23          UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size);          \
24          instr;                                                                \
25          if (returns)                                                          \
26             agx_unit_test(b, out);                                             \
27       },                                                                       \
28       {                                                                        \
29          UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size);          \
30          expected;                                                             \
31          if (returns)                                                          \
32             agx_unit_test(b, out);                                             \
33       },                                                                       \
34       agx_optimize_and_dce)
35 
36 #define NEGCASE(instr, size) CASE(instr, instr, size, true)
37 
38 #define CASE16(instr, expected) CASE(instr, expected, 16, true)
39 #define CASE32(instr, expected) CASE(instr, expected, 32, true)
40 #define CASE64(instr, expected) CASE(instr, expected, 64, true)
41 
42 #define CASE_NO_RETURN(instr, expected)                                        \
43    CASE(instr, expected, 32 /* irrelevant */, false)
44 
45 #define NEGCASE16(instr) NEGCASE(instr, 16)
46 #define NEGCASE32(instr) NEGCASE(instr, 32)
47 
48 static inline agx_index
agx_fmov(agx_builder * b,agx_index s0)49 agx_fmov(agx_builder *b, agx_index s0)
50 {
51    agx_index tmp = agx_temp(b->shader, s0.size);
52    agx_fmov_to(b, tmp, s0);
53    return tmp;
54 }
55 
56 class Optimizer : public testing::Test {
57  protected:
Optimizer()58    Optimizer()
59    {
60       mem_ctx = ralloc_context(NULL);
61 
62       dx = agx_register(0, AGX_SIZE_64);
63       dz = agx_register(4, AGX_SIZE_64);
64 
65       wx = agx_register(0, AGX_SIZE_32);
66       wy = agx_register(2, AGX_SIZE_32);
67       wz = agx_register(4, AGX_SIZE_32);
68 
69       hx = agx_register(0, AGX_SIZE_16);
70       hy = agx_register(1, AGX_SIZE_16);
71       hz = agx_register(2, AGX_SIZE_16);
72    }
73 
~Optimizer()74    ~Optimizer()
75    {
76       ralloc_free(mem_ctx);
77    }
78 
79    void *mem_ctx;
80 
81    agx_index dx, dz, wx, wy, wz, hx, hy, hz;
82 };
83 
TEST_F(Optimizer,FloatCopyprop)84 TEST_F(Optimizer, FloatCopyprop)
85 {
86    CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, wx)), wy),
87           agx_fadd_to(b, out, agx_abs(wx), wy));
88 
89    CASE32(agx_fadd_to(b, out, agx_neg(agx_fmov(b, wx)), wy),
90           agx_fadd_to(b, out, agx_neg(wx), wy));
91 }
92 
TEST_F(Optimizer,FloatConversion)93 TEST_F(Optimizer, FloatConversion)
94 {
95    CASE32(
96       {
97          agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
98          agx_fmov_to(b, cvt, hx);
99          agx_fadd_to(b, out, cvt, wy);
100       },
101       { agx_fadd_to(b, out, hx, wy); });
102 
103    CASE16(
104       {
105          agx_index sum = agx_temp(b->shader, AGX_SIZE_32);
106          agx_fadd_to(b, sum, wx, wy);
107          agx_fmov_to(b, out, sum);
108       },
109       { agx_fadd_to(b, out, wx, wy); });
110 }
111 
TEST_F(Optimizer,FusedFABSNEG)112 TEST_F(Optimizer, FusedFABSNEG)
113 {
114    CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy),
115           agx_fadd_to(b, out, agx_abs(wx), wy));
116 
117    CASE32(agx_fmul_to(b, out, wx, agx_fmov(b, agx_neg(agx_abs(wx)))),
118           agx_fmul_to(b, out, wx, agx_neg(agx_abs(wx))));
119 }
120 
TEST_F(Optimizer,FusedFabsAbsorb)121 TEST_F(Optimizer, FusedFabsAbsorb)
122 {
123    CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, agx_abs(wx))), wy),
124           agx_fadd_to(b, out, agx_abs(wx), wy));
125 }
126 
TEST_F(Optimizer,FusedFnegCancel)127 TEST_F(Optimizer, FusedFnegCancel)
128 {
129    CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(wx)))),
130           agx_fmul_to(b, out, wx, wx));
131 
132    CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(agx_abs(wx))))),
133           agx_fmul_to(b, out, wx, agx_abs(wx)));
134 }
135 
TEST_F(Optimizer,FusedNot)136 TEST_F(Optimizer, FusedNot)
137 {
138    CASE32(agx_not_to(b, out, agx_and(b, wx, wx)), agx_nand_to(b, out, wx, wx));
139 
140    CASE32(agx_not_to(b, out, agx_or(b, wx, wx)), agx_nor_to(b, out, wx, wx));
141 
142    CASE32(agx_not_to(b, out, agx_xor(b, wx, wx)), agx_xnor_to(b, out, wx, wx));
143 
144    CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_not(b, wx)),
145           agx_xor_to(b, out, wx, wx));
146 
147    CASE32(agx_xor_to(b, out, agx_not(b, wx), wx), agx_xnor_to(b, out, wx, wx));
148 
149    CASE32(agx_xor_to(b, out, wx, agx_not(b, wx)), agx_xnor_to(b, out, wx, wx));
150 
151    CASE32(agx_nand_to(b, out, agx_not(b, wx), agx_not(b, wx)),
152           agx_or_to(b, out, wx, wx));
153 
154    CASE32(agx_andn1_to(b, out, agx_not(b, wx), wx), agx_and_to(b, out, wx, wx));
155 
156    CASE32(agx_andn1_to(b, out, wx, agx_not(b, wx)), agx_nor_to(b, out, wx, wx));
157 
158    CASE32(agx_andn2_to(b, out, agx_not(b, wx), wx), agx_nor_to(b, out, wx, wx));
159 
160    CASE32(agx_andn2_to(b, out, wx, agx_not(b, wx)), agx_and_to(b, out, wx, wx));
161 
162    CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_uniform(8, AGX_SIZE_32)),
163           agx_xnor_to(b, out, wx, agx_uniform(8, AGX_SIZE_32)));
164 
165    CASE32(agx_or_to(b, out, agx_immediate(123), agx_not(b, wx)),
166           agx_orn2_to(b, out, agx_immediate(123), wx));
167 
168    CASE32(agx_xor_to(b, out, wx, agx_not(b, wy)), agx_xnor_to(b, out, wx, wy));
169 
170    CASE32(agx_xor_to(b, out, wy, agx_not(b, wx)), agx_xnor_to(b, out, wy, wx));
171 
172    CASE32(agx_and_to(b, out, agx_not(b, wx), wy), agx_andn1_to(b, out, wx, wy));
173 
174    CASE32(agx_or_to(b, out, wx, agx_not(b, wy)), agx_orn2_to(b, out, wx, wy));
175 }
176 
TEST_F(Optimizer,FmulFsatF2F16)177 TEST_F(Optimizer, FmulFsatF2F16)
178 {
179    CASE16(
180       {
181          agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
182          agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
183          agx_fmov_to(b, out, tmp);
184       },
185       { agx_fmul_to(b, out, wx, wy)->saturate = true; });
186 }
187 
TEST_F(Optimizer,FsatWithPhi)188 TEST_F(Optimizer, FsatWithPhi)
189 {
190    /*
191     * Construct the loop:
192     *
193     * A:
194     *   ...
195     *
196     * B:
197     *    phi ..., u
198     *    u = wx * phi
199     *    out = fsat u
200     *    --> B
201     *
202     * This example shows that phi sources are read at the end of the
203     * predecessor, not at the start of the successor. If phis are not handled
204     * properly, the fsat would be fused incorrectly.
205     *
206     * This reproduces an issue hit in a Control shader. Astonishingly, it is not
207     * hit anywhere in CTS.
208     */
209    NEGCASE32({
210       agx_block *A = agx_start_block(b->shader);
211       agx_block *B = agx_test_block(b->shader);
212 
213       agx_block_add_successor(A, B);
214       agx_block_add_successor(B, B);
215 
216       b->cursor = agx_after_block(B);
217       agx_index u = agx_temp(b->shader, AGX_SIZE_32);
218 
219       agx_instr *phi = agx_phi_to(b, agx_temp(b->shader, AGX_SIZE_32), 2);
220       phi->src[0] = wx;
221       phi->src[1] = u;
222 
223       agx_fmul_to(b, u, wx, phi->dest[0]);
224       agx_fmov_to(b, out, u)->saturate = true;
225    });
226 }
227 
TEST_F(Optimizer,Copyprop)228 TEST_F(Optimizer, Copyprop)
229 {
230    CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
231    CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
232           agx_fmul_to(b, out, wx, wy));
233 }
234 
TEST_F(Optimizer,SourceZeroExtend)235 TEST_F(Optimizer, SourceZeroExtend)
236 {
237    CASE32(
238       {
239          agx_index t = agx_temp(b->shader, AGX_SIZE_32);
240          agx_mov_to(b, t, hy);
241          agx_ffs_to(b, out, t);
242       },
243       agx_ffs_to(b, out, hy));
244 }
245 
TEST_F(Optimizer,AddSourceZeroExtend)246 TEST_F(Optimizer, AddSourceZeroExtend)
247 {
248    CASE32(
249       {
250          agx_index t = agx_temp(b->shader, AGX_SIZE_32);
251          agx_mov_to(b, t, hy);
252          agx_iadd_to(b, out, wx, t, 1);
253       },
254       agx_iadd_to(b, out, wx, agx_abs(hy), 1));
255 }
256 
TEST_F(Optimizer,AddSourceSignExtend)257 TEST_F(Optimizer, AddSourceSignExtend)
258 {
259    CASE32(
260       {
261          agx_index t = agx_temp(b->shader, AGX_SIZE_32);
262          agx_signext_to(b, t, hy);
263          agx_iadd_to(b, out, wx, t, 1);
264       },
265       agx_iadd_to(b, out, wx, hy, 1));
266 }
267 
TEST_F(Optimizer,SubInlineImmediate)268 TEST_F(Optimizer, SubInlineImmediate)
269 {
270    CASE16(agx_iadd_to(b, out, hx, agx_mov_imm(b, 16, -2), 0),
271           agx_iadd_to(b, out, hx, agx_neg(agx_immediate(2)), 0));
272 
273    CASE32(agx_iadd_to(b, out, wx, agx_mov_imm(b, 32, -1), 0),
274           agx_iadd_to(b, out, wx, agx_neg(agx_immediate(1)), 0));
275 
276    CASE64(agx_iadd_to(b, out, dx, agx_mov_imm(b, 64, -17), 0),
277           agx_iadd_to(b, out, dx, agx_neg(agx_immediate(17)), 0));
278 
279    CASE16(agx_imad_to(b, out, hx, hy, agx_mov_imm(b, 16, -2), 0),
280           agx_imad_to(b, out, hx, hy, agx_neg(agx_immediate(2)), 0));
281 
282    CASE32(agx_imad_to(b, out, wx, wy, agx_mov_imm(b, 32, -1), 0),
283           agx_imad_to(b, out, wx, wy, agx_neg(agx_immediate(1)), 0));
284 
285    CASE64(agx_imad_to(b, out, dx, dz, agx_mov_imm(b, 64, -17), 0),
286           agx_imad_to(b, out, dx, dz, agx_neg(agx_immediate(17)), 0));
287 }
288 
TEST_F(Optimizer,InlineHazards)289 TEST_F(Optimizer, InlineHazards)
290 {
291    NEGCASE32({
292       agx_index zero = agx_mov_imm(b, AGX_SIZE_32, 0);
293       agx_instr *I = agx_collect_to(b, out, 4);
294 
295       I->src[0] = zero;
296       I->src[1] = wy;
297       I->src[2] = wz;
298       I->src[3] = wz;
299    });
300 }
301 
TEST_F(Optimizer,CopypropRespectsAbsNeg)302 TEST_F(Optimizer, CopypropRespectsAbsNeg)
303 {
304    CASE32(agx_fadd_to(b, out, agx_abs(agx_mov(b, wx)), wy),
305           agx_fadd_to(b, out, agx_abs(wx), wy));
306 
307    CASE32(agx_fadd_to(b, out, agx_neg(agx_mov(b, wx)), wy),
308           agx_fadd_to(b, out, agx_neg(wx), wy));
309 
310    CASE32(agx_fadd_to(b, out, agx_neg(agx_abs(agx_mov(b, wx))), wy),
311           agx_fadd_to(b, out, agx_neg(agx_abs(wx)), wy));
312 }
313 
TEST_F(Optimizer,IntCopyprop)314 TEST_F(Optimizer, IntCopyprop)
315 {
316    CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
317 }
318 
TEST_F(Optimizer,CopypropSplitMovedUniform64)319 TEST_F(Optimizer, CopypropSplitMovedUniform64)
320 {
321    CASE32(
322       {
323          /* emit_load_preamble puts in the move, so we do too */
324          agx_index mov = agx_mov(b, agx_uniform(40, AGX_SIZE_64));
325          agx_instr *spl = agx_split(b, 2, mov);
326          spl->dest[0] = agx_temp(b->shader, AGX_SIZE_32);
327          spl->dest[1] = agx_temp(b->shader, AGX_SIZE_32);
328          agx_xor_to(b, out, spl->dest[0], spl->dest[1]);
329       },
330       {
331          agx_xor_to(b, out, agx_uniform(40, AGX_SIZE_32),
332                     agx_uniform(42, AGX_SIZE_32));
333       });
334 }
335 
TEST_F(Optimizer,IntCopypropDoesntConvert)336 TEST_F(Optimizer, IntCopypropDoesntConvert)
337 {
338    NEGCASE32({
339       agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
340       agx_mov_to(b, cvt, hx);
341       agx_fmul_to(b, out, cvt, wy);
342    });
343 }
344 
TEST_F(Optimizer,SkipPreloads)345 TEST_F(Optimizer, SkipPreloads)
346 {
347    NEGCASE32({
348       agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
349       agx_xor_to(b, out, preload, wy);
350    });
351 }
352 
TEST_F(Optimizer,NoConversionsOn16BitALU)353 TEST_F(Optimizer, NoConversionsOn16BitALU)
354 {
355    NEGCASE16({
356       agx_index cvt = agx_temp(b->shader, AGX_SIZE_16);
357       agx_fmov_to(b, cvt, wx);
358       agx_fadd_to(b, out, cvt, hy);
359    });
360 
361    NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
362 }
363 
TEST_F(Optimizer,BallotCondition)364 TEST_F(Optimizer, BallotCondition)
365 {
366    CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
367           agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
368 
369    CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
370           agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
371 
372    CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
373           agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
374 
375    CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
376           agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
377 }
378 
TEST_F(Optimizer,BallotMultipleUses)379 TEST_F(Optimizer, BallotMultipleUses)
380 {
381    CASE32(
382       {
383          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
384          agx_index ballot = agx_quad_ballot(b, cmp);
385          agx_fadd_to(b, out, cmp, ballot);
386       },
387       {
388          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
389          agx_index ballot =
390             agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
391          agx_fadd_to(b, out, cmp, ballot);
392       });
393 }
394 
395 /*
396  * We had a bug where the ballot optimization didn't check the agx_index's type
397  * so would fuse constants with overlapping values. An unrelated common code
398  * change surfaced this in CTS case:
399  *
400  *    dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bool_fragment
401  *
402  * We passed Vulkan CTS without hitting it though, hence the targeted test.
403  */
TEST_F(Optimizer,BallotConstant)404 TEST_F(Optimizer, BallotConstant)
405 {
406    CASE32(
407       {
408          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
409          agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
410          agx_index ballot2 = agx_quad_ballot(b, cmp);
411          agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
412       },
413       {
414          agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
415          agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
416          agx_index ballot2 =
417             agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
418          agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
419       });
420 }
421 
TEST_F(Optimizer,IfCondition)422 TEST_F(Optimizer, IfCondition)
423 {
424    CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
425                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
426                   agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, true, NULL));
427 
428    CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true),
429                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
430                   agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, true, NULL));
431 
432    CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false),
433                               agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
434                   agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, false, NULL));
435 }
436 
TEST_F(Optimizer,SelectCondition)437 TEST_F(Optimizer, SelectCondition)
438 {
439    CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, false),
440                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
441           agx_icmpsel_to(b, out, wx, wy, wx, wz, AGX_ICOND_UEQ));
442 
443    CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
444                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
445           agx_icmpsel_to(b, out, wx, wy, wz, wx, AGX_ICOND_UEQ));
446 
447    CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, false),
448                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
449           agx_fcmpsel_to(b, out, wx, wy, wx, wz, AGX_FCOND_EQ));
450 
451    CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_LT, true),
452                          agx_zero(), wz, wx, AGX_ICOND_UEQ),
453           agx_fcmpsel_to(b, out, wx, wy, wz, wx, AGX_FCOND_LT));
454 }
455 
TEST_F(Optimizer,IfInverted)456 TEST_F(Optimizer, IfInverted)
457 {
458    CASE_NO_RETURN(
459       agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(), 1,
460                   AGX_ICOND_UEQ, true, NULL),
461       agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, false, NULL));
462 
463    CASE_NO_RETURN(agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(),
464                               1, AGX_ICOND_UEQ, false, NULL),
465                   agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, true, NULL));
466 }
467 
TEST_F(Optimizer,IfInvertedCondition)468 TEST_F(Optimizer, IfInvertedCondition)
469 {
470    CASE_NO_RETURN(
471       agx_if_icmp(
472          b,
473          agx_xor(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true), agx_immediate(1)),
474          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
475       agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, false, NULL));
476 
477    CASE_NO_RETURN(
478       agx_if_icmp(
479          b,
480          agx_xor(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true), agx_immediate(1)),
481          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
482       agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, false, NULL));
483 
484    CASE_NO_RETURN(
485       agx_if_icmp(
486          b,
487          agx_xor(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false), agx_immediate(1)),
488          agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
489       agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, true, NULL));
490 }
491