• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
29    /* Registers of operands should be "recycled" for the output. But if the
30     * input is smaller than the output, that's not generally possible. The
31     * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
32     * while the lower 16 bits are still live, so the output must be stored in
33     * a register other than v0. For the second v_cvt_f32_f16, the original
34     * value stored in v0 is no longer used and hence it's safe to store the
35     * result in v0.
36     */
37 
38    /* TODO: is this possible to do on GFX11? */
39    for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
40       for (bool pessimistic : { false, true }) {
41          const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
42 
43          //>> v1: %_:v[#a] = p_startpgm
44          if (!setup_cs("v1", (amd_gfx_level)cc, CHIP_UNKNOWN, subvariant))
45             return;
46 
47          //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
48          Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
49 
50          //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
51          //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
52          //; success = (b != a)
53          auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
54          auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
55          writeout(0, result1);
56          writeout(1, result2);
57 
58          finish_ra_test(ra_test_policy { pessimistic });
59       }
60    }
61 END_TEST
62 
63 BEGIN_TEST(regalloc.32bit_partial_write)
64    //>> v1: %_:v[0] = p_startpgm
65    if (!setup_cs("v1", GFX10))
66       return;
67 
68    /* ensure high 16 bits are occupied */
69    //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
70    Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
71 
72    /* This test checks if this instruction uses SDWA. */
73    //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
74    Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
75 
76    //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
77    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
78 
79    finish_ra_test(ra_test_policy());
80 END_TEST
81 
82 BEGIN_TEST(regalloc.precolor.swap)
83    //>> s2: %op0:s[0-1] = p_startpgm
84    if (!setup_cs("s2", GFX10))
85       return;
86 
87    program->dev.sgpr_limit = 4;
88 
89    //! s2: %op1:s[2-3] = p_unit_test
90    Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
91 
92    //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
93    //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
94    Operand op(inputs[0]);
95    op.setFixed(PhysReg(2));
96    bld.pseudo(aco_opcode::p_unit_test, op, op1);
97 
98    finish_ra_test(ra_test_policy());
99 END_TEST
100 
101 BEGIN_TEST(regalloc.precolor.blocking_vector)
102    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
103    if (!setup_cs("s2 s1", GFX10))
104       return;
105 
106    //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
107    //! p_unit_test %tmp1_2:s[1]
108    Operand op(inputs[1]);
109    op.setFixed(PhysReg(1));
110    bld.pseudo(aco_opcode::p_unit_test, op);
111 
112    //! p_unit_test %tmp0_2:s[2-3]
113    bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
114 
115    finish_ra_test(ra_test_policy());
116 END_TEST
117 
118 BEGIN_TEST(regalloc.precolor.vector.test)
119    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
120    if (!setup_cs("s2 s1 s1", GFX10))
121       return;
122 
123    //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
124    //! p_unit_test %tmp0_2:s[2-3]
125    Operand op(inputs[0]);
126    op.setFixed(PhysReg(2));
127    bld.pseudo(aco_opcode::p_unit_test, op);
128 
129    //! p_unit_test %tmp2_2:s[0]
130    bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
131 
132    finish_ra_test(ra_test_policy());
133 END_TEST
134 
135 BEGIN_TEST(regalloc.precolor.vector.collect)
136    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
137    if (!setup_cs("s2 s1 s1", GFX10))
138       return;
139 
140    //! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1]
141    //! p_unit_test %tmp0_2:s[2-3]
142    Operand op(inputs[0]);
143    op.setFixed(PhysReg(2));
144    bld.pseudo(aco_opcode::p_unit_test, op);
145 
146    //! p_unit_test %tmp1_2:s[0], %tmp2_2:s[1]
147    bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
148 
149    finish_ra_test(ra_test_policy());
150 END_TEST
151 
152 BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
153    if (!setup_cs("v1 s1", GFX7))
154       return;
155 
156    Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero());
157 
158    //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
159    //! s1: %0:s[1] = s_mov_b32 0x1000001
160    //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8]
161    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp));
162 
163    //! p_unit_test %_:s[0]
164    //! s_endpgm
165    bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
166 
167    finish_ra_test(ra_test_policy(), true);
168 END_TEST
169 
170 BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand)
171    if (!setup_cs("v2 s1", GFX7))
172       return;
173 
174    Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u));
175 
176    //>> v1: %0:v[0] = v_mov_b32 %_:s[0]
177    //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24]
178    //! s1: %0:s[1] = s_mov_b32 0x1000001
179    //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8]
180    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp));
181 
182    //! p_unit_test %_:s[0]
183    //! s_endpgm
184    bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
185 
186    finish_ra_test(ra_test_policy(), true);
187 END_TEST
188 
189 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def)
190    //>> p_startpgm
191    if (!setup_cs("", GFX10))
192       return;
193 
194    PhysReg reg_v0{256};
195    PhysReg reg_v1{257};
196 
197    //! lv1: %tmp1:v[0] = p_unit_test
198    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
199 
200    //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0]
201    //! v1: %_:v[0] = p_unit_test
202    bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1));
203 
204    //! p_unit_test %tmp2:v[1]
205    bld.pseudo(aco_opcode::p_unit_test, tmp);
206 
207    finish_ra_test(ra_test_policy());
208 END_TEST
209 
210 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
211    //>> p_startpgm
212    if (!setup_cs("", GFX10))
213       return;
214 
215    program->dev.vgpr_limit = 3;
216 
217    PhysReg reg_v1{257};
218 
219    //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
220    Temp s0_tmp = bld.tmp(s1);
221    Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
222 
223    //! lv1: %tmp1:v[1] = p_unit_test
224    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
225 
226    //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1]
227    //! v2: %_:v[0-1] = p_unit_test
228    bld.pseudo(aco_opcode::p_unit_test, bld.def(v2));
229 
230    //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0]
231    bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp);
232 
233    finish_ra_test(ra_test_policy());
234 
235    //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
236    Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
237    aco_print_instr(&parallelcopy, output);
238    fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
239 END_TEST
240 
241 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
242    //>> p_startpgm
243    if (!setup_cs("", GFX10))
244       return;
245 
246    program->dev.vgpr_limit = 6;
247 
248    PhysReg reg_v2{258};
249    PhysReg reg_v4{260};
250 
251    //! lv1: %lin_tmp1:v[4] = p_unit_test
252    Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4));
253    //! v2: %log_tmp1:v[2-3] = p_unit_test
254    Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2));
255 
256    //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3]
257    //! v3: %_:v[1-3] = p_unit_test
258    bld.pseudo(aco_opcode::p_unit_test, bld.def(v3));
259 
260    //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0]
261    bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp);
262 
263    finish_ra_test(ra_test_policy());
264 END_TEST
265 
266 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector)
267    //>> p_startpgm
268    if (!setup_cs("", GFX10))
269       return;
270 
271    program->dev.vgpr_limit = 4;
272 
273    PhysReg reg_v0{256};
274    PhysReg reg_v1{257};
275 
276    //! lv1: %lin_tmp1:v[0] = p_unit_test
277    Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
278    //! v1: %log_tmp:v[1] = p_unit_test
279    Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1));
280 
281    //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0]
282    //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1]
283    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp);
284 
285    //! p_unit_test %lin_tmp2:v[2]
286    bld.pseudo(aco_opcode::p_unit_test, lin_tmp);
287 
288    finish_ra_test(ra_test_policy());
289 END_TEST
290 
291 BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
292    //>> p_startpgm
293    if (!setup_cs("", GFX10))
294       return;
295 
296    //! s2: %_:s[2-3] = p_branch
297    bld.branch(aco_opcode::p_branch, bld.def(s2));
298 
299    //! BB1
300    //! /* logical preds: / linear preds: BB0, / kind: uniform, */
301    bld.reset(program->create_and_insert_block());
302    program->blocks[1].linear_preds.push_back(0);
303 
304    //! s2: %tmp:s[0-1] = p_linear_phi 0
305    Temp tmp = bld.pseudo(aco_opcode::p_linear_phi, bld.def(s2), Operand::c64(0u));
306 
307    //! p_unit_test %tmp:s[0-1]
308    bld.pseudo(aco_opcode::p_unit_test, tmp);
309 
310    finish_ra_test(ra_test_policy());
311 END_TEST
312 
313 BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
314    //>> p_startpgm
315    if (!setup_cs("", GFX10))
316       return;
317 
318    //! s2: %tmp:s[0-1] = p_unit_test
319    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
320 
321    //! s2: %_:s[2-3] = p_cbranch_z %0:scc
322    bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
323 
324    //! BB1
325    //! /* logical preds: / linear preds: BB0, / kind: */
326    bld.reset(program->create_and_insert_block());
327    program->blocks[1].linear_preds.push_back(0);
328 
329    //! p_unit_test %tmp:s[0-1]
330    bld.pseudo(aco_opcode::p_unit_test, tmp);
331    bld.branch(aco_opcode::p_branch, bld.def(s2));
332 
333    bld.reset(program->create_and_insert_block());
334    program->blocks[2].linear_preds.push_back(0);
335 
336    bld.branch(aco_opcode::p_branch, bld.def(s2));
337 
338    bld.reset(program->create_and_insert_block());
339    program->blocks[3].linear_preds.push_back(1);
340    program->blocks[3].linear_preds.push_back(2);
341 
342    finish_ra_test(ra_test_policy());
343 END_TEST
344