• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
29    /* Registers of operands should be "recycled" for the output. But if the
30     * input is smaller than the output, that's not generally possible. The
31     * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
32     * while the lower 16 bits are still live, so the output must be stored in
33     * a register other than v0. For the second v_cvt_f32_f16, the original
34     * value stored in v0 is no longer used and hence it's safe to store the
35     * result in v0.
36     */
37 
38    for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {
39       for (bool pessimistic : { false, true }) {
40          const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
41 
42          //>> v1: %_:v[#a] = p_startpgm
43          if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))
44             return;
45 
46          //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
47          Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
48 
49          //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
50          //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
51          //; success = (b != a)
52          auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
53          auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
54          writeout(0, result1);
55          writeout(1, result2);
56 
57          finish_ra_test(ra_test_policy { pessimistic });
58       }
59    }
60 END_TEST
61 
62 BEGIN_TEST(regalloc.32bit_partial_write)
63    //>> v1: %_:v[0] = p_startpgm
64    if (!setup_cs("v1", GFX10))
65       return;
66 
67    /* ensure high 16 bits are occupied */
68    //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
69    Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
70 
71    /* This test checks if this instruction uses SDWA. */
72    //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
73    Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
74 
75    //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
76    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
77 
78    finish_ra_test(ra_test_policy());
79 END_TEST
80 
81 BEGIN_TEST(regalloc.precolor.swap)
82    //>> s2: %op0:s[0-1] = p_startpgm
83    if (!setup_cs("s2", GFX10))
84       return;
85 
86    program->dev.sgpr_limit = 4;
87 
88    //! s2: %op1:s[2-3] = p_unit_test
89    Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
90 
91    //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
92    //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
93    Operand op(inputs[0]);
94    op.setFixed(PhysReg(2));
95    bld.pseudo(aco_opcode::p_unit_test, op, op1);
96 
97    finish_ra_test(ra_test_policy());
98 END_TEST
99 
100 BEGIN_TEST(regalloc.precolor.blocking_vector)
101    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
102    if (!setup_cs("s2 s1", GFX10))
103       return;
104 
105    //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
106    //! p_unit_test %tmp1_2:s[1]
107    Operand op(inputs[1]);
108    op.setFixed(PhysReg(1));
109    bld.pseudo(aco_opcode::p_unit_test, op);
110 
111    //! p_unit_test %tmp0_2:s[2-3]
112    bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
113 
114    finish_ra_test(ra_test_policy());
115 END_TEST
116 
117 BEGIN_TEST(regalloc.precolor.vector.test)
118    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
119    if (!setup_cs("s2 s1 s1", GFX10))
120       return;
121 
122    //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
123    //! p_unit_test %tmp0_2:s[2-3]
124    Operand op(inputs[0]);
125    op.setFixed(PhysReg(2));
126    bld.pseudo(aco_opcode::p_unit_test, op);
127 
128    //! p_unit_test %tmp2_2:s[0]
129    bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
130 
131    finish_ra_test(ra_test_policy());
132 END_TEST
133 
134 BEGIN_TEST(regalloc.precolor.vector.collect)
135    //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
136    if (!setup_cs("s2 s1 s1", GFX10))
137       return;
138 
139    //! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1]
140    //! p_unit_test %tmp0_2:s[2-3]
141    Operand op(inputs[0]);
142    op.setFixed(PhysReg(2));
143    bld.pseudo(aco_opcode::p_unit_test, op);
144 
145    //! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0]
146    bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
147 
148    finish_ra_test(ra_test_policy());
149 END_TEST
150 
151 BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
152    if (!setup_cs("v1 s1", GFX7))
153       return;
154 
155    Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero());
156 
157    //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
158    //! s1: %0:s[1] = s_mov_b32 0x1000001
159    //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8]
160    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp));
161 
162    //! p_unit_test %_:s[0]
163    //! s_endpgm
164    bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
165 
166    finish_ra_test(ra_test_policy(), true);
167 END_TEST
168 
169 BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand)
170    if (!setup_cs("v2 s1", GFX7))
171       return;
172 
173    Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u));
174 
175    //>> v1: %0:v[0] = v_mov_b32 %_:s[0]
176    //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24]
177    //! s1: %0:s[1] = s_mov_b32 0x1000001
178    //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8]
179    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp));
180 
181    //! p_unit_test %_:s[0]
182    //! s_endpgm
183    bld.pseudo(aco_opcode::p_unit_test, inputs[1]);
184 
185    finish_ra_test(ra_test_policy(), true);
186 END_TEST
187 
188 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def)
189    //>> p_startpgm
190    if (!setup_cs("", GFX10))
191       return;
192 
193    PhysReg reg_v0{256};
194    PhysReg reg_v1{257};
195 
196    //! lv1: %tmp1:v[0] = p_unit_test
197    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
198 
199    //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0]
200    //! v1: %_:v[0] = p_unit_test
201    bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1));
202 
203    //! p_unit_test %tmp2:v[1]
204    bld.pseudo(aco_opcode::p_unit_test, tmp);
205 
206    finish_ra_test(ra_test_policy());
207 END_TEST
208 
209 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
210    //>> p_startpgm
211    if (!setup_cs("", GFX10))
212       return;
213 
214    program->dev.vgpr_limit = 3;
215 
216    PhysReg reg_v1{257};
217 
218    //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
219    Temp s0_tmp = bld.tmp(s1);
220    Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
221 
222    //! lv1: %tmp1:v[1] = p_unit_test
223    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
224 
225    //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1]
226    //! v2: %_:v[0-1] = p_unit_test
227    bld.pseudo(aco_opcode::p_unit_test, bld.def(v2));
228 
229    //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0]
230    bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp);
231 
232    finish_ra_test(ra_test_policy());
233 
234    //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
235    Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
236    aco_print_instr(&parallelcopy, output);
237    fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
238 END_TEST
239 
240 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
241    //>> p_startpgm
242    if (!setup_cs("", GFX10))
243       return;
244 
245    program->dev.vgpr_limit = 6;
246 
247    PhysReg reg_v2{258};
248    PhysReg reg_v4{260};
249 
250    //! lv1: %lin_tmp1:v[4] = p_unit_test
251    Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4));
252    //! v2: %log_tmp1:v[2-3] = p_unit_test
253    Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2));
254 
255    //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3]
256    //! v3: %_:v[1-3] = p_unit_test
257    bld.pseudo(aco_opcode::p_unit_test, bld.def(v3));
258 
259    //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0]
260    bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp);
261 
262    finish_ra_test(ra_test_policy());
263 END_TEST
264 
265 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector)
266    //>> p_startpgm
267    if (!setup_cs("", GFX10))
268       return;
269 
270    program->dev.vgpr_limit = 4;
271 
272    PhysReg reg_v0{256};
273    PhysReg reg_v1{257};
274 
275    //! lv1: %lin_tmp1:v[0] = p_unit_test
276    Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
277    //! v1: %log_tmp:v[1] = p_unit_test
278    Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1));
279 
280    //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0]
281    //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1]
282    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp);
283 
284    //! p_unit_test %lin_tmp2:v[2]
285    bld.pseudo(aco_opcode::p_unit_test, lin_tmp);
286 
287    finish_ra_test(ra_test_policy());
288 END_TEST
289