1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands) 29 /* Registers of operands should be "recycled" for the output. But if the 30 * input is smaller than the output, that's not generally possible. The 31 * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0 32 * while the lower 16 bits are still live, so the output must be stored in 33 * a register other than v0. For the second v_cvt_f32_f16, the original 34 * value stored in v0 is no longer used and hence it's safe to store the 35 * result in v0. 36 */ 37 38 for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) { 39 for (bool pessimistic : { false, true }) { 40 const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic"; 41 42 //>> v1: %_:v[#a] = p_startpgm 43 if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant)) 44 return; 45 46 //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a] 47 Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]); 48 49 //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1 50 //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16] 51 //; success = (b != a) 52 auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp()); 53 auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp()); 54 writeout(0, result1); 55 writeout(1, result2); 56 57 finish_ra_test(ra_test_policy { pessimistic }); 58 } 59 } 60 END_TEST 61 62 BEGIN_TEST(regalloc.32bit_partial_write) 63 //>> v1: %_:v[0] = p_startpgm 64 if (!setup_cs("v1", GFX10)) 65 return; 66 67 /* ensure high 16 bits are occupied */ 68 //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0] 69 Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp(); 70 71 /* This test checks if this instruction uses SDWA. */ 72 //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 73 Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero()); 74 75 //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32] 76 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi); 77 78 finish_ra_test(ra_test_policy()); 79 END_TEST 80 81 BEGIN_TEST(regalloc.precolor.swap) 82 //>> s2: %op0:s[0-1] = p_startpgm 83 if (!setup_cs("s2", GFX10)) 84 return; 85 86 program->dev.sgpr_limit = 4; 87 88 //! s2: %op1:s[2-3] = p_unit_test 89 Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); 90 91 //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1] 92 //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1] 93 Operand op(inputs[0]); 94 op.setFixed(PhysReg(2)); 95 bld.pseudo(aco_opcode::p_unit_test, op, op1); 96 97 finish_ra_test(ra_test_policy()); 98 END_TEST 99 100 BEGIN_TEST(regalloc.precolor.blocking_vector) 101 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm 102 if (!setup_cs("s2 s1", GFX10)) 103 return; 104 105 //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2] 106 //! p_unit_test %tmp1_2:s[1] 107 Operand op(inputs[1]); 108 op.setFixed(PhysReg(1)); 109 bld.pseudo(aco_opcode::p_unit_test, op); 110 111 //! p_unit_test %tmp0_2:s[2-3] 112 bld.pseudo(aco_opcode::p_unit_test, inputs[0]); 113 114 finish_ra_test(ra_test_policy()); 115 END_TEST 116 117 BEGIN_TEST(regalloc.precolor.vector.test) 118 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 119 if (!setup_cs("s2 s1 s1", GFX10)) 120 return; 121 122 //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1] 123 //! p_unit_test %tmp0_2:s[2-3] 124 Operand op(inputs[0]); 125 op.setFixed(PhysReg(2)); 126 bld.pseudo(aco_opcode::p_unit_test, op); 127 128 //! p_unit_test %tmp2_2:s[0] 129 bld.pseudo(aco_opcode::p_unit_test, inputs[2]); 130 131 finish_ra_test(ra_test_policy()); 132 END_TEST 133 134 BEGIN_TEST(regalloc.precolor.vector.collect) 135 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 136 if (!setup_cs("s2 s1 s1", GFX10)) 137 return; 138 139 //! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1] 140 //! p_unit_test %tmp0_2:s[2-3] 141 Operand op(inputs[0]); 142 op.setFixed(PhysReg(2)); 143 bld.pseudo(aco_opcode::p_unit_test, op); 144 145 //! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0] 146 bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]); 147 148 finish_ra_test(ra_test_policy()); 149 END_TEST 150 151 BEGIN_TEST(regalloc.scratch_sgpr.create_vector) 152 if (!setup_cs("v1 s1", GFX7)) 153 return; 154 155 Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero()); 156 157 //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 158 //! s1: %0:s[1] = s_mov_b32 0x1000001 159 //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8] 160 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp)); 161 162 //! p_unit_test %_:s[0] 163 //! s_endpgm 164 bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 165 166 finish_ra_test(ra_test_policy(), true); 167 END_TEST 168 169 BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand) 170 if (!setup_cs("v2 s1", GFX7)) 171 return; 172 173 Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u)); 174 175 //>> v1: %0:v[0] = v_mov_b32 %_:s[0] 176 //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24] 177 //! s1: %0:s[1] = s_mov_b32 0x1000001 178 //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8] 179 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp)); 180 181 //! p_unit_test %_:s[0] 182 //! s_endpgm 183 bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 184 185 finish_ra_test(ra_test_policy(), true); 186 END_TEST 187 188 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def) 189 //>> p_startpgm 190 if (!setup_cs("", GFX10)) 191 return; 192 193 PhysReg reg_v0{256}; 194 PhysReg reg_v1{257}; 195 196 //! lv1: %tmp1:v[0] = p_unit_test 197 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 198 199 //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0] 200 //! v1: %_:v[0] = p_unit_test 201 bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1)); 202 203 //! p_unit_test %tmp2:v[1] 204 bld.pseudo(aco_opcode::p_unit_test, tmp); 205 206 finish_ra_test(ra_test_policy()); 207 END_TEST 208 209 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl) 210 //>> p_startpgm 211 if (!setup_cs("", GFX10)) 212 return; 213 214 program->dev.vgpr_limit = 3; 215 216 PhysReg reg_v1{257}; 217 218 //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test 219 Temp s0_tmp = bld.tmp(s1); 220 Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1)); 221 222 //! lv1: %tmp1:v[1] = p_unit_test 223 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1)); 224 225 //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1] 226 //! v2: %_:v[0-1] = p_unit_test 227 bld.pseudo(aco_opcode::p_unit_test, bld.def(v2)); 228 229 //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0] 230 bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp); 231 232 finish_ra_test(ra_test_policy()); 233 234 //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1 235 Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo(); 236 aco_print_instr(¶llelcopy, output); 237 fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg()); 238 END_TEST 239 240 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies) 241 //>> p_startpgm 242 if (!setup_cs("", GFX10)) 243 return; 244 245 program->dev.vgpr_limit = 6; 246 247 PhysReg reg_v2{258}; 248 PhysReg reg_v4{260}; 249 250 //! lv1: %lin_tmp1:v[4] = p_unit_test 251 Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4)); 252 //! v2: %log_tmp1:v[2-3] = p_unit_test 253 Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2)); 254 255 //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3] 256 //! v3: %_:v[1-3] = p_unit_test 257 bld.pseudo(aco_opcode::p_unit_test, bld.def(v3)); 258 259 //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0] 260 bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp); 261 262 finish_ra_test(ra_test_policy()); 263 END_TEST 264 265 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector) 266 //>> p_startpgm 267 if (!setup_cs("", GFX10)) 268 return; 269 270 program->dev.vgpr_limit = 4; 271 272 PhysReg reg_v0{256}; 273 PhysReg reg_v1{257}; 274 275 //! lv1: %lin_tmp1:v[0] = p_unit_test 276 Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 277 //! v1: %log_tmp:v[1] = p_unit_test 278 Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1)); 279 280 //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0] 281 //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1] 282 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp); 283 284 //! p_unit_test %lin_tmp2:v[2] 285 bld.pseudo(aco_opcode::p_unit_test, lin_tmp); 286 287 finish_ra_test(ra_test_policy()); 288 END_TEST 289