1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands) 29 /* Registers of operands should be "recycled" for the output. But if the 30 * input is smaller than the output, that's not generally possible. The 31 * first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0 32 * while the lower 16 bits are still live, so the output must be stored in 33 * a register other than v0. For the second v_cvt_f32_f16, the original 34 * value stored in v0 is no longer used and hence it's safe to store the 35 * result in v0. 36 */ 37 38 /* TODO: is this possible to do on GFX11? */ 39 for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) { 40 for (bool pessimistic : { false, true }) { 41 const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic"; 42 43 //>> v1: %_:v[#a] = p_startpgm 44 if (!setup_cs("v1", (amd_gfx_level)cc, CHIP_UNKNOWN, subvariant)) 45 return; 46 47 //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a] 48 Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]); 49 50 //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1 51 //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16] 52 //; success = (b != a) 53 auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp()); 54 auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp()); 55 writeout(0, result1); 56 writeout(1, result2); 57 58 finish_ra_test(ra_test_policy { pessimistic }); 59 } 60 } 61 END_TEST 62 63 BEGIN_TEST(regalloc.32bit_partial_write) 64 //>> v1: %_:v[0] = p_startpgm 65 if (!setup_cs("v1", GFX10)) 66 return; 67 68 /* ensure high 16 bits are occupied */ 69 //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0] 70 Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp(); 71 72 /* This test checks if this instruction uses SDWA. */ 73 //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 74 Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero()); 75 76 //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32] 77 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi); 78 79 finish_ra_test(ra_test_policy()); 80 END_TEST 81 82 BEGIN_TEST(regalloc.precolor.swap) 83 //>> s2: %op0:s[0-1] = p_startpgm 84 if (!setup_cs("s2", GFX10)) 85 return; 86 87 program->dev.sgpr_limit = 4; 88 89 //! s2: %op1:s[2-3] = p_unit_test 90 Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); 91 92 //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1] 93 //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1] 94 Operand op(inputs[0]); 95 op.setFixed(PhysReg(2)); 96 bld.pseudo(aco_opcode::p_unit_test, op, op1); 97 98 finish_ra_test(ra_test_policy()); 99 END_TEST 100 101 BEGIN_TEST(regalloc.precolor.blocking_vector) 102 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm 103 if (!setup_cs("s2 s1", GFX10)) 104 return; 105 106 //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2] 107 //! p_unit_test %tmp1_2:s[1] 108 Operand op(inputs[1]); 109 op.setFixed(PhysReg(1)); 110 bld.pseudo(aco_opcode::p_unit_test, op); 111 112 //! p_unit_test %tmp0_2:s[2-3] 113 bld.pseudo(aco_opcode::p_unit_test, inputs[0]); 114 115 finish_ra_test(ra_test_policy()); 116 END_TEST 117 118 BEGIN_TEST(regalloc.precolor.vector.test) 119 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 120 if (!setup_cs("s2 s1 s1", GFX10)) 121 return; 122 123 //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1] 124 //! p_unit_test %tmp0_2:s[2-3] 125 Operand op(inputs[0]); 126 op.setFixed(PhysReg(2)); 127 bld.pseudo(aco_opcode::p_unit_test, op); 128 129 //! p_unit_test %tmp2_2:s[0] 130 bld.pseudo(aco_opcode::p_unit_test, inputs[2]); 131 132 finish_ra_test(ra_test_policy()); 133 END_TEST 134 135 BEGIN_TEST(regalloc.precolor.vector.collect) 136 //>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm 137 if (!setup_cs("s2 s1 s1", GFX10)) 138 return; 139 140 //! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1] 141 //! p_unit_test %tmp0_2:s[2-3] 142 Operand op(inputs[0]); 143 op.setFixed(PhysReg(2)); 144 bld.pseudo(aco_opcode::p_unit_test, op); 145 146 //! p_unit_test %tmp1_2:s[0], %tmp2_2:s[1] 147 bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]); 148 149 finish_ra_test(ra_test_policy()); 150 END_TEST 151 152 BEGIN_TEST(regalloc.scratch_sgpr.create_vector) 153 if (!setup_cs("v1 s1", GFX7)) 154 return; 155 156 Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::zero()); 157 158 //>> v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 159 //! s1: %0:s[1] = s_mov_b32 0x1000001 160 //! v1: %0:v[0] = v_mul_lo_u32 %0:s[1], %_:v[0][0:8] 161 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand(v3b), Operand(tmp)); 162 163 //! p_unit_test %_:s[0] 164 //! s_endpgm 165 bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 166 167 finish_ra_test(ra_test_policy(), true); 168 END_TEST 169 170 BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand) 171 if (!setup_cs("v2 s1", GFX7)) 172 return; 173 174 Temp tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(4u)); 175 176 //>> v1: %0:v[0] = v_mov_b32 %_:s[0] 177 //! v3b: %0:v[1][0:24] = v_and_b32 0xffffff, %0:v[1][0:24] 178 //! s1: %0:s[1] = s_mov_b32 0x1000001 179 //! v1: %0:v[1] = v_mul_lo_u32 %0:s[1], %_:v[1][0:8] 180 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[1], Operand(v3b), Operand(tmp)); 181 182 //! p_unit_test %_:s[0] 183 //! s_endpgm 184 bld.pseudo(aco_opcode::p_unit_test, inputs[1]); 185 186 finish_ra_test(ra_test_policy(), true); 187 END_TEST 188 189 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def) 190 //>> p_startpgm 191 if (!setup_cs("", GFX10)) 192 return; 193 194 PhysReg reg_v0{256}; 195 PhysReg reg_v1{257}; 196 197 //! lv1: %tmp1:v[0] = p_unit_test 198 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 199 200 //! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0] 201 //! v1: %_:v[0] = p_unit_test 202 bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1)); 203 204 //! p_unit_test %tmp2:v[1] 205 bld.pseudo(aco_opcode::p_unit_test, tmp); 206 207 finish_ra_test(ra_test_policy()); 208 END_TEST 209 210 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl) 211 //>> p_startpgm 212 if (!setup_cs("", GFX10)) 213 return; 214 215 program->dev.vgpr_limit = 3; 216 217 PhysReg reg_v1{257}; 218 219 //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test 220 Temp s0_tmp = bld.tmp(s1); 221 Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1)); 222 223 //! lv1: %tmp1:v[1] = p_unit_test 224 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1)); 225 226 //! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1] 227 //! v2: %_:v[0-1] = p_unit_test 228 bld.pseudo(aco_opcode::p_unit_test, bld.def(v2)); 229 230 //! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0] 231 bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp); 232 233 finish_ra_test(ra_test_policy()); 234 235 //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1 236 Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo(); 237 aco_print_instr(¶llelcopy, output); 238 fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg()); 239 END_TEST 240 241 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies) 242 //>> p_startpgm 243 if (!setup_cs("", GFX10)) 244 return; 245 246 program->dev.vgpr_limit = 6; 247 248 PhysReg reg_v2{258}; 249 PhysReg reg_v4{260}; 250 251 //! lv1: %lin_tmp1:v[4] = p_unit_test 252 Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4)); 253 //! v2: %log_tmp1:v[2-3] = p_unit_test 254 Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2)); 255 256 //! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3] 257 //! v3: %_:v[1-3] = p_unit_test 258 bld.pseudo(aco_opcode::p_unit_test, bld.def(v3)); 259 260 //! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0] 261 bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp); 262 263 finish_ra_test(ra_test_policy()); 264 END_TEST 265 266 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector) 267 //>> p_startpgm 268 if (!setup_cs("", GFX10)) 269 return; 270 271 program->dev.vgpr_limit = 4; 272 273 PhysReg reg_v0{256}; 274 PhysReg reg_v1{257}; 275 276 //! lv1: %lin_tmp1:v[0] = p_unit_test 277 Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0)); 278 //! v1: %log_tmp:v[1] = p_unit_test 279 Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1)); 280 281 //! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0] 282 //! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1] 283 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp); 284 285 //! p_unit_test %lin_tmp2:v[2] 286 bld.pseudo(aco_opcode::p_unit_test, lin_tmp); 287 288 finish_ra_test(ra_test_policy()); 289 END_TEST 290 291 BEGIN_TEST(regalloc.branch_def_phis_at_merge_block) 292 //>> p_startpgm 293 if (!setup_cs("", GFX10)) 294 return; 295 296 //! s2: %_:s[2-3] = p_branch 297 bld.branch(aco_opcode::p_branch, bld.def(s2)); 298 299 //! BB1 300 //! /* logical preds: / linear preds: BB0, / kind: uniform, */ 301 bld.reset(program->create_and_insert_block()); 302 program->blocks[1].linear_preds.push_back(0); 303 304 //! s2: %tmp:s[0-1] = p_linear_phi 0 305 Temp tmp = bld.pseudo(aco_opcode::p_linear_phi, bld.def(s2), Operand::c64(0u)); 306 307 //! p_unit_test %tmp:s[0-1] 308 bld.pseudo(aco_opcode::p_unit_test, tmp); 309 310 finish_ra_test(ra_test_policy()); 311 END_TEST 312 313 BEGIN_TEST(regalloc.branch_def_phis_at_branch_block) 314 //>> p_startpgm 315 if (!setup_cs("", GFX10)) 316 return; 317 318 //! s2: %tmp:s[0-1] = p_unit_test 319 Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); 320 321 //! s2: %_:s[2-3] = p_cbranch_z %0:scc 322 bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1)); 323 324 //! BB1 325 //! /* logical preds: / linear preds: BB0, / kind: */ 326 bld.reset(program->create_and_insert_block()); 327 program->blocks[1].linear_preds.push_back(0); 328 329 //! p_unit_test %tmp:s[0-1] 330 bld.pseudo(aco_opcode::p_unit_test, tmp); 331 bld.branch(aco_opcode::p_branch, bld.def(s2)); 332 333 bld.reset(program->create_and_insert_block()); 334 program->blocks[2].linear_preds.push_back(0); 335 336 bld.branch(aco_opcode::p_branch, bld.def(s2)); 337 338 bld.reset(program->create_and_insert_block()); 339 program->blocks[3].linear_preds.push_back(1); 340 program->blocks[3].linear_preds.push_back(2); 341 342 finish_ra_test(ra_test_policy()); 343 END_TEST 344