1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(to_hw_instr.swap_subdword) 29 PhysReg v0_lo{256}; 30 PhysReg v0_hi{256}; 31 PhysReg v0_b1{256}; 32 PhysReg v0_b3{256}; 33 PhysReg v1_lo{257}; 34 PhysReg v1_hi{257}; 35 PhysReg v1_b1{257}; 36 PhysReg v1_b3{257}; 37 PhysReg v2_lo{258}; 38 PhysReg v3_lo{259}; 39 v0_hi.reg_b += 2; 40 v1_hi.reg_b += 2; 41 v0_b1.reg_b += 1; 42 v1_b1.reg_b += 1; 43 v0_b3.reg_b += 3; 44 v1_b3.reg_b += 3; 45 46 for (unsigned i = GFX6; i <= GFX7; i++) { 47 if (!setup_cs(NULL, (chip_class)i)) 48 continue; 49 50 //~gfx[67]>> p_unit_test 0 51 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 52 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 53 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 54 bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); 55 bld.pseudo(aco_opcode::p_parallelcopy, 56 Definition(v0_lo, v2b), Definition(v1_lo, v2b), 57 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 58 59 //~gfx[67]! p_unit_test 1 60 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 61 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 62 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 63 bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); 64 bld.pseudo(aco_opcode::p_create_vector, 65 Definition(v0_lo, v1), 66 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 67 68 //~gfx[67]! p_unit_test 2 69 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 70 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 71 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 72 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] 73 bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); 74 bld.pseudo(aco_opcode::p_create_vector, 75 Definition(v0_lo, v6b), Operand(v1_lo, v2b), 76 Operand(v0_lo, v2b), Operand(v2_lo, v2b)); 77 78 //~gfx[67]! p_unit_test 3 79 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 80 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 81 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 82 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] 83 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 84 bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); 85 bld.pseudo(aco_opcode::p_create_vector, 86 Definition(v0_lo, v2), 87 Operand(v1_lo, v2b), Operand(v0_lo, v2b), 88 Operand(v2_lo, v2b), Operand(v3_lo, v2b)); 89 90 //~gfx[67]! p_unit_test 4 91 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] 92 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2 93 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 94 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2 95 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 96 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 97 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 98 bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); 99 bld.pseudo(aco_opcode::p_create_vector, 100 Definition(v0_lo, v2), 101 Operand(v1_lo, v2b), Operand(v2_lo, v2b), 102 Operand(v0_lo, v2b), Operand(v3_lo, v2b)); 103 104 //~gfx[67]! p_unit_test 5 105 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 106 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 107 bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); 108 bld.pseudo(aco_opcode::p_split_vector, 109 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 110 Operand(v0_lo, v1)); 111 112 //~gfx[67]! p_unit_test 6 113 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 114 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 115 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 116 bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); 117 bld.pseudo(aco_opcode::p_split_vector, 118 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 119 Definition(v2_lo, v2b), Operand(v0_lo, v6b)); 120 121 //~gfx[67]! p_unit_test 7 122 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 123 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 124 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 125 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] 126 bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); 127 bld.pseudo(aco_opcode::p_split_vector, 128 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 129 Definition(v2_lo, v2b), Definition(v3_lo, v2b), 130 Operand(v0_lo, v2)); 131 132 //~gfx[67]! p_unit_test 8 133 //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] 134 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 135 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 136 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 137 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 138 bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); 139 bld.pseudo(aco_opcode::p_split_vector, 140 Definition(v1_lo, v2b), Definition(v2_lo, v2b), 141 Definition(v0_lo, v2b), Definition(v3_lo, v2b), 142 Operand(v0_lo, v2)); 143 144 //~gfx[67]! p_unit_test 9 145 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 146 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 147 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 148 bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); 149 bld.pseudo(aco_opcode::p_parallelcopy, 150 Definition(v0_lo, v1b), Definition(v1_lo, v1b), 151 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 152 153 //~gfx[67]! p_unit_test 10 154 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 155 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 156 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 157 bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); 158 bld.pseudo(aco_opcode::p_create_vector, 159 Definition(v0_lo, v2b), 160 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 161 162 //~gfx[67]! p_unit_test 11 163 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 164 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 165 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 166 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 167 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 168 bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); 169 bld.pseudo(aco_opcode::p_create_vector, 170 Definition(v0_lo, v3b), Operand(v1_lo, v1b), 171 Operand(v0_lo, v1b), Operand(v2_lo, v1b)); 172 173 //~gfx[67]! p_unit_test 12 174 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 175 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 176 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 177 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 178 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 179 //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] 180 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 181 bld.pseudo(aco_opcode::p_unit_test, Operand(12u)); 182 bld.pseudo(aco_opcode::p_create_vector, 183 Definition(v0_lo, v1), 184 Operand(v1_lo, v1b), Operand(v0_lo, v1b), 185 Operand(v2_lo, v1b), Operand(v3_lo, v1b)); 186 187 //~gfx[67]! p_unit_test 13 188 //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] 189 //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8] 190 //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16] 191 //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8] 192 //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 193 //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 194 //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] 195 bld.pseudo(aco_opcode::p_unit_test, Operand(13u)); 196 Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector, 197 Definition(v0_lo, v1), 198 Operand(v0_lo, v1b), Operand(v0_lo, v1b), 199 Operand(v0_lo, v1b), Operand(v0_lo, v1b)); 200 static_cast<Pseudo_instruction*>(pseudo)->scratch_sgpr = m0; 201 202 //~gfx[67]! p_unit_test 14 203 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 204 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 205 bld.pseudo(aco_opcode::p_unit_test, Operand(14u)); 206 bld.pseudo(aco_opcode::p_split_vector, 207 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 208 Operand(v0_lo, v2b)); 209 210 //~gfx[67]! p_unit_test 15 211 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 212 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 213 //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] 214 //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] 215 bld.pseudo(aco_opcode::p_unit_test, Operand(15u)); 216 bld.pseudo(aco_opcode::p_split_vector, 217 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 218 Definition(v2_lo, v1b), Definition(v3_lo, v1b), 219 Operand(v0_lo, v1)); 220 221 //~gfx[67]! s_endpgm 222 223 finish_to_hw_instr_test(); 224 } 225 226 for (unsigned i = GFX8; i <= GFX9; i++) { 227 if (!setup_cs(NULL, (chip_class)i)) 228 continue; 229 230 //~gfx[89]>> p_unit_test 0 231 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 232 //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 233 bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); 234 bld.pseudo(aco_opcode::p_parallelcopy, 235 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 236 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 237 238 //~gfx[89]! p_unit_test 1 239 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 240 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 241 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 242 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 243 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve 244 bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); 245 bld.pseudo(aco_opcode::p_parallelcopy, 246 Definition(v0_lo, v1), Definition(v1_lo, v2b), 247 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 248 249 //~gfx[89]! p_unit_test 2 250 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_preserve 251 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_preserve 252 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve 253 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve 254 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve 255 bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); 256 bld.pseudo(aco_opcode::p_parallelcopy, 257 Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b), 258 Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b)); 259 260 //~gfx[89]! p_unit_test 3 261 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 262 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 263 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 264 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 265 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve 266 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve 267 bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); 268 bld.pseudo(aco_opcode::p_parallelcopy, 269 Definition(v0_lo, v1), Definition(v1_b3, v1b), 270 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 271 272 //~gfx[89]! p_unit_test 4 273 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 274 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 275 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 276 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 277 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve 278 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve 279 bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); 280 bld.pseudo(aco_opcode::p_parallelcopy, 281 Definition(v0_lo, v1), Definition(v1_lo, v1b), 282 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 283 284 //~gfx[89]! p_unit_test 5 285 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 286 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 287 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 288 //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 289 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve 290 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve 291 bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); 292 bld.pseudo(aco_opcode::p_parallelcopy, 293 Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1), 294 Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1)); 295 296 //~gfx[89]! p_unit_test 6 297 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 298 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 299 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 300 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 301 bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); 302 bld.pseudo(aco_opcode::p_parallelcopy, 303 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 304 Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1)); 305 306 //~gfx[89]! p_unit_test 7 307 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 308 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 309 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 310 //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 311 //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 312 bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); 313 bld.pseudo(aco_opcode::p_parallelcopy, 314 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 315 Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1)); 316 317 //~gfx[89]! p_unit_test 8 318 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 319 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 320 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 321 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 322 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve 323 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve 324 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve 325 bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); 326 bld.pseudo(aco_opcode::p_parallelcopy, 327 Definition(v0_lo, v3b), Definition(v1_lo, v3b), 328 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 329 330 //~gfx[89]! p_unit_test 9 331 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 332 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 333 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 334 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 335 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve 336 bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); 337 bld.pseudo(aco_opcode::p_parallelcopy, 338 Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b), 339 Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b)); 340 341 //~gfx[89]! p_unit_test 10 342 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve 343 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve 344 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve 345 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve 346 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve 347 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve 348 bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); 349 bld.pseudo(aco_opcode::p_parallelcopy, 350 Definition(v0_b1, v2b), Definition(v1_b1, v2b), 351 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 352 353 //~gfx[89]! p_unit_test 11 354 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve 355 //~gfx[89]! v1: %0:v[0] = v_mov_b32 42 356 bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); 357 bld.pseudo(aco_opcode::p_parallelcopy, 358 Definition(v0_lo, v1), Definition(v1_lo, v2b), 359 Operand(42u), Operand(v0_hi, v2b)); 360 361 //~gfx[89]! s_endpgm 362 363 finish_to_hw_instr_test(); 364 } 365 END_TEST 366 367 BEGIN_TEST(to_hw_instr.subdword_constant) 368 PhysReg v0_lo{256}; 369 PhysReg v0_hi{256}; 370 PhysReg v0_b1{256}; 371 PhysReg v1_hi{257}; 372 v0_hi.reg_b += 2; 373 v0_b1.reg_b += 1; 374 v1_hi.reg_b += 2; 375 376 for (unsigned i = GFX9; i <= GFX10; i++) { 377 if (!setup_cs(NULL, (chip_class)i)) 378 continue; 379 380 /* 16-bit pack */ 381 //>> p_unit_test 0 382 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 383 bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); 384 bld.pseudo(aco_opcode::p_parallelcopy, 385 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 386 Operand((uint16_t)0x3800), Operand(v1_hi, v2b)); 387 388 //! p_unit_test 1 389 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 390 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 391 //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 392 bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); 393 bld.pseudo(aco_opcode::p_parallelcopy, 394 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 395 Operand((uint16_t)0x4205), Operand(v1_hi, v2b)); 396 397 //! p_unit_test 2 398 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 399 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 400 //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 401 bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); 402 bld.pseudo(aco_opcode::p_parallelcopy, 403 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 404 Operand((uint16_t)0x4205), Operand(v0_lo, v2b)); 405 406 //! p_unit_test 3 407 //! v1: %_:v[0] = v_mov_b32 0x3c003800 408 bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); 409 bld.pseudo(aco_opcode::p_parallelcopy, 410 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 411 Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00)); 412 413 //! p_unit_test 4 414 //! v1: %_:v[0] = v_mov_b32 0x43064205 415 bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); 416 bld.pseudo(aco_opcode::p_parallelcopy, 417 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 418 Operand((uint16_t)0x4205), Operand((uint16_t)0x4306)); 419 420 //! p_unit_test 5 421 //! v1: %_:v[0] = v_mov_b32 0x38004205 422 bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); 423 bld.pseudo(aco_opcode::p_parallelcopy, 424 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 425 Operand((uint16_t)0x4205), Operand((uint16_t)0x3800)); 426 427 /* 16-bit copy */ 428 //! p_unit_test 6 429 //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve 430 bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); 431 bld.pseudo(aco_opcode::p_parallelcopy, 432 Definition(v0_lo, v2b), Operand((uint16_t)0x3800)); 433 434 //! p_unit_test 7 435 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 436 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 437 //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) 438 bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); 439 bld.pseudo(aco_opcode::p_parallelcopy, 440 Definition(v0_lo, v2b), Operand((uint16_t)0x4205)); 441 442 //! p_unit_test 8 443 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 444 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 445 //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 446 bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); 447 bld.pseudo(aco_opcode::p_parallelcopy, 448 Definition(v0_hi, v2b), Operand((uint16_t)0x4205)); 449 450 //! p_unit_test 9 451 //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve 452 //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve 453 bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); 454 bld.pseudo(aco_opcode::p_parallelcopy, 455 Definition(v0_b1, v2b), Operand((uint16_t)0x3800)); 456 457 //! p_unit_test 10 458 //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve 459 //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve 460 bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); 461 bld.pseudo(aco_opcode::p_parallelcopy, 462 Definition(v0_b1, v2b), Operand((uint16_t)0x4205)); 463 464 /* 8-bit copy */ 465 //! p_unit_test 11 466 //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve 467 bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); 468 bld.pseudo(aco_opcode::p_parallelcopy, 469 Definition(v0_lo, v1b), Operand((uint8_t)0x42)); 470 471 //! s_endpgm 472 473 finish_to_hw_instr_test(); 474 } 475 END_TEST 476