1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(to_hw_instr.swap_subdword) 29 PhysReg v0_lo{256}; 30 PhysReg v0_hi{256}; 31 PhysReg v0_b1{256}; 32 PhysReg v0_b3{256}; 33 PhysReg v1_lo{257}; 34 PhysReg v1_hi{257}; 35 PhysReg v1_b1{257}; 36 PhysReg v1_b3{257}; 37 PhysReg v2_lo{258}; 38 PhysReg v3_lo{259}; 39 v0_hi.reg_b += 2; 40 v1_hi.reg_b += 2; 41 v0_b1.reg_b += 1; 42 v1_b1.reg_b += 1; 43 v0_b3.reg_b += 3; 44 v1_b3.reg_b += 3; 45 46 for (unsigned i = GFX6; i <= GFX7; i++) { 47 if (!setup_cs(NULL, (amd_gfx_level)i)) 48 continue; 49 50 //~gfx[67]>> p_unit_test 0 51 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 52 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 53 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 54 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 55 bld.pseudo(aco_opcode::p_parallelcopy, 56 Definition(v0_lo, v2b), Definition(v1_lo, v2b), 57 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 58 59 //~gfx[67]! p_unit_test 1 60 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 61 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 62 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 63 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 64 bld.pseudo(aco_opcode::p_create_vector, 65 Definition(v0_lo, v1), 66 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 67 68 //~gfx[67]! p_unit_test 2 69 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 70 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 71 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 72 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] 73 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 74 bld.pseudo(aco_opcode::p_create_vector, 75 Definition(v0_lo, v6b), Operand(v1_lo, v2b), 76 Operand(v0_lo, v2b), Operand(v2_lo, v2b)); 77 78 //~gfx[67]! p_unit_test 3 79 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 80 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 81 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 82 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] 83 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 84 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 85 bld.pseudo(aco_opcode::p_create_vector, 86 Definition(v0_lo, v2), 87 Operand(v1_lo, v2b), Operand(v0_lo, v2b), 88 Operand(v2_lo, v2b), Operand(v3_lo, v2b)); 89 90 //~gfx[67]! p_unit_test 4 91 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] 92 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2 93 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 94 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2 95 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 96 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 97 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 98 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 99 bld.pseudo(aco_opcode::p_create_vector, 100 Definition(v0_lo, v2), 101 Operand(v1_lo, v2b), Operand(v2_lo, v2b), 102 Operand(v0_lo, v2b), Operand(v3_lo, v2b)); 103 104 //~gfx[67]! p_unit_test 5 105 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 106 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 107 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 108 bld.pseudo(aco_opcode::p_split_vector, 109 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 110 Operand(v0_lo, v1)); 111 112 //~gfx[67]! p_unit_test 6 113 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 114 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 115 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 116 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 117 bld.pseudo(aco_opcode::p_split_vector, 118 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 119 Definition(v2_lo, v2b), Operand(v0_lo, v6b)); 120 121 //~gfx[67]! p_unit_test 7 122 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 123 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 124 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 125 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] 126 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 127 bld.pseudo(aco_opcode::p_split_vector, 128 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 129 Definition(v2_lo, v2b), Definition(v3_lo, v2b), 130 Operand(v0_lo, v2)); 131 132 //~gfx[67]! p_unit_test 8 133 //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] 134 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 135 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 136 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 137 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 138 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 139 bld.pseudo(aco_opcode::p_split_vector, 140 Definition(v1_lo, v2b), Definition(v2_lo, v2b), 141 Definition(v0_lo, v2b), Definition(v3_lo, v2b), 142 Operand(v0_lo, v2)); 143 144 //~gfx[67]! p_unit_test 9 145 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 146 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 147 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 148 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 149 bld.pseudo(aco_opcode::p_parallelcopy, 150 Definition(v0_lo, v1b), Definition(v1_lo, v1b), 151 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 152 153 //~gfx[67]! p_unit_test 10 154 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 155 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 156 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 157 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 158 bld.pseudo(aco_opcode::p_create_vector, 159 Definition(v0_lo, v2b), 160 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 161 162 //~gfx[67]! p_unit_test 11 163 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 164 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 165 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 166 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 167 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 168 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 169 bld.pseudo(aco_opcode::p_create_vector, 170 Definition(v0_lo, v3b), Operand(v1_lo, v1b), 171 Operand(v0_lo, v1b), Operand(v2_lo, v1b)); 172 173 //~gfx[67]! p_unit_test 12 174 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 175 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 176 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 177 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 178 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 179 //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] 180 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 181 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 182 bld.pseudo(aco_opcode::p_create_vector, 183 Definition(v0_lo, v1), 184 Operand(v1_lo, v1b), Operand(v0_lo, v1b), 185 Operand(v2_lo, v1b), Operand(v3_lo, v1b)); 186 187 //~gfx[67]! p_unit_test 13 188 //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] 189 //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8] 190 //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16] 191 //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8] 192 //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 193 //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 194 //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] 195 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 196 Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector, 197 Definition(v0_lo, v1), 198 Operand(v0_lo, v1b), Operand(v0_lo, v1b), 199 Operand(v0_lo, v1b), Operand(v0_lo, v1b)); 200 pseudo->pseudo().scratch_sgpr = m0; 201 202 //~gfx[67]! p_unit_test 14 203 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 204 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 205 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 206 bld.pseudo(aco_opcode::p_split_vector, 207 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 208 Operand(v0_lo, v2b)); 209 210 //~gfx[67]! p_unit_test 15 211 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 212 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 213 //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] 214 //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] 215 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 216 bld.pseudo(aco_opcode::p_split_vector, 217 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 218 Definition(v2_lo, v1b), Definition(v3_lo, v1b), 219 Operand(v0_lo, v1)); 220 221 //~gfx[67]! s_endpgm 222 223 finish_to_hw_instr_test(); 224 } 225 226 for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) { 227 if (!setup_cs(NULL, lvl)) 228 continue; 229 230 //~gfx(8|9|11)>> p_unit_test 0 231 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 232 //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 233 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 234 bld.pseudo(aco_opcode::p_parallelcopy, 235 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 236 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 237 238 //~gfx(8|9|11)! p_unit_test 1 239 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 240 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 241 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 242 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 243 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 244 //~gfx11! v2b: %0:v[1][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), 0 opsel_hi 245 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 246 bld.pseudo(aco_opcode::p_parallelcopy, 247 Definition(v0_lo, v1), Definition(v1_lo, v2b), 248 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 249 250 //~gfx(8|9|11)! p_unit_test 2 251 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 252 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0 253 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 254 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 255 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 256 //~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[1][16:32]), 0 opsel_hi 257 //~gfx11! v2b: %0:v[1][16:32] = v_add_u16_e64 %0:v[0][0:16], 0 opsel_hi 258 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 259 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 260 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 261 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 262 bld.pseudo(aco_opcode::p_parallelcopy, 263 Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b), 264 Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b)); 265 266 //~gfx(8|9|11)! p_unit_test 3 267 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 268 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 269 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 270 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 271 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 272 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 273 //~gfx11! v2b: %0:v[1][0:16] = v_add_u16_e64 %0:v[0][0:16], 0 274 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504 275 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 276 bld.pseudo(aco_opcode::p_parallelcopy, 277 Definition(v0_lo, v1), Definition(v1_b3, v1b), 278 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 279 280 //~gfx(8|9|11)! p_unit_test 4 281 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 282 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 283 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 284 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 285 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 286 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 287 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104 288 //~gfx11! v2b: %0:v[1][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), 0 opsel_hi 289 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 290 bld.pseudo(aco_opcode::p_parallelcopy, 291 Definition(v0_lo, v1), Definition(v1_lo, v1b), 292 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 293 294 //~gfx(8|9|11)! p_unit_test 5 295 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 296 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 297 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 298 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 299 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 300 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 301 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104 302 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504 303 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 304 bld.pseudo(aco_opcode::p_parallelcopy, 305 Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1), 306 Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1)); 307 308 //~gfx(8|9|11)! p_unit_test 6 309 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 310 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 311 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 312 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 313 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 314 bld.pseudo(aco_opcode::p_parallelcopy, 315 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 316 Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1)); 317 318 //~gfx(8|9|11)! p_unit_test 7 319 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 320 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 321 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 322 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 323 //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 324 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 325 bld.pseudo(aco_opcode::p_parallelcopy, 326 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 327 Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1)); 328 329 //~gfx(8|9|11)! p_unit_test 8 330 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 331 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 332 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 333 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 334 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 335 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 336 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 337 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 338 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 339 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 340 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 341 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 342 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 343 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 344 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 345 bld.pseudo(aco_opcode::p_parallelcopy, 346 Definition(v0_lo, v3b), Definition(v1_lo, v3b), 347 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 348 349 //~gfx(8|9|11)! p_unit_test 9 350 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 351 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 352 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 353 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 354 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 355 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504 356 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 357 bld.pseudo(aco_opcode::p_parallelcopy, 358 Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b), 359 Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b)); 360 361 //~gfx(8|9|11)! p_unit_test 10 362 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 363 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 364 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 365 //~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 366 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] 367 //~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 368 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 369 //~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 370 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] 371 //~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 372 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 373 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 374 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 375 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 376 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 377 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 378 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506 379 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 380 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 381 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 382 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 383 bld.pseudo(aco_opcode::p_parallelcopy, 384 Definition(v0_b1, v2b), Definition(v1_b1, v2b), 385 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 386 387 //~gfx(8|9|11)! p_unit_test 11 388 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1 389 //~gfx11! v2b: %0:v[1][0:16] = v_add_u16_e64 hi(%0:v[0][16:32]), 0 390 //~gfx(8|9|11)! v1: %0:v[0] = v_mov_b32 42 391 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 392 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 393 Operand::c32(42u), Operand(v0_hi, v2b)); 394 395 //~gfx(8|9|11)! p_unit_test 12 396 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 397 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 398 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 399 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 400 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 401 bld.pseudo(aco_opcode::p_parallelcopy, 402 Definition(v0_b1, v1b), Definition(v0_b3, v1b), 403 Operand(v0_b3, v1b), Operand(v0_b1, v1b)); 404 405 //~gfx(8|9|11)! s_endpgm 406 407 finish_to_hw_instr_test(); 408 } 409 END_TEST 410 411 BEGIN_TEST(to_hw_instr.subdword_constant) 412 PhysReg v0_lo{256}; 413 PhysReg v0_hi{256}; 414 PhysReg v0_b1{256}; 415 PhysReg v1_lo{257}; 416 PhysReg v1_hi{257}; 417 v0_hi.reg_b += 2; 418 v0_b1.reg_b += 1; 419 v1_hi.reg_b += 2; 420 421 for (amd_gfx_level lvl : {GFX9, GFX10, GFX11}) { 422 if (!setup_cs(NULL, lvl)) 423 continue; 424 425 /* 16-bit pack */ 426 //>> p_unit_test 0 427 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 428 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 429 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 430 Operand::c16(0x3800), Operand(v1_hi, v2b)); 431 432 //! p_unit_test 1 433 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 434 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 435 //~gfx(10|11)! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 436 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 437 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 438 Operand::c16(0x4205), Operand(v1_hi, v2b)); 439 440 //! p_unit_test 2 441 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 442 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 443 //~gfx(10|11)! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 444 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 445 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 446 Operand::c16(0x4205), Operand(v0_lo, v2b)); 447 448 //! p_unit_test 3 449 //! v1: %_:v[0] = v_mov_b32 0x3c003800 450 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 451 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 452 Operand::c16(0x3800), Operand::c16(0x3c00)); 453 454 //! p_unit_test 4 455 //! v1: %_:v[0] = v_mov_b32 0x43064205 456 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 457 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 458 Operand::c16(0x4205), Operand::c16(0x4306)); 459 460 //! p_unit_test 5 461 //! v1: %_:v[0] = v_mov_b32 0x38004205 462 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 463 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 464 Operand::c16(0x4205), Operand::c16(0x3800)); 465 466 /* 16-bit copy */ 467 //! p_unit_test 6 468 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword 469 //~gfx11! v2b: %_:v[0][0:16] = v_pack_b32_f16 0.5, hi(%_:v[0][16:32]) 470 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 471 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); 472 473 //! p_unit_test 7 474 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 475 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 476 //~gfx(10|11)! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) 477 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 478 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); 479 480 //! p_unit_test 8 481 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 482 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 483 //~gfx(10|11)! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 484 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 485 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); 486 487 //! p_unit_test 9 488 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword 489 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword 490 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c04 491 //~gfx11! v1: %_:v[0] = v_and_b32 0xff00ffff, %_:v[0] 492 //~gfx11! v1: %_:v[0] = v_or_b32 0x380000, %_:v[0] 493 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 494 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); 495 496 //! p_unit_test 10 497 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword 498 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword 499 //~gfx11! v1: %_:v[0] = v_and_b32 0xffff00ff, %_:v[0] 500 //~gfx11! v1: %_:v[0] = v_or_b32 0x500, %_:v[0] 501 //~gfx11! v1: %_:v[0] = v_and_b32 0xff00ffff, %_:v[0] 502 //~gfx11! v1: %_:v[0] = v_or_b32 0x420000, %_:v[0] 503 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 504 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); 505 506 /* 8-bit copy */ 507 //! p_unit_test 11 508 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword 509 //~gfx11! v1: %_:v[0] = v_and_b32 0xffffff00, %_:v[0] 510 //~gfx11! v1: %_:v[0] = v_or_b32 0x42, %_:v[0] 511 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 512 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); 513 514 /* 32-bit and 8-bit copy */ 515 //! p_unit_test 12 516 //! v1: %_:v[0] = v_mov_b32 0 517 //~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 518 //~gfx11! v1: %_:v[1] = v_perm_b32 %_:v[1], 0, 0x706050c 519 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 520 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 521 Operand::zero(), Operand::zero(1)); 522 523 bld.reset(program->create_and_insert_block()); 524 program->blocks[0].linear_succs.push_back(1); 525 program->blocks[1].linear_preds.push_back(0); 526 527 /* Prevent usage of v_pack_b32_f16, so we use v_perm_b32 instead. */ 528 program->blocks[1].fp_mode.denorm16_64 = fp_denorm_flush; 529 530 //>> p_unit_test 13 531 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 532 //~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0] 533 //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d 534 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 535 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), 536 Operand::c16(0x00ff)); 537 538 //! p_unit_test 14 539 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 540 //~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0] 541 //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504 542 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 543 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), 544 Operand::c16(0xff00)); 545 546 //! p_unit_test 15 547 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 548 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0c 549 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 550 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), 551 Operand::zero(2)); 552 553 //! p_unit_test 16 554 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword 555 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d 556 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u)); 557 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), 558 Operand::c8(0xff)); 559 560 //! p_unit_test 17 561 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 562 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c 563 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); 564 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), 565 Operand::zero(1)); 566 567 //! s_endpgm 568 569 finish_to_hw_instr_test(); 570 } 571 END_TEST 572 573 BEGIN_TEST(to_hw_instr.self_intersecting_swap) 574 if (!setup_cs(NULL, GFX9)) 575 return; 576 577 PhysReg reg_v1{257}; 578 PhysReg reg_v2{258}; 579 PhysReg reg_v3{259}; 580 PhysReg reg_v7{263}; 581 582 //>> p_unit_test 0 583 //! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1] 584 //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] 585 //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] 586 //! s_endpgm 587 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 588 //v[1:2] = v[2:3] 589 //v3 = v7 590 //v7 = v1 591 bld.pseudo(aco_opcode::p_parallelcopy, 592 Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1), 593 Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1)); 594 595 finish_to_hw_instr_test(); 596 END_TEST 597 598 BEGIN_TEST(to_hw_instr.extract) 599 PhysReg s0_lo{0}; 600 PhysReg s1_lo{1}; 601 PhysReg v0_lo{256}; 602 PhysReg v1_lo{257}; 603 604 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 605 for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { 606 if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) 607 continue; 608 609 #define EXT(idx, size) \ 610 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 611 Operand::c32(size), Operand::c32(is_signed)); 612 613 //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32' 614 //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32' 615 //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32' 616 //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32' 617 //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n) 618 619 //>> p_unit_test 0 620 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 621 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 622 EXT(0, 8) 623 //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 624 EXT(1, 8) 625 //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8 626 EXT(2, 8) 627 //! v1: %_:v[0] = @v_shr 24, %_:v[1] 628 EXT(3, 8) 629 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 630 EXT(0, 16) 631 //! v1: %_:v[0] = @v_shr 16, %_:v[1] 632 EXT(1, 16) 633 634 #undef EXT 635 636 #define EXT(idx, size) \ 637 bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ 638 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); 639 640 //>> p_unit_test 2 641 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 642 //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 643 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] 644 EXT(0, 8) 645 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008 646 EXT(1, 8) 647 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010 648 EXT(2, 8) 649 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 650 EXT(3, 8) 651 //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 652 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] 653 EXT(0, 16) 654 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 655 EXT(1, 16) 656 657 #undef EXT 658 659 #define EXT(idx, src_b) \ 660 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ 661 Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); 662 663 //>> p_unit_test 4 664 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 665 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 666 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) 667 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00 668 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000 669 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 670 EXT(0, 0) 671 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) 672 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02 673 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202 674 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 675 if (lvl != GFX7) 676 EXT(0, 2) 677 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8 678 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) 679 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01 680 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801 681 EXT(1, 0) 682 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) 683 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03 684 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903 685 if (lvl != GFX7) 686 EXT(1, 2) 687 688 #undef EXT 689 690 finish_to_hw_instr_test(); 691 692 //! s_endpgm 693 } 694 } 695 END_TEST 696 697 BEGIN_TEST(to_hw_instr.insert) 698 PhysReg s0_lo{0}; 699 PhysReg s1_lo{1}; 700 PhysReg v0_lo{256}; 701 PhysReg v1_lo{257}; 702 703 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 704 if (!setup_cs(NULL, lvl)) 705 continue; 706 707 #define INS(idx, size) \ 708 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 709 Operand::c32(size)); 710 711 //>> p_unit_test 0 712 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 713 //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 714 INS(0, 8) 715 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 716 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0] 717 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword 718 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c 719 INS(1, 8) 720 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 721 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0] 722 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword 723 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c 724 INS(2, 8) 725 //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1] 726 INS(3, 8) 727 //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16 728 INS(0, 16) 729 //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] 730 INS(1, 16) 731 732 #undef INS 733 734 #define INS(idx, size) \ 735 bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ 736 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); 737 738 //>> p_unit_test 1 739 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 740 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 741 INS(0, 8) 742 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 743 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8 744 INS(1, 8) 745 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 746 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16 747 INS(2, 8) 748 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24 749 INS(3, 8) 750 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000 751 INS(0, 16) 752 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 753 INS(1, 16) 754 755 #undef INS 756 757 #define INS(idx, def_b) \ 758 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \ 759 Operand::c32(idx), Operand::c32(8u)); 760 761 //>> p_unit_test 2 762 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 763 //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 764 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 765 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00 766 INS(0, 0) 767 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 768 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504 769 if (lvl != GFX7) 770 INS(0, 2) 771 //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16] 772 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 773 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c 774 INS(1, 0) 775 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 776 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504 777 if (lvl != GFX7) 778 INS(1, 2) 779 780 #undef INS 781 782 finish_to_hw_instr_test(); 783 784 //! s_endpgm 785 } 786 END_TEST 787 788 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc) 789 if (!setup_cs(NULL, GFX10)) 790 return; 791 792 PhysReg reg_s0{0}; 793 PhysReg reg_s1{1}; 794 PhysReg v0_lo{256}; 795 PhysReg v0_b3{256}; 796 v0_b3.reg_b += 3; 797 PhysReg v1_lo{257}; 798 799 //>> p_unit_test 0 800 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 801 802 /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex 803 * enough 804 */ 805 806 //! s1: %0:scc = s_cmp_lg_i32 %0:s[0], 0 807 //! s1: %0:m0 = s_mov_b32 %0:scc 808 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 809 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 810 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 811 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 812 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 813 Instruction *instr = bld.pseudo( 814 aco_opcode::p_parallelcopy, 815 Definition(scc, s1), Definition(v0_lo, v1.as_linear()), 816 Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear())); 817 instr->pseudo().scratch_sgpr = m0; 818 819 finish_to_hw_instr_test(); 820 END_TEST 821 822 BEGIN_TEST(to_hw_instr.swap_linear_vgpr) 823 if (!setup_cs(NULL, GFX10)) 824 return; 825 826 PhysReg reg_v0{256}; 827 PhysReg reg_v1{257}; 828 RegClass v1_linear = v1.as_linear(); 829 830 //>> p_unit_test 0 831 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 832 833 Instruction *instr = bld.pseudo( 834 aco_opcode::p_parallelcopy, 835 Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear), 836 Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear)); 837 instr->pseudo().scratch_sgpr = m0; 838 839 finish_to_hw_instr_test(); 840 END_TEST 841 842 BEGIN_TEST(to_hw_instr.pack2x16_alignbyte_constant) 843 PhysReg v0_lo{256}; 844 PhysReg v0_hi{256}; 845 PhysReg v1_hi{257}; 846 v0_hi.reg_b += 2; 847 v1_hi.reg_b += 2; 848 849 if (!setup_cs(NULL, GFX10)) 850 return; 851 852 /* prevent usage of v_pack_b32_f16 */ 853 program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush; 854 855 //>> p_unit_test 0 856 //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2 857 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 858 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 859 Operand(v1_hi, v2b), Operand::c16(0x3800)); 860 861 //! s_endpgm 862 863 finish_to_hw_instr_test(); 864 END_TEST 865