1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(to_hw_instr.swap_subdword) 29 PhysReg v0_lo{256}; 30 PhysReg v0_hi{256}; 31 PhysReg v0_b1{256}; 32 PhysReg v0_b3{256}; 33 PhysReg v1_lo{257}; 34 PhysReg v1_hi{257}; 35 PhysReg v1_b1{257}; 36 PhysReg v1_b3{257}; 37 PhysReg v2_lo{258}; 38 PhysReg v3_lo{259}; 39 v0_hi.reg_b += 2; 40 v1_hi.reg_b += 2; 41 v0_b1.reg_b += 1; 42 v1_b1.reg_b += 1; 43 v0_b3.reg_b += 3; 44 v1_b3.reg_b += 3; 45 46 for (unsigned i = GFX6; i <= GFX7; i++) { 47 if (!setup_cs(NULL, (amd_gfx_level)i)) 48 continue; 49 50 //~gfx[67]>> p_unit_test 0 51 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 52 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 53 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 54 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 55 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b), 56 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 57 58 //~gfx[67]! p_unit_test 1 59 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 60 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 61 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 62 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 63 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b), 64 Operand(v0_lo, v2b)); 65 66 //~gfx[67]! p_unit_test 2 67 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 68 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 69 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 70 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] 71 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 72 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b), 73 Operand(v0_lo, v2b), Operand(v2_lo, v2b)); 74 75 //~gfx[67]! p_unit_test 3 76 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 77 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 78 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 79 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] 80 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 81 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 82 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), 83 Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b)); 84 85 //~gfx[67]! p_unit_test 4 86 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] 87 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2 88 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 89 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2 90 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 91 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 92 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 93 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 94 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), 95 Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b)); 96 97 //~gfx[67]! p_unit_test 5 98 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 99 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 100 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 101 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), 102 Operand(v0_lo, v1)); 103 104 //~gfx[67]! p_unit_test 6 105 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 106 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 107 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 108 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 109 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), 110 Definition(v2_lo, v2b), Operand(v0_lo, v6b)); 111 112 //~gfx[67]! p_unit_test 7 113 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 114 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 115 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 116 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] 117 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 118 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), 119 Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2)); 120 121 //~gfx[67]! p_unit_test 8 122 //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] 123 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 124 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 125 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 126 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 127 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 128 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b), 129 Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2)); 130 131 //~gfx[67]! p_unit_test 9 132 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 133 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 134 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 135 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 136 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b), 137 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 138 139 //~gfx[67]! p_unit_test 10 140 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 141 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 142 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 143 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 144 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b), 145 Operand(v0_lo, v1b)); 146 147 //~gfx[67]! p_unit_test 11 148 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 149 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 150 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 151 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 152 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 153 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 154 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b), 155 Operand(v0_lo, v1b), Operand(v2_lo, v1b)); 156 157 //~gfx[67]! p_unit_test 12 158 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 159 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 160 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 161 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 162 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 163 //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] 164 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 165 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 166 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b), 167 Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b)); 168 169 //~gfx[67]! p_unit_test 13 170 //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] 171 //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8] 172 //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16] 173 //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8] 174 //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 175 //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 176 //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] 177 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 178 Instruction* pseudo = 179 bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b), 180 Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b)); 181 pseudo->pseudo().scratch_sgpr = m0; 182 183 //~gfx[67]! p_unit_test 14 184 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 185 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 186 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 187 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), 188 Operand(v0_lo, v2b)); 189 190 //~gfx[67]! p_unit_test 15 191 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 192 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 193 //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] 194 //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] 195 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 196 bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), 197 Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1)); 198 199 //~gfx[67]! s_endpgm 200 201 finish_to_hw_instr_test(); 202 } 203 204 for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) { 205 if (!setup_cs(NULL, lvl)) 206 continue; 207 208 //~gfx(8|9|11)>> p_unit_test 0 209 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 210 //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 211 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 212 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 213 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 214 215 //~gfx(8|9|11)! p_unit_test 1 216 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 217 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 218 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 219 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 220 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 221 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 222 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 223 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 224 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 225 226 //~gfx(8|9|11)! p_unit_test 2 227 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 228 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0 229 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 230 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 231 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 232 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 hi(%0:v[1][16:32]) opsel_hi 233 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 %0:v[0][0:16] opsel_hi 234 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 235 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 236 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16] 237 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 238 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 239 Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b), 240 Operand(v0_lo, v2b)); 241 242 //~gfx(8|9|11)! p_unit_test 3 243 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 244 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 245 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 246 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 247 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 248 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 249 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16] 250 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504 251 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 252 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b), 253 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 254 255 //~gfx(8|9|11)! p_unit_test 4 256 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 257 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 258 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 259 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 260 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 261 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 262 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104 263 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 264 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 265 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 266 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 267 268 //~gfx(8|9|11)! p_unit_test 5 269 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 270 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 271 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 272 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 273 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 274 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 275 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104 276 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504 277 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 278 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b), 279 Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b), 280 Operand(v0_lo, v1)); 281 282 //~gfx(8|9|11)! p_unit_test 6 283 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 284 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 285 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 286 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 287 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 288 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 289 Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b), 290 Operand(v0_lo, v1)); 291 292 //~gfx(8|9|11)! p_unit_test 7 293 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 294 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 295 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 296 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 297 //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 298 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 299 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 300 Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b), 301 Operand(v0_lo, v1)); 302 303 //~gfx(8|9|11)! p_unit_test 8 304 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 305 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 306 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 307 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 308 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 309 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 310 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 311 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 312 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 313 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 314 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 315 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 316 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 317 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 318 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 319 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 320 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 321 322 //~gfx(8|9|11)! p_unit_test 9 323 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 324 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 325 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 326 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 327 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 328 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504 329 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 330 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 331 Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b), 332 Operand(v1_b3, v1b)); 333 334 //~gfx(8|9|11)! p_unit_test 10 335 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 336 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 337 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 338 //~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 339 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] 340 //~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 341 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 342 //~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 343 //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] 344 //~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi 345 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 346 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 347 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 348 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 349 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 350 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 351 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506 352 //~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 353 //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi 354 //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) 355 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 356 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b), 357 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 358 359 //~gfx(8|9|11)! p_unit_test 11 360 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1 361 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 hi(%0:v[0][16:32]) 362 //~gfx(8|9|11)! v1: %0:v[0] = v_mov_b32 42 363 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 364 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 365 Operand::c32(42u), Operand(v0_hi, v2b)); 366 367 //~gfx(8|9|11)! p_unit_test 12 368 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 369 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 370 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 371 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 372 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 373 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b), 374 Operand(v0_b3, v1b), Operand(v0_b1, v1b)); 375 376 //~gfx(8|9|11)! s_endpgm 377 378 finish_to_hw_instr_test(); 379 } 380 END_TEST 381 382 BEGIN_TEST(to_hw_instr.subdword_constant) 383 PhysReg v0_lo{256}; 384 PhysReg v0_hi{256}; 385 PhysReg v0_b1{256}; 386 PhysReg v1_lo{257}; 387 PhysReg v1_hi{257}; 388 v0_hi.reg_b += 2; 389 v0_b1.reg_b += 1; 390 v1_hi.reg_b += 2; 391 392 for (amd_gfx_level lvl : {GFX9, GFX10, GFX11}) { 393 if (!setup_cs(NULL, lvl)) 394 continue; 395 396 /* 16-bit pack */ 397 //>> p_unit_test 0 398 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 399 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 400 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 401 Operand::c16(0x3800), Operand(v1_hi, v2b)); 402 403 //! p_unit_test 1 404 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 405 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 406 //~gfx(10|11)! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 407 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 408 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 409 Operand::c16(0x4205), Operand(v1_hi, v2b)); 410 411 //! p_unit_test 2 412 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 413 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 414 //~gfx(10|11)! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 415 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 416 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 417 Operand::c16(0x4205), Operand(v0_lo, v2b)); 418 419 //! p_unit_test 3 420 //! v1: %_:v[0] = v_mov_b32 0x3c003800 421 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 422 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 423 Operand::c16(0x3800), Operand::c16(0x3c00)); 424 425 //! p_unit_test 4 426 //! v1: %_:v[0] = v_mov_b32 0x43064205 427 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 428 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 429 Operand::c16(0x4205), Operand::c16(0x4306)); 430 431 //! p_unit_test 5 432 //! v1: %_:v[0] = v_mov_b32 0x38004205 433 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 434 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 435 Operand::c16(0x4205), Operand::c16(0x3800)); 436 437 /* 16-bit copy */ 438 //! p_unit_test 6 439 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword 440 //~gfx11! v2b: %0:v[0][0:16] = v_add_f16 0.5, 0 441 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 442 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); 443 444 //! p_unit_test 7 445 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 446 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 447 //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) 448 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0x4205 449 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 450 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); 451 452 //! p_unit_test 8 453 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 454 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 455 //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 456 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0x4205 opsel_hi 457 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 458 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); 459 460 //! p_unit_test 9 461 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword 462 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword 463 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c04 464 //~gfx11! v1: %_:v[0] = v_and_b32 0xff00ffff, %_:v[0] 465 //~gfx11! v1: %_:v[0] = v_or_b32 0x380000, %_:v[0] 466 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 467 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); 468 469 //! p_unit_test 10 470 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword 471 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword 472 //~gfx11! v1: %_:v[0] = v_and_b32 0xffff00ff, %_:v[0] 473 //~gfx11! v1: %_:v[0] = v_or_b32 0x500, %_:v[0] 474 //~gfx11! v1: %_:v[0] = v_and_b32 0xff00ffff, %_:v[0] 475 //~gfx11! v1: %_:v[0] = v_or_b32 0x420000, %_:v[0] 476 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 477 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); 478 479 /* 8-bit copy */ 480 //! p_unit_test 11 481 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword 482 //~gfx11! v1: %_:v[0] = v_and_b32 0xffffff00, %_:v[0] 483 //~gfx11! v1: %_:v[0] = v_or_b32 0x42, %_:v[0] 484 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 485 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); 486 487 /* 32-bit and 8-bit copy */ 488 //! p_unit_test 12 489 //! v1: %_:v[0] = v_mov_b32 0 490 //~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 491 //~gfx11! v1: %_:v[1] = v_perm_b32 %_:v[1], 0, 0x706050c 492 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 493 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 494 Operand::zero(), Operand::zero(1)); 495 496 bld.reset(program->create_and_insert_block()); 497 program->blocks[0].linear_succs.push_back(1); 498 program->blocks[1].linear_preds.push_back(0); 499 500 /* Prevent usage of v_pack_b32_f16, so we use v_perm_b32 instead. */ 501 program->blocks[1].fp_mode.denorm16_64 = fp_denorm_flush; 502 503 //>> p_unit_test 13 504 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 505 //~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0] 506 //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d 507 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff 508 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 509 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff)); 510 511 //! p_unit_test 14 512 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 513 //~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0] 514 //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504 515 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi 516 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 517 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00)); 518 519 //! p_unit_test 15 520 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 521 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0 522 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 523 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2)); 524 525 //! p_unit_test 16 526 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword 527 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d 528 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u)); 529 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff)); 530 531 //! p_unit_test 17 532 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 533 //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c 534 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); 535 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1)); 536 537 //! s_endpgm 538 539 finish_to_hw_instr_test(); 540 } 541 END_TEST 542 543 BEGIN_TEST(to_hw_instr.self_intersecting_swap) 544 if (!setup_cs(NULL, GFX9)) 545 return; 546 547 PhysReg reg_v1{257}; 548 PhysReg reg_v2{258}; 549 PhysReg reg_v3{259}; 550 PhysReg reg_v7{263}; 551 552 //>> p_unit_test 0 553 //! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1] 554 //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] 555 //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] 556 //! s_endpgm 557 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 558 // v[1:2] = v[2:3] 559 // v3 = v7 560 // v7 = v1 561 bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1), 562 Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1), 563 Operand(reg_v1, v1)); 564 565 finish_to_hw_instr_test(); 566 END_TEST 567 568 BEGIN_TEST(to_hw_instr.extract) 569 PhysReg s0_lo{0}; 570 PhysReg s1_lo{1}; 571 PhysReg v0_lo{256}; 572 PhysReg v1_lo{257}; 573 574 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 575 for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { 576 if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) 577 continue; 578 579 #define EXT(idx, size) \ 580 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 581 Operand::c32(size), Operand::c32(is_signed)); 582 583 //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32' 584 //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32' 585 //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32' 586 //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32' 587 //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n) 588 589 //>> p_unit_test 0 590 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 591 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 592 EXT(0, 8) 593 //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 594 EXT(1, 8) 595 //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8 596 EXT(2, 8) 597 //! v1: %_:v[0] = @v_shr 24, %_:v[1] 598 EXT(3, 8) 599 //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 600 //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1] 601 //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1] 602 EXT(0, 16) 603 //! v1: %_:v[0] = @v_shr 16, %_:v[1] 604 EXT(1, 16) 605 606 #undef EXT 607 608 #define EXT(idx, size) \ 609 bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ 610 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); 611 612 //>> p_unit_test 2 613 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 614 //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 615 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] 616 EXT(0, 8) 617 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008 618 EXT(1, 8) 619 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010 620 EXT(2, 8) 621 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 622 EXT(3, 8) 623 //~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 624 //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 625 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] 626 EXT(0, 16) 627 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 628 EXT(1, 16) 629 630 #undef EXT 631 632 #define EXT(idx, src_b) \ 633 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ 634 Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); 635 636 //>> p_unit_test 4 637 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 638 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 639 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) 640 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00 641 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000 642 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 643 EXT(0, 0) 644 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) 645 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02 646 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202 647 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 648 if (lvl != GFX7) 649 EXT(0, 2) 650 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8 651 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) 652 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01 653 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801 654 EXT(1, 0) 655 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) 656 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03 657 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903 658 if (lvl != GFX7) 659 EXT(1, 2) 660 661 #undef EXT 662 663 finish_to_hw_instr_test(); 664 665 //! s_endpgm 666 } 667 } 668 END_TEST 669 670 BEGIN_TEST(to_hw_instr.insert) 671 PhysReg s0_lo{0}; 672 PhysReg s1_lo{1}; 673 PhysReg v0_lo{256}; 674 PhysReg v1_lo{257}; 675 676 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 677 if (!setup_cs(NULL, lvl)) 678 continue; 679 680 #define INS(idx, size) \ 681 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 682 Operand::c32(size)); 683 684 //>> p_unit_test 0 685 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 686 //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 687 INS(0, 8) 688 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 689 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0] 690 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword 691 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c 692 INS(1, 8) 693 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 694 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0] 695 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword 696 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c 697 INS(2, 8) 698 //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1] 699 INS(3, 8) 700 //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16 701 INS(0, 16) 702 //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] 703 INS(1, 16) 704 705 #undef INS 706 707 #define INS(idx, size) \ 708 bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ 709 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); 710 711 //>> p_unit_test 1 712 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 713 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 714 INS(0, 8) 715 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 716 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8 717 INS(1, 8) 718 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 719 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16 720 INS(2, 8) 721 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24 722 INS(3, 8) 723 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000 724 INS(0, 16) 725 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 726 INS(1, 16) 727 728 #undef INS 729 730 #define INS(idx, def_b) \ 731 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \ 732 Operand::c32(idx), Operand::c32(8u)); 733 734 //>> p_unit_test 2 735 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 736 //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 737 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 738 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00 739 INS(0, 0) 740 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 741 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504 742 if (lvl != GFX7) 743 INS(0, 2) 744 //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16] 745 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 746 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c 747 INS(1, 0) 748 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 749 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504 750 if (lvl != GFX7) 751 INS(1, 2) 752 753 #undef INS 754 755 finish_to_hw_instr_test(); 756 757 //! s_endpgm 758 } 759 END_TEST 760 761 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc) 762 if (!setup_cs(NULL, GFX10)) 763 return; 764 765 PhysReg reg_s0{0}; 766 PhysReg v0_lo{256}; 767 PhysReg v0_b3{256}; 768 v0_b3.reg_b += 3; 769 PhysReg v1_lo{257}; 770 771 //>> p_unit_test 0 772 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 773 774 /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex 775 * enough 776 */ 777 778 //! s1: %0:scc = s_cmp_lg_i32 %0:s[0], 0 779 //! s1: %0:m0 = s_mov_b32 %0:scc 780 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 781 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 782 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 783 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 784 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 785 Instruction* instr = 786 bld.pseudo(aco_opcode::p_parallelcopy, Definition(scc, s1), Definition(v0_lo, v1.as_linear()), 787 Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear())); 788 instr->pseudo().scratch_sgpr = m0; 789 790 finish_to_hw_instr_test(); 791 END_TEST 792 793 BEGIN_TEST(to_hw_instr.swap_linear_vgpr) 794 if (!setup_cs(NULL, GFX10)) 795 return; 796 797 PhysReg reg_v0{256}; 798 PhysReg reg_v1{257}; 799 RegClass v1_linear = v1.as_linear(); 800 801 //>> p_unit_test 0 802 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 803 804 //! lv1: %0:v[0], lv1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 805 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 806 //! lv1: %0:v[0], lv1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 807 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 808 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), 809 Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear), 810 Operand(reg_v0, v1_linear)); 811 instr->pseudo().scratch_sgpr = m0; 812 813 finish_to_hw_instr_test(); 814 END_TEST 815 816 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3) 817 if (!setup_cs(NULL, GFX10)) 818 return; 819 820 PhysReg reg_v0{256}; 821 PhysReg reg_v4{256 + 4}; 822 RegClass v3_linear = v3.as_linear(); 823 824 //>> p_unit_test 0 825 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 826 827 //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 828 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 829 //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 830 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 831 //! lv1: %0:v[2] = v_mov_b32 %0:v[6] 832 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 833 //! lv1: %0:v[2] = v_mov_b32 %0:v[6] 834 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 835 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear), 836 Operand(reg_v4, v3_linear)); 837 instr->pseudo().scratch_sgpr = m0; 838 839 finish_to_hw_instr_test(); 840 END_TEST 841 842 BEGIN_TEST(to_hw_instr.pack2x16_constant) 843 PhysReg v0_lo{256}; 844 PhysReg v0_hi{256}; 845 PhysReg v1_lo{257}; 846 PhysReg v1_hi{257}; 847 v0_hi.reg_b += 2; 848 v1_hi.reg_b += 2; 849 850 for (amd_gfx_level lvl : {GFX10, GFX11}) { 851 if (!setup_cs(NULL, lvl)) 852 continue; 853 854 /* prevent usage of v_pack_b32_f16 */ 855 program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush; 856 857 //>> p_unit_test 0 858 //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2 859 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 860 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 861 Operand(v1_hi, v2b), Operand::c16(0x3800)); 862 863 //! p_unit_test 1 864 //! v2b: %_:v[0][0:16] = v_lshrrev_b32 16, %_:v[1][16:32] 865 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 866 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 867 Operand(v1_hi, v2b), Operand::zero(2)); 868 869 //! p_unit_test 2 870 //~gfx10! v2b: %_:v[0][0:16] = v_and_b32 0xffff, %_:v[1][0:16] 871 //~gfx11! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1][0:16] 872 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 873 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 874 Operand(v1_lo, v2b), Operand::zero(2)); 875 876 //! p_unit_test 3 877 //! v2b: %_:v[0][16:32] = v_and_b32 0xffff0000, %_:v[1][16:32] 878 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 879 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 880 Operand::zero(2), Operand(v1_hi, v2b)); 881 882 //! p_unit_test 4 883 //! v2b: %_:v[0][16:32] = v_lshlrev_b32 16, %_:v[1][0:16] 884 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 885 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 886 Operand::zero(2), Operand(v1_lo, v2b)); 887 888 //! s_endpgm 889 890 finish_to_hw_instr_test(); 891 } 892 END_TEST 893