1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 #include "helpers.h" 7 8 using namespace aco; 9 10 BEGIN_TEST(to_hw_instr.swap_subdword) 11 PhysReg v0_lo{256}; 12 PhysReg v0_hi{256}; 13 PhysReg v0_b1{256}; 14 PhysReg v0_b3{256}; 15 PhysReg v1_lo{257}; 16 PhysReg v1_hi{257}; 17 PhysReg v1_b1{257}; 18 PhysReg v1_b3{257}; 19 PhysReg v128_lo{256 + 128}; 20 PhysReg v128_hi{256 + 128}; 21 PhysReg v129_lo{256 + 129}; 22 PhysReg v129_hi{256 + 129}; 23 v0_hi.reg_b += 2; 24 v1_hi.reg_b += 2; 25 v0_b1.reg_b += 1; 26 v1_b1.reg_b += 1; 27 v0_b3.reg_b += 3; 28 v1_b3.reg_b += 3; 29 v128_hi.reg_b += 2; 30 v129_hi.reg_b += 2; 31 32 for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) { 33 if (!setup_cs(NULL, lvl)) 34 continue; 35 36 //~gfx(8|9|11)>> p_unit_test 0 37 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 38 //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 39 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 40 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 41 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 42 43 //~gfx(8|9|11)! p_unit_test 1 44 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 45 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 46 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 47 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 48 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 49 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 50 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 51 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 52 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 53 54 //~gfx(8|9|11)! p_unit_test 2 55 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 56 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0 57 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 58 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 59 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 60 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 hi(%0:v[1][16:32]) opsel_hi 61 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 %0:v[0][0:16] opsel_hi 62 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][0:16] 63 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 64 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 65 Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b), 66 Operand(v0_lo, v2b)); 67 68 //~gfx(8|9|11)! p_unit_test 3 69 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 70 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 71 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 72 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 73 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 74 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 75 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16] 76 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504 77 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 78 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b), 79 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 80 81 //~gfx(8|9|11)! p_unit_test 4 82 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 83 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 84 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 85 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 86 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 87 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 88 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104 89 //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi 90 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 91 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 92 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 93 94 //~gfx(8|9|11)! p_unit_test 5 95 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 96 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 97 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 98 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 99 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 100 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 101 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104 102 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504 103 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 104 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b), 105 Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b), 106 Operand(v0_lo, v1)); 107 108 //~gfx(8|9|11)! p_unit_test 6 109 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 110 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 111 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 112 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 113 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 114 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 115 Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b), 116 Operand(v0_lo, v1)); 117 118 //~gfx(8|9|11)! p_unit_test 7 119 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 120 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 121 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 122 //~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 123 //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 124 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 125 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 126 Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b), 127 Operand(v0_lo, v1)); 128 129 //~gfx(8|9|11)! p_unit_test 8 130 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 131 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 132 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 133 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 134 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 135 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 136 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 137 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 138 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 139 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 140 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 141 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 142 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 143 144 //~gfx(8|9|11)! p_unit_test 9 145 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 146 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 147 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 148 //~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 149 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 150 //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504 151 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 152 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), 153 Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b), 154 Operand(v1_b3, v1b)); 155 156 //~gfx(8|9|11)! p_unit_test 10 157 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 158 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 159 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 160 //~gfx11! v2b: %0:v[0][16:32], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi 161 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 162 //~gfx11! v2b: %0:v[0][16:32], v2b: %0:v[1][0:16] = v_swap_b16 %0:v[1][0:16], %0:v[0][16:32] opsel_hi 163 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 164 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 165 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 166 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 167 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506 168 //~gfx11! v2b: %0:v[0][0:16], v2b: %0:v[1][16:32] = v_swap_b16 hi(%0:v[1][16:32]), %0:v[0][0:16] 169 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 170 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b), 171 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 172 173 //~gfx(8|9|11)! p_unit_test 11 174 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1 175 //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 hi(%0:v[0][16:32]) 176 //~gfx(8|9|11)! v1: %0:v[0] = v_mov_b32 42 177 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 178 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 179 Operand::c32(42u), Operand(v0_hi, v2b)); 180 181 //~gfx(8|9|11)! p_unit_test 12 182 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 183 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 184 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1 185 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704 186 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 187 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b), 188 Operand(v0_b3, v1b), Operand(v0_b1, v1b)); 189 190 //~gfx(8|9|11)! p_unit_test 13 191 //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0 192 //~gfx[89]! v2b: %0:v[128][0:16] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword0 dst_preserve src0_sel:uword1 src1_sel:uword0 193 //~gfx[89]! v2b: %0:v[129][16:32] = v_xor_b32 %0:v[129][16:32], %0:v[128][0:16] dst_sel:uword1 dst_preserve src0_sel:uword1 src1_sel:uword0 194 //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] 195 //~gfx11! v2b: %0:v[129][16:32] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] opsel_hi 196 //~gfx11! v2b: %0:v[128][0:16] = v_xor_b16 hi(%0:v[129][16:32]), %0:v[128][0:16] 197 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 198 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_lo, v2b), Definition(v129_hi, v2b), 199 Operand(v129_hi, v2b), Operand(v128_lo, v2b)); 200 201 //~gfx(8|9|11)! p_unit_test 14 202 //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1 203 //~gfx[89]! v2b: %0:v[128][16:32] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword1 dst_preserve src0_sel:uword0 src1_sel:uword1 204 //~gfx[89]! v2b: %0:v[129][0:16] = v_xor_b32 %0:v[129][0:16], %0:v[128][16:32] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword1 205 //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi 206 //~gfx11! v2b: %0:v[129][0:16] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) 207 //~gfx11! v2b: %0:v[128][16:32] = v_xor_b16 %0:v[129][0:16], hi(%0:v[128][16:32]) opsel_hi 208 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 209 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v128_hi, v2b), Definition(v129_lo, v2b), 210 Operand(v129_lo, v2b), Operand(v128_hi, v2b)); 211 212 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 213 //~gfx(8|9|11)! s_endpgm 214 215 finish_to_hw_instr_test(); 216 } 217 END_TEST 218 219 BEGIN_TEST(to_hw_instr.subdword_constant) 220 PhysReg v0_lo{256}; 221 PhysReg v0_hi{256}; 222 PhysReg v0_b1{256}; 223 PhysReg v1_lo{257}; 224 PhysReg v1_hi{257}; 225 v0_hi.reg_b += 2; 226 v0_b1.reg_b += 1; 227 v1_hi.reg_b += 2; 228 229 for (amd_gfx_level lvl : {GFX9, GFX10, GFX11}) { 230 if (!setup_cs(NULL, lvl)) 231 continue; 232 233 /* 16-bit pack */ 234 //>> p_unit_test 0 235 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 236 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 237 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 238 Operand::c16(0x3800), Operand(v1_hi, v2b)); 239 240 //! p_unit_test 1 241 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 242 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 243 //~gfx(10|11)! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 244 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 245 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 246 Operand::c16(0x4205), Operand(v1_hi, v2b)); 247 248 //! p_unit_test 2 249 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 250 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 251 //~gfx(10|11)! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 252 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 253 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 254 Operand::c16(0x4205), Operand(v0_lo, v2b)); 255 256 //! p_unit_test 3 257 //! v1: %_:v[0] = v_mov_b32 0x3c003800 258 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 259 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 260 Operand::c16(0x3800), Operand::c16(0x3c00)); 261 262 //! p_unit_test 4 263 //! v1: %_:v[0] = v_mov_b32 0x43064205 264 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 265 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 266 Operand::c16(0x4205), Operand::c16(0x4306)); 267 268 //! p_unit_test 5 269 //! v1: %_:v[0] = v_mov_b32 0x38004205 270 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 271 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 272 Operand::c16(0x4205), Operand::c16(0x3800)); 273 274 /* 16-bit copy */ 275 //! p_unit_test 6 276 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword 277 //~gfx11! v2b: %0:v[0][0:16] = v_add_f16 0.5, 0 278 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 279 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); 280 281 //! p_unit_test 7 282 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 283 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 284 //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0x4205, 0 285 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0x4205 286 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 287 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); 288 289 //! p_unit_test 8 290 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 291 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 292 //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0x4205, 0 opsel_hi 293 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0x4205 opsel_hi 294 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 295 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); 296 297 //! p_unit_test 9 298 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword 299 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword 300 //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0, 1, %_:v[0] 301 //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42600000, 2, %_:v[0] 302 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 303 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); 304 305 //! p_unit_test 10 306 //~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword 307 //~gfx(9|10)! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword 308 //~gfx11! v1b: %_:v[0][8:16] = v_cvt_pk_u8_f32 0x40a00000, 1, %_:v[0] 309 //~gfx11! v1b: %_:v[0][16:24] = v_cvt_pk_u8_f32 0x42840000, 2, %_:v[0] 310 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 311 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); 312 313 /* 8-bit copy */ 314 //! p_unit_test 11 315 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword 316 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x42840000, 0, %_:v[0] 317 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 318 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); 319 320 /* 32-bit and 8-bit copy */ 321 //! p_unit_test 12 322 //! v1: %_:v[0] = v_mov_b32 0 323 //~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 324 //~gfx11! v1b: %_:v[1][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[1] 325 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 326 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 327 Operand::zero(), Operand::zero(1)); 328 329 //! p_unit_test 13 330 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 331 //~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0] 332 //~gfx10! v2b: %_:v[0][0:16] = v_add_u16_e64 0xff, 0 333 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff 334 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 335 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff)); 336 337 //! p_unit_test 14 338 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 339 //~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0] 340 //~gfx10! v2b: %_:v[0][16:32] = v_add_u16_e64 0xff00, 0 opsel_hi 341 //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi 342 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 343 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00)); 344 345 //! p_unit_test 15 346 //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword 347 //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0 348 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 349 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2)); 350 351 //! p_unit_test 16 352 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword 353 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0x437f0000, 0, %_:v[0] 354 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u)); 355 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff)); 356 357 //! p_unit_test 17 358 //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 359 //~gfx11! v1b: %_:v[0][0:8] = v_cvt_pk_u8_f32 0, 0, %_:v[0] 360 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); 361 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1)); 362 363 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 364 //! s_endpgm 365 366 finish_to_hw_instr_test(); 367 } 368 END_TEST 369 370 BEGIN_TEST(to_hw_instr.self_intersecting_swap) 371 if (!setup_cs(NULL, GFX9)) 372 return; 373 374 PhysReg reg_v1{257}; 375 PhysReg reg_v2{258}; 376 PhysReg reg_v3{259}; 377 PhysReg reg_v7{263}; 378 379 //>> p_unit_test 0 380 //! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1] 381 //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] 382 //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] 383 //! s_endpgm 384 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 385 // v[1:2] = v[2:3] 386 // v3 = v7 387 // v7 = v1 388 bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1), 389 Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1), 390 Operand(reg_v1, v1)); 391 392 finish_to_hw_instr_test(); 393 END_TEST 394 395 BEGIN_TEST(to_hw_instr.extract) 396 PhysReg s0_lo{0}; 397 PhysReg s1_lo{1}; 398 PhysReg v0_lo{256}; 399 PhysReg v1_lo{257}; 400 401 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 402 for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { 403 if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) 404 continue; 405 406 #define EXT(idx, size) \ 407 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 408 Operand::c32(size), Operand::c32(is_signed)); 409 410 //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32' 411 //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32' 412 //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32' 413 //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32' 414 //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n) 415 416 //>> p_unit_test 0 417 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 418 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 419 EXT(0, 8) 420 //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 421 EXT(1, 8) 422 //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8 423 EXT(2, 8) 424 //! v1: %_:v[0] = @v_shr 24, %_:v[1] 425 EXT(3, 8) 426 //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 427 //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1] 428 //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1] 429 EXT(0, 16) 430 //! v1: %_:v[0] = @v_shr 16, %_:v[1] 431 EXT(1, 16) 432 433 #undef EXT 434 435 #define EXT(idx, size) \ 436 bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ 437 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); 438 439 //>> p_unit_test 2 440 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 441 //~gfx.*_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 442 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] 443 EXT(0, 8) 444 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008 445 EXT(1, 8) 446 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010 447 EXT(2, 8) 448 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 449 EXT(3, 8) 450 //~gfx(7|8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 451 //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 452 //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] 453 EXT(0, 16) 454 //~gfx(7,8)_unsigned! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 455 //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_hh_b32_b16 %_:s[1], 0 456 //~gfx.*_signed! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 457 EXT(1, 16) 458 459 #undef EXT 460 461 #define EXT(idx, src_b) \ 462 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ 463 Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); 464 465 //>> p_unit_test 4 466 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 467 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) 468 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00 469 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000 470 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 471 if (lvl != GFX7) 472 EXT(0, 0) 473 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) 474 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02 475 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202 476 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 477 if (lvl != GFX7) 478 EXT(0, 2) 479 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) 480 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01 481 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801 482 if (lvl != GFX7) 483 EXT(1, 0) 484 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) 485 //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03 486 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903 487 if (lvl != GFX7) 488 EXT(1, 2) 489 490 #undef EXT 491 492 finish_to_hw_instr_test(); 493 494 //~gfx11_.*! s_sendmsg sendmsg(dealloc_vgprs) 495 //! s_endpgm 496 } 497 } 498 END_TEST 499 500 BEGIN_TEST(to_hw_instr.insert) 501 PhysReg s0_lo{0}; 502 PhysReg s1_lo{1}; 503 PhysReg v0_lo{256}; 504 PhysReg v1_lo{257}; 505 506 for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) { 507 if (!setup_cs(NULL, lvl)) 508 continue; 509 510 #define INS(idx, size) \ 511 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 512 Operand::c32(size)); 513 514 //>> p_unit_test 0 515 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 516 //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 517 INS(0, 8) 518 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 519 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0] 520 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword 521 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c 522 INS(1, 8) 523 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 524 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0] 525 //~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword 526 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c 527 INS(2, 8) 528 //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1] 529 INS(3, 8) 530 //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16 531 INS(0, 16) 532 //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] 533 INS(1, 16) 534 535 #undef INS 536 537 #define INS(idx, size) \ 538 bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ 539 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); 540 541 //>> p_unit_test 1 542 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 543 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 544 INS(0, 8) 545 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 546 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8 547 INS(1, 8) 548 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 549 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16 550 INS(2, 8) 551 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24 552 INS(3, 8) 553 //~gfx(7|8)! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000 554 //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0 555 INS(0, 16) 556 //~gfx(7|8)! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 557 //~gfx(9|11)! s1: %_:s[0] = s_pack_ll_b32_b16 0, %_:s[1] 558 INS(1, 16) 559 560 #undef INS 561 562 #define INS(idx, def_b, op_b) \ 563 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), \ 564 Operand(v1_lo.advance(op_b), v2b), Operand::c32(idx), Operand::c32(8u)); 565 566 //>> p_unit_test 2 567 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 568 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0 569 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00 570 if (lvl != GFX7) 571 INS(0, 0, 0) 572 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0 573 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504 574 if (lvl != GFX7) 575 INS(0, 2, 0) 576 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:ubyte2 577 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c02 578 if (lvl != GFX7) 579 INS(0, 0, 2) 580 //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:ubyte2 581 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504 582 if (lvl != GFX7) 583 INS(0, 2, 2) 584 //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0 585 //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1] 586 //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 587 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c 588 if (lvl != GFX7) 589 INS(1, 0, 0) 590 //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0 591 //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1] 592 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 593 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504 594 if (lvl != GFX7) 595 INS(1, 2, 0) 596 //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte1 dst_preserve src0_sel:ubyte2 597 //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1] 598 //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte2 599 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706020c 600 if (lvl != GFX7) 601 INS(1, 0, 2) 602 //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][16:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte2 603 //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1] 604 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte2 605 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x20c0504 606 if (lvl != GFX7) 607 INS(1, 2, 2) 608 #undef INS 609 610 finish_to_hw_instr_test(); 611 612 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 613 //! s_endpgm 614 } 615 END_TEST 616 617 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc) 618 if (!setup_cs(NULL, GFX10)) 619 return; 620 621 PhysReg v0_lo{256}; 622 PhysReg v1_lo{257}; 623 624 //>> p_unit_test 0 625 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 626 627 /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex 628 * enough 629 */ 630 631 //! v1: %0:v[0] = v_mov_b32 %0:v[1] 632 //! s1: %0:m0 = s_mov_b32 %0:scc 633 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 634 //! v1: %0:v[0] = v_mov_b32 %0:v[1] 635 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 636 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 637 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1.as_linear()), 638 Operand(v1_lo, v1.as_linear())); 639 instr->pseudo().scratch_sgpr = m0; 640 instr->pseudo().needs_scratch_reg = true; 641 642 finish_to_hw_instr_test(); 643 END_TEST 644 645 BEGIN_TEST(to_hw_instr.swap_linear_vgpr) 646 if (!setup_cs(NULL, GFX10)) 647 return; 648 649 PhysReg reg_v0{256}; 650 PhysReg reg_v1{257}; 651 RegClass v1_linear = v1.as_linear(); 652 653 //>> p_unit_test 0 654 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 655 656 //! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 657 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 658 //! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 659 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 660 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), 661 Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear), 662 Operand(reg_v0, v1_linear)); 663 instr->pseudo().scratch_sgpr = scc; 664 instr->pseudo().needs_scratch_reg = true; 665 666 finish_to_hw_instr_test(); 667 END_TEST 668 669 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3) 670 if (!setup_cs(NULL, GFX10)) 671 return; 672 673 PhysReg reg_v0{256}; 674 PhysReg reg_v4{256 + 4}; 675 RegClass v3_linear = v3.as_linear(); 676 677 //>> p_unit_test 0 678 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 679 680 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 681 //! v1: %0:v[2] = v_mov_b32 %0:v[6] 682 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 683 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 684 //! v1: %0:v[2] = v_mov_b32 %0:v[6] 685 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 686 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear), 687 Operand(reg_v4, v3_linear)); 688 instr->pseudo().scratch_sgpr = scc; 689 instr->pseudo().needs_scratch_reg = true; 690 691 finish_to_hw_instr_test(); 692 END_TEST 693 694 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_coalesce) 695 if (!setup_cs(NULL, GFX10)) 696 return; 697 698 PhysReg reg_v0{256}; 699 PhysReg reg_v1{256 + 1}; 700 PhysReg reg_v4{256 + 4}; 701 PhysReg reg_v5{256 + 5}; 702 RegClass v1_linear = v1.as_linear(); 703 704 //>> p_unit_test 0 705 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 706 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 707 //! v2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] 708 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 709 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 710 711 Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear), 712 Definition(reg_v1, v1_linear), Operand(reg_v4, v1_linear), 713 Operand(reg_v5, v1_linear)); 714 instr->pseudo().scratch_sgpr = scc; 715 instr->pseudo().needs_scratch_reg = true; 716 717 finish_to_hw_instr_test(); 718 END_TEST 719 720 BEGIN_TEST(to_hw_instr.pack2x16_constant) 721 PhysReg v0_lo{256}; 722 PhysReg v0_hi{256}; 723 PhysReg v1_lo{257}; 724 PhysReg v1_hi{257}; 725 v0_hi.reg_b += 2; 726 v1_hi.reg_b += 2; 727 728 for (amd_gfx_level lvl : {GFX10, GFX11}) { 729 if (!setup_cs(NULL, lvl)) 730 continue; 731 732 /* prevent usage of v_pack_b32_f16 */ 733 program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush; 734 735 //>> p_unit_test 0 736 //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2 737 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 738 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 739 Operand(v1_hi, v2b), Operand::c16(0x3800)); 740 741 //! p_unit_test 1 742 //! v2b: %_:v[0][0:16] = v_lshrrev_b32 16, %_:v[1][16:32] 743 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 744 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 745 Operand(v1_hi, v2b), Operand::zero(2)); 746 747 //! p_unit_test 2 748 //~gfx10! v2b: %_:v[0][0:16] = v_and_b32 0xffff, %_:v[1][0:16] 749 //~gfx11! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1][0:16] 750 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 751 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 752 Operand(v1_lo, v2b), Operand::zero(2)); 753 754 //! p_unit_test 3 755 //! v2b: %_:v[0][16:32] = v_and_b32 0xffff0000, %_:v[1][16:32] 756 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 757 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 758 Operand::zero(2), Operand(v1_hi, v2b)); 759 760 //! p_unit_test 4 761 //! v2b: %_:v[0][16:32] = v_lshlrev_b32 16, %_:v[1][0:16] 762 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 763 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 764 Operand::zero(2), Operand(v1_lo, v2b)); 765 766 //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) 767 //! s_endpgm 768 769 finish_to_hw_instr_test(); 770 } 771 END_TEST 772 773 BEGIN_TEST(to_hw_instr.mov_b16_sgpr_src) 774 if (!setup_cs(NULL, GFX11)) 775 return; 776 777 //>> p_unit_test 0 778 //! v2b: %0:v[0][0:16] = v_mov_b16 hi(%0:s[0][16:32]) 779 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 780 bld.pseudo(aco_opcode::p_extract_vector, Definition(PhysReg(256), v2b), Operand(PhysReg(0), s1), 781 Operand::c32(1)); 782 783 //! s_sendmsg sendmsg(dealloc_vgprs) 784 //! s_endpgm 785 786 finish_to_hw_instr_test(); 787 788 for (aco_ptr<Instruction>& instr : program->blocks[0].instructions) { 789 if (instr->opcode == aco_opcode::v_mov_b16 && instr->format != asVOP3(Format::VOP1)) { 790 fail_test("v_mov_b16 must be be VOP3"); 791 return; 792 } 793 } 794 END_TEST 795