• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(to_hw_instr.swap_subdword)
29    PhysReg v0_lo{256};
30    PhysReg v0_hi{256};
31    PhysReg v0_b1{256};
32    PhysReg v0_b3{256};
33    PhysReg v1_lo{257};
34    PhysReg v1_hi{257};
35    PhysReg v1_b1{257};
36    PhysReg v1_b3{257};
37    PhysReg v2_lo{258};
38    PhysReg v3_lo{259};
39    v0_hi.reg_b += 2;
40    v1_hi.reg_b += 2;
41    v0_b1.reg_b += 1;
42    v1_b1.reg_b += 1;
43    v0_b3.reg_b += 3;
44    v1_b3.reg_b += 3;
45 
46    for (unsigned i = GFX6; i <= GFX7; i++) {
47       if (!setup_cs(NULL, (chip_class)i))
48          continue;
49 
50       //~gfx[67]>>  p_unit_test 0
51       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
52       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
53       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
54       bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
55       bld.pseudo(aco_opcode::p_parallelcopy,
56                  Definition(v0_lo, v2b), Definition(v1_lo, v2b),
57                  Operand(v1_lo, v2b), Operand(v0_lo, v2b));
58 
59       //~gfx[67]! p_unit_test 1
60       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
61       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
62       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
63       bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
64       bld.pseudo(aco_opcode::p_create_vector,
65                  Definition(v0_lo, v1),
66                  Operand(v1_lo, v2b), Operand(v0_lo, v2b));
67 
68       //~gfx[67]! p_unit_test 2
69       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
70       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
71       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
72       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
73       bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
74       bld.pseudo(aco_opcode::p_create_vector,
75                  Definition(v0_lo, v6b), Operand(v1_lo, v2b),
76                  Operand(v0_lo, v2b), Operand(v2_lo, v2b));
77 
78       //~gfx[67]! p_unit_test 3
79       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
80       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
81       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
82       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
83       //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
84       bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
85       bld.pseudo(aco_opcode::p_create_vector,
86                  Definition(v0_lo, v2),
87                  Operand(v1_lo, v2b), Operand(v0_lo, v2b),
88                  Operand(v2_lo, v2b), Operand(v3_lo, v2b));
89 
90       //~gfx[67]! p_unit_test 4
91       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
92       //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2
93       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
94       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2
95       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
96       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
97       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
98       bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
99       bld.pseudo(aco_opcode::p_create_vector,
100                  Definition(v0_lo, v2),
101                  Operand(v1_lo, v2b), Operand(v2_lo, v2b),
102                  Operand(v0_lo, v2b), Operand(v3_lo, v2b));
103 
104       //~gfx[67]! p_unit_test 5
105       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
106       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
107       bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
108       bld.pseudo(aco_opcode::p_split_vector,
109                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
110                  Operand(v0_lo, v1));
111 
112       //~gfx[67]! p_unit_test 6
113       //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
114       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
115       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
116       bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
117       bld.pseudo(aco_opcode::p_split_vector,
118                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
119                  Definition(v2_lo, v2b), Operand(v0_lo, v6b));
120 
121       //~gfx[67]! p_unit_test 7
122       //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
123       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
124       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
125       //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
126       bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
127       bld.pseudo(aco_opcode::p_split_vector,
128                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
129                  Definition(v2_lo, v2b), Definition(v3_lo, v2b),
130                  Operand(v0_lo, v2));
131 
132       //~gfx[67]! p_unit_test 8
133       //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
134       //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
135       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
136       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
137       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
138       bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
139       bld.pseudo(aco_opcode::p_split_vector,
140                  Definition(v1_lo, v2b), Definition(v2_lo, v2b),
141                  Definition(v0_lo, v2b), Definition(v3_lo, v2b),
142                  Operand(v0_lo, v2));
143 
144       //~gfx[67]! p_unit_test 9
145       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
146       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
147       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
148       bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
149       bld.pseudo(aco_opcode::p_parallelcopy,
150                  Definition(v0_lo, v1b), Definition(v1_lo, v1b),
151                  Operand(v1_lo, v1b), Operand(v0_lo, v1b));
152 
153       //~gfx[67]! p_unit_test 10
154       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
155       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
156       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
157       bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
158       bld.pseudo(aco_opcode::p_create_vector,
159                  Definition(v0_lo, v2b),
160                  Operand(v1_lo, v1b), Operand(v0_lo, v1b));
161 
162       //~gfx[67]! p_unit_test 11
163       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
164       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
165       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
166       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
167       //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
168       bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
169       bld.pseudo(aco_opcode::p_create_vector,
170                  Definition(v0_lo, v3b), Operand(v1_lo, v1b),
171                  Operand(v0_lo, v1b), Operand(v2_lo, v1b));
172 
173       //~gfx[67]! p_unit_test 12
174       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
175       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
176       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
177       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
178       //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
179       //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
180       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
181       bld.pseudo(aco_opcode::p_unit_test, Operand(12u));
182       bld.pseudo(aco_opcode::p_create_vector,
183                  Definition(v0_lo, v1),
184                  Operand(v1_lo, v1b), Operand(v0_lo, v1b),
185                  Operand(v2_lo, v1b), Operand(v3_lo, v1b));
186 
187       //~gfx[67]! p_unit_test 13
188       //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
189       //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8]
190       //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16]
191       //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8]
192       //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
193       //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
194       //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
195       bld.pseudo(aco_opcode::p_unit_test, Operand(13u));
196       Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
197                                        Definition(v0_lo, v1),
198                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b),
199                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b));
200       static_cast<Pseudo_instruction*>(pseudo)->scratch_sgpr = m0;
201 
202       //~gfx[67]! p_unit_test 14
203       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
204       //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
205       bld.pseudo(aco_opcode::p_unit_test, Operand(14u));
206       bld.pseudo(aco_opcode::p_split_vector,
207                  Definition(v1_lo, v1b), Definition(v0_lo, v1b),
208                  Operand(v0_lo, v2b));
209 
210       //~gfx[67]! p_unit_test 15
211       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
212       //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
213       //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
214       //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
215       bld.pseudo(aco_opcode::p_unit_test, Operand(15u));
216       bld.pseudo(aco_opcode::p_split_vector,
217                  Definition(v1_lo, v1b), Definition(v0_lo, v1b),
218                  Definition(v2_lo, v1b), Definition(v3_lo, v1b),
219                  Operand(v0_lo, v1));
220 
221       //~gfx[67]! s_endpgm
222 
223       finish_to_hw_instr_test();
224    }
225 
226    for (unsigned i = GFX8; i <= GFX9; i++) {
227       if (!setup_cs(NULL, (chip_class)i))
228          continue;
229 
230       //~gfx[89]>> p_unit_test 0
231       //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
232       //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
233       bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
234       bld.pseudo(aco_opcode::p_parallelcopy,
235                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
236                  Operand(v0_hi, v2b), Operand(v0_lo, v2b));
237 
238       //~gfx[89]! p_unit_test 1
239       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
240       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
241       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
242       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
243       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
244       bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
245       bld.pseudo(aco_opcode::p_parallelcopy,
246                  Definition(v0_lo, v1), Definition(v1_lo, v2b),
247                  Operand(v1_lo, v1), Operand(v0_lo, v2b));
248 
249       //~gfx[89]! p_unit_test 2
250       //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_preserve
251       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_preserve
252       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
253       //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
254       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
255       bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
256       bld.pseudo(aco_opcode::p_parallelcopy,
257                  Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
258                  Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
259 
260       //~gfx[89]! p_unit_test 3
261       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
262       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
263       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
264       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
265       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve
266       //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve
267       bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
268       bld.pseudo(aco_opcode::p_parallelcopy,
269                  Definition(v0_lo, v1), Definition(v1_b3, v1b),
270                  Operand(v1_lo, v1), Operand(v0_b3, v1b));
271 
272       //~gfx[89]! p_unit_test 4
273       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
274       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
275       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
276       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
277       //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve
278       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
279       bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
280       bld.pseudo(aco_opcode::p_parallelcopy,
281                  Definition(v0_lo, v1), Definition(v1_lo, v1b),
282                  Operand(v1_lo, v1), Operand(v0_lo, v1b));
283 
284       //~gfx[89]! p_unit_test 5
285       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
286       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
287       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
288       //~gfx9! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
289       //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve
290       //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve
291       bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
292       bld.pseudo(aco_opcode::p_parallelcopy,
293                  Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
294                  Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
295 
296       //~gfx[89]! p_unit_test 6
297       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
298       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
299       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
300       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
301       bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
302       bld.pseudo(aco_opcode::p_parallelcopy,
303                  Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
304                  Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
305 
306       //~gfx[89]! p_unit_test 7
307       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
308       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
309       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
310       //~gfx9! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
311       //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
312       bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
313       bld.pseudo(aco_opcode::p_parallelcopy,
314                  Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
315                  Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
316 
317       //~gfx[89]! p_unit_test 8
318       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
319       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
320       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
321       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
322       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
323       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
324       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
325       bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
326       bld.pseudo(aco_opcode::p_parallelcopy,
327                  Definition(v0_lo, v3b), Definition(v1_lo, v3b),
328                  Operand(v1_lo, v3b), Operand(v0_lo, v3b));
329 
330       //~gfx[89]! p_unit_test 9
331       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
332       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
333       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
334       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
335       //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve
336       bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
337       bld.pseudo(aco_opcode::p_parallelcopy,
338                  Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
339                  Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
340 
341       //~gfx[89]! p_unit_test 10
342       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
343       //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
344       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
345       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
346       //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
347       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
348       bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
349       bld.pseudo(aco_opcode::p_parallelcopy,
350                  Definition(v0_b1, v2b), Definition(v1_b1, v2b),
351                  Operand(v1_b1, v2b), Operand(v0_b1, v2b));
352 
353       //~gfx[89]! p_unit_test 11
354       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve
355       //~gfx[89]! v1: %0:v[0] = v_mov_b32 42
356       bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
357       bld.pseudo(aco_opcode::p_parallelcopy,
358                  Definition(v0_lo, v1), Definition(v1_lo, v2b),
359                  Operand(42u), Operand(v0_hi, v2b));
360 
361       //~gfx[89]! s_endpgm
362 
363       finish_to_hw_instr_test();
364    }
365 END_TEST
366 
367 BEGIN_TEST(to_hw_instr.subdword_constant)
368    PhysReg v0_lo{256};
369    PhysReg v0_hi{256};
370    PhysReg v0_b1{256};
371    PhysReg v1_hi{257};
372    v0_hi.reg_b += 2;
373    v0_b1.reg_b += 1;
374    v1_hi.reg_b += 2;
375 
376    for (unsigned i = GFX9; i <= GFX10; i++) {
377       if (!setup_cs(NULL, (chip_class)i))
378          continue;
379 
380       /* 16-bit pack */
381       //>> p_unit_test 0
382       //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
383       bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
384       bld.pseudo(aco_opcode::p_parallelcopy,
385                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
386                  Operand((uint16_t)0x3800), Operand(v1_hi, v2b));
387 
388       //! p_unit_test 1
389       //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]
390       //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]
391       //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
392       bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
393       bld.pseudo(aco_opcode::p_parallelcopy,
394                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
395                  Operand((uint16_t)0x4205), Operand(v1_hi, v2b));
396 
397       //! p_unit_test 2
398       //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
399       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
400       //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
401       bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
402       bld.pseudo(aco_opcode::p_parallelcopy,
403                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
404                  Operand((uint16_t)0x4205), Operand(v0_lo, v2b));
405 
406       //! p_unit_test 3
407       //! v1: %_:v[0] = v_mov_b32 0x3c003800
408       bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
409       bld.pseudo(aco_opcode::p_parallelcopy,
410                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
411                  Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00));
412 
413       //! p_unit_test 4
414       //! v1: %_:v[0] = v_mov_b32 0x43064205
415       bld.pseudo(aco_opcode::p_unit_test, Operand(4u));
416       bld.pseudo(aco_opcode::p_parallelcopy,
417                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
418                  Operand((uint16_t)0x4205), Operand((uint16_t)0x4306));
419 
420       //! p_unit_test 5
421       //! v1: %_:v[0] = v_mov_b32 0x38004205
422       bld.pseudo(aco_opcode::p_unit_test, Operand(5u));
423       bld.pseudo(aco_opcode::p_parallelcopy,
424                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
425                  Operand((uint16_t)0x4205), Operand((uint16_t)0x3800));
426 
427       /* 16-bit copy */
428       //! p_unit_test 6
429       //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve
430       bld.pseudo(aco_opcode::p_unit_test, Operand(6u));
431       bld.pseudo(aco_opcode::p_parallelcopy,
432                  Definition(v0_lo, v2b), Operand((uint16_t)0x3800));
433 
434       //! p_unit_test 7
435       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
436       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
437       //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
438       bld.pseudo(aco_opcode::p_unit_test, Operand(7u));
439       bld.pseudo(aco_opcode::p_parallelcopy,
440                  Definition(v0_lo, v2b), Operand((uint16_t)0x4205));
441 
442       //! p_unit_test 8
443       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
444       //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
445       //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205
446       bld.pseudo(aco_opcode::p_unit_test, Operand(8u));
447       bld.pseudo(aco_opcode::p_parallelcopy,
448                  Definition(v0_hi, v2b), Operand((uint16_t)0x4205));
449 
450       //! p_unit_test 9
451       //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve
452       //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve
453       bld.pseudo(aco_opcode::p_unit_test, Operand(9u));
454       bld.pseudo(aco_opcode::p_parallelcopy,
455                  Definition(v0_b1, v2b), Operand((uint16_t)0x3800));
456 
457       //! p_unit_test 10
458       //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve
459       //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve
460       bld.pseudo(aco_opcode::p_unit_test, Operand(10u));
461       bld.pseudo(aco_opcode::p_parallelcopy,
462                  Definition(v0_b1, v2b), Operand((uint16_t)0x4205));
463 
464       /* 8-bit copy */
465       //! p_unit_test 11
466       //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve
467       bld.pseudo(aco_opcode::p_unit_test, Operand(11u));
468       bld.pseudo(aco_opcode::p_parallelcopy,
469                  Definition(v0_lo, v1b), Operand((uint8_t)0x42));
470 
471       //! s_endpgm
472 
473       finish_to_hw_instr_test();
474    }
475 END_TEST
476