• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include <llvm/Config/llvm-config.h>
25 
26 #include "helpers.h"
27 #include "sid.h"
28 
29 using namespace aco;
30 
31 BEGIN_TEST(assembler.s_memtime)
32    for (unsigned i = GFX6; i <= GFX10; i++) {
33       if (!setup_cs(NULL, (amd_gfx_level)i))
34          continue;
35 
36       //~gfx[6-7]>> c7800000
37       //~gfx[6-7]!  bf810000
38       //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000
39       //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000
40       bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0});
41 
42       finish_assembler_test();
43    }
44 END_TEST
45 
46 BEGIN_TEST(assembler.branch_3f)
47    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
48       return;
49 
50    //! BB0:
51    //! s_branch BB1                                                ; bf820040
52    //! s_nop 0                                                     ; bf800000
53    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
54 
55    for (unsigned i = 0; i < 0x3f; i++)
56       bld.vop1(aco_opcode::v_nop);
57 
58    bld.reset(program->create_and_insert_block());
59 
60    program->blocks[1].linear_preds.push_back(0u);
61 
62    finish_assembler_test();
63 END_TEST
64 
65 BEGIN_TEST(assembler.long_jump.unconditional_forwards)
66    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
67       return;
68 
69    //!BB0:
70    //! s_getpc_b64 s[0:1]                                          ; be801f00
71    //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
72    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
73    //! s_bitset0_b32 s0, 0                                         ; be801b80
74    //! s_setpc_b64 s[0:1]                                          ; be802000
75    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
76 
77    bld.reset(program->create_and_insert_block());
78 
79    //! s_nop 0                                                     ; bf800000
80    //!(then repeated 32767 times)
81    for (unsigned i = 0; i < INT16_MAX + 1; i++)
82       bld.sopp(aco_opcode::s_nop, -1, 0);
83 
84    //! BB2:
85    //! s_endpgm                                                    ; bf810000
86    bld.reset(program->create_and_insert_block());
87 
88    program->blocks[2].linear_preds.push_back(0u);
89    program->blocks[2].linear_preds.push_back(1u);
90 
91    finish_assembler_test();
92 END_TEST
93 
94 BEGIN_TEST(assembler.long_jump.conditional_forwards)
95    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
96       return;
97 
98    //! BB0:
99    //! s_cbranch_scc1 BB1                                          ; bf850006
100    //! s_getpc_b64 s[0:1]                                          ; be801f00
101    //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
102    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
103    //! s_bitset0_b32 s0, 0                                         ; be801b80
104    //! s_setpc_b64 s[0:1]                                          ; be802000
105    bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
106 
107    bld.reset(program->create_and_insert_block());
108 
109    //! BB1:
110    //! s_nop 0 ; bf800000
111    //!(then repeated 32767 times)
112    for (unsigned i = 0; i < INT16_MAX + 1; i++)
113       bld.sopp(aco_opcode::s_nop, -1, 0);
114 
115    //! BB2:
116    //! s_endpgm                                                    ; bf810000
117    bld.reset(program->create_and_insert_block());
118 
119    program->blocks[1].linear_preds.push_back(0u);
120    program->blocks[2].linear_preds.push_back(0u);
121    program->blocks[2].linear_preds.push_back(1u);
122 
123    finish_assembler_test();
124 END_TEST
125 
126 BEGIN_TEST(assembler.long_jump.unconditional_backwards)
127    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
128       return;
129 
130    //!BB0:
131    //! s_nop 0                                                     ; bf800000
132    //!(then repeated 32767 times)
133    for (unsigned i = 0; i < INT16_MAX + 1; i++)
134       bld.sopp(aco_opcode::s_nop, -1, 0);
135 
136    //! s_getpc_b64 s[0:1]                                          ; be801f00
137    //! s_addc_u32 s0, s0, 0xfffdfffc                               ; 8200ff00 fffdfffc
138    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
139    //! s_bitset0_b32 s0, 0                                         ; be801b80
140    //! s_setpc_b64 s[0:1]                                          ; be802000
141    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0);
142 
143    //! BB1:
144    //! s_endpgm                                                    ; bf810000
145    bld.reset(program->create_and_insert_block());
146 
147    program->blocks[0].linear_preds.push_back(0u);
148    program->blocks[1].linear_preds.push_back(0u);
149 
150    finish_assembler_test();
151 END_TEST
152 
153 BEGIN_TEST(assembler.long_jump.conditional_backwards)
154    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
155       return;
156 
157    //!BB0:
158    //! s_nop 0                                                     ; bf800000
159    //!(then repeated 32767 times)
160    for (unsigned i = 0; i < INT16_MAX + 1; i++)
161       bld.sopp(aco_opcode::s_nop, -1, 0);
162 
163    //! s_cbranch_execz BB1                                         ; bf880006
164    //! s_getpc_b64 s[0:1]                                          ; be801f00
165    //! s_addc_u32 s0, s0, 0xfffdfff8                               ; 8200ff00 fffdfff8
166    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
167    //! s_bitset0_b32 s0, 0                                         ; be801b80
168    //! s_setpc_b64 s[0:1]                                          ; be802000
169    bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0);
170 
171    //! BB1:
172    //! s_endpgm                                                    ; bf810000
173    bld.reset(program->create_and_insert_block());
174 
175    program->blocks[0].linear_preds.push_back(0u);
176    program->blocks[1].linear_preds.push_back(0u);
177 
178    finish_assembler_test();
179 END_TEST
180 
181 BEGIN_TEST(assembler.long_jump .3f)
182    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
183       return;
184 
185    //! BB0:
186    //! s_branch BB1                                                ; bf820040
187    //! s_nop 0                                                     ; bf800000
188    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
189 
190    for (unsigned i = 0; i < 0x3f - 6; i++) // a unconditional long jump is 6 dwords
191       bld.vop1(aco_opcode::v_nop);
192    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
193 
194    bld.reset(program->create_and_insert_block());
195    for (unsigned i = 0; i < INT16_MAX + 1; i++)
196       bld.vop1(aco_opcode::v_nop);
197    bld.reset(program->create_and_insert_block());
198 
199    program->blocks[1].linear_preds.push_back(0u);
200    program->blocks[2].linear_preds.push_back(0u);
201    program->blocks[2].linear_preds.push_back(1u);
202 
203    finish_assembler_test();
204 END_TEST
205 
206 BEGIN_TEST(assembler.long_jump.constaddr)
207    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
208       return;
209 
210    //>> s_getpc_b64 s[0:1]                                          ; be801f00
211    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
212 
213    bld.reset(program->create_and_insert_block());
214 
215    for (unsigned i = 0; i < INT16_MAX + 1; i++)
216       bld.sopp(aco_opcode::s_nop, -1, 0);
217 
218    bld.reset(program->create_and_insert_block());
219 
220    //>> s_getpc_b64 s[0:1]                                          ; be801f00
221    //! s_add_u32 s0, s0, 32                                         ; 8000ff00 00000020
222    bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
223    bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
224             Operand(PhysReg(0), s1), Operand::zero(), Operand::zero());
225 
226    program->blocks[2].linear_preds.push_back(0u);
227    program->blocks[2].linear_preds.push_back(1u);
228 
229    finish_assembler_test();
230 END_TEST
231 
232 BEGIN_TEST(assembler.long_jump.discard_early_exit)
233    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
234       return;
235 
236    //! BB0:
237    //! s_cbranch_scc1 BB1                                          ; bf850006
238    //! s_getpc_b64 s[0:1]                                          ; be801f00
239    //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
240    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
241    //! s_bitset0_b32 s0, 0                                         ; be801b80
242    //! s_setpc_b64 s[0:1]                                          ; be802000
243    bld.sopp(aco_opcode::s_cbranch_scc0, 2);
244 
245    bld.reset(program->create_and_insert_block());
246 
247    //! BB1:
248    //! s_nop 1                                                     ; bf800001
249    //!(then repeated 32766 times)
250    //! s_endpgm                                                    ; bf810000
251    for (unsigned i = 0; i < INT16_MAX; i++)
252       bld.sopp(aco_opcode::s_nop, -1, 1);
253 
254    //! BB2:
255    //! s_endpgm                                                    ; bf810000
256    bld.reset(program->create_and_insert_block());
257 
258    program->blocks[1].linear_preds.push_back(0u);
259    program->blocks[2].linear_preds.push_back(0u);
260    program->blocks[2].kind = block_kind_discard_early_exit;
261 
262    finish_assembler_test();
263 END_TEST
264 
265 BEGIN_TEST(assembler.v_add3)
266    for (unsigned i = GFX9; i <= GFX10; i++) {
267       if (!setup_cs(NULL, (amd_gfx_level)i))
268          continue;
269 
270       //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
271       //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
272       aco_ptr<VALU_instruction> add3{
273          create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
274       add3->operands[0] = Operand::zero();
275       add3->operands[1] = Operand::zero();
276       add3->operands[2] = Operand::zero();
277       add3->definitions[0] = Definition(PhysReg(0), v1);
278       bld.insert(std::move(add3));
279 
280       finish_assembler_test();
281    }
282 END_TEST
283 
284 BEGIN_TEST(assembler.v_add3_clamp)
285    for (unsigned i = GFX9; i <= GFX10; i++) {
286       if (!setup_cs(NULL, (amd_gfx_level)i))
287          continue;
288 
289       //~gfx9>> integer addition + clamp ; d1ff8000 02010080
290       //~gfx10>> integer addition + clamp ; d76d8000 02010080
291       aco_ptr<VALU_instruction> add3{
292          create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
293       add3->operands[0] = Operand::zero();
294       add3->operands[1] = Operand::zero();
295       add3->operands[2] = Operand::zero();
296       add3->definitions[0] = Definition(PhysReg(0), v1);
297       add3->clamp = 1;
298       bld.insert(std::move(add3));
299 
300       finish_assembler_test();
301    }
302 END_TEST
303 
304 BEGIN_TEST(assembler.smem_offset)
305    for (unsigned i = GFX9; i <= GFX10; i++) {
306       if (!setup_cs(NULL, (amd_gfx_level)i))
307          continue;
308 
309       Definition dst(PhysReg(7), s1);
310       Operand sbase(PhysReg(6), s2);
311       Operand offset(PhysReg(5), s1);
312 
313       //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005
314       //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000
315       bld.smem(aco_opcode::s_load_dword, dst, sbase, offset);
316       //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042
317       //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042
318       bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42));
319       if (i >= GFX9) {
320          //~gfx9! s_load_dword s7, s[6:7], s5 offset:0x42 ; c00241c3 0a000042
321          //~gfx10! s_load_dword s7, s[6:7], s5 offset:0x42 ; f40001c3 0a000042
322          bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset);
323       }
324 
325       finish_assembler_test();
326    }
327 END_TEST
328 
329 BEGIN_TEST(assembler.p_constaddr)
330    if (!setup_cs(NULL, GFX9))
331       return;
332 
333    Definition dst0 = bld.def(s2);
334    Definition dst1 = bld.def(s2);
335    dst0.setFixed(PhysReg(0));
336    dst1.setFixed(PhysReg(2));
337 
338    //>> s_getpc_b64 s[0:1] ; be801c00
339    //! s_add_u32 s0, s0, 44 ; 8000ff00 0000002c
340    bld.pseudo(aco_opcode::p_constaddr, dst0, Operand::zero());
341 
342    //! s_getpc_b64 s[2:3] ; be821c00
343    //! s_add_u32 s2, s2, 64 ; 8002ff02 00000040
344    bld.pseudo(aco_opcode::p_constaddr, dst1, Operand::c32(32));
345 
346    aco::lower_to_hw_instr(program.get());
347    finish_assembler_test();
348 END_TEST
349 
350 BEGIN_TEST(assembler.vopc_sdwa)
351    for (unsigned i = GFX9; i <= GFX10; i++) {
352       if (!setup_cs(NULL, (amd_gfx_level)i))
353          continue;
354 
355       //~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
356       //~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD   ; 7d8300f9 86860080
357       bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(),
358                     Operand::zero());
359 
360       //~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
361       //~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
362       bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(),
363                     Operand::zero());
364 
365       //~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
366       //~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD  ; 7d8300f9 8686fe80
367       bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(),
368                     Operand::zero());
369 
370       if (i == GFX10) {
371          //~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
372          bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(),
373                        Operand::zero());
374       } else {
375          //~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
376          bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2),
377                        Operand::zero(), Operand::zero());
378 
379          //~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
380          bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2),
381                        Definition(exec, s2), Operand::zero(), Operand::zero());
382       }
383 
384       finish_assembler_test();
385    }
386 END_TEST
387 
388 BEGIN_TEST(assembler.gfx11.smem)
389    if (!setup_cs(NULL, GFX11))
390       return;
391 
392    Definition dst = bld.def(s1);
393    dst.setFixed(PhysReg(4));
394 
395    Operand op_s1(bld.tmp(s1));
396    op_s1.setFixed(PhysReg(8));
397 
398    Operand op_s2(bld.tmp(s2));
399    op_s2.setFixed(PhysReg(16));
400 
401    Operand op_s4(bld.tmp(s4));
402    op_s4.setFixed(PhysReg(32));
403 
404    //>> s_dcache_inv                                                ; f4840000 f8000000
405    bld.smem(aco_opcode::s_dcache_inv);
406 
407    //! s_load_b32 s4, s[16:17], 0x2a                               ; f4000108 f800002a
408    bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42));
409 
410    //! s_load_b32 s4, s[16:17], s8                                 ; f4000108 10000000
411    bld.smem(aco_opcode::s_load_dword, dst, op_s2, op_s1);
412 
413    //! s_load_b32 s4, s[16:17], s8 offset:0x2a                     ; f4000108 1000002a
414    bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
415 
416    //! s_buffer_load_b32 s4, s[32:35], s8 glc                      ; f4204110 10000000
417    bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
418 
419    //! s_buffer_load_b32 s4, s[32:35], s8 dlc                      ; f4202110 10000000
420    bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
421 
422    finish_assembler_test();
423 END_TEST
424 
425 BEGIN_TEST(assembler.gfx11.mubuf)
426    if (!setup_cs(NULL, GFX11))
427       return;
428 
429    Definition dst = bld.def(v1);
430    dst.setFixed(PhysReg(256 + 42));
431 
432    Operand op_s4(bld.tmp(s4));
433    op_s4.setFixed(PhysReg(32));
434 
435    Operand op_v1(bld.tmp(v1));
436    op_v1.setFixed(PhysReg(256 + 10));
437 
438    Operand op_v2(bld.tmp(v2));
439    op_v2.setFixed(PhysReg(256 + 20));
440 
441    Operand op_s1(bld.tmp(s1));
442    op_s1.setFixed(PhysReg(30));
443 
444    Operand op_m0(bld.tmp(s1));
445    op_m0.setFixed(m0);
446 
447    //! llvm_version: #llvm_ver
448    fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
449 
450    /* Addressing */
451    //>> buffer_load_b32 v42, off, s[32:35], s30                     ; e0500000 1e082a80
452    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 0, false);
453 
454    //! buffer_load_b32 v42, off, s[32:35], 42                      ; e0500000 aa082a80
455    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::c32(42), 0, false);
456 
457    //! buffer_load_b32 v42, v10, s[32:35], s30 offen               ; e0500000 1e482a0a
458    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
459 
460    //! buffer_load_b32 v42, v10, s[32:35], s30 idxen               ; e0500000 1e882a0a
461    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
462       true;
463 
464    //! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen    ; e0500000 1ec82a14
465    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
466       true;
467 
468    //! buffer_load_b32 v42, off, s[32:35], s30 offset:84           ; e0500054 1e082a80
469    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
470 
471    /* Various flags */
472    //! buffer_load_b32 v42, off, s[32:35], 0 glc                   ; e0504000 80082a80
473    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
474       ->mubuf()
475       .glc = true;
476 
477    //! buffer_load_b32 v42, off, s[32:35], 0 dlc                   ; e0502000 80082a80
478    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
479       ->mubuf()
480       .dlc = true;
481 
482    //! buffer_load_b32 v42, off, s[32:35], 0 slc                   ; e0501000 80082a80
483    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
484       ->mubuf()
485       .slc = true;
486 
487    //; if llvm_ver >= 16:
488    //;    insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe              ; e0500000 80282a80')
489    //; else:
490    //;    insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe                   ; e0500000 80282a80')
491    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
492       ->mubuf()
493       .tfe = true;
494 
495    /* LDS */
496    //! buffer_load_lds_b32 off, s[32:35], 0                        ; e0c40000 80080080
497    bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
498       ->mubuf()
499       .lds = true;
500 
501    //! buffer_load_lds_i8 off, s[32:35], 0                         ; e0b80000 80080080
502    bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
503       ->mubuf()
504       .lds = true;
505 
506    //! buffer_load_lds_i16 off, s[32:35], 0                        ; e0c00000 80080080
507    bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
508       ->mubuf()
509       .lds = true;
510 
511    //! buffer_load_lds_u8 off, s[32:35], 0                         ; e0b40000 80080080
512    bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
513       ->mubuf()
514       .lds = true;
515 
516    //! buffer_load_lds_u16 off, s[32:35], 0                        ; e0bc0000 80080080
517    bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
518       ->mubuf()
519       .lds = true;
520 
521    //! buffer_load_lds_format_x off, s[32:35], 0                   ; e0c80000 80080080
522    bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
523       ->mubuf()
524       .lds = true;
525 
526    /* Stores */
527    //! buffer_store_b32 v10, off, s[32:35], s30                    ; e0680000 1e080a80
528    bld.mubuf(aco_opcode::buffer_store_dword, op_s4, Operand(v1), op_s1, op_v1, 0, false);
529 
530    //! buffer_store_b64 v[20:21], v10, s[32:35], s30 offen         ; e06c0000 1e48140a
531    bld.mubuf(aco_opcode::buffer_store_dwordx2, op_s4, op_v1, op_s1, op_v2, 0, true);
532 
533    finish_assembler_test();
534 END_TEST
535 
536 BEGIN_TEST(assembler.gfx11.mtbuf)
537    if (!setup_cs(NULL, GFX11))
538       return;
539 
540    Definition dst = bld.def(v1);
541    dst.setFixed(PhysReg(256 + 42));
542 
543    Operand op_s4(bld.tmp(s4));
544    op_s4.setFixed(PhysReg(32));
545 
546    Operand op_v1(bld.tmp(v1));
547    op_v1.setFixed(PhysReg(256 + 10));
548 
549    Operand op_v2(bld.tmp(v2));
550    op_v2.setFixed(PhysReg(256 + 20));
551 
552    Operand op_s1(bld.tmp(s1));
553    op_s1.setFixed(PhysReg(30));
554 
555    unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_32_32;
556    unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_FLOAT;
557 
558    //! llvm_version: #llvm_ver
559    fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
560 
561    /* Addressing */
562    //>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
563    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
564              false);
565 
566    //! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
567    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
568              nfmt, 0, false);
569 
570    //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
571    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
572 
573    //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
574    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
575       ->mtbuf()
576       .idxen = true;
577 
578    //! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
579    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
580       ->mtbuf()
581       .idxen = true;
582 
583    //! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
584    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
585              false);
586 
587    /* Various flags */
588    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
589    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
590              nfmt, 0, false)
591       ->mtbuf()
592       .glc = true;
593 
594    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
595    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
596              nfmt, 0, false)
597       ->mtbuf()
598       .dlc = true;
599 
600    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
601    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
602              nfmt, 0, false)
603       ->mtbuf()
604       .slc = true;
605 
606    //; if llvm_ver >= 16:
607    //;    insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
608    //; else:
609    //;    insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
610    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
611              nfmt, 0, false)
612       ->mtbuf()
613       .tfe = true;
614 
615    /* Stores */
616    //! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
617    bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
618              false);
619 
620    //! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
621    bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
622 
623    finish_assembler_test();
624 END_TEST
625 
626 BEGIN_TEST(assembler.gfx11.mimg)
627    if (!setup_cs(NULL, GFX11))
628       return;
629 
630    Definition dst_v1 = bld.def(v1);
631    dst_v1.setFixed(PhysReg(256 + 42));
632 
633    Definition dst_v4 = bld.def(v4);
634    dst_v4.setFixed(PhysReg(256 + 84));
635 
636    Operand op_s4(bld.tmp(s4));
637    op_s4.setFixed(PhysReg(32));
638 
639    Operand op_s8(bld.tmp(s8));
640    op_s8.setFixed(PhysReg(64));
641 
642    Operand op_v1(bld.tmp(v1));
643    op_v1.setFixed(PhysReg(256 + 10));
644 
645    Operand op_v2(bld.tmp(v2));
646    op_v2.setFixed(PhysReg(256 + 20));
647 
648    Operand op_v4(bld.tmp(v4));
649    op_v4.setFixed(PhysReg(256 + 30));
650 
651    //>> image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D ; f06c0f00 2010540a
652    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
653 
654    //! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
655    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
656       ac_image_2d;
657 
658    //! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
659    bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
660 
661    /* Various flags */
662    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
663    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc = true;
664 
665    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
666    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc = true;
667 
668    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
669    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc = true;
670 
671    //! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
672    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().tfe = true;
673 
674    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; f06c0f00 2050540a
675    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().lwe = true;
676 
677    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D r128 ; f06c8f00 2010540a
678    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().r128 = true;
679 
680    //! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; f06d0f00 2010540a
681    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().a16 = true;
682 
683    //! image_sample v[84:85], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D d16 ; f06e0f00 2010540a
684    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().d16 = true;
685 
686    /* NSA */
687    //! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
688    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
689             Operand(bld.tmp(v1), PhysReg(256 + 40)))
690       ->mimg()
691       .dim = ac_image_2d;
692 
693    /* Stores */
694    //! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
695    bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
696 
697    //! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
698    bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
699             op_v1, op_v2)
700       ->mimg()
701       .dim = ac_image_2d;
702 
703    finish_assembler_test();
704 END_TEST
705 
706 BEGIN_TEST(assembler.gfx11.flat)
707    if (!setup_cs(NULL, GFX11))
708       return;
709 
710    Definition dst_v1 = bld.def(v1);
711    dst_v1.setFixed(PhysReg(256 + 42));
712 
713    Operand op_s1(bld.tmp(s1));
714    op_s1.setFixed(PhysReg(32));
715 
716    Operand op_s2(bld.tmp(s2));
717    op_s2.setFixed(PhysReg(64));
718 
719    Operand op_v1(bld.tmp(v1));
720    op_v1.setFixed(PhysReg(256 + 10));
721 
722    Operand op_v2(bld.tmp(v2));
723    op_v2.setFixed(PhysReg(256 + 20));
724 
725    /* Addressing */
726    //>> flat_load_b32 v42, v[20:21]                                 ; dc500000 2a7c0014
727    bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1));
728 
729    //! global_load_b32 v42, v[20:21], off                          ; dc520000 2a7c0014
730    bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1));
731 
732    //! global_load_b32 v42, v10, s[64:65]                          ; dc520000 2a40000a
733    bld.global(aco_opcode::global_load_dword, dst_v1, op_v1, op_s2);
734 
735    //! scratch_load_b32 v42, v10, off                              ; dc510000 2afc000a
736    bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, Operand(s1));
737 
738    //! scratch_load_b32 v42, off, s32                              ; dc510000 2a200080
739    bld.scratch(aco_opcode::scratch_load_dword, dst_v1, Operand(v1), op_s1);
740 
741    //! scratch_load_b32 v42, v10, s32                              ; dc510000 2aa0000a
742    bld.scratch(aco_opcode::scratch_load_dword, dst_v1, op_v1, op_s1);
743 
744    //! global_load_b32 v42, v[20:21], off offset:-42               ; dc521fd6 2a7c0014
745    bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), -42);
746 
747    //! global_load_b32 v42, v[20:21], off offset:84                ; dc520054 2a7c0014
748    bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
749 
750    /* Various flags */
751    //! flat_load_b32 v42, v[20:21] slc                             ; dc508000 2a7c0014
752    bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
753 
754    //! flat_load_b32 v42, v[20:21] glc                             ; dc504000 2a7c0014
755    bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
756 
757    //! flat_load_b32 v42, v[20:21] dlc                             ; dc502000 2a7c0014
758    bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
759 
760    /* Stores */
761    //! flat_store_b32 v[20:21], v10                                ; dc680000 007c0a14
762    bld.flat(aco_opcode::flat_store_dword, op_v2, Operand(s1), op_v1);
763 
764    finish_assembler_test();
765 END_TEST
766 
767 BEGIN_TEST(assembler.gfx11.exp)
768    if (!setup_cs(NULL, GFX11))
769       return;
770 
771    Operand op[4];
772    for (unsigned i = 0; i < 4; i++)
773       op[i] = Operand(PhysReg(256 + i), v1);
774 
775    Operand op_m0(bld.tmp(s1));
776    op_m0.setFixed(m0);
777 
778    //>> exp mrt3 v1, v0, v3, v2                                     ; f800003f 02030001
779    bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3);
780 
781    //! exp mrt3 v1, off, v0, off                                   ; f8000035 80008001
782    bld.exp(aco_opcode::exp, op[1], Operand(v1), op[0], Operand(v1), 0x5, 3);
783 
784    //! exp mrt3 v1, v0, v3, v2 done                                ; f800083f 02030001
785    bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], 0xf, 3, false, true);
786 
787    //>> exp mrt3 v1, v0, v3, v2 row_en                              ; f800203f 02030001
788    bld.exp(aco_opcode::exp, op[1], op[0], op[3], op[2], op_m0, 0xf, 3)->exp().row_en = true;
789 
790    finish_assembler_test();
791 END_TEST
792 
793 BEGIN_TEST(assembler.gfx11.vinterp)
794    if (!setup_cs(NULL, GFX11))
795       return;
796 
797    Definition dst = bld.def(v1);
798    dst.setFixed(PhysReg(256 + 42));
799 
800    Operand op0(bld.tmp(v1));
801    op0.setFixed(PhysReg(256 + 10));
802 
803    Operand op1(bld.tmp(v1));
804    op1.setFixed(PhysReg(256 + 20));
805 
806    Operand op2(bld.tmp(v1));
807    op2.setFixed(PhysReg(256 + 30));
808 
809    //! llvm_version: #llvm_ver
810    fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
811 
812    //>> v_interp_p10_f32 v42, v10, v20, v30 wait_exp:7              ; cd00072a 047a290a
813    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2);
814 
815    //! v_interp_p10_f32 v42, v10, v20, v30 wait_exp:6              ; cd00062a 047a290a
816    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6);
817 
818    //; if llvm_ver >= 18:
819    //;    insert_pattern('v_interp_p2_f32 v42, v10, v20, v30 wait_exp:0               ; cd01002a 047a290a')
820    //; else:
821    //;    insert_pattern('v_interp_p2_f32 v42, v10, v20, v30                          ; cd01002a 047a290a')
822    bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
823 
824    //! v_interp_p10_f32 v42, -v10, v20, v30 wait_exp:6             ; cd00062a 247a290a
825    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6)
826       ->vinterp_inreg()
827       .neg[0] = true;
828 
829    //! v_interp_p10_f32 v42, v10, -v20, v30 wait_exp:6             ; cd00062a 447a290a
830    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6)
831       ->vinterp_inreg()
832       .neg[1] = true;
833 
834    //! v_interp_p10_f32 v42, v10, v20, -v30 wait_exp:6             ; cd00062a 847a290a
835    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6)
836       ->vinterp_inreg()
837       .neg[2] = true;
838 
839    //! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] wait_exp:6 ; cd020e2a 047a290a
840    bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 6, 0x1);
841 
842    //! v_interp_p2_f16_f32 v42, v10, v20, v30 op_sel:[0,1,0,0] wait_exp:6 ; cd03162a 047a290a
843    bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, op2, 6, 0x2);
844 
845    //! v_interp_p10_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,1,0] wait_exp:6 ; cd04262a 047a290a
846    bld.vinterp_inreg(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, op1, op2, 6, 0x4);
847 
848    //! v_interp_p2_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,0,1] wait_exp:6 ; cd05462a 047a290a
849    bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 6, 0x8);
850 
851    //! v_interp_p10_f32 v42, v10, v20, v30 clamp wait_exp:6        ; cd00862a 047a290a
852    bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6)
853       ->vinterp_inreg()
854       .clamp = true;
855 
856    finish_assembler_test();
857 END_TEST
858 
859 BEGIN_TEST(assembler.gfx11.ldsdir)
860    if (!setup_cs(NULL, GFX11))
861       return;
862 
863    Definition dst = bld.def(v1);
864    dst.setFixed(PhysReg(256 + 42));
865 
866    Operand op(bld.tmp(s1));
867    op.setFixed(m0);
868 
869    //! llvm_version: #llvm_ver
870    fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
871 
872    //>> lds_direct_load v42 wait_vdst:15                            ; ce1f002a
873    bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 15;
874 
875    //! lds_direct_load v42 wait_vdst:6                             ; ce16002a
876    bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 6;
877 
878    //; if llvm_ver >= 18:
879    //;    insert_pattern('lds_direct_load v42 wait_vdst:0                             ; ce10002a')
880    //; else:
881    //;    insert_pattern('lds_direct_load v42                                         ; ce10002a')
882    bld.ldsdir(aco_opcode::lds_direct_load, dst, op)->ldsdir().wait_vdst = 0;
883 
884    //! lds_param_load v42, attr56.x wait_vdst:8                    ; ce08e02a
885    bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 8;
886 
887    //; if llvm_ver >= 18:
888    //;    insert_pattern('lds_param_load v42, attr56.x wait_vdst:0                    ; ce00e02a')
889    //; else:
890    //;    insert_pattern('lds_param_load v42, attr56.x                                ; ce00e02a')
891    bld.ldsdir(aco_opcode::lds_param_load, dst, op, 56, 0)->ldsdir().wait_vdst = 0;
892 
893    //! lds_param_load v42, attr34.y wait_vdst:8                    ; ce08892a
894    bld.ldsdir(aco_opcode::lds_param_load, dst, op, 34, 1)->ldsdir().wait_vdst = 8;
895 
896    //! lds_param_load v42, attr12.z wait_vdst:8                    ; ce08322a
897    bld.ldsdir(aco_opcode::lds_param_load, dst, op, 12, 2)->ldsdir().wait_vdst = 8;
898 
899    finish_assembler_test();
900 END_TEST
901 
902 BEGIN_TEST(assembler.gfx11.vop12c_v128)
903    if (!setup_cs(NULL, GFX11))
904       return;
905 
906    Definition dst_v0 = bld.def(v1);
907    dst_v0.setFixed(PhysReg(256));
908 
909    Definition dst_v128 = bld.def(v1);
910    dst_v128.setFixed(PhysReg(256 + 128));
911 
912    Operand op_v1(bld.tmp(v1));
913    op_v1.setFixed(PhysReg(256 + 1));
914 
915    Operand op_v2(bld.tmp(v1));
916    op_v2.setFixed(PhysReg(256 + 2));
917 
918    Operand op_v129(bld.tmp(v1));
919    op_v129.setFixed(PhysReg(256 + 129));
920 
921    Operand op_v130(bld.tmp(v1));
922    op_v130.setFixed(PhysReg(256 + 130));
923 
924    //! llvm_version: #llvm_ver
925    fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
926 
927    //>> BB0:
928    //; if llvm_ver == 16:
929    //;    insert_pattern('v_mul_f16_e32 v0, v1, v2 ; Error: VGPR_32_Lo128: unknown register 128 ; 6a000501')
930    //; else:
931    //;    insert_pattern('v_mul_f16_e32 v0, v1, v2                                    ; 6a000501')
932    bld.vop2(aco_opcode::v_mul_f16, dst_v0, op_v1, op_v2);
933 
934    //! v_mul_f16_e64 v128, v1, v2                                  ; d5350080 00020501
935    bld.vop2(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2);
936 
937    //! v_mul_f16_e64 v0, v129, v2                                  ; d5350000 00020581
938    bld.vop2(aco_opcode::v_mul_f16, dst_v0, op_v129, op_v2);
939 
940    //! v_mul_f16_e64 v0, v1, v130                                  ; d5350000 00030501
941    bld.vop2(aco_opcode::v_mul_f16, dst_v0, op_v1, op_v130);
942 
943    //! v_rcp_f16_e64 v128, v1                                      ; d5d40080 00000101
944    bld.vop1(aco_opcode::v_rcp_f16, dst_v128, op_v1);
945 
946    //! v_cmp_eq_f16_e64 vcc, v129, v2                              ; d402006a 00020581
947    bld.vopc(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2);
948 
949    //! v_mul_f16_e64_dpp v128, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 000204fa ff0d2101
950    bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1));
951 
952    //! v_mul_f16_e64_dpp v0, v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350000 000204fa ff0d2181
953    bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v0, op_v129, op_v2, dpp_row_rr(1));
954 
955    //! v_mul_f16_e64_dpp v0, v1, v130 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350000 000304fa ff0d2101
956    bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v0, op_v1, op_v130, dpp_row_rr(1));
957 
958    //! v_mul_f16_e64_dpp v128, v1, v2 dpp8:[0,0,0,0,0,0,0,0] fi:1  ; d5350080 000204ea 00000001
959    bld.vop2_dpp8(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2);
960 
961    //! v_mul_f16_e64_dpp v0, v129, v2 dpp8:[0,0,0,0,0,0,0,0] fi:1  ; d5350000 000204ea 00000081
962    bld.vop2_dpp8(aco_opcode::v_mul_f16, dst_v0, op_v129, op_v2);
963 
964    //! v_mul_f16_e64_dpp v0, v1, v130 dpp8:[0,0,0,0,0,0,0,0] fi:1  ; d5350000 000304ea 00000001
965    bld.vop2_dpp8(aco_opcode::v_mul_f16, dst_v0, op_v1, op_v130);
966 
967    //! v_fma_f16 v128, v1, v2, 0x60                                ; d6480080 03fe0501 00000060
968    bld.vop2(aco_opcode::v_fmaak_f16, dst_v128, op_v1, op_v2, Operand::literal32(96));
969 
970    //! v_fma_f16 v128, v1, 0x60, v2                                ; d6480080 0409ff01 00000060
971    bld.vop2(aco_opcode::v_fmamk_f16, dst_v128, op_v1, op_v2, Operand::literal32(96));
972 
973    //! v_rcp_f16_e64_dpp v128, -v1 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5d40080 200000fa ff1d2101
974    bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().neg[0] = true;
975 
976    //! v_rcp_f16_e64_dpp v128, |v1| row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5d40180 000000fa ff2d2101
977    bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true;
978 
979    //! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101
980    bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] =
981       true;
982 
983    //! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101
984    bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] =
985       true;
986 
987    //! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181
988    bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
989       ->dpp16()
990       .neg[0] = true;
991 
992    //! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181
993    bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
994       ->dpp16()
995       .abs[0] = true;
996 
997    finish_assembler_test();
998 END_TEST
999 
1000 BEGIN_TEST(assembler.vop3_dpp)
1001    if (!setup_cs(NULL, GFX11))
1002       return;
1003 
1004    Definition dst_v0 = bld.def(v1);
1005    dst_v0.setFixed(PhysReg(256));
1006 
1007    Definition dst_non_vcc = bld.def(s2);
1008    dst_non_vcc.setFixed(PhysReg(4));
1009 
1010    Operand op_v1(bld.tmp(v1));
1011    op_v1.setFixed(PhysReg(256 + 1));
1012 
1013    Operand op_v2(bld.tmp(v1));
1014    op_v2.setFixed(PhysReg(256 + 2));
1015 
1016    Operand op_s1(bld.tmp(s1));
1017    op_s1.setFixed(PhysReg(1));
1018 
1019    //>> BB0:
1020    //! v_fma_f32_e64_dpp v0, v1, v2, s1 clamp row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d6138000 000604fa ff0d2101
1021    bld.vop3_dpp(aco_opcode::v_fma_f32, dst_v0, op_v1, op_v2, op_s1, dpp_row_rr(1))->valu().clamp =
1022       true;
1023 
1024    //! v_fma_mix_f32_e64_dpp v0, |v1|, |v2|, |s1| op_sel:[1,0,0] op_sel_hi:[1,0,1] row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; cc204f00 080604fa ffad2101
1025    bld.vop3p_dpp(aco_opcode::v_fma_mix_f32, dst_v0, op_v1, op_v2, op_s1, 0x1, 0x5, dpp_row_rr(1))
1026       ->valu()
1027       .abs = 0x7;
1028 
1029    //! v_fma_f32_e64_dpp v0, -v1, -v2, -s1 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d6130000 e00604ea 00000001
1030    bld.vop3_dpp8(aco_opcode::v_fma_f32, dst_v0, op_v1, op_v2, op_s1)->valu().neg = 0x7;
1031 
1032    //! v_fma_mix_f32_e64_dpp v0, -v1, -v2, s1 op_sel_hi:[1,1,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; cc204000 780604ea 00000001
1033    bld.vop3p_dpp8(aco_opcode::v_fma_mix_f32, dst_v0, op_v1, op_v2, op_s1, 0x0, 0x7)->valu().neg =
1034       0x3;
1035 
1036    //! v_add_f32_e64_dpp v0, v1, v2 clamp row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5038000 000204fa ff0d2101
1037    bld.vop2_e64_dpp(aco_opcode::v_add_f32, dst_v0, op_v1, op_v2, dpp_row_rr(1))->valu().clamp =
1038       true;
1039 
1040    //! v_sqrt_f32_e64_dpp v0, v1 clamp row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5b38000 000000fa ff0d2101
1041    bld.vop1_e64_dpp(aco_opcode::v_sqrt_f32, dst_v0, op_v1, dpp_row_rr(1))->valu().clamp = true;
1042 
1043    //! v_cmp_lt_f32_e64_dpp s[4:5], |v1|, |v2| row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d4110304 000204fa ffad2101
1044    bld.vopc_e64_dpp(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2, dpp_row_rr(1))->valu().abs =
1045       0x3;
1046 
1047    //! v_add_f32_e64_dpp v0, v1, v2 mul:4 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d5030000 100204ea 00000001
1048    bld.vop2_e64_dpp8(aco_opcode::v_add_f32, dst_v0, op_v1, op_v2)->valu().omod = 2;
1049 
1050    //! v_sqrt_f32_e64_dpp v0, v1 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d5b38000 000000ea 00000001
1051    bld.vop1_e64_dpp8(aco_opcode::v_sqrt_f32, dst_v0, op_v1)->valu().clamp = true;
1052 
1053    //! v_cmp_lt_f32_e64_dpp s[4:5], |v1|, v2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; d4110104 000204ea 00000001
1054    bld.vopc_e64_dpp8(aco_opcode::v_cmp_lt_f32, dst_non_vcc, op_v1, op_v2)->valu().abs = 0x1;
1055 
1056    finish_assembler_test();
1057 END_TEST
1058 
1059 BEGIN_TEST(assembler.vopd)
1060    if (!setup_cs(NULL, GFX11))
1061       return;
1062 
1063    Definition dst_v0 = bld.def(v1);
1064    dst_v0.setFixed(PhysReg(256));
1065 
1066    Definition dst_v1 = bld.def(v1);
1067    dst_v1.setFixed(PhysReg(256 + 1));
1068 
1069    Operand op_v0(bld.tmp(v1));
1070    op_v0.setFixed(PhysReg(256 + 0));
1071 
1072    Operand op_v1(bld.tmp(v1));
1073    op_v1.setFixed(PhysReg(256 + 1));
1074 
1075    Operand op_v2(bld.tmp(v1));
1076    op_v2.setFixed(PhysReg(256 + 2));
1077 
1078    Operand op_v3(bld.tmp(v1));
1079    op_v3.setFixed(PhysReg(256 + 3));
1080 
1081    Operand op_s0(bld.tmp(s1));
1082    op_s0.setFixed(PhysReg(0));
1083 
1084    Operand op_vcc(bld.tmp(s1));
1085    op_vcc.setFixed(vcc);
1086 
1087    //>> BB0:
1088    //! v_dual_mov_b32 v0, v0 :: v_dual_mov_b32 v1, v1 ; ca100100 00000101
1089    bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, aco_opcode::v_dual_mov_b32);
1090 
1091    //! v_dual_mov_b32 v0, 0x60 :: v_dual_mov_b32 v1, s0 ; ca1000ff 00000000 00000060
1092    bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, Operand::c32(96), op_s0,
1093             aco_opcode::v_dual_mov_b32);
1094 
1095    //! v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0x60 ; ca100000 000000ff 00000060
1096    bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_s0, Operand::c32(96),
1097             aco_opcode::v_dual_mov_b32);
1098 
1099    //! v_dual_mul_f32 v0, v0, v1 :: v_dual_mov_b32 v1, v2 ; c8d00300 00000102
1100    bld.vopd(aco_opcode::v_dual_mul_f32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
1101             aco_opcode::v_dual_mov_b32);
1102 
1103    //! v_dual_fmac_f32 v0, v1, v2 :: v_dual_mov_b32 v1, v3 ; c8100501 00000103
1104    bld.vopd(aco_opcode::v_dual_fmac_f32, dst_v0, dst_v1, op_v1, op_v2, op_v0, op_v3,
1105             aco_opcode::v_dual_mov_b32);
1106 
1107    //! v_dual_mov_b32 v0, v0 :: v_dual_and_b32 v1, v1, v2 ; ca240100 00000501
1108    bld.vopd(aco_opcode::v_dual_mov_b32, dst_v0, dst_v1, op_v0, op_v1, op_v2,
1109             aco_opcode::v_dual_and_b32);
1110 
1111    //! v_dual_cndmask_b32 v0, v0, v1 :: v_dual_cndmask_b32 v1, v2, v3 ; ca520300 00000702
1112    bld.vopd(aco_opcode::v_dual_cndmask_b32, dst_v0, dst_v1, op_v0, op_v1, op_vcc, op_v2, op_v3,
1113             op_vcc, aco_opcode::v_dual_cndmask_b32);
1114 
1115    finish_assembler_test();
1116 END_TEST
1117