• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(assembler.s_memtime)
29    for (unsigned i = GFX6; i <= GFX10; i++) {
30       if (!setup_cs(NULL, (amd_gfx_level)i))
31          continue;
32 
33       //~gfx[6-7]>> c7800000
34       //~gfx[6-7]!  bf810000
35       //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000
36       //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000
37       bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0});
38 
39       finish_assembler_test();
40    }
41 END_TEST
42 
43 BEGIN_TEST(assembler.branch_3f)
44    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
45       return;
46 
47    //! BB0:
48    //! s_branch BB1                                                ; bf820040
49    //! s_nop 0                                                     ; bf800000
50    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
51 
52    for (unsigned i = 0; i < 0x3f; i++)
53       bld.vop1(aco_opcode::v_nop);
54 
55    bld.reset(program->create_and_insert_block());
56 
57    program->blocks[1].linear_preds.push_back(0u);
58 
59    finish_assembler_test();
60 END_TEST
61 
62 BEGIN_TEST(assembler.long_jump.unconditional_forwards)
63    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
64       return;
65 
66    //!BB0:
67    //! s_getpc_b64 s[0:1]                                          ; be801f00
68    //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
69    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
70    //! s_bitset0_b32 s0, 0                                         ; be801b80
71    //! s_setpc_b64 s[0:1]                                          ; be802000
72    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
73 
74    bld.reset(program->create_and_insert_block());
75 
76    //! s_nop 0                                                     ; bf800000
77    //!(then repeated 32767 times)
78    for (unsigned i = 0; i < INT16_MAX + 1; i++)
79       bld.sopp(aco_opcode::s_nop, -1, 0);
80 
81    //! BB2:
82    //! s_endpgm                                                    ; bf810000
83    bld.reset(program->create_and_insert_block());
84 
85    program->blocks[2].linear_preds.push_back(0u);
86    program->blocks[2].linear_preds.push_back(1u);
87 
88    finish_assembler_test();
89 END_TEST
90 
91 BEGIN_TEST(assembler.long_jump.conditional_forwards)
92    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
93       return;
94 
95    //! BB0:
96    //! s_cbranch_scc1 BB1                                          ; bf850006
97    //! s_getpc_b64 s[0:1]                                          ; be801f00
98    //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
99    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
100    //! s_bitset0_b32 s0, 0                                         ; be801b80
101    //! s_setpc_b64 s[0:1]                                          ; be802000
102    bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
103 
104    bld.reset(program->create_and_insert_block());
105 
106    //! BB1:
107    //! s_nop 0 ; bf800000
108    //!(then repeated 32767 times)
109    for (unsigned i = 0; i < INT16_MAX + 1; i++)
110       bld.sopp(aco_opcode::s_nop, -1, 0);
111 
112    //! BB2:
113    //! s_endpgm                                                    ; bf810000
114    bld.reset(program->create_and_insert_block());
115 
116    program->blocks[1].linear_preds.push_back(0u);
117    program->blocks[2].linear_preds.push_back(0u);
118    program->blocks[2].linear_preds.push_back(1u);
119 
120    finish_assembler_test();
121 END_TEST
122 
123 BEGIN_TEST(assembler.long_jump.unconditional_backwards)
124    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
125       return;
126 
127    //!BB0:
128    //! s_nop 0                                                     ; bf800000
129    //!(then repeated 32767 times)
130    for (unsigned i = 0; i < INT16_MAX + 1; i++)
131       bld.sopp(aco_opcode::s_nop, -1, 0);
132 
133    //! s_getpc_b64 s[0:1]                                          ; be801f00
134    //! s_addc_u32 s0, s0, 0xfffdfffc                               ; 8200ff00 fffdfffc
135    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
136    //! s_bitset0_b32 s0, 0                                         ; be801b80
137    //! s_setpc_b64 s[0:1]                                          ; be802000
138    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0);
139 
140    //! BB1:
141    //! s_endpgm                                                    ; bf810000
142    bld.reset(program->create_and_insert_block());
143 
144    program->blocks[0].linear_preds.push_back(0u);
145    program->blocks[1].linear_preds.push_back(0u);
146 
147    finish_assembler_test();
148 END_TEST
149 
150 BEGIN_TEST(assembler.long_jump.conditional_backwards)
151    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
152       return;
153 
154    //!BB0:
155    //! s_nop 0                                                     ; bf800000
156    //!(then repeated 32767 times)
157    for (unsigned i = 0; i < INT16_MAX + 1; i++)
158       bld.sopp(aco_opcode::s_nop, -1, 0);
159 
160    //! s_cbranch_execz BB1                                         ; bf880006
161    //! s_getpc_b64 s[0:1]                                          ; be801f00
162    //! s_addc_u32 s0, s0, 0xfffdfff8                               ; 8200ff00 fffdfff8
163    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
164    //! s_bitset0_b32 s0, 0                                         ; be801b80
165    //! s_setpc_b64 s[0:1]                                          ; be802000
166    bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0);
167 
168    //! BB1:
169    //! s_endpgm                                                    ; bf810000
170    bld.reset(program->create_and_insert_block());
171 
172    program->blocks[0].linear_preds.push_back(0u);
173    program->blocks[1].linear_preds.push_back(0u);
174 
175    finish_assembler_test();
176 END_TEST
177 
178 BEGIN_TEST(assembler.long_jump.3f)
179    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
180       return;
181 
182    //! BB0:
183    //! s_branch BB1                                                ; bf820040
184    //! s_nop 0                                                     ; bf800000
185    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
186 
187    for (unsigned i = 0; i < 0x3f - 6; i++) // a unconditional long jump is 6 dwords
188       bld.vop1(aco_opcode::v_nop);
189    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
190 
191    bld.reset(program->create_and_insert_block());
192    for (unsigned i = 0; i < INT16_MAX + 1; i++)
193       bld.vop1(aco_opcode::v_nop);
194    bld.reset(program->create_and_insert_block());
195 
196    program->blocks[1].linear_preds.push_back(0u);
197    program->blocks[2].linear_preds.push_back(0u);
198    program->blocks[2].linear_preds.push_back(1u);
199 
200    finish_assembler_test();
201 END_TEST
202 
203 BEGIN_TEST(assembler.long_jump.constaddr)
204    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
205       return;
206 
207    //>> s_getpc_b64 s[0:1]                                          ; be801f00
208    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
209 
210    bld.reset(program->create_and_insert_block());
211 
212    for (unsigned i = 0; i < INT16_MAX + 1; i++)
213       bld.sopp(aco_opcode::s_nop, -1, 0);
214 
215    bld.reset(program->create_and_insert_block());
216 
217    //>> s_getpc_b64 s[0:1]                                          ; be801f00
218    //! s_add_u32 s0, s0, 0xe4                                      ; 8000ff00 000000e4
219    bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
220    bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
221             Operand(PhysReg(0), s1), Operand::zero(), Operand::zero());
222 
223    program->blocks[2].linear_preds.push_back(0u);
224    program->blocks[2].linear_preds.push_back(1u);
225 
226    finish_assembler_test();
227 END_TEST
228 
229 BEGIN_TEST(assembler.v_add3)
230    for (unsigned i = GFX9; i <= GFX10; i++) {
231       if (!setup_cs(NULL, (amd_gfx_level)i))
232          continue;
233 
234       //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
235       //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
236       aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
237       add3->operands[0] = Operand::zero();
238       add3->operands[1] = Operand::zero();
239       add3->operands[2] = Operand::zero();
240       add3->definitions[0] = Definition(PhysReg(0), v1);
241       bld.insert(std::move(add3));
242 
243       finish_assembler_test();
244    }
245 END_TEST
246 
247 BEGIN_TEST(assembler.v_add3_clamp)
248    for (unsigned i = GFX9; i <= GFX10; i++) {
249       if (!setup_cs(NULL, (amd_gfx_level)i))
250          continue;
251 
252       //~gfx9>> integer addition + clamp ; d1ff8000 02010080
253       //~gfx10>> integer addition + clamp ; d76d8000 02010080
254       aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
255       add3->operands[0] = Operand::zero();
256       add3->operands[1] = Operand::zero();
257       add3->operands[2] = Operand::zero();
258       add3->definitions[0] = Definition(PhysReg(0), v1);
259       add3->clamp = 1;
260       bld.insert(std::move(add3));
261 
262       finish_assembler_test();
263    }
264 END_TEST
265 
266 BEGIN_TEST(assembler.smem_offset)
267    for (unsigned i = GFX9; i <= GFX10; i++) {
268       if (!setup_cs(NULL, (amd_gfx_level)i))
269          continue;
270 
271       Definition dst(PhysReg(7), s1);
272       Operand sbase(PhysReg(6), s2);
273       Operand offset(PhysReg(5), s1);
274 
275       //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005
276       //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000
277       bld.smem(aco_opcode::s_load_dword, dst, sbase, offset);
278       //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042
279       //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042
280       bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42));
281       if (i >= GFX9) {
282          //~gfx9! s_load_dword s7, s[6:7], s5 offset:0x42 ; c00241c3 0a000042
283          //~gfx10! s_load_dword s7, s[6:7], s5 offset:0x42 ; f40001c3 0a000042
284          bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset);
285       }
286 
287       finish_assembler_test();
288    }
289 END_TEST
290 
291 BEGIN_TEST(assembler.p_constaddr)
292    if (!setup_cs(NULL, GFX9))
293       return;
294 
295    Definition dst0 = bld.def(s2);
296    Definition dst1 = bld.def(s2);
297    dst0.setFixed(PhysReg(0));
298    dst1.setFixed(PhysReg(2));
299 
300    //>> s_getpc_b64 s[0:1] ; be801c00
301    //! s_add_u32 s0, s0, 24 ; 8000ff00 00000018
302    bld.pseudo(aco_opcode::p_constaddr, dst0, Operand::zero());
303 
304    //! s_getpc_b64 s[2:3] ; be821c00
305    //! s_add_u32 s2, s2, 44 ; 8002ff02 0000002c
306    bld.pseudo(aco_opcode::p_constaddr, dst1, Operand::c32(32));
307 
308    aco::lower_to_hw_instr(program.get());
309    finish_assembler_test();
310 END_TEST
311