• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(builder.v_mul_imm)
29    for (unsigned i = GFX8; i <= GFX10; i++) {
30       //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
31       if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
32          continue;
33 
34       /* simple optimizations */
35 
36       //! p_unit_test 0, 0
37       writeout(0, bld.v_mul_imm(bld.def(v1), inputs[0], 0));
38 
39       //! p_unit_test 1, %a
40       writeout(1, bld.v_mul_imm(bld.def(v1), inputs[0], 1));
41 
42       //! v1: %res2 = v_lshlrev_b32 2, %a
43       //! p_unit_test 2, %res2
44       writeout(2, bld.v_mul_imm(bld.def(v1), inputs[0], 4));
45 
46       //! v1: %res3 = v_lshlrev_b32 31, %a
47       //! p_unit_test 3, %res3
48       writeout(3, bld.v_mul_imm(bld.def(v1), inputs[0], 2147483648u));
49 
50       /* single lshl+add/sub */
51 
52       //~gfx8! v1: %res4_tmp = v_lshlrev_b32 3, %a
53       //~gfx8! v1: %res4,  s2: %_ = v_add_co_u32 %res4_tmp, %a
54       //~gfx(9|10)! v1: %res4 = v_lshl_add_u32 %a, 3, %a
55       //! p_unit_test 4, %res4
56       writeout(4, bld.v_mul_imm(bld.def(v1), inputs[0], 9));
57 
58       //~gfx[89]! v1: %res5_tmp = v_lshlrev_b32 3, %a
59       //~gfx8! v1: %res5,  s2: %_ = v_sub_co_u32 %res5_tmp, %a
60       //~gfx9! v1: %res5 = v_sub_u32 %res5_tmp, %a
61       //~gfx10! v1: %res5 = v_mul_lo_u32 7, %a
62       //! p_unit_test 5, %res5
63       writeout(5, bld.v_mul_imm(bld.def(v1), inputs[0], 7));
64 
65       /* lshl+add optimization with literal */
66 
67       //~gfx8! v1: %res6_tmp0 = v_lshlrev_b32 2, %a
68       //~gfx8! v1: %res6_tmp1 = v_lshlrev_b32 6, %a
69       //~gfx8! v1: %res6,  s2: %_ = v_add_co_u32 %res6_tmp1, %res6_tmp0
70       //~gfx9! v1: %res6_tmp = v_lshlrev_b32 2, %a
71       //~gfx9! v1: %res6 = v_lshl_add_u32 %a, 6, %res6_tmp
72       //~gfx10! v1: %res6 = v_mul_lo_u32 0x44, %a
73       //! p_unit_test 6, %res6
74       writeout(6, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64));
75 
76       //~gfx8! s1: %res7_tmp = p_parallelcopy 0x144
77       //~gfx8! v1: %res7 = v_mul_lo_u32 %res7_tmp, %a
78       //~gfx9! v1: %res7_tmp0 = v_lshlrev_b32 2, %a
79       //~gfx9! v1: %res7_tmp1 = v_lshl_add_u32 %a, 6, %res7_tmp0
80       //~gfx9! v1: %res7 = v_lshl_add_u32 %a, 8, %res7_tmp1
81       //~gfx10! v1: %res7 = v_mul_lo_u32 0x144, %a
82       //! p_unit_test 7, %res7
83       writeout(7, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256));
84 
85       //~gfx8! s1: %res8_tmp = p_parallelcopy 0x944
86       //~gfx8! v1: %res8 = v_mul_lo_u32 %res8_tmp, %a
87       //~gfx9! v1: %res8_tmp0 = v_lshlrev_b32 2, %a
88       //~gfx9! v1: %res8_tmp1 = v_lshl_add_u32 %a, 6, %res8_tmp0
89       //~gfx9! v1: %res8_tmp2 = v_lshl_add_u32 %a, 8, %res8_tmp1
90       //~gfx9! v1: %res8 = v_lshl_add_u32 %a, 11, %res8_tmp2
91       //~gfx10! v1: %res8 = v_mul_lo_u32 0x944, %a
92       //! p_unit_test 8, %res8
93       writeout(8, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256 | 2048));
94 
95       /* lshl+add optimization with inline constant */
96 
97       //~gfx8! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
98       //~gfx8! v1: %res9_tmp1 = v_lshlrev_b32 2, %a
99       //~gfx8! v1: %res9,  s2: %_ = v_add_co_u32 %res9_tmp1, %res9_tmp0
100       //~gfx9! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
101       //~gfx9! v1: %res9 = v_lshl_add_u32 %a, 2, %res9_tmp0
102       //~gfx10! v1: %res9 = v_mul_lo_u32 6, %a
103       //! p_unit_test 9, %res9
104       writeout(9, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4));
105 
106       //~gfx(8|10)! v1: %res10 = v_mul_lo_u32 14, %a
107       //~gfx9! v1: %res10_tmp0 = v_lshlrev_b32 1, %a
108       //~gfx9! v1: %res10_tmp1 = v_lshl_add_u32 %a, 2, %res10_tmp0
109       //~gfx9! v1: %res10 = v_lshl_add_u32 %a, 3, %res10_tmp1
110       //! p_unit_test 10, %res10
111       writeout(10, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8));
112 
113       //! v1: %res11 = v_mul_lo_u32 30, %a
114       //! p_unit_test 11, %res11
115       writeout(11, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8 | 16));
116 
117       finish_opt_test();
118    }
119 END_TEST
120