1 // Copyright 2021, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include "test-runner.h"
28 #include "test-utils.h"
29 #include "aarch64/test-utils-aarch64.h"
30
31 #include "aarch64/cpu-aarch64.h"
32 #include "aarch64/disasm-aarch64.h"
33 #include "aarch64/macro-assembler-aarch64.h"
34 #include "aarch64/simulator-aarch64.h"
35 #include "test-assembler-aarch64.h"
36
37 #define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
38
39 namespace vixl {
40 namespace aarch64 {
41
TEST_SVE(sve_matmul)42 TEST_SVE(sve_matmul) {
43 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
44 CPUFeatures::kSVEI8MM,
45 CPUFeatures::kNEON,
46 CPUFeatures::kCRC32);
47 START();
48
49 SetInitialMachineState(&masm);
50 // state = 0xe2bd2480
51
52 {
53 ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
54 __ dci(0x45179979); // smmla z25.s, z11.b, z23.b
55 // vl128 state = 0xf1ca8a4d
56 __ dci(0x45179b51); // smmla z17.s, z26.b, z23.b
57 // vl128 state = 0x4458ad10
58 __ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b
59 // vl128 state = 0x43d4d064
60 __ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b
61 // vl128 state = 0x601e77c8
62 __ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b
63 // vl128 state = 0x561b4e22
64 __ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b
65 // vl128 state = 0x89b65d78
66 __ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b
67 // vl128 state = 0x85c9e62d
68 __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
69 // vl128 state = 0x3fc74134
70 __ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b
71 // vl128 state = 0xa2fa347b
72 __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
73 // vl128 state = 0xb9854782
74 __ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b
75 // vl128 state = 0x7fd376d8
76 __ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b
77 // vl128 state = 0xb41d8433
78 __ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b
79 // vl128 state = 0xc9c0e80d
80 __ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b
81 // vl128 state = 0xf1130e02
82 __ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b
83 // vl128 state = 0x282d3dc7
84 __ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b
85 // vl128 state = 0x34570238
86 __ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b
87 // vl128 state = 0xc451206a
88 __ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b
89 // vl128 state = 0xa58e2ea8
90 __ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b
91 // vl128 state = 0x7b5f948d
92 __ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b
93 // vl128 state = 0xf746260d
94 __ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b
95 // vl128 state = 0xc31cc539
96 __ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b
97 // vl128 state = 0x736bb3ee
98 __ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b
99 // vl128 state = 0xbb05fef6
100 __ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b
101 // vl128 state = 0xbc594372
102 __ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b
103 // vl128 state = 0x87c5a584
104 __ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b
105 // vl128 state = 0xa413f733
106 __ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b
107 // vl128 state = 0x87ec445d
108 __ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b
109 // vl128 state = 0x3ca8a6e5
110 __ dci(0x450898d0); // smmla z16.s, z6.b, z8.b
111 // vl128 state = 0x4300d87b
112 __ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b
113 // vl128 state = 0x38be2e8a
114 __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
115 // vl128 state = 0x8a3e6103
116 __ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b
117 // vl128 state = 0xc728e586
118 __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
119 // vl128 state = 0x4cb44c0e
120 __ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b
121 // vl128 state = 0x84ebcb36
122 __ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b
123 // vl128 state = 0x8813d2e2
124 __ dci(0x451c999d); // smmla z29.s, z12.b, z28.b
125 // vl128 state = 0x8f26ee51
126 __ dci(0x451c999f); // smmla z31.s, z12.b, z28.b
127 // vl128 state = 0x5d626fd0
128 __ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b
129 // vl128 state = 0x6b64cc8f
130 __ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b
131 // vl128 state = 0x41648186
132 __ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b
133 // vl128 state = 0x701525ec
134 __ dci(0x45079816); // smmla z22.s, z0.b, z7.b
135 // vl128 state = 0x61a2d024
136 __ dci(0x450f9897); // smmla z23.s, z4.b, z15.b
137 // vl128 state = 0x82ba6bd5
138 __ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b
139 // vl128 state = 0xa842bbde
140 __ dci(0x450b98db); // smmla z27.s, z6.b, z11.b
141 // vl128 state = 0x9977677a
142 __ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b
143 // vl128 state = 0xe6d6c2ef
144 __ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b
145 // vl128 state = 0xa535453f
146 __ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b
147 // vl128 state = 0xeda3f381
148 __ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b
149 // vl128 state = 0xd72dbdef
150 __ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b
151 // vl128 state = 0xfae4975b
152 __ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b
153 // vl128 state = 0x0aa6e1f6
154 }
155
156 uint32_t state;
157 ComputeMachineStateHash(&masm, &state);
158 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
159 __ Ldr(w0, MemOperand(x0));
160
161 END();
162 if (CAN_RUN()) {
163 RUN();
164 uint32_t expected_hashes[] = {
165 0x0aa6e1f6,
166 0xba2d4547,
167 0x0e72a647,
168 0x15b8fc1b,
169 0x92eddc98,
170 0xe0c72bcf,
171 0x36b4e3ba,
172 0x1041114e,
173 0x4d44ebd4,
174 0xfe0e3cbf,
175 0x81c43455,
176 0x678617c5,
177 0xf72fac1f,
178 0xabdcd4e4,
179 0x108864bd,
180 0x035f6eca,
181 };
182 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
183 }
184 }
185
TEST_SVE(sve_fmatmul_s)186 TEST_SVE(sve_fmatmul_s) {
187 SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
188 CPUFeatures::kSVEF32MM,
189 CPUFeatures::kNEON,
190 CPUFeatures::kCRC32);
191 START();
192
193 SetInitialMachineState(&masm);
194 // state = 0xe2bd2480
195
196 {
197 ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
198 __ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s
199 // vl128 state = 0x9db41bef
200 __ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s
201 // vl128 state = 0xc1535e55
202 __ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s
203 // vl128 state = 0xc65aad35
204 __ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s
205 // vl128 state = 0x68387c22
206 __ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s
207 // vl128 state = 0xcf08b3a4
208 __ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s
209 // vl128 state = 0x969bbe77
210 __ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s
211 // vl128 state = 0xc3f514e1
212 __ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s
213 // vl128 state = 0x4b351c29
214 __ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s
215 // vl128 state = 0x5e026315
216 __ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s
217 // vl128 state = 0x61684fe6
218 __ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s
219 // vl128 state = 0x719b4ce0
220 __ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s
221 // vl128 state = 0xdf3d2a1c
222 __ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s
223 // vl128 state = 0x3279aab8
224 __ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s
225 // vl128 state = 0x0b985869
226 __ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s
227 // vl128 state = 0x14230587
228 __ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s
229 // vl128 state = 0x2cb88e7f
230 __ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s
231 // vl128 state = 0xb5ec0c65
232 __ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s
233 // vl128 state = 0xb5e5eab0
234 __ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s
235 // vl128 state = 0xd0491fb5
236 __ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s
237 // vl128 state = 0x98a55a30
238 }
239
240 uint32_t state;
241 ComputeMachineStateHash(&masm, &state);
242 __ Mov(x0, reinterpret_cast<uint64_t>(&state));
243 __ Ldr(w0, MemOperand(x0));
244
245 END();
246 if (CAN_RUN()) {
247 RUN();
248 uint32_t expected_hashes[] = {
249 0x98a55a30,
250 0x590b7715,
251 0x4562ccf3,
252 0x1f8653a6,
253 0x5fe174d5,
254 0xb300dcb8,
255 0x3cefa79e,
256 0xa22484c7,
257 0x380697ec,
258 0xde9e699b,
259 0x99d21870,
260 0x456cb46b,
261 0x207d2615,
262 0xecaf9678,
263 0x0949e2d2,
264 0xa764c43f,
265 };
266 ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
267 }
268 }
269
270 } // namespace aarch64
271 } // namespace vixl
272