• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "test-runner.h"
28 #include "test-utils.h"
29 #include "aarch64/test-utils-aarch64.h"
30 
31 #include "aarch64/cpu-aarch64.h"
32 #include "aarch64/disasm-aarch64.h"
33 #include "aarch64/macro-assembler-aarch64.h"
34 #include "aarch64/simulator-aarch64.h"
35 #include "test-assembler-aarch64.h"
36 
37 #define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
38 
39 namespace vixl {
40 namespace aarch64 {
41 
TEST_SVE(sve_matmul)42 TEST_SVE(sve_matmul) {
43   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
44                           CPUFeatures::kSVEI8MM,
45                           CPUFeatures::kNEON,
46                           CPUFeatures::kCRC32);
47   START();
48 
49   SetInitialMachineState(&masm);
50   // state = 0xe2bd2480
51 
52   {
53     ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
54     __ dci(0x45179979);  // smmla z25.s, z11.b, z23.b
55     // vl128 state = 0xf1ca8a4d
56     __ dci(0x45179b51);  // smmla z17.s, z26.b, z23.b
57     // vl128 state = 0x4458ad10
58     __ dci(0x45d79b53);  // ummla z19.s, z26.b, z23.b
59     // vl128 state = 0x43d4d064
60     __ dci(0x45d69b17);  // ummla z23.s, z24.b, z22.b
61     // vl128 state = 0x601e77c8
62     __ dci(0x45c69b33);  // ummla z19.s, z25.b, z6.b
63     // vl128 state = 0x561b4e22
64     __ dci(0x45c49b1b);  // ummla z27.s, z24.b, z4.b
65     // vl128 state = 0x89b65d78
66     __ dci(0x45dc9b1a);  // ummla z26.s, z24.b, z28.b
67     // vl128 state = 0x85c9e62d
68     __ dci(0x45d99b1b);  // ummla z27.s, z24.b, z25.b
69     // vl128 state = 0x3fc74134
70     __ dci(0x45d99b19);  // ummla z25.s, z24.b, z25.b
71     // vl128 state = 0xa2fa347b
72     __ dci(0x45d99b1b);  // ummla z27.s, z24.b, z25.b
73     // vl128 state = 0xb9854782
74     __ dci(0x45899b1a);  // usmmla z26.s, z24.b, z9.b
75     // vl128 state = 0x7fd376d8
76     __ dci(0x45099b8a);  // smmla z10.s, z28.b, z9.b
77     // vl128 state = 0xb41d8433
78     __ dci(0x45019bcb);  // smmla z11.s, z30.b, z1.b
79     // vl128 state = 0xc9c0e80d
80     __ dci(0x45019bdb);  // smmla z27.s, z30.b, z1.b
81     // vl128 state = 0xf1130e02
82     __ dci(0x45019b6b);  // smmla z11.s, z27.b, z1.b
83     // vl128 state = 0x282d3dc7
84     __ dci(0x45019b6f);  // smmla z15.s, z27.b, z1.b
85     // vl128 state = 0x34570238
86     __ dci(0x45859b6b);  // usmmla z11.s, z27.b, z5.b
87     // vl128 state = 0xc451206a
88     __ dci(0x45919b6a);  // usmmla z10.s, z27.b, z17.b
89     // vl128 state = 0xa58e2ea8
90     __ dci(0x45909a62);  // usmmla z2.s, z19.b, z16.b
91     // vl128 state = 0x7b5f948d
92     __ dci(0x45809a52);  // usmmla z18.s, z18.b, z0.b
93     // vl128 state = 0xf746260d
94     __ dci(0x45889b53);  // usmmla z19.s, z26.b, z8.b
95     // vl128 state = 0xc31cc539
96     __ dci(0x45809a57);  // usmmla z23.s, z18.b, z0.b
97     // vl128 state = 0x736bb3ee
98     __ dci(0x45809a96);  // usmmla z22.s, z20.b, z0.b
99     // vl128 state = 0xbb05fef6
100     __ dci(0x45809a92);  // usmmla z18.s, z20.b, z0.b
101     // vl128 state = 0xbc594372
102     __ dci(0x45809a82);  // usmmla z2.s, z20.b, z0.b
103     // vl128 state = 0x87c5a584
104     __ dci(0x45829ad2);  // usmmla z18.s, z22.b, z2.b
105     // vl128 state = 0xa413f733
106     __ dci(0x45889ad6);  // usmmla z22.s, z22.b, z8.b
107     // vl128 state = 0x87ec445d
108     __ dci(0x45c898d2);  // ummla z18.s, z6.b, z8.b
109     // vl128 state = 0x3ca8a6e5
110     __ dci(0x450898d0);  // smmla z16.s, z6.b, z8.b
111     // vl128 state = 0x4300d87b
112     __ dci(0x45189ad8);  // smmla z24.s, z22.b, z24.b
113     // vl128 state = 0x38be2e8a
114     __ dci(0x451c9bd9);  // smmla z25.s, z30.b, z28.b
115     // vl128 state = 0x8a3e6103
116     __ dci(0x45989bc9);  // usmmla z9.s, z30.b, z24.b
117     // vl128 state = 0xc728e586
118     __ dci(0x451c9bd9);  // smmla z25.s, z30.b, z28.b
119     // vl128 state = 0x4cb44c0e
120     __ dci(0x459c99d1);  // usmmla z17.s, z14.b, z28.b
121     // vl128 state = 0x84ebcb36
122     __ dci(0x459c99d5);  // usmmla z21.s, z14.b, z28.b
123     // vl128 state = 0x8813d2e2
124     __ dci(0x451c999d);  // smmla z29.s, z12.b, z28.b
125     // vl128 state = 0x8f26ee51
126     __ dci(0x451c999f);  // smmla z31.s, z12.b, z28.b
127     // vl128 state = 0x5d626fd0
128     __ dci(0x459e998f);  // usmmla z15.s, z12.b, z30.b
129     // vl128 state = 0x6b64cc8f
130     __ dci(0x459f991f);  // usmmla z31.s, z8.b, z31.b
131     // vl128 state = 0x41648186
132     __ dci(0x4587991e);  // usmmla z30.s, z8.b, z7.b
133     // vl128 state = 0x701525ec
134     __ dci(0x45079816);  // smmla z22.s, z0.b, z7.b
135     // vl128 state = 0x61a2d024
136     __ dci(0x450f9897);  // smmla z23.s, z4.b, z15.b
137     // vl128 state = 0x82ba6bd5
138     __ dci(0x450b98d3);  // smmla z19.s, z6.b, z11.b
139     // vl128 state = 0xa842bbde
140     __ dci(0x450b98db);  // smmla z27.s, z6.b, z11.b
141     // vl128 state = 0x9977677a
142     __ dci(0x451f98d3);  // smmla z19.s, z6.b, z31.b
143     // vl128 state = 0xe6d6c2ef
144     __ dci(0x451b9adb);  // smmla z27.s, z22.b, z27.b
145     // vl128 state = 0xa535453f
146     __ dci(0x450b98d9);  // smmla z25.s, z6.b, z11.b
147     // vl128 state = 0xeda3f381
148     __ dci(0x458b9adb);  // usmmla z27.s, z22.b, z11.b
149     // vl128 state = 0xd72dbdef
150     __ dci(0x45cb98da);  // ummla z26.s, z6.b, z11.b
151     // vl128 state = 0xfae4975b
152     __ dci(0x45c999d2);  // ummla z18.s, z14.b, z9.b
153     // vl128 state = 0x0aa6e1f6
154   }
155 
156   uint32_t state;
157   ComputeMachineStateHash(&masm, &state);
158   __ Mov(x0, reinterpret_cast<uint64_t>(&state));
159   __ Ldr(w0, MemOperand(x0));
160 
161   END();
162   if (CAN_RUN()) {
163     RUN();
164     uint32_t expected_hashes[] = {
165         0x0aa6e1f6,
166         0xba2d4547,
167         0x0e72a647,
168         0x15b8fc1b,
169         0x92eddc98,
170         0xe0c72bcf,
171         0x36b4e3ba,
172         0x1041114e,
173         0x4d44ebd4,
174         0xfe0e3cbf,
175         0x81c43455,
176         0x678617c5,
177         0xf72fac1f,
178         0xabdcd4e4,
179         0x108864bd,
180         0x035f6eca,
181     };
182     ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
183   }
184 }
185 
TEST_SVE(sve_fmatmul_s)186 TEST_SVE(sve_fmatmul_s) {
187   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
188                           CPUFeatures::kSVEF32MM,
189                           CPUFeatures::kNEON,
190                           CPUFeatures::kCRC32);
191   START();
192 
193   SetInitialMachineState(&masm);
194   // state = 0xe2bd2480
195 
196   {
197     ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
198     __ dci(0x64a1e6ee);  // fmmla z14.s, z23.s, z1.s
199     // vl128 state = 0x9db41bef
200     __ dci(0x64b1e7fe);  // fmmla z30.s, z31.s, z17.s
201     // vl128 state = 0xc1535e55
202     __ dci(0x64b9e7d6);  // fmmla z22.s, z30.s, z25.s
203     // vl128 state = 0xc65aad35
204     __ dci(0x64bde6c6);  // fmmla z6.s, z22.s, z29.s
205     // vl128 state = 0x68387c22
206     __ dci(0x64b9e4c2);  // fmmla z2.s, z6.s, z25.s
207     // vl128 state = 0xcf08b3a4
208     __ dci(0x64b9e543);  // fmmla z3.s, z10.s, z25.s
209     // vl128 state = 0x969bbe77
210     __ dci(0x64b9e553);  // fmmla z19.s, z10.s, z25.s
211     // vl128 state = 0xc3f514e1
212     __ dci(0x64b9e557);  // fmmla z23.s, z10.s, z25.s
213     // vl128 state = 0x4b351c29
214     __ dci(0x64b9e773);  // fmmla z19.s, z27.s, z25.s
215     // vl128 state = 0x5e026315
216     __ dci(0x64bbe757);  // fmmla z23.s, z26.s, z27.s
217     // vl128 state = 0x61684fe6
218     __ dci(0x64bbe755);  // fmmla z21.s, z26.s, z27.s
219     // vl128 state = 0x719b4ce0
220     __ dci(0x64bfe554);  // fmmla z20.s, z10.s, z31.s
221     // vl128 state = 0xdf3d2a1c
222     __ dci(0x64bfe550);  // fmmla z16.s, z10.s, z31.s
223     // vl128 state = 0x3279aab8
224     __ dci(0x64bfe714);  // fmmla z20.s, z24.s, z31.s
225     // vl128 state = 0x0b985869
226     __ dci(0x64b7e756);  // fmmla z22.s, z26.s, z23.s
227     // vl128 state = 0x14230587
228     __ dci(0x64b7e737);  // fmmla z23.s, z25.s, z23.s
229     // vl128 state = 0x2cb88e7f
230     __ dci(0x64bfe767);  // fmmla z7.s, z27.s, z31.s
231     // vl128 state = 0xb5ec0c65
232     __ dci(0x64bfe777);  // fmmla z23.s, z27.s, z31.s
233     // vl128 state = 0xb5e5eab0
234     __ dci(0x64bfe715);  // fmmla z21.s, z24.s, z31.s
235     // vl128 state = 0xd0491fb5
236     __ dci(0x64b7e797);  // fmmla z23.s, z28.s, z23.s
237     // vl128 state = 0x98a55a30
238   }
239 
240   uint32_t state;
241   ComputeMachineStateHash(&masm, &state);
242   __ Mov(x0, reinterpret_cast<uint64_t>(&state));
243   __ Ldr(w0, MemOperand(x0));
244 
245   END();
246   if (CAN_RUN()) {
247     RUN();
248     uint32_t expected_hashes[] = {
249         0x98a55a30,
250         0x590b7715,
251         0x4562ccf3,
252         0x1f8653a6,
253         0x5fe174d5,
254         0xb300dcb8,
255         0x3cefa79e,
256         0xa22484c7,
257         0x380697ec,
258         0xde9e699b,
259         0x99d21870,
260         0x456cb46b,
261         0x207d2615,
262         0xecaf9678,
263         0x0949e2d2,
264         0xa764c43f,
265     };
266     ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
267   }
268 }
269 
270 }  // namespace aarch64
271 }  // namespace vixl
272