1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <sys/mman.h>
28
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 #include "test-assembler-aarch64.h"
44
45 namespace vixl {
46 namespace aarch64 {
47
TEST(load_store_b)48 TEST(load_store_b) {
49 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
50
51 uint8_t src[3] = {0x12, 0x23, 0x34};
52 uint8_t dst[3] = {0, 0, 0};
53 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
54 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
55
56 START();
57 __ Mov(x17, src_base);
58 __ Mov(x18, dst_base);
59 __ Mov(x19, src_base);
60 __ Mov(x20, dst_base);
61 __ Mov(x21, src_base);
62 __ Mov(x22, dst_base);
63 __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
64 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
65 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
66 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
67 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
68 __ Str(b2, MemOperand(x22, sizeof(dst[0])));
69 END();
70
71 if (CAN_RUN()) {
72 RUN();
73
74 ASSERT_EQUAL_128(0, 0x23, q0);
75 ASSERT_EQUAL_64(0x23, dst[0]);
76 ASSERT_EQUAL_128(0, 0x12, q1);
77 ASSERT_EQUAL_64(0x12, dst[2]);
78 ASSERT_EQUAL_128(0, 0x34, q2);
79 ASSERT_EQUAL_64(0x34, dst[1]);
80 ASSERT_EQUAL_64(src_base, x17);
81 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
82 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
83 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
84 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
85 ASSERT_EQUAL_64(dst_base, x22);
86 }
87 }
88
89
TEST(load_store_h)90 TEST(load_store_h) {
91 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
92
93 uint16_t src[3] = {0x1234, 0x2345, 0x3456};
94 uint16_t dst[3] = {0, 0, 0};
95 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
96 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
97
98 START();
99 __ Mov(x17, src_base);
100 __ Mov(x18, dst_base);
101 __ Mov(x19, src_base);
102 __ Mov(x20, dst_base);
103 __ Mov(x21, src_base);
104 __ Mov(x22, dst_base);
105 __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
106 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
107 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
108 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
109 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
110 __ Str(h2, MemOperand(x22, sizeof(dst[0])));
111 END();
112
113 if (CAN_RUN()) {
114 RUN();
115
116 ASSERT_EQUAL_128(0, 0x2345, q0);
117 ASSERT_EQUAL_64(0x2345, dst[0]);
118 ASSERT_EQUAL_128(0, 0x1234, q1);
119 ASSERT_EQUAL_64(0x1234, dst[2]);
120 ASSERT_EQUAL_128(0, 0x3456, q2);
121 ASSERT_EQUAL_64(0x3456, dst[1]);
122 ASSERT_EQUAL_64(src_base, x17);
123 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
124 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
125 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
126 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
127 ASSERT_EQUAL_64(dst_base, x22);
128 }
129 }
130
131
TEST(load_store_q)132 TEST(load_store_q) {
133 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
134
135 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
136 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
137 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
138 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
139 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
140
141 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
142 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
143 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
144
145 START();
146 __ Mov(x17, src_base);
147 __ Mov(x18, dst_base);
148 __ Mov(x19, src_base);
149 __ Mov(x20, dst_base);
150 __ Mov(x21, src_base);
151 __ Mov(x22, dst_base);
152 __ Ldr(q0, MemOperand(x17, 16));
153 __ Str(q0, MemOperand(x18, 16, PostIndex));
154 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
155 __ Str(q1, MemOperand(x20, 32, PreIndex));
156 __ Ldr(q2, MemOperand(x21, 32, PreIndex));
157 __ Str(q2, MemOperand(x22, 16));
158 END();
159
160 if (CAN_RUN()) {
161 RUN();
162
163 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
164 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
165 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
166 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
167 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
168 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
169 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
170 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
171 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
172 ASSERT_EQUAL_64(src_base, x17);
173 ASSERT_EQUAL_64(dst_base + 16, x18);
174 ASSERT_EQUAL_64(src_base + 16, x19);
175 ASSERT_EQUAL_64(dst_base + 32, x20);
176 ASSERT_EQUAL_64(src_base + 32, x21);
177 ASSERT_EQUAL_64(dst_base, x22);
178 }
179 }
180
181
TEST(load_store_v_regoffset)182 TEST(load_store_v_regoffset) {
183 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
184
185 uint8_t src[64];
186 for (unsigned i = 0; i < sizeof(src); i++) {
187 src[i] = i;
188 }
189 uint8_t dst[64];
190 memset(dst, 0, sizeof(dst));
191
192 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
193 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
194
195 START();
196 __ Mov(x17, src_base + 16);
197 __ Mov(x18, 1);
198 __ Mov(w19, -1);
199 __ Mov(x20, dst_base - 1);
200
201 __ Ldr(b0, MemOperand(x17, x18));
202 __ Ldr(b1, MemOperand(x17, x19, SXTW));
203
204 __ Ldr(h2, MemOperand(x17, x18));
205 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
206 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
207 __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
208
209 __ Ldr(s16, MemOperand(x17, x18));
210 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
211 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
212 __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
213
214 __ Ldr(d20, MemOperand(x17, x18));
215 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
216 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
217 __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
218
219 __ Ldr(q24, MemOperand(x17, x18));
220 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
221 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
222 __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
223
224 // Store [bhsdq]27 to adjacent memory locations, then load again to check.
225 __ Str(b27, MemOperand(x20, x18));
226 __ Str(h27, MemOperand(x20, x18, UXTW, 1));
227 __ Add(x20, x20, 8);
228 __ Str(s27, MemOperand(x20, x19, SXTW, 2));
229 __ Sub(x20, x20, 8);
230 __ Str(d27, MemOperand(x20, x18, LSL, 3));
231 __ Add(x20, x20, 32);
232 __ Str(q27, MemOperand(x20, x19, SXTW, 4));
233
234 __ Sub(x20, x20, 32);
235 __ Ldr(q6, MemOperand(x20, x18));
236 __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
237
238 END();
239
240 if (CAN_RUN()) {
241 RUN();
242
243 ASSERT_EQUAL_128(0, 0x11, q0);
244 ASSERT_EQUAL_128(0, 0x0f, q1);
245 ASSERT_EQUAL_128(0, 0x1211, q2);
246 ASSERT_EQUAL_128(0, 0x1312, q3);
247 ASSERT_EQUAL_128(0, 0x0f0e, q4);
248 ASSERT_EQUAL_128(0, 0x1312, q5);
249 ASSERT_EQUAL_128(0, 0x14131211, q16);
250 ASSERT_EQUAL_128(0, 0x17161514, q17);
251 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
252 ASSERT_EQUAL_128(0, 0x17161514, q19);
253 ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
254 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
255 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
256 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
257 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
258 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
259 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
260 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
261 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
262 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
263 }
264 }
265
TEST(ldp_stp_quad)266 TEST(ldp_stp_quad) {
267 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
268
269 uint64_t src[4] = {0x0123456789abcdef,
270 0xaaaaaaaa55555555,
271 0xfedcba9876543210,
272 0x55555555aaaaaaaa};
273 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
274 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
275 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
276
277 START();
278 __ Mov(x16, src_base);
279 __ Mov(x17, dst_base);
280 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
281 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
282 END();
283
284 if (CAN_RUN()) {
285 RUN();
286
287 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
288 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
289 ASSERT_EQUAL_64(0, dst[0]);
290 ASSERT_EQUAL_64(0, dst[1]);
291 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
292 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
293 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
294 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
295 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
296 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
297 }
298 }
299
TEST(neon_ld1_d)300 TEST(neon_ld1_d) {
301 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
302
303 uint8_t src[32 + 5];
304 for (unsigned i = 0; i < sizeof(src); i++) {
305 src[i] = i;
306 }
307 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
308
309 START();
310 __ Mov(x17, src_base);
311 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
312 __ Ld1(v2.V8B(), MemOperand(x17));
313 __ Add(x17, x17, 1);
314 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
315 __ Add(x17, x17, 1);
316 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
317 __ Add(x17, x17, 1);
318 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
319 __ Add(x17, x17, 1);
320 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
321 __ Add(x17, x17, 1);
322 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
323 END();
324
325 if (CAN_RUN()) {
326 RUN();
327
328 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
329 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
330 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
331 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
332 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
333 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
334 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
335 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
336 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
337 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
338 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
339 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
340 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
341 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
342 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
343 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
344 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
345 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
346 }
347 }
348
349
TEST(neon_ld1_d_postindex)350 TEST(neon_ld1_d_postindex) {
351 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
352
353 uint8_t src[32 + 5];
354 for (unsigned i = 0; i < sizeof(src); i++) {
355 src[i] = i;
356 }
357 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
358
359 START();
360 __ Mov(x17, src_base);
361 __ Mov(x18, src_base + 1);
362 __ Mov(x19, src_base + 2);
363 __ Mov(x20, src_base + 3);
364 __ Mov(x21, src_base + 4);
365 __ Mov(x22, src_base + 5);
366 __ Mov(x23, 1);
367 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
368 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
369 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
370 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
371 __ Ld1(v16.V2S(),
372 v17.V2S(),
373 v18.V2S(),
374 v19.V2S(),
375 MemOperand(x20, 32, PostIndex));
376 __ Ld1(v30.V2S(),
377 v31.V2S(),
378 v0.V2S(),
379 v1.V2S(),
380 MemOperand(x21, 32, PostIndex));
381 __ Ld1(v20.V1D(),
382 v21.V1D(),
383 v22.V1D(),
384 v23.V1D(),
385 MemOperand(x22, 32, PostIndex));
386 END();
387
388 if (CAN_RUN()) {
389 RUN();
390
391 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
392 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
393 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
394 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
395 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
396 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
397 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
398 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
399 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
400 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
401 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
402 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
403 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
404 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
405 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
406 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
407 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
408 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
409 ASSERT_EQUAL_64(src_base + 1, x17);
410 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
411 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
412 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
413 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
414 ASSERT_EQUAL_64(src_base + 5 + 32, x22);
415 }
416 }
417
418
TEST(neon_ld1_q)419 TEST(neon_ld1_q) {
420 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
421
422 uint8_t src[64 + 4];
423 for (unsigned i = 0; i < sizeof(src); i++) {
424 src[i] = i;
425 }
426 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
427
428 START();
429 __ Mov(x17, src_base);
430 __ Ld1(v2.V16B(), MemOperand(x17));
431 __ Add(x17, x17, 1);
432 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
433 __ Add(x17, x17, 1);
434 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
435 __ Add(x17, x17, 1);
436 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
437 __ Add(x17, x17, 1);
438 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
439 END();
440
441 if (CAN_RUN()) {
442 RUN();
443
444 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
445 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
446 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
447 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
448 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
449 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
450 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
451 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
452 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
453 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
454 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
455 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
456 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
457 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
458 }
459 }
460
461
TEST(neon_ld1_q_postindex)462 TEST(neon_ld1_q_postindex) {
463 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
464
465 uint8_t src[64 + 4];
466 for (unsigned i = 0; i < sizeof(src); i++) {
467 src[i] = i;
468 }
469 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
470
471 START();
472 __ Mov(x17, src_base);
473 __ Mov(x18, src_base + 1);
474 __ Mov(x19, src_base + 2);
475 __ Mov(x20, src_base + 3);
476 __ Mov(x21, src_base + 4);
477 __ Mov(x22, 1);
478 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
479 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
480 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
481 __ Ld1(v16.V4S(),
482 v17.V4S(),
483 v18.V4S(),
484 v19.V4S(),
485 MemOperand(x20, 64, PostIndex));
486 __ Ld1(v30.V2D(),
487 v31.V2D(),
488 v0.V2D(),
489 v1.V2D(),
490 MemOperand(x21, 64, PostIndex));
491 END();
492
493 if (CAN_RUN()) {
494 RUN();
495
496 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
497 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
498 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
499 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
500 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
501 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
502 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
503 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
504 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
505 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
506 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
507 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
508 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
509 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
510 ASSERT_EQUAL_64(src_base + 1, x17);
511 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
512 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
513 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
514 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
515 }
516 }
517
518
TEST(neon_ld1_lane)519 TEST(neon_ld1_lane) {
520 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
521
522 uint8_t src[64];
523 for (unsigned i = 0; i < sizeof(src); i++) {
524 src[i] = i;
525 }
526 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
527
528 START();
529
530 // Test loading whole register by element.
531 __ Mov(x17, src_base);
532 for (int i = 15; i >= 0; i--) {
533 __ Ld1(v0.B(), i, MemOperand(x17));
534 __ Add(x17, x17, 1);
535 }
536
537 __ Mov(x17, src_base);
538 for (int i = 7; i >= 0; i--) {
539 __ Ld1(v1.H(), i, MemOperand(x17));
540 __ Add(x17, x17, 1);
541 }
542
543 __ Mov(x17, src_base);
544 for (int i = 3; i >= 0; i--) {
545 __ Ld1(v2.S(), i, MemOperand(x17));
546 __ Add(x17, x17, 1);
547 }
548
549 __ Mov(x17, src_base);
550 for (int i = 1; i >= 0; i--) {
551 __ Ld1(v3.D(), i, MemOperand(x17));
552 __ Add(x17, x17, 1);
553 }
554
555 // Test loading a single element into an initialised register.
556 __ Mov(x17, src_base);
557 __ Ldr(q4, MemOperand(x17));
558 __ Ld1(v4.B(), 4, MemOperand(x17));
559 __ Ldr(q5, MemOperand(x17));
560 __ Ld1(v5.H(), 3, MemOperand(x17));
561 __ Ldr(q6, MemOperand(x17));
562 __ Ld1(v6.S(), 2, MemOperand(x17));
563 __ Ldr(q7, MemOperand(x17));
564 __ Ld1(v7.D(), 1, MemOperand(x17));
565
566 END();
567
568 if (CAN_RUN()) {
569 RUN();
570
571 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
572 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
573 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
574 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
575 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
576 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
577 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
578 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
579 }
580 }
581
TEST(neon_ld2_d)582 TEST(neon_ld2_d) {
583 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
584
585 uint8_t src[64 + 4];
586 for (unsigned i = 0; i < sizeof(src); i++) {
587 src[i] = i;
588 }
589 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
590
591 START();
592 __ Mov(x17, src_base);
593 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
594 __ Add(x17, x17, 1);
595 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
596 __ Add(x17, x17, 1);
597 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
598 __ Add(x17, x17, 1);
599 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
600 END();
601
602 if (CAN_RUN()) {
603 RUN();
604
605 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
606 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
607 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
608 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
609 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
610 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
611 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
612 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
613 }
614 }
615
TEST(neon_ld2_d_postindex)616 TEST(neon_ld2_d_postindex) {
617 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
618
619 uint8_t src[32 + 4];
620 for (unsigned i = 0; i < sizeof(src); i++) {
621 src[i] = i;
622 }
623 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
624
625 START();
626 __ Mov(x17, src_base);
627 __ Mov(x18, src_base + 1);
628 __ Mov(x19, src_base + 2);
629 __ Mov(x20, src_base + 3);
630 __ Mov(x21, src_base + 4);
631 __ Mov(x22, 1);
632 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
633 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
634 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
635 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
636 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
637 END();
638
639 if (CAN_RUN()) {
640 RUN();
641
642 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
643 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
644 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
645 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
646 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
647 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
648 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
649 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
650 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
651
652 ASSERT_EQUAL_64(src_base + 1, x17);
653 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
654 ASSERT_EQUAL_64(src_base + 2 + 16, x19);
655 ASSERT_EQUAL_64(src_base + 3 + 16, x20);
656 ASSERT_EQUAL_64(src_base + 4 + 16, x21);
657 }
658 }
659
660
TEST(neon_ld2_q)661 TEST(neon_ld2_q) {
662 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
663
664 uint8_t src[64 + 4];
665 for (unsigned i = 0; i < sizeof(src); i++) {
666 src[i] = i;
667 }
668 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
669
670 START();
671 __ Mov(x17, src_base);
672 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
673 __ Add(x17, x17, 1);
674 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
675 __ Add(x17, x17, 1);
676 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
677 __ Add(x17, x17, 1);
678 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
679 __ Add(x17, x17, 1);
680 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
681 END();
682
683 if (CAN_RUN()) {
684 RUN();
685
686 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
687 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
688 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
689 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
690 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
691 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
692 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
693 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
694 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
695 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
696 }
697 }
698
699
TEST(neon_ld2_q_postindex)700 TEST(neon_ld2_q_postindex) {
701 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
702
703 uint8_t src[64 + 4];
704 for (unsigned i = 0; i < sizeof(src); i++) {
705 src[i] = i;
706 }
707 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
708
709 START();
710 __ Mov(x17, src_base);
711 __ Mov(x18, src_base + 1);
712 __ Mov(x19, src_base + 2);
713 __ Mov(x20, src_base + 3);
714 __ Mov(x21, src_base + 4);
715 __ Mov(x22, 1);
716 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
717 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
718 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
719 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
720 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
721 END();
722
723 if (CAN_RUN()) {
724 RUN();
725
726 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
727 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
728 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
729 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
730 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
731 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
732 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
733 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
734 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
735 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
736
737
738 ASSERT_EQUAL_64(src_base + 1, x17);
739 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
740 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
741 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
742 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
743 }
744 }
745
746
TEST(neon_ld2_lane)747 TEST(neon_ld2_lane) {
748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
749
750 uint8_t src[64];
751 for (unsigned i = 0; i < sizeof(src); i++) {
752 src[i] = i;
753 }
754 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
755
756 START();
757
758 // Test loading whole register by element.
759 __ Mov(x17, src_base);
760 for (int i = 15; i >= 0; i--) {
761 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
762 __ Add(x17, x17, 1);
763 }
764
765 __ Mov(x17, src_base);
766 for (int i = 7; i >= 0; i--) {
767 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
768 __ Add(x17, x17, 1);
769 }
770
771 __ Mov(x17, src_base);
772 for (int i = 3; i >= 0; i--) {
773 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
774 __ Add(x17, x17, 1);
775 }
776
777 __ Mov(x17, src_base);
778 for (int i = 1; i >= 0; i--) {
779 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
780 __ Add(x17, x17, 1);
781 }
782
783 // Test loading a single element into an initialised register.
784 __ Mov(x17, src_base);
785 __ Mov(x4, x17);
786 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
787 __ Ldr(q9, MemOperand(x4));
788 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
789 __ Mov(x5, x17);
790 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
791 __ Ldr(q11, MemOperand(x5));
792 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
793 __ Mov(x6, x17);
794 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
795 __ Ldr(q13, MemOperand(x6));
796 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
797 __ Mov(x7, x17);
798 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
799 __ Ldr(q15, MemOperand(x7));
800 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
801
802 END();
803
804 if (CAN_RUN()) {
805 RUN();
806
807 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
808 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
809 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
810 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
811 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
812 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
813 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
814 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
815 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
816 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
817 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
818 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
819 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
820 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
821 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
822 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
823 }
824 }
825
826
TEST(neon_ld2_lane_postindex)827 TEST(neon_ld2_lane_postindex) {
828 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
829
830 uint8_t src[64];
831 for (unsigned i = 0; i < sizeof(src); i++) {
832 src[i] = i;
833 }
834 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
835
836 START();
837 __ Mov(x17, src_base);
838 __ Mov(x18, src_base);
839 __ Mov(x19, src_base);
840 __ Mov(x20, src_base);
841 __ Mov(x21, src_base);
842 __ Mov(x22, src_base);
843 __ Mov(x23, src_base);
844 __ Mov(x24, src_base);
845
846 // Test loading whole register by element.
847 for (int i = 15; i >= 0; i--) {
848 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
849 }
850
851 for (int i = 7; i >= 0; i--) {
852 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
853 }
854
855 for (int i = 3; i >= 0; i--) {
856 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
857 }
858
859 for (int i = 1; i >= 0; i--) {
860 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
861 }
862
863 // Test loading a single element into an initialised register.
864 __ Mov(x25, 1);
865 __ Mov(x4, x21);
866 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
867 __ Ldr(q9, MemOperand(x4));
868 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
869 __ Add(x25, x25, 1);
870
871 __ Mov(x5, x22);
872 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
873 __ Ldr(q11, MemOperand(x5));
874 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
875 __ Add(x25, x25, 1);
876
877 __ Mov(x6, x23);
878 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
879 __ Ldr(q13, MemOperand(x6));
880 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
881 __ Add(x25, x25, 1);
882
883 __ Mov(x7, x24);
884 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
885 __ Ldr(q15, MemOperand(x7));
886 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
887
888 END();
889
890 if (CAN_RUN()) {
891 RUN();
892
893 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
894 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
895 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
896 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
897 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
898 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
899 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
900 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
901 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
902 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
903 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
904 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
905 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
906 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
907 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
908 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
909
910
911 ASSERT_EQUAL_64(src_base + 32, x17);
912 ASSERT_EQUAL_64(src_base + 32, x18);
913 ASSERT_EQUAL_64(src_base + 32, x19);
914 ASSERT_EQUAL_64(src_base + 32, x20);
915 ASSERT_EQUAL_64(src_base + 1, x21);
916 ASSERT_EQUAL_64(src_base + 2, x22);
917 ASSERT_EQUAL_64(src_base + 3, x23);
918 ASSERT_EQUAL_64(src_base + 4, x24);
919 }
920 }
921
922
TEST(neon_ld2_alllanes)923 TEST(neon_ld2_alllanes) {
924 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
925
926 uint8_t src[64];
927 for (unsigned i = 0; i < sizeof(src); i++) {
928 src[i] = i;
929 }
930 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
931
932 START();
933 __ Mov(x17, src_base + 1);
934 __ Mov(x18, 1);
935 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
936 __ Add(x17, x17, 2);
937 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
938 __ Add(x17, x17, 1);
939 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
940 __ Add(x17, x17, 1);
941 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
942 __ Add(x17, x17, 4);
943 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
944 __ Add(x17, x17, 1);
945 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
946 __ Add(x17, x17, 8);
947 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
948 END();
949
950 if (CAN_RUN()) {
951 RUN();
952
953 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
954 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
955 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
956 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
957 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
958 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
959 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
960 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
961 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
962 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
963 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
964 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
965 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
966 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
967 }
968 }
969
970
TEST(neon_ld2_alllanes_postindex)971 TEST(neon_ld2_alllanes_postindex) {
972 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
973
974 uint8_t src[64];
975 for (unsigned i = 0; i < sizeof(src); i++) {
976 src[i] = i;
977 }
978 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
979
980 START();
981 __ Mov(x17, src_base + 1);
982 __ Mov(x18, 1);
983 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
984 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
985 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
986 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
987 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
988 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
989 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
990 END();
991
992 if (CAN_RUN()) {
993 RUN();
994
995 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
996 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
997 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
998 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
999 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
1000 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1001 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1002 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1003 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1004 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1005 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1006 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1007 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1008 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1009 ASSERT_EQUAL_64(src_base + 34, x17);
1010 }
1011 }
1012
1013
TEST(neon_ld3_d)1014 TEST(neon_ld3_d) {
1015 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1016
1017 uint8_t src[64 + 4];
1018 for (unsigned i = 0; i < sizeof(src); i++) {
1019 src[i] = i;
1020 }
1021 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1022
1023 START();
1024 __ Mov(x17, src_base);
1025 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1026 __ Add(x17, x17, 1);
1027 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1028 __ Add(x17, x17, 1);
1029 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1030 __ Add(x17, x17, 1);
1031 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1032 END();
1033
1034 if (CAN_RUN()) {
1035 RUN();
1036
1037 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1038 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1039 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1040 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1041 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1042 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1043 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1044 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1045 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1046 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1047 ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1048 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1049 }
1050 }
1051
1052
TEST(neon_ld3_d_postindex)1053 TEST(neon_ld3_d_postindex) {
1054 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1055
1056 uint8_t src[32 + 4];
1057 for (unsigned i = 0; i < sizeof(src); i++) {
1058 src[i] = i;
1059 }
1060 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1061
1062 START();
1063 __ Mov(x17, src_base);
1064 __ Mov(x18, src_base + 1);
1065 __ Mov(x19, src_base + 2);
1066 __ Mov(x20, src_base + 3);
1067 __ Mov(x21, src_base + 4);
1068 __ Mov(x22, 1);
1069 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1070 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1071 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1072 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1073 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1074 END();
1075
1076 if (CAN_RUN()) {
1077 RUN();
1078
1079 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1080 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1081 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1082 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1083 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1084 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1085 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1086 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1087 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1088 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1089 ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1090 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1091 ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1092 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1093 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1094
1095 ASSERT_EQUAL_64(src_base + 1, x17);
1096 ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1097 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1098 ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1099 ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1100 }
1101 }
1102
1103
TEST(neon_ld3_q)1104 TEST(neon_ld3_q) {
1105 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1106
1107 uint8_t src[64 + 4];
1108 for (unsigned i = 0; i < sizeof(src); i++) {
1109 src[i] = i;
1110 }
1111 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1112
1113 START();
1114 __ Mov(x17, src_base);
1115 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1116 __ Add(x17, x17, 1);
1117 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1118 __ Add(x17, x17, 1);
1119 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1120 __ Add(x17, x17, 1);
1121 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1122 __ Add(x17, x17, 1);
1123 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1124 END();
1125
1126 if (CAN_RUN()) {
1127 RUN();
1128
1129 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1130 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1131 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1132 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1133 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1134 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1135 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1136 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1137 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1138 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1139 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1140 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1141 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1142 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1143 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1144 }
1145 }
1146
1147
TEST(neon_ld3_q_postindex)1148 TEST(neon_ld3_q_postindex) {
1149 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1150
1151 uint8_t src[64 + 4];
1152 for (unsigned i = 0; i < sizeof(src); i++) {
1153 src[i] = i;
1154 }
1155 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1156
1157 START();
1158 __ Mov(x17, src_base);
1159 __ Mov(x18, src_base + 1);
1160 __ Mov(x19, src_base + 2);
1161 __ Mov(x20, src_base + 3);
1162 __ Mov(x21, src_base + 4);
1163 __ Mov(x22, 1);
1164
1165 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1166 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1167 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1168 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1169 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1170 END();
1171
1172 if (CAN_RUN()) {
1173 RUN();
1174
1175 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1176 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1177 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1178 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1179 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1180 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1181 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1182 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1183 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1184 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1185 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1186 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1187 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1188 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1189 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1190
1191 ASSERT_EQUAL_64(src_base + 1, x17);
1192 ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1193 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1194 ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1195 ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1196 }
1197 }
1198
1199
TEST(neon_ld3_lane)1200 TEST(neon_ld3_lane) {
1201 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1202
1203 uint8_t src[64];
1204 for (unsigned i = 0; i < sizeof(src); i++) {
1205 src[i] = i;
1206 }
1207 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1208
1209 START();
1210
1211 // Test loading whole register by element.
1212 __ Mov(x17, src_base);
1213 for (int i = 15; i >= 0; i--) {
1214 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1215 __ Add(x17, x17, 1);
1216 }
1217
1218 __ Mov(x17, src_base);
1219 for (int i = 7; i >= 0; i--) {
1220 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1221 __ Add(x17, x17, 1);
1222 }
1223
1224 __ Mov(x17, src_base);
1225 for (int i = 3; i >= 0; i--) {
1226 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1227 __ Add(x17, x17, 1);
1228 }
1229
1230 __ Mov(x17, src_base);
1231 for (int i = 1; i >= 0; i--) {
1232 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1233 __ Add(x17, x17, 1);
1234 }
1235
1236 // Test loading a single element into an initialised register.
1237 __ Mov(x17, src_base);
1238 __ Mov(x4, x17);
1239 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1240 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1241 __ Ldr(q14, MemOperand(x4));
1242 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1243 __ Mov(x5, x17);
1244 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1245 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1246 __ Ldr(q17, MemOperand(x5));
1247 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1248 __ Mov(x6, x17);
1249 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1250 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1251 __ Ldr(q20, MemOperand(x6));
1252 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1253 __ Mov(x7, x17);
1254 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1255 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1256 __ Ldr(q23, MemOperand(x7));
1257 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1258
1259 END();
1260
1261 if (CAN_RUN()) {
1262 RUN();
1263
1264 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1265 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1266 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1267 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1268 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1269 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1270 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1271 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1272 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1273 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1274 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1275 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1276 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1277 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1278 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1279 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1280 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1281 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1282 }
1283 }
1284
1285
TEST(neon_ld3_lane_postindex)1286 TEST(neon_ld3_lane_postindex) {
1287 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1288
1289 uint8_t src[64];
1290 for (unsigned i = 0; i < sizeof(src); i++) {
1291 src[i] = i;
1292 }
1293 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1294
1295 START();
1296
1297 // Test loading whole register by element.
1298 __ Mov(x17, src_base);
1299 __ Mov(x18, src_base);
1300 __ Mov(x19, src_base);
1301 __ Mov(x20, src_base);
1302 __ Mov(x21, src_base);
1303 __ Mov(x22, src_base);
1304 __ Mov(x23, src_base);
1305 __ Mov(x24, src_base);
1306 for (int i = 15; i >= 0; i--) {
1307 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1308 }
1309
1310 for (int i = 7; i >= 0; i--) {
1311 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1312 }
1313
1314 for (int i = 3; i >= 0; i--) {
1315 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1316 }
1317
1318 for (int i = 1; i >= 0; i--) {
1319 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1320 }
1321
1322
1323 // Test loading a single element into an initialised register.
1324 __ Mov(x25, 1);
1325 __ Mov(x4, x21);
1326 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1327 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1328 __ Ldr(q14, MemOperand(x4));
1329 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1330 __ Add(x25, x25, 1);
1331
1332 __ Mov(x5, x22);
1333 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1334 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1335 __ Ldr(q17, MemOperand(x5));
1336 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1337 __ Add(x25, x25, 1);
1338
1339 __ Mov(x6, x23);
1340 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1341 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1342 __ Ldr(q20, MemOperand(x6));
1343 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1344 __ Add(x25, x25, 1);
1345
1346 __ Mov(x7, x24);
1347 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1348 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1349 __ Ldr(q23, MemOperand(x7));
1350 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1351
1352 END();
1353
1354 if (CAN_RUN()) {
1355 RUN();
1356
1357 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1358 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1359 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1360 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1361 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1362 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1363 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1364 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1365 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1366 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1367 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1368 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1369 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1370 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1371 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1372 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1373 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1374 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1375 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1376 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1377 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1378 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1379 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1380 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1381
1382 ASSERT_EQUAL_64(src_base + 48, x17);
1383 ASSERT_EQUAL_64(src_base + 48, x18);
1384 ASSERT_EQUAL_64(src_base + 48, x19);
1385 ASSERT_EQUAL_64(src_base + 48, x20);
1386 ASSERT_EQUAL_64(src_base + 1, x21);
1387 ASSERT_EQUAL_64(src_base + 2, x22);
1388 ASSERT_EQUAL_64(src_base + 3, x23);
1389 ASSERT_EQUAL_64(src_base + 4, x24);
1390 }
1391 }
1392
1393
TEST(neon_ld3_alllanes)1394 TEST(neon_ld3_alllanes) {
1395 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1396
1397 uint8_t src[64];
1398 for (unsigned i = 0; i < sizeof(src); i++) {
1399 src[i] = i;
1400 }
1401 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1402
1403 START();
1404 __ Mov(x17, src_base + 1);
1405 __ Mov(x18, 1);
1406 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1407 __ Add(x17, x17, 3);
1408 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1409 __ Add(x17, x17, 1);
1410 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1411 __ Add(x17, x17, 1);
1412 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1413 __ Add(x17, x17, 6);
1414 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1415 __ Add(x17, x17, 1);
1416 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1417 __ Add(x17, x17, 12);
1418 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1419 END();
1420
1421 if (CAN_RUN()) {
1422 RUN();
1423
1424 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1425 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1426 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1427 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1428 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1429 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1430 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1431 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1432 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1433 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1434 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1435 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1436 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1437 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1438 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1439 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1440 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1441 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1442 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1443 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1444 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1445 }
1446 }
1447
1448
TEST(neon_ld3_alllanes_postindex)1449 TEST(neon_ld3_alllanes_postindex) {
1450 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1451
1452 uint8_t src[64];
1453 for (unsigned i = 0; i < sizeof(src); i++) {
1454 src[i] = i;
1455 }
1456 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1457 __ Mov(x17, src_base + 1);
1458 __ Mov(x18, 1);
1459
1460 START();
1461 __ Mov(x17, src_base + 1);
1462 __ Mov(x18, 1);
1463 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1464 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1465 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1466 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1467 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1468 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1469 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1470 END();
1471
1472 if (CAN_RUN()) {
1473 RUN();
1474
1475 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1476 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1477 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1478 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1479 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1480 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1481 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1482 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1483 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1484 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1485 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1486 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1487 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1488 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1489 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1490 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1491 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1492 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1493 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1494 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1495 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1496 }
1497 }
1498
1499
TEST(neon_ld4_d)1500 TEST(neon_ld4_d) {
1501 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1502
1503 uint8_t src[64 + 4];
1504 for (unsigned i = 0; i < sizeof(src); i++) {
1505 src[i] = i;
1506 }
1507 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1508
1509 START();
1510 __ Mov(x17, src_base);
1511 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1512 __ Add(x17, x17, 1);
1513 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1514 __ Add(x17, x17, 1);
1515 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1516 __ Add(x17, x17, 1);
1517 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1518 END();
1519
1520 if (CAN_RUN()) {
1521 RUN();
1522
1523 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1524 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1525 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1526 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1527 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1528 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1529 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1530 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1531 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1532 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1533 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1534 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1535 ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1536 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1537 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1538 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1539 }
1540 }
1541
1542
TEST(neon_ld4_d_postindex)1543 TEST(neon_ld4_d_postindex) {
1544 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1545
1546 uint8_t src[32 + 4];
1547 for (unsigned i = 0; i < sizeof(src); i++) {
1548 src[i] = i;
1549 }
1550 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1551
1552 START();
1553 __ Mov(x17, src_base);
1554 __ Mov(x18, src_base + 1);
1555 __ Mov(x19, src_base + 2);
1556 __ Mov(x20, src_base + 3);
1557 __ Mov(x21, src_base + 4);
1558 __ Mov(x22, 1);
1559 __ Ld4(v2.V8B(),
1560 v3.V8B(),
1561 v4.V8B(),
1562 v5.V8B(),
1563 MemOperand(x17, x22, PostIndex));
1564 __ Ld4(v6.V8B(),
1565 v7.V8B(),
1566 v8.V8B(),
1567 v9.V8B(),
1568 MemOperand(x18, 32, PostIndex));
1569 __ Ld4(v10.V4H(),
1570 v11.V4H(),
1571 v12.V4H(),
1572 v13.V4H(),
1573 MemOperand(x19, 32, PostIndex));
1574 __ Ld4(v14.V2S(),
1575 v15.V2S(),
1576 v16.V2S(),
1577 v17.V2S(),
1578 MemOperand(x20, 32, PostIndex));
1579 __ Ld4(v30.V2S(),
1580 v31.V2S(),
1581 v0.V2S(),
1582 v1.V2S(),
1583 MemOperand(x21, 32, PostIndex));
1584 END();
1585
1586 if (CAN_RUN()) {
1587 RUN();
1588
1589 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1590 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1591 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1592 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1593 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1594 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1595 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1596 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1597 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1598 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1599 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1600 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1601 ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1602 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1603 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1604 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1605 ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1606 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1607 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1608 ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1609
1610
1611 ASSERT_EQUAL_64(src_base + 1, x17);
1612 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1613 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1614 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1615 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1616 }
1617 }
1618
1619
TEST(neon_ld4_q)1620 TEST(neon_ld4_q) {
1621 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1622
1623 uint8_t src[64 + 4];
1624 for (unsigned i = 0; i < sizeof(src); i++) {
1625 src[i] = i;
1626 }
1627 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1628
1629 START();
1630 __ Mov(x17, src_base);
1631 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1632 __ Add(x17, x17, 1);
1633 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1634 __ Add(x17, x17, 1);
1635 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1636 __ Add(x17, x17, 1);
1637 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1638 __ Add(x17, x17, 1);
1639 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1640 END();
1641
1642 if (CAN_RUN()) {
1643 RUN();
1644
1645 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1646 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1647 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1648 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1649 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1650 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1651 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1652 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1653 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1654 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1655 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1656 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1657 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1658 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1659 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1660 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1661 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1662 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1663 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1664 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1665 }
1666 }
1667
1668
TEST(neon_ld4_q_postindex)1669 TEST(neon_ld4_q_postindex) {
1670 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1671
1672 uint8_t src[64 + 4];
1673 for (unsigned i = 0; i < sizeof(src); i++) {
1674 src[i] = i;
1675 }
1676 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1677
1678 START();
1679 __ Mov(x17, src_base);
1680 __ Mov(x18, src_base + 1);
1681 __ Mov(x19, src_base + 2);
1682 __ Mov(x20, src_base + 3);
1683 __ Mov(x21, src_base + 4);
1684 __ Mov(x22, 1);
1685
1686 __ Ld4(v2.V16B(),
1687 v3.V16B(),
1688 v4.V16B(),
1689 v5.V16B(),
1690 MemOperand(x17, x22, PostIndex));
1691 __ Ld4(v6.V16B(),
1692 v7.V16B(),
1693 v8.V16B(),
1694 v9.V16B(),
1695 MemOperand(x18, 64, PostIndex));
1696 __ Ld4(v10.V8H(),
1697 v11.V8H(),
1698 v12.V8H(),
1699 v13.V8H(),
1700 MemOperand(x19, 64, PostIndex));
1701 __ Ld4(v14.V4S(),
1702 v15.V4S(),
1703 v16.V4S(),
1704 v17.V4S(),
1705 MemOperand(x20, 64, PostIndex));
1706 __ Ld4(v30.V2D(),
1707 v31.V2D(),
1708 v0.V2D(),
1709 v1.V2D(),
1710 MemOperand(x21, 64, PostIndex));
1711 END();
1712
1713 if (CAN_RUN()) {
1714 RUN();
1715
1716 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1717 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1718 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1719 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1720 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1721 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1722 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1723 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1724 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1725 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1726 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1727 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1728 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1729 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1730 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1731 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1732 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1733 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1734 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1735 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1736
1737
1738 ASSERT_EQUAL_64(src_base + 1, x17);
1739 ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1740 ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1741 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1742 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1743 }
1744 }
1745
1746
TEST(neon_ld4_lane)1747 TEST(neon_ld4_lane) {
1748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1749
1750 uint8_t src[64];
1751 for (unsigned i = 0; i < sizeof(src); i++) {
1752 src[i] = i;
1753 }
1754 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1755
1756 START();
1757
1758 // Test loading whole register by element.
1759 __ Mov(x17, src_base);
1760 for (int i = 15; i >= 0; i--) {
1761 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1762 __ Add(x17, x17, 1);
1763 }
1764
1765 __ Mov(x17, src_base);
1766 for (int i = 7; i >= 0; i--) {
1767 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1768 __ Add(x17, x17, 1);
1769 }
1770
1771 __ Mov(x17, src_base);
1772 for (int i = 3; i >= 0; i--) {
1773 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1774 __ Add(x17, x17, 1);
1775 }
1776
1777 __ Mov(x17, src_base);
1778 for (int i = 1; i >= 0; i--) {
1779 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1780 __ Add(x17, x17, 1);
1781 }
1782
1783 // Test loading a single element into an initialised register.
1784 __ Mov(x17, src_base);
1785 __ Mov(x4, x17);
1786 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1787 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1788 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1789 __ Ldr(q19, MemOperand(x4));
1790 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1791
1792 __ Mov(x5, x17);
1793 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1794 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1795 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1796 __ Ldr(q23, MemOperand(x5));
1797 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1798
1799 __ Mov(x6, x17);
1800 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1801 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1802 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1803 __ Ldr(q27, MemOperand(x6));
1804 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1805
1806 __ Mov(x7, x17);
1807 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1808 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1809 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1810 __ Ldr(q31, MemOperand(x7));
1811 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1812
1813 END();
1814
1815 if (CAN_RUN()) {
1816 RUN();
1817
1818 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1819 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1820 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1821 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1822 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1823 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1824 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1825 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1826 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1827 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1828 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1829 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1830 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1831 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1832 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1833 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1834 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1835 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1836 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1837 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1838 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1839 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1840 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1841 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1842 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1843 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1844 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1845 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1846 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1847 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1848 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1849 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1850 }
1851 }
1852
1853
TEST(neon_ld4_lane_postindex)1854 TEST(neon_ld4_lane_postindex) {
1855 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1856
1857 uint8_t src[64];
1858 for (unsigned i = 0; i < sizeof(src); i++) {
1859 src[i] = i;
1860 }
1861 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1862
1863 START();
1864
1865 // Test loading whole register by element.
1866 __ Mov(x17, src_base);
1867 for (int i = 15; i >= 0; i--) {
1868 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1869 }
1870
1871 __ Mov(x18, src_base);
1872 for (int i = 7; i >= 0; i--) {
1873 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1874 }
1875
1876 __ Mov(x19, src_base);
1877 for (int i = 3; i >= 0; i--) {
1878 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1879 }
1880
1881 __ Mov(x20, src_base);
1882 for (int i = 1; i >= 0; i--) {
1883 __ Ld4(v12.D(),
1884 v13.D(),
1885 v14.D(),
1886 v15.D(),
1887 i,
1888 MemOperand(x20, 32, PostIndex));
1889 }
1890
1891 // Test loading a single element into an initialised register.
1892 __ Mov(x25, 1);
1893 __ Mov(x21, src_base);
1894 __ Mov(x22, src_base);
1895 __ Mov(x23, src_base);
1896 __ Mov(x24, src_base);
1897
1898 __ Mov(x4, x21);
1899 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1900 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1901 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1902 __ Ldr(q19, MemOperand(x4));
1903 __ Ld4(v16.B(),
1904 v17.B(),
1905 v18.B(),
1906 v19.B(),
1907 4,
1908 MemOperand(x21, x25, PostIndex));
1909 __ Add(x25, x25, 1);
1910
1911 __ Mov(x5, x22);
1912 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1913 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1914 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1915 __ Ldr(q23, MemOperand(x5));
1916 __ Ld4(v20.H(),
1917 v21.H(),
1918 v22.H(),
1919 v23.H(),
1920 3,
1921 MemOperand(x22, x25, PostIndex));
1922 __ Add(x25, x25, 1);
1923
1924 __ Mov(x6, x23);
1925 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1926 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1927 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1928 __ Ldr(q27, MemOperand(x6));
1929 __ Ld4(v24.S(),
1930 v25.S(),
1931 v26.S(),
1932 v27.S(),
1933 2,
1934 MemOperand(x23, x25, PostIndex));
1935 __ Add(x25, x25, 1);
1936
1937 __ Mov(x7, x24);
1938 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1939 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1940 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1941 __ Ldr(q31, MemOperand(x7));
1942 __ Ld4(v28.D(),
1943 v29.D(),
1944 v30.D(),
1945 v31.D(),
1946 1,
1947 MemOperand(x24, x25, PostIndex));
1948
1949 END();
1950
1951 if (CAN_RUN()) {
1952 RUN();
1953
1954 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1955 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1956 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1957 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1958 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1959 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1960 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1961 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1962 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1963 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1964 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1965 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1966 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1967 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1968 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1969 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1970 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1971 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1972 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1973 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1974 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1975 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1976 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1977 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1978 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1979 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1980 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1981 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1982 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1983 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1984 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1985 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1986
1987 ASSERT_EQUAL_64(src_base + 64, x17);
1988 ASSERT_EQUAL_64(src_base + 64, x18);
1989 ASSERT_EQUAL_64(src_base + 64, x19);
1990 ASSERT_EQUAL_64(src_base + 64, x20);
1991 ASSERT_EQUAL_64(src_base + 1, x21);
1992 ASSERT_EQUAL_64(src_base + 2, x22);
1993 ASSERT_EQUAL_64(src_base + 3, x23);
1994 ASSERT_EQUAL_64(src_base + 4, x24);
1995 }
1996 }
1997
1998
TEST(neon_ld4_alllanes)1999 TEST(neon_ld4_alllanes) {
2000 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2001
2002 uint8_t src[64];
2003 for (unsigned i = 0; i < sizeof(src); i++) {
2004 src[i] = i;
2005 }
2006 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2007
2008 START();
2009 __ Mov(x17, src_base + 1);
2010 __ Mov(x18, 1);
2011 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2012 __ Add(x17, x17, 4);
2013 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2014 __ Add(x17, x17, 1);
2015 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2016 __ Add(x17, x17, 1);
2017 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2018 __ Add(x17, x17, 8);
2019 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2020 __ Add(x17, x17, 1);
2021 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2022 __ Add(x17, x17, 16);
2023 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2024
2025
2026 END();
2027
2028 if (CAN_RUN()) {
2029 RUN();
2030
2031 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2032 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2033 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2034 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2035 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2036 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2037 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2038 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2039 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2040 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2041 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2042 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2043 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2044 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2045 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2046 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2047 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2048 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2049 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2050 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2051 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2052 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2053 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2054 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2055 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2056 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2057 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2058 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2059 }
2060 }
2061
2062
TEST(neon_ld4_alllanes_postindex)2063 TEST(neon_ld4_alllanes_postindex) {
2064 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2065
2066 uint8_t src[64];
2067 for (unsigned i = 0; i < sizeof(src); i++) {
2068 src[i] = i;
2069 }
2070 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2071 __ Mov(x17, src_base + 1);
2072 __ Mov(x18, 1);
2073
2074 START();
2075 __ Mov(x17, src_base + 1);
2076 __ Mov(x18, 1);
2077 __ Ld4r(v0.V8B(),
2078 v1.V8B(),
2079 v2.V8B(),
2080 v3.V8B(),
2081 MemOperand(x17, 4, PostIndex));
2082 __ Ld4r(v4.V16B(),
2083 v5.V16B(),
2084 v6.V16B(),
2085 v7.V16B(),
2086 MemOperand(x17, x18, PostIndex));
2087 __ Ld4r(v8.V4H(),
2088 v9.V4H(),
2089 v10.V4H(),
2090 v11.V4H(),
2091 MemOperand(x17, x18, PostIndex));
2092 __ Ld4r(v12.V8H(),
2093 v13.V8H(),
2094 v14.V8H(),
2095 v15.V8H(),
2096 MemOperand(x17, 8, PostIndex));
2097 __ Ld4r(v16.V2S(),
2098 v17.V2S(),
2099 v18.V2S(),
2100 v19.V2S(),
2101 MemOperand(x17, x18, PostIndex));
2102 __ Ld4r(v20.V4S(),
2103 v21.V4S(),
2104 v22.V4S(),
2105 v23.V4S(),
2106 MemOperand(x17, 16, PostIndex));
2107 __ Ld4r(v24.V2D(),
2108 v25.V2D(),
2109 v26.V2D(),
2110 v27.V2D(),
2111 MemOperand(x17, 32, PostIndex));
2112 END();
2113
2114 if (CAN_RUN()) {
2115 RUN();
2116
2117 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2118 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2119 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2120 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2121 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2122 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2123 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2124 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2125 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2126 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2127 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2128 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2129 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2130 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2131 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2132 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2133 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2134 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2135 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2136 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2137 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2138 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2139 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2140 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2141 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2142 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2143 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2144 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2145 ASSERT_EQUAL_64(src_base + 64, x17);
2146 }
2147 }
2148
2149
TEST(neon_st1_lane)2150 TEST(neon_st1_lane) {
2151 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2152
2153 uint8_t src[64];
2154 for (unsigned i = 0; i < sizeof(src); i++) {
2155 src[i] = i;
2156 }
2157 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2158
2159 START();
2160 __ Mov(x17, src_base);
2161 __ Mov(x18, -16);
2162 __ Ldr(q0, MemOperand(x17));
2163
2164 for (int i = 15; i >= 0; i--) {
2165 __ St1(v0.B(), i, MemOperand(x17));
2166 __ Add(x17, x17, 1);
2167 }
2168 __ Ldr(q1, MemOperand(x17, x18));
2169
2170 for (int i = 7; i >= 0; i--) {
2171 __ St1(v0.H(), i, MemOperand(x17));
2172 __ Add(x17, x17, 2);
2173 }
2174 __ Ldr(q2, MemOperand(x17, x18));
2175
2176 for (int i = 3; i >= 0; i--) {
2177 __ St1(v0.S(), i, MemOperand(x17));
2178 __ Add(x17, x17, 4);
2179 }
2180 __ Ldr(q3, MemOperand(x17, x18));
2181
2182 for (int i = 1; i >= 0; i--) {
2183 __ St1(v0.D(), i, MemOperand(x17));
2184 __ Add(x17, x17, 8);
2185 }
2186 __ Ldr(q4, MemOperand(x17, x18));
2187
2188 END();
2189
2190 if (CAN_RUN()) {
2191 RUN();
2192
2193 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2194 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2195 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2196 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2197 }
2198 }
2199
2200
TEST(neon_st2_lane)2201 TEST(neon_st2_lane) {
2202 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2203
2204 // Struct size * addressing modes * element sizes * vector size.
2205 uint8_t dst[2 * 2 * 4 * 16];
2206 memset(dst, 0, sizeof(dst));
2207 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2208
2209 START();
2210 __ Mov(x17, dst_base);
2211 __ Mov(x18, dst_base);
2212 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2213 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2214
2215 // Test B stores with and without post index.
2216 for (int i = 15; i >= 0; i--) {
2217 __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2218 __ Add(x18, x18, 2);
2219 }
2220 for (int i = 15; i >= 0; i--) {
2221 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2222 }
2223 __ Ldr(q2, MemOperand(x17, 0 * 16));
2224 __ Ldr(q3, MemOperand(x17, 1 * 16));
2225 __ Ldr(q4, MemOperand(x17, 2 * 16));
2226 __ Ldr(q5, MemOperand(x17, 3 * 16));
2227
2228 // Test H stores with and without post index.
2229 __ Mov(x0, 4);
2230 for (int i = 7; i >= 0; i--) {
2231 __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2232 __ Add(x18, x18, 4);
2233 }
2234 for (int i = 7; i >= 0; i--) {
2235 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2236 }
2237 __ Ldr(q6, MemOperand(x17, 4 * 16));
2238 __ Ldr(q7, MemOperand(x17, 5 * 16));
2239 __ Ldr(q16, MemOperand(x17, 6 * 16));
2240 __ Ldr(q17, MemOperand(x17, 7 * 16));
2241
2242 // Test S stores with and without post index.
2243 for (int i = 3; i >= 0; i--) {
2244 __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2245 __ Add(x18, x18, 8);
2246 }
2247 for (int i = 3; i >= 0; i--) {
2248 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2249 }
2250 __ Ldr(q18, MemOperand(x17, 8 * 16));
2251 __ Ldr(q19, MemOperand(x17, 9 * 16));
2252 __ Ldr(q20, MemOperand(x17, 10 * 16));
2253 __ Ldr(q21, MemOperand(x17, 11 * 16));
2254
2255 // Test D stores with and without post index.
2256 __ Mov(x0, 16);
2257 __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2258 __ Add(x18, x18, 16);
2259 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2260 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2261 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2262 __ Ldr(q22, MemOperand(x17, 12 * 16));
2263 __ Ldr(q23, MemOperand(x17, 13 * 16));
2264 __ Ldr(q24, MemOperand(x17, 14 * 16));
2265 __ Ldr(q25, MemOperand(x17, 15 * 16));
2266 END();
2267
2268 if (CAN_RUN()) {
2269 RUN();
2270
2271 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2272 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2273 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2274 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2275
2276 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2277 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2278 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2279 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2280
2281 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2282 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2283 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2284 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2285
2286 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2287 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2288 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2289 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2290 }
2291 }
2292
2293
TEST(neon_st3_lane)2294 TEST(neon_st3_lane) {
2295 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2296
2297 // Struct size * addressing modes * element sizes * vector size.
2298 uint8_t dst[3 * 2 * 4 * 16];
2299 memset(dst, 0, sizeof(dst));
2300 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2301
2302 START();
2303 __ Mov(x17, dst_base);
2304 __ Mov(x18, dst_base);
2305 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2306 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2307 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2308
2309 // Test B stores with and without post index.
2310 for (int i = 15; i >= 0; i--) {
2311 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2312 __ Add(x18, x18, 3);
2313 }
2314 for (int i = 15; i >= 0; i--) {
2315 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2316 }
2317 __ Ldr(q3, MemOperand(x17, 0 * 16));
2318 __ Ldr(q4, MemOperand(x17, 1 * 16));
2319 __ Ldr(q5, MemOperand(x17, 2 * 16));
2320 __ Ldr(q6, MemOperand(x17, 3 * 16));
2321 __ Ldr(q7, MemOperand(x17, 4 * 16));
2322 __ Ldr(q16, MemOperand(x17, 5 * 16));
2323
2324 // Test H stores with and without post index.
2325 __ Mov(x0, 6);
2326 for (int i = 7; i >= 0; i--) {
2327 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2328 __ Add(x18, x18, 6);
2329 }
2330 for (int i = 7; i >= 0; i--) {
2331 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2332 }
2333 __ Ldr(q17, MemOperand(x17, 6 * 16));
2334 __ Ldr(q18, MemOperand(x17, 7 * 16));
2335 __ Ldr(q19, MemOperand(x17, 8 * 16));
2336 __ Ldr(q20, MemOperand(x17, 9 * 16));
2337 __ Ldr(q21, MemOperand(x17, 10 * 16));
2338 __ Ldr(q22, MemOperand(x17, 11 * 16));
2339
2340 // Test S stores with and without post index.
2341 for (int i = 3; i >= 0; i--) {
2342 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2343 __ Add(x18, x18, 12);
2344 }
2345 for (int i = 3; i >= 0; i--) {
2346 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2347 }
2348 __ Ldr(q23, MemOperand(x17, 12 * 16));
2349 __ Ldr(q24, MemOperand(x17, 13 * 16));
2350 __ Ldr(q25, MemOperand(x17, 14 * 16));
2351 __ Ldr(q26, MemOperand(x17, 15 * 16));
2352 __ Ldr(q27, MemOperand(x17, 16 * 16));
2353 __ Ldr(q28, MemOperand(x17, 17 * 16));
2354
2355 // Test D stores with and without post index.
2356 __ Mov(x0, 24);
2357 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2358 __ Add(x18, x18, 24);
2359 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2360 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2361 __ Ldr(q29, MemOperand(x17, 18 * 16));
2362 __ Ldr(q30, MemOperand(x17, 19 * 16));
2363 __ Ldr(q31, MemOperand(x17, 20 * 16));
2364 END();
2365
2366 if (CAN_RUN()) {
2367 RUN();
2368
2369 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2370 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2371 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2372 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2373 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2374 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2375
2376 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2377 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2378 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2379 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2380 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2381 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2382
2383 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2384 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2385 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2386 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2387 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2388 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2389 }
2390 }
2391
2392
TEST(neon_st4_lane)2393 TEST(neon_st4_lane) {
2394 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2395
2396 // Struct size * element sizes * vector size.
2397 uint8_t dst[4 * 4 * 16];
2398 memset(dst, 0, sizeof(dst));
2399 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2400
2401 START();
2402 __ Mov(x17, dst_base);
2403 __ Mov(x18, dst_base);
2404 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2405 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2406 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2408
2409 // Test B stores without post index.
2410 for (int i = 15; i >= 0; i--) {
2411 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2412 __ Add(x18, x18, 4);
2413 }
2414 __ Ldr(q4, MemOperand(x17, 0 * 16));
2415 __ Ldr(q5, MemOperand(x17, 1 * 16));
2416 __ Ldr(q6, MemOperand(x17, 2 * 16));
2417 __ Ldr(q7, MemOperand(x17, 3 * 16));
2418
2419 // Test H stores with post index.
2420 __ Mov(x0, 8);
2421 for (int i = 7; i >= 0; i--) {
2422 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2423 }
2424 __ Ldr(q16, MemOperand(x17, 4 * 16));
2425 __ Ldr(q17, MemOperand(x17, 5 * 16));
2426 __ Ldr(q18, MemOperand(x17, 6 * 16));
2427 __ Ldr(q19, MemOperand(x17, 7 * 16));
2428
2429 // Test S stores without post index.
2430 for (int i = 3; i >= 0; i--) {
2431 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2432 __ Add(x18, x18, 16);
2433 }
2434 __ Ldr(q20, MemOperand(x17, 8 * 16));
2435 __ Ldr(q21, MemOperand(x17, 9 * 16));
2436 __ Ldr(q22, MemOperand(x17, 10 * 16));
2437 __ Ldr(q23, MemOperand(x17, 11 * 16));
2438
2439 // Test D stores with post index.
2440 __ Mov(x0, 32);
2441 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2442 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2443
2444 __ Ldr(q24, MemOperand(x17, 12 * 16));
2445 __ Ldr(q25, MemOperand(x17, 13 * 16));
2446 __ Ldr(q26, MemOperand(x17, 14 * 16));
2447 __ Ldr(q27, MemOperand(x17, 15 * 16));
2448 END();
2449
2450 if (CAN_RUN()) {
2451 RUN();
2452
2453 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2454 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2455 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2456 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2457
2458 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2459 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2460 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2461 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2462
2463 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2464 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2465 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2466 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2467
2468 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2469 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2470 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2471 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2472 }
2473 }
2474
2475
TEST(neon_ld1_lane_postindex)2476 TEST(neon_ld1_lane_postindex) {
2477 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2478
2479 uint8_t src[64];
2480 for (unsigned i = 0; i < sizeof(src); i++) {
2481 src[i] = i;
2482 }
2483 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2484
2485 START();
2486 __ Mov(x17, src_base);
2487 __ Mov(x18, src_base);
2488 __ Mov(x19, src_base);
2489 __ Mov(x20, src_base);
2490 __ Mov(x21, src_base);
2491 __ Mov(x22, src_base);
2492 __ Mov(x23, src_base);
2493 __ Mov(x24, src_base);
2494
2495 // Test loading whole register by element.
2496 for (int i = 15; i >= 0; i--) {
2497 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2498 }
2499
2500 for (int i = 7; i >= 0; i--) {
2501 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2502 }
2503
2504 for (int i = 3; i >= 0; i--) {
2505 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2506 }
2507
2508 for (int i = 1; i >= 0; i--) {
2509 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2510 }
2511
2512 // Test loading a single element into an initialised register.
2513 __ Mov(x25, 1);
2514 __ Ldr(q4, MemOperand(x21));
2515 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2516 __ Add(x25, x25, 1);
2517
2518 __ Ldr(q5, MemOperand(x22));
2519 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2520 __ Add(x25, x25, 1);
2521
2522 __ Ldr(q6, MemOperand(x23));
2523 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2524 __ Add(x25, x25, 1);
2525
2526 __ Ldr(q7, MemOperand(x24));
2527 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2528
2529 END();
2530
2531 if (CAN_RUN()) {
2532 RUN();
2533
2534 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2535 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2536 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2537 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2538 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2539 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2540 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2541 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2542 ASSERT_EQUAL_64(src_base + 16, x17);
2543 ASSERT_EQUAL_64(src_base + 16, x18);
2544 ASSERT_EQUAL_64(src_base + 16, x19);
2545 ASSERT_EQUAL_64(src_base + 16, x20);
2546 ASSERT_EQUAL_64(src_base + 1, x21);
2547 ASSERT_EQUAL_64(src_base + 2, x22);
2548 ASSERT_EQUAL_64(src_base + 3, x23);
2549 ASSERT_EQUAL_64(src_base + 4, x24);
2550 }
2551 }
2552
2553
TEST(neon_st1_lane_postindex)2554 TEST(neon_st1_lane_postindex) {
2555 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2556
2557 uint8_t src[64];
2558 for (unsigned i = 0; i < sizeof(src); i++) {
2559 src[i] = i;
2560 }
2561 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2562
2563 START();
2564 __ Mov(x17, src_base);
2565 __ Mov(x18, -16);
2566 __ Ldr(q0, MemOperand(x17));
2567
2568 for (int i = 15; i >= 0; i--) {
2569 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2570 }
2571 __ Ldr(q1, MemOperand(x17, x18));
2572
2573 for (int i = 7; i >= 0; i--) {
2574 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2575 }
2576 __ Ldr(q2, MemOperand(x17, x18));
2577
2578 for (int i = 3; i >= 0; i--) {
2579 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2580 }
2581 __ Ldr(q3, MemOperand(x17, x18));
2582
2583 for (int i = 1; i >= 0; i--) {
2584 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2585 }
2586 __ Ldr(q4, MemOperand(x17, x18));
2587
2588 END();
2589
2590 if (CAN_RUN()) {
2591 RUN();
2592
2593 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2594 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2595 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2596 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2597 }
2598 }
2599
2600
TEST(neon_ld1_alllanes)2601 TEST(neon_ld1_alllanes) {
2602 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2603
2604 uint8_t src[64];
2605 for (unsigned i = 0; i < sizeof(src); i++) {
2606 src[i] = i;
2607 }
2608 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2609
2610 START();
2611 __ Mov(x17, src_base + 1);
2612 __ Ld1r(v0.V8B(), MemOperand(x17));
2613 __ Add(x17, x17, 1);
2614 __ Ld1r(v1.V16B(), MemOperand(x17));
2615 __ Add(x17, x17, 1);
2616 __ Ld1r(v2.V4H(), MemOperand(x17));
2617 __ Add(x17, x17, 1);
2618 __ Ld1r(v3.V8H(), MemOperand(x17));
2619 __ Add(x17, x17, 1);
2620 __ Ld1r(v4.V2S(), MemOperand(x17));
2621 __ Add(x17, x17, 1);
2622 __ Ld1r(v5.V4S(), MemOperand(x17));
2623 __ Add(x17, x17, 1);
2624 __ Ld1r(v6.V1D(), MemOperand(x17));
2625 __ Add(x17, x17, 1);
2626 __ Ld1r(v7.V2D(), MemOperand(x17));
2627 END();
2628
2629 if (CAN_RUN()) {
2630 RUN();
2631
2632 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2633 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2634 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2635 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2636 ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2637 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2638 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2639 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2640 }
2641 }
2642
2643
TEST(neon_ld1_alllanes_postindex)2644 TEST(neon_ld1_alllanes_postindex) {
2645 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2646
2647 uint8_t src[64];
2648 for (unsigned i = 0; i < sizeof(src); i++) {
2649 src[i] = i;
2650 }
2651 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2652
2653 START();
2654 __ Mov(x17, src_base + 1);
2655 __ Mov(x18, 1);
2656 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2657 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2658 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2659 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2660 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2661 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2662 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2663 END();
2664
2665 if (CAN_RUN()) {
2666 RUN();
2667
2668 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2669 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2670 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2671 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2672 ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2673 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2674 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2675 ASSERT_EQUAL_64(src_base + 19, x17);
2676 }
2677 }
2678
2679
TEST(neon_st1_d)2680 TEST(neon_st1_d) {
2681 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2682
2683 uint8_t src[14 * kDRegSizeInBytes];
2684 for (unsigned i = 0; i < sizeof(src); i++) {
2685 src[i] = i;
2686 }
2687 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2688
2689 START();
2690 __ Mov(x17, src_base);
2691 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2692 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2693 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2694 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2695 __ Mov(x17, src_base);
2696
2697 __ St1(v0.V8B(), MemOperand(x17));
2698 __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2699
2700 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2701 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2702
2703 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2704 __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2705 __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2706 __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2707
2708 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2709 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2710 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2711
2712 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2713 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2714 __ Ldr(q24, MemOperand(x17));
2715 END();
2716
2717 if (CAN_RUN()) {
2718 RUN();
2719
2720 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2721 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2722 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2723 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2724 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2725 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2726 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2727 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2728 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2729 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2730 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2731 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2732 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2733 }
2734 }
2735
2736
TEST(neon_st1_d_postindex)2737 TEST(neon_st1_d_postindex) {
2738 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2739
2740 uint8_t src[64 + 14 * kDRegSizeInBytes];
2741 for (unsigned i = 0; i < sizeof(src); i++) {
2742 src[i] = i;
2743 }
2744 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2745
2746 START();
2747 __ Mov(x17, src_base);
2748 __ Mov(x18, -8);
2749 __ Mov(x19, -16);
2750 __ Mov(x20, -24);
2751 __ Mov(x21, -32);
2752 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2753 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2754 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2755 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2756 __ Mov(x17, src_base);
2757
2758 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2759 __ Ldr(d16, MemOperand(x17, x18));
2760
2761 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2762 __ Ldr(q17, MemOperand(x17, x19));
2763
2764 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2765 __ Ldr(d18, MemOperand(x17, x20));
2766 __ Ldr(d19, MemOperand(x17, x19));
2767 __ Ldr(d20, MemOperand(x17, x18));
2768
2769 __ St1(v0.V2S(),
2770 v1.V2S(),
2771 v2.V2S(),
2772 v3.V2S(),
2773 MemOperand(x17, 32, PostIndex));
2774 __ Ldr(q21, MemOperand(x17, x21));
2775 __ Ldr(q22, MemOperand(x17, x19));
2776
2777 __ St1(v0.V1D(),
2778 v1.V1D(),
2779 v2.V1D(),
2780 v3.V1D(),
2781 MemOperand(x17, 32, PostIndex));
2782 __ Ldr(q23, MemOperand(x17, x21));
2783 __ Ldr(q24, MemOperand(x17, x19));
2784 END();
2785
2786 if (CAN_RUN()) {
2787 RUN();
2788
2789 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2790 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2791 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2792 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2793 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2794 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2795 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2796 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2797 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2798 }
2799 }
2800
2801
TEST(neon_st1_q)2802 TEST(neon_st1_q) {
2803 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2804
2805 uint8_t src[64 + 160];
2806 for (unsigned i = 0; i < sizeof(src); i++) {
2807 src[i] = i;
2808 }
2809 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2810
2811 START();
2812 __ Mov(x17, src_base);
2813 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2814 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2815 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2816 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2817
2818 __ St1(v0.V16B(), MemOperand(x17));
2819 __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2820
2821 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2822 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2823 __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2824
2825 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2826 __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2827 __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2828 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2829
2830 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2831 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2832 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2833 __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2834 __ Ldr(q25, MemOperand(x17));
2835 END();
2836
2837 if (CAN_RUN()) {
2838 RUN();
2839
2840 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2841 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2842 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2843 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2844 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2845 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2846 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2847 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2848 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2849 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2850 }
2851 }
2852
2853
TEST(neon_st1_q_postindex)2854 TEST(neon_st1_q_postindex) {
2855 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2856
2857 uint8_t src[64 + 160];
2858 for (unsigned i = 0; i < sizeof(src); i++) {
2859 src[i] = i;
2860 }
2861 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2862
2863 START();
2864 __ Mov(x17, src_base);
2865 __ Mov(x18, -16);
2866 __ Mov(x19, -32);
2867 __ Mov(x20, -48);
2868 __ Mov(x21, -64);
2869 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2870 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2871 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2872 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2873
2874 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2875 __ Ldr(q16, MemOperand(x17, x18));
2876
2877 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2878 __ Ldr(q17, MemOperand(x17, x19));
2879 __ Ldr(q18, MemOperand(x17, x18));
2880
2881 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2882 __ Ldr(q19, MemOperand(x17, x20));
2883 __ Ldr(q20, MemOperand(x17, x19));
2884 __ Ldr(q21, MemOperand(x17, x18));
2885
2886 __ St1(v0.V2D(),
2887 v1.V2D(),
2888 v2.V2D(),
2889 v3.V2D(),
2890 MemOperand(x17, 64, PostIndex));
2891 __ Ldr(q22, MemOperand(x17, x21));
2892 __ Ldr(q23, MemOperand(x17, x20));
2893 __ Ldr(q24, MemOperand(x17, x19));
2894 __ Ldr(q25, MemOperand(x17, x18));
2895
2896 END();
2897
2898 if (CAN_RUN()) {
2899 RUN();
2900
2901 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2902 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2903 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2904 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2905 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2906 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2907 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2908 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2909 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2910 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2911 }
2912 }
2913
2914
TEST(neon_st2_d)2915 TEST(neon_st2_d) {
2916 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2917
2918 uint8_t src[4 * 16];
2919 for (unsigned i = 0; i < sizeof(src); i++) {
2920 src[i] = i;
2921 }
2922 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2923
2924 START();
2925 __ Mov(x17, src_base);
2926 __ Mov(x18, src_base);
2927 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2928 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2929
2930 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2931 __ Add(x18, x18, 22);
2932 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2933 __ Add(x18, x18, 11);
2934 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2935
2936 __ Mov(x19, src_base);
2937 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2938 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2939 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2940 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2941
2942 END();
2943
2944 if (CAN_RUN()) {
2945 RUN();
2946
2947 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2948 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2949 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2950 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2951 }
2952 }
2953
2954
TEST(neon_st2_d_postindex)2955 TEST(neon_st2_d_postindex) {
2956 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2957
2958 uint8_t src[4 * 16];
2959 for (unsigned i = 0; i < sizeof(src); i++) {
2960 src[i] = i;
2961 }
2962 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2963
2964 START();
2965 __ Mov(x22, 5);
2966 __ Mov(x17, src_base);
2967 __ Mov(x18, src_base);
2968 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2969 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2970
2971 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2972 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2973 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2974
2975
2976 __ Mov(x19, src_base);
2977 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2978 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2979 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2980
2981 END();
2982
2983 if (CAN_RUN()) {
2984 RUN();
2985
2986 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2987 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2988 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2989 }
2990 }
2991
2992
TEST(neon_st2_q)2993 TEST(neon_st2_q) {
2994 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2995
2996 uint8_t src[5 * 16];
2997 for (unsigned i = 0; i < sizeof(src); i++) {
2998 src[i] = i;
2999 }
3000 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3001
3002 START();
3003 __ Mov(x17, src_base);
3004 __ Mov(x18, src_base);
3005 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3006 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3007
3008 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3009 __ Add(x18, x18, 8);
3010 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3011 __ Add(x18, x18, 22);
3012 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3013 __ Add(x18, x18, 2);
3014 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3015
3016 __ Mov(x19, src_base);
3017 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3018 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3019 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3020 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3021
3022 END();
3023
3024 if (CAN_RUN()) {
3025 RUN();
3026
3027 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3028 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3029 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3030 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3031 }
3032 }
3033
3034
TEST(neon_st2_q_postindex)3035 TEST(neon_st2_q_postindex) {
3036 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3037
3038 uint8_t src[5 * 16];
3039 for (unsigned i = 0; i < sizeof(src); i++) {
3040 src[i] = i;
3041 }
3042 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3043
3044 START();
3045 __ Mov(x22, 5);
3046 __ Mov(x17, src_base);
3047 __ Mov(x18, src_base);
3048 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3049 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3050
3051 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3052 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3053 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3054 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3055
3056 __ Mov(x19, src_base);
3057 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3058 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3059 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3060 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3061 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3062
3063 END();
3064
3065 if (CAN_RUN()) {
3066 RUN();
3067
3068 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3069 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3070 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3071 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3072 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3073 }
3074 }
3075
3076
TEST(neon_st3_d)3077 TEST(neon_st3_d) {
3078 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3079
3080 uint8_t src[3 * 16];
3081 for (unsigned i = 0; i < sizeof(src); i++) {
3082 src[i] = i;
3083 }
3084 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3085
3086 START();
3087 __ Mov(x17, src_base);
3088 __ Mov(x18, src_base);
3089 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3090 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3091 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3092
3093 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3094 __ Add(x18, x18, 3);
3095 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3096 __ Add(x18, x18, 2);
3097 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3098
3099
3100 __ Mov(x19, src_base);
3101 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3102 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3103
3104 END();
3105
3106 if (CAN_RUN()) {
3107 RUN();
3108
3109 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3110 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3111 }
3112 }
3113
3114
TEST(neon_st3_d_postindex)3115 TEST(neon_st3_d_postindex) {
3116 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3117
3118 uint8_t src[4 * 16];
3119 for (unsigned i = 0; i < sizeof(src); i++) {
3120 src[i] = i;
3121 }
3122 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3123
3124 START();
3125 __ Mov(x22, 5);
3126 __ Mov(x17, src_base);
3127 __ Mov(x18, src_base);
3128 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3129 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3130 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3131
3132 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3133 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3134 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3135
3136
3137 __ Mov(x19, src_base);
3138 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3139 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3140 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3141 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3142
3143 END();
3144
3145 if (CAN_RUN()) {
3146 RUN();
3147
3148 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3149 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3150 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3151 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3152 }
3153 }
3154
3155
TEST(neon_st3_q)3156 TEST(neon_st3_q) {
3157 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3158
3159 uint8_t src[6 * 16];
3160 for (unsigned i = 0; i < sizeof(src); i++) {
3161 src[i] = i;
3162 }
3163 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3164
3165 START();
3166 __ Mov(x17, src_base);
3167 __ Mov(x18, src_base);
3168 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3169 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3170 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3171
3172 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3173 __ Add(x18, x18, 5);
3174 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3175 __ Add(x18, x18, 12);
3176 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3177 __ Add(x18, x18, 22);
3178 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3179
3180 __ Mov(x19, src_base);
3181 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3182 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3183 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3184 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3185 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3186 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3187
3188 END();
3189
3190 if (CAN_RUN()) {
3191 RUN();
3192
3193 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3194 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3195 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3196 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3197 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3198 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3199 }
3200 }
3201
3202
TEST(neon_st3_q_postindex)3203 TEST(neon_st3_q_postindex) {
3204 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3205
3206 uint8_t src[7 * 16];
3207 for (unsigned i = 0; i < sizeof(src); i++) {
3208 src[i] = i;
3209 }
3210 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3211
3212 START();
3213 __ Mov(x22, 5);
3214 __ Mov(x17, src_base);
3215 __ Mov(x18, src_base);
3216 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3217 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3218 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3219
3220 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3221 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3222 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3223 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3224
3225 __ Mov(x19, src_base);
3226 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3227 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3228 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3229 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3230 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3231 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3232 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3233
3234 END();
3235
3236 if (CAN_RUN()) {
3237 RUN();
3238
3239 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3240 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3241 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3242 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3243 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3244 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3245 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3246 }
3247 }
3248
3249
TEST(neon_st4_d)3250 TEST(neon_st4_d) {
3251 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3252
3253 uint8_t src[4 * 16];
3254 for (unsigned i = 0; i < sizeof(src); i++) {
3255 src[i] = i;
3256 }
3257 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3258
3259 START();
3260 __ Mov(x17, src_base);
3261 __ Mov(x18, src_base);
3262 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3263 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3264 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3265 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3266
3267 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3268 __ Add(x18, x18, 12);
3269 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3270 __ Add(x18, x18, 15);
3271 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3272
3273
3274 __ Mov(x19, src_base);
3275 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3276 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3277 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3278 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3279
3280 END();
3281
3282 if (CAN_RUN()) {
3283 RUN();
3284
3285 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3286 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3287 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3288 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3289 }
3290 }
3291
3292
TEST(neon_st4_d_postindex)3293 TEST(neon_st4_d_postindex) {
3294 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3295
3296 uint8_t src[5 * 16];
3297 for (unsigned i = 0; i < sizeof(src); i++) {
3298 src[i] = i;
3299 }
3300 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3301
3302 START();
3303 __ Mov(x22, 5);
3304 __ Mov(x17, src_base);
3305 __ Mov(x18, src_base);
3306 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3307 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3308 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3309 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3310
3311 __ St4(v0.V8B(),
3312 v1.V8B(),
3313 v2.V8B(),
3314 v3.V8B(),
3315 MemOperand(x18, x22, PostIndex));
3316 __ St4(v0.V4H(),
3317 v1.V4H(),
3318 v2.V4H(),
3319 v3.V4H(),
3320 MemOperand(x18, 32, PostIndex));
3321 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3322
3323
3324 __ Mov(x19, src_base);
3325 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3326 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3327 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3328 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3329 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3330
3331 END();
3332
3333 if (CAN_RUN()) {
3334 RUN();
3335
3336 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3337 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3338 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3339 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3340 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3341 }
3342 }
3343
3344
TEST(neon_st4_q)3345 TEST(neon_st4_q) {
3346 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3347
3348 uint8_t src[7 * 16];
3349 for (unsigned i = 0; i < sizeof(src); i++) {
3350 src[i] = i;
3351 }
3352 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3353
3354 START();
3355 __ Mov(x17, src_base);
3356 __ Mov(x18, src_base);
3357 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3358 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3359 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3360 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3361
3362 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3363 __ Add(x18, x18, 5);
3364 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3365 __ Add(x18, x18, 12);
3366 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3367 __ Add(x18, x18, 22);
3368 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3369 __ Add(x18, x18, 10);
3370
3371 __ Mov(x19, src_base);
3372 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3373 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3374 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3375 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3376 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3377 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3378 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3379
3380 END();
3381
3382 if (CAN_RUN()) {
3383 RUN();
3384
3385 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3386 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3387 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3388 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3389 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3390 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3391 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3392 }
3393 }
3394
3395
TEST(neon_st4_q_postindex)3396 TEST(neon_st4_q_postindex) {
3397 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3398
3399 uint8_t src[9 * 16];
3400 for (unsigned i = 0; i < sizeof(src); i++) {
3401 src[i] = i;
3402 }
3403 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3404
3405 START();
3406 __ Mov(x22, 5);
3407 __ Mov(x17, src_base);
3408 __ Mov(x18, src_base);
3409 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3410 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3411 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3412 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3413
3414 __ St4(v0.V16B(),
3415 v1.V16B(),
3416 v2.V16B(),
3417 v3.V16B(),
3418 MemOperand(x18, x22, PostIndex));
3419 __ St4(v0.V8H(),
3420 v1.V8H(),
3421 v2.V8H(),
3422 v3.V8H(),
3423 MemOperand(x18, 64, PostIndex));
3424 __ St4(v0.V4S(),
3425 v1.V4S(),
3426 v2.V4S(),
3427 v3.V4S(),
3428 MemOperand(x18, x22, PostIndex));
3429 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3430
3431 __ Mov(x19, src_base);
3432 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3433 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3434 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3435 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3436 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3437 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3438 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3439 __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3440 __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3441
3442 END();
3443
3444 if (CAN_RUN()) {
3445 RUN();
3446
3447 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3448 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3449 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3450 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3451 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3452 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3453 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3454 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3455 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3456 }
3457 }
3458
3459
TEST(neon_destructive_minmaxp)3460 TEST(neon_destructive_minmaxp) {
3461 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3462
3463 START();
3464 __ Movi(v0.V2D(), 0, 0x2222222233333333);
3465 __ Movi(v1.V2D(), 0, 0x0000000011111111);
3466
3467 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3468 __ Mov(v17, v0);
3469 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3470 __ Mov(v18, v1);
3471 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3472 __ Mov(v19, v0);
3473 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3474
3475 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3476 __ Mov(v21, v0);
3477 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3478 __ Mov(v22, v1);
3479 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3480 __ Mov(v23, v0);
3481 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3482
3483 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3484 __ Mov(v25, v0);
3485 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3486 __ Mov(v26, v1);
3487 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3488 __ Mov(v27, v0);
3489 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3490
3491 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3492 __ Mov(v29, v0);
3493 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3494 __ Mov(v30, v1);
3495 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3496 __ Mov(v31, v0);
3497 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3498 END();
3499
3500 if (CAN_RUN()) {
3501 RUN();
3502
3503 ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3504 ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3505 ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3506 ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3507
3508 ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3509 ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3510 ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3511 ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3512
3513 ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3514 ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3515 ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3516 ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3517
3518 ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3519 ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3520 ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3521 ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3522 }
3523 }
3524
3525
TEST(neon_destructive_tbl)3526 TEST(neon_destructive_tbl) {
3527 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3528
3529 START();
3530 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3531 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3532 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3533 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3534 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3535
3536 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3537 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3538 __ Mov(v17, v0);
3539 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3540 __ Mov(v18, v1);
3541 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3542 __ Mov(v19, v0);
3543 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3544
3545 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3546 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3547 __ Mov(v21, v0);
3548 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3549 __ Mov(v22, v1);
3550 __ Mov(v23, v2);
3551 __ Mov(v24, v3);
3552 __ Mov(v25, v4);
3553 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3554 __ Mov(v26, v0);
3555 __ Mov(v27, v1);
3556 __ Mov(v28, v2);
3557 __ Mov(v29, v3);
3558 __ Tbl(v26.V16B(),
3559 v26.V16B(),
3560 v27.V16B(),
3561 v28.V16B(),
3562 v29.V16B(),
3563 v26.V16B());
3564 END();
3565
3566 if (CAN_RUN()) {
3567 RUN();
3568
3569 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3570 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3571 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3572 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3573
3574 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3575 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3576 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3577 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3578 }
3579 }
3580
3581
TEST(neon_destructive_tbx)3582 TEST(neon_destructive_tbx) {
3583 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3584
3585 START();
3586 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3587 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3588 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3589 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3590 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3591
3592 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3593 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3594 __ Mov(v17, v0);
3595 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3596 __ Mov(v18, v1);
3597 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3598 __ Mov(v19, v0);
3599 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3600
3601 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3602 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3603 __ Mov(v21, v0);
3604 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3605 __ Mov(v22, v1);
3606 __ Mov(v23, v2);
3607 __ Mov(v24, v3);
3608 __ Mov(v25, v4);
3609 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3610 __ Mov(v26, v0);
3611 __ Mov(v27, v1);
3612 __ Mov(v28, v2);
3613 __ Mov(v29, v3);
3614 __ Tbx(v26.V16B(),
3615 v26.V16B(),
3616 v27.V16B(),
3617 v28.V16B(),
3618 v29.V16B(),
3619 v26.V16B());
3620 END();
3621
3622 if (CAN_RUN()) {
3623 RUN();
3624
3625 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3626 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3627 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3628 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3629
3630 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3631 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3632 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3633 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3634 }
3635 }
3636
3637
TEST(neon_destructive_fcvtl)3638 TEST(neon_destructive_fcvtl) {
3639 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3640
3641 START();
3642 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3643 __ Fcvtl(v16.V2D(), v0.V2S());
3644 __ Fcvtl2(v17.V2D(), v0.V4S());
3645 __ Mov(v18, v0);
3646 __ Mov(v19, v0);
3647 __ Fcvtl(v18.V2D(), v18.V2S());
3648 __ Fcvtl2(v19.V2D(), v19.V4S());
3649
3650 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3651 __ Fcvtl(v20.V4S(), v1.V4H());
3652 __ Fcvtl2(v21.V4S(), v1.V8H());
3653 __ Mov(v22, v1);
3654 __ Mov(v23, v1);
3655 __ Fcvtl(v22.V4S(), v22.V4H());
3656 __ Fcvtl2(v23.V4S(), v23.V8H());
3657
3658 END();
3659
3660 if (CAN_RUN()) {
3661 RUN();
3662
3663 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3664 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3665 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3666 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3667
3668 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3669 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3670 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3671 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3672 }
3673 }
3674
TEST(fadd_h_neon)3675 TEST(fadd_h_neon) {
3676 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3677 CPUFeatures::kFP,
3678 CPUFeatures::kNEONHalf);
3679
3680 START();
3681 __ Fmov(v0.V4H(), 24.0);
3682 __ Fmov(v1.V4H(), 1024.0);
3683 __ Fmov(v2.V8H(), 5.5);
3684 __ Fmov(v3.V8H(), 2048.0);
3685 __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3686 __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3687 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3688 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3689
3690 __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3691 __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3692 __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3693
3694 __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3695 __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3696
3697 END();
3698
3699 if (CAN_RUN()) {
3700 RUN();
3701
3702 ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3703 // 2053.5 is unrepresentable in FP16.
3704 ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3705
3706 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3707 // and we don't have traces for half-precision enabled hardware.
3708 // Default (Signalling NaN)
3709 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3710 // Quiet NaN from Signalling.
3711 ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3712 // Quiet NaN.
3713 ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3714 }
3715 }
3716
TEST(fsub_h_neon)3717 TEST(fsub_h_neon) {
3718 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3719 CPUFeatures::kFP,
3720 CPUFeatures::kNEONHalf);
3721
3722 START();
3723 __ Fmov(v0.V4H(), 24.0);
3724 __ Fmov(v1.V4H(), 1024.0);
3725 __ Fmov(v2.V8H(), 5.5);
3726 __ Fmov(v3.V8H(), 2048.0);
3727 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3728 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3729 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3730 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3731
3732 __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3733 __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3734 __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3735 __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3736
3737 __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3738 __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3739 END();
3740
3741 if (CAN_RUN()) {
3742 RUN();
3743
3744 ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3745 // 2042.5 is unpresentable in FP16:
3746 ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3747
3748 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3749 // and we don't have traces for half-precision enabled hardware.
3750 // Signalling (Default) NaN.
3751 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3752 ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3753 // Quiet NaN from Signalling.
3754 ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3755 // Quiet NaN.
3756 ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3757 }
3758 }
3759
TEST(fmul_h_neon)3760 TEST(fmul_h_neon) {
3761 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3762 CPUFeatures::kFP,
3763 CPUFeatures::kNEONHalf);
3764
3765 START();
3766 __ Fmov(v0.V4H(), 24.0);
3767 __ Fmov(v1.V4H(), -2.0);
3768 __ Fmov(v2.V8H(), 5.5);
3769 __ Fmov(v3.V8H(), 0.5);
3770 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3771 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3772
3773 __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3774 __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3775 __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3776 __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3777 __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3778 END();
3779
3780 if (CAN_RUN()) {
3781 RUN();
3782
3783 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3784 ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3785 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3786 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3787 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3788 }
3789 }
3790
TEST(fdiv_h_neon)3791 TEST(fdiv_h_neon) {
3792 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3793 CPUFeatures::kFP,
3794 CPUFeatures::kNEONHalf);
3795
3796 START();
3797 __ Fmov(v0.V4H(), 24.0);
3798 __ Fmov(v1.V4H(), -2.0);
3799 __ Fmov(v2.V8H(), 5.5);
3800 __ Fmov(v3.V8H(), 0.5);
3801 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3802 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3803
3804 __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3805 __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3806 __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3807 __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3808 __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3809 END();
3810
3811 if (CAN_RUN()) {
3812 RUN();
3813
3814 ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3815 ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3816 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3817 // -0.083333... is unrepresentable in FP16:
3818 ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3819 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3820 }
3821 }
3822
TEST(neon_fcvtl)3823 TEST(neon_fcvtl) {
3824 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3825
3826 START();
3827
3828 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3829 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3830 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3831 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3832 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3833 __ Fcvtl(v16.V4S(), v0.V4H());
3834 __ Fcvtl2(v17.V4S(), v0.V8H());
3835 __ Fcvtl(v18.V4S(), v1.V4H());
3836 __ Fcvtl2(v19.V4S(), v1.V8H());
3837
3838 __ Fcvtl(v20.V2D(), v2.V2S());
3839 __ Fcvtl2(v21.V2D(), v2.V4S());
3840 __ Fcvtl(v22.V2D(), v3.V2S());
3841 __ Fcvtl2(v23.V2D(), v3.V4S());
3842 __ Fcvtl(v24.V2D(), v4.V2S());
3843 __ Fcvtl2(v25.V2D(), v4.V4S());
3844
3845 END();
3846
3847 if (CAN_RUN()) {
3848 RUN();
3849 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3850 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3851 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3852 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3853 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3854 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3855 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3856 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3857 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3858 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3859 }
3860 }
3861
3862
TEST(neon_fcvtn)3863 TEST(neon_fcvtn) {
3864 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3865
3866 START();
3867
3868 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3869 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3870 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3871 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3872 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3873 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3874 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3875 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3876 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3877
3878 __ Fcvtn(v16.V4H(), v0.V4S());
3879 __ Fcvtn2(v16.V8H(), v1.V4S());
3880 __ Fcvtn(v17.V4H(), v2.V4S());
3881 __ Fcvtn(v18.V2S(), v3.V2D());
3882 __ Fcvtn2(v18.V4S(), v4.V2D());
3883 __ Fcvtn(v19.V2S(), v5.V2D());
3884 __ Fcvtn2(v19.V4S(), v6.V2D());
3885 __ Fcvtn(v20.V2S(), v7.V2D());
3886 __ Fcvtn2(v20.V4S(), v8.V2D());
3887 END();
3888
3889 if (CAN_RUN()) {
3890 RUN();
3891 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3892 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3893 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3894 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3895 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3896 }
3897 }
3898
TEST(neon_fcvtn_fcvtxn_regression_test)3899 TEST(neon_fcvtn_fcvtxn_regression_test) {
3900 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3901
3902 START();
3903 __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3904 __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
3905 __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3906
3907 __ Fcvtn(v16.V2S(), v0.V2D());
3908 __ Fcvtn(v17.V4H(), v1.V4S());
3909 __ Fcvtn(v0.V2S(), v0.V2D());
3910 __ Fcvtn(v1.V4H(), v1.V4S());
3911 __ Fcvtxn(v2.V2S(), v2.V2D());
3912 END();
3913
3914 if (CAN_RUN()) {
3915 RUN();
3916 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16);
3917 ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17);
3918 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0);
3919 ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1);
3920 ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2);
3921 }
3922 }
3923
TEST(neon_fcvtxn)3924 TEST(neon_fcvtxn) {
3925 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3926
3927 START();
3928 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3929 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3930 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3931 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3932 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3933 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3934 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3935 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3936 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3937 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3938 __ Fcvtxn(v16.V2S(), v0.V2D());
3939 __ Fcvtxn2(v16.V4S(), v1.V2D());
3940 __ Fcvtxn(v17.V2S(), v2.V2D());
3941 __ Fcvtxn2(v17.V4S(), v3.V2D());
3942 __ Fcvtxn(v18.V2S(), v4.V2D());
3943 __ Fcvtxn2(v18.V4S(), v5.V2D());
3944 __ Fcvtxn(v19.V2S(), v6.V2D());
3945 __ Fcvtxn2(v19.V4S(), v7.V2D());
3946 __ Fcvtxn(v20.V2S(), v8.V2D());
3947 __ Fcvtxn2(v20.V4S(), v9.V2D());
3948 __ Fcvtxn(s21, d0);
3949 END();
3950
3951 if (CAN_RUN()) {
3952 RUN();
3953 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3954 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3955 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3956 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3957 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3958 ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3959 }
3960 }
3961
TEST(neon_3same_addp)3962 TEST(neon_3same_addp) {
3963 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3964
3965 START();
3966
3967 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3968 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3969 __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3970
3971 END();
3972
3973 if (CAN_RUN()) {
3974 RUN();
3975 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3976 }
3977 }
3978
TEST(neon_3same_sqdmulh_sqrdmulh)3979 TEST(neon_3same_sqdmulh_sqrdmulh) {
3980 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3981
3982 START();
3983
3984 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3985 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3986 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3987 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3988
3989 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3990 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3991 __ Sqdmulh(h18, h0, h1);
3992 __ Sqdmulh(s19, s2, s3);
3993
3994 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3995 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3996 __ Sqrdmulh(h22, h0, h1);
3997 __ Sqrdmulh(s23, s2, s3);
3998
3999 END();
4000
4001 if (CAN_RUN()) {
4002 RUN();
4003 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
4004 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
4005 ASSERT_EQUAL_128(0, 0x7fff, q18);
4006 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4007 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
4008 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
4009 ASSERT_EQUAL_128(0, 0x7fff, q22);
4010 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4011 }
4012 }
4013
TEST(neon_byelement_sqdmulh_sqrdmulh)4014 TEST(neon_byelement_sqdmulh_sqrdmulh) {
4015 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4016
4017 START();
4018
4019 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4020 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4021 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4022 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4023
4024 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4025 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4026 __ Sqdmulh(h18, h0, v1.H(), 0);
4027 __ Sqdmulh(s19, s2, v3.S(), 0);
4028
4029 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4030 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4031 __ Sqrdmulh(h22, h0, v1.H(), 0);
4032 __ Sqrdmulh(s23, s2, v3.S(), 0);
4033
4034 END();
4035
4036 if (CAN_RUN()) {
4037 RUN();
4038 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4039 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4040 ASSERT_EQUAL_128(0, 0x7fff, q18);
4041 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4042 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4043 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4044 ASSERT_EQUAL_128(0, 0x7fff, q22);
4045 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4046 }
4047 }
4048
TEST(neon_3same_sqrdmlah)4049 TEST(neon_3same_sqrdmlah) {
4050 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4051
4052 START();
4053
4054 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4055 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4056 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4057 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4058
4059 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4060 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4061 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4062 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4063
4064 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4065 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4066 __ Sqrdmlah(h18, h0, h1);
4067 __ Sqrdmlah(s19, s2, s3);
4068
4069 END();
4070
4071 if (CAN_RUN()) {
4072 RUN();
4073 ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4074 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4075 ASSERT_EQUAL_128(0, 0x7fff, q18);
4076 ASSERT_EQUAL_128(0, 0, q19);
4077 }
4078 }
4079
TEST(neon_byelement_sqrdmlah)4080 TEST(neon_byelement_sqrdmlah) {
4081 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4082
4083 START();
4084
4085 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4086 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4087 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4088 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4089
4090 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4091 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4092 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4093 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4094
4095 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4096 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4097 __ Sqrdmlah(h18, h0, v1.H(), 0);
4098 __ Sqrdmlah(s19, s2, v3.S(), 0);
4099
4100 END();
4101
4102 if (CAN_RUN()) {
4103 RUN();
4104 ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4105 ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4106 ASSERT_EQUAL_128(0, 0x7fff, q18);
4107 ASSERT_EQUAL_128(0, 0, q19);
4108 }
4109 }
4110
TEST(neon_3same_sqrdmlsh)4111 TEST(neon_3same_sqrdmlsh) {
4112 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4113
4114 START();
4115
4116 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4117 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4118 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4119 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4120
4121 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4122 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4123 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4124 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4125
4126 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4127 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4128 __ Sqrdmlsh(h18, h0, h1);
4129 __ Sqrdmlsh(s19, s2, s3);
4130
4131 END();
4132
4133 if (CAN_RUN()) {
4134 RUN();
4135 ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4136 ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4137 ASSERT_EQUAL_128(0, 0x3ffb, q18);
4138 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4139 }
4140 }
4141
TEST(neon_byelement_sqrdmlsh)4142 TEST(neon_byelement_sqrdmlsh) {
4143 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4144
4145 START();
4146
4147 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4148 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4149 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4150 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4151
4152 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4153 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4154 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4155 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4156
4157 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4158 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4159 __ Sqrdmlsh(h18, h0, v1.H(), 0);
4160 __ Sqrdmlsh(s19, s2, v3.S(), 0);
4161
4162 END();
4163
4164 if (CAN_RUN()) {
4165 RUN();
4166 ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4167 ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4168 ASSERT_EQUAL_128(0, 0xc000, q18);
4169 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4170 }
4171 }
4172
TEST(neon_3same_sdot_udot)4173 TEST(neon_3same_sdot_udot) {
4174 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4175
4176 START();
4177
4178 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4179 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4180 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4181
4182 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4183 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4184 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4185 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4186
4187 __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4188 __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4189
4190 __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4191 __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4192
4193 END();
4194
4195 if (CAN_RUN()) {
4196 RUN();
4197 ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4198 ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4199 ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4200 ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4201 }
4202 }
4203
TEST(neon_byelement_sdot_udot)4204 TEST(neon_byelement_sdot_udot) {
4205 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4206
4207 START();
4208
4209 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4210 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4211 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4212
4213 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4214 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4215 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4216 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4217
4218 __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4219 __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4220
4221 __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4222 __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4223
4224 END();
4225
4226 if (CAN_RUN()) {
4227 RUN();
4228 ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4229 ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4230 ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4231 ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4232 }
4233 }
4234
4235
TEST(neon_2regmisc_saddlp)4236 TEST(neon_2regmisc_saddlp) {
4237 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4238
4239 START();
4240
4241 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4242
4243 __ Saddlp(v16.V8H(), v0.V16B());
4244 __ Saddlp(v17.V4H(), v0.V8B());
4245
4246 __ Saddlp(v18.V4S(), v0.V8H());
4247 __ Saddlp(v19.V2S(), v0.V4H());
4248
4249 __ Saddlp(v20.V2D(), v0.V4S());
4250 __ Saddlp(v21.V1D(), v0.V2S());
4251
4252 END();
4253
4254 if (CAN_RUN()) {
4255 RUN();
4256 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4257 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4258 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4259 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4260 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4261 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4262 }
4263 }
4264
TEST(neon_2regmisc_uaddlp)4265 TEST(neon_2regmisc_uaddlp) {
4266 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4267
4268 START();
4269
4270 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4271
4272 __ Uaddlp(v16.V8H(), v0.V16B());
4273 __ Uaddlp(v17.V4H(), v0.V8B());
4274
4275 __ Uaddlp(v18.V4S(), v0.V8H());
4276 __ Uaddlp(v19.V2S(), v0.V4H());
4277
4278 __ Uaddlp(v20.V2D(), v0.V4S());
4279 __ Uaddlp(v21.V1D(), v0.V2S());
4280
4281 END();
4282
4283 if (CAN_RUN()) {
4284 RUN();
4285 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4286 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4287 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4288 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4289 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4290 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4291 }
4292 }
4293
TEST(neon_2regmisc_sadalp)4294 TEST(neon_2regmisc_sadalp) {
4295 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4296
4297 START();
4298
4299 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4300 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4301 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4302 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4303 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4304
4305 __ Mov(v16.V16B(), v1.V16B());
4306 __ Mov(v17.V16B(), v1.V16B());
4307 __ Sadalp(v16.V8H(), v0.V16B());
4308 __ Sadalp(v17.V4H(), v0.V8B());
4309
4310 __ Mov(v18.V16B(), v2.V16B());
4311 __ Mov(v19.V16B(), v2.V16B());
4312 __ Sadalp(v18.V4S(), v1.V8H());
4313 __ Sadalp(v19.V2S(), v1.V4H());
4314
4315 __ Mov(v20.V16B(), v3.V16B());
4316 __ Mov(v21.V16B(), v4.V16B());
4317 __ Sadalp(v20.V2D(), v2.V4S());
4318 __ Sadalp(v21.V1D(), v2.V2S());
4319
4320 END();
4321
4322 if (CAN_RUN()) {
4323 RUN();
4324 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4325 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4326 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4327 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4328 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4329 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4330 }
4331 }
4332
TEST(neon_2regmisc_uadalp)4333 TEST(neon_2regmisc_uadalp) {
4334 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4335
4336 START();
4337
4338 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4339 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4340 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4341 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4342 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4343
4344 __ Mov(v16.V16B(), v1.V16B());
4345 __ Mov(v17.V16B(), v1.V16B());
4346 __ Uadalp(v16.V8H(), v0.V16B());
4347 __ Uadalp(v17.V4H(), v0.V8B());
4348
4349 __ Mov(v18.V16B(), v2.V16B());
4350 __ Mov(v19.V16B(), v2.V16B());
4351 __ Uadalp(v18.V4S(), v1.V8H());
4352 __ Uadalp(v19.V2S(), v1.V4H());
4353
4354 __ Mov(v20.V16B(), v3.V16B());
4355 __ Mov(v21.V16B(), v4.V16B());
4356 __ Uadalp(v20.V2D(), v2.V4S());
4357 __ Uadalp(v21.V1D(), v2.V2S());
4358
4359 END();
4360
4361 if (CAN_RUN()) {
4362 RUN();
4363 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4364 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4365 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4366 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4367 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4368 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4369 }
4370 }
4371
TEST(neon_3same_mul)4372 TEST(neon_3same_mul) {
4373 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4374
4375 START();
4376
4377 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4378 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4379 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4380 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4381
4382 __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4383 __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4384 __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4385
4386 END();
4387
4388 if (CAN_RUN()) {
4389 RUN();
4390 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4391 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4392 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4393 }
4394 }
4395
4396
TEST(neon_3same_absdiff)4397 TEST(neon_3same_absdiff) {
4398 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4399
4400 START();
4401
4402 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4403 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4404 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4405 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4406
4407 __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4408 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4409 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4410 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4411
4412 END();
4413
4414 if (CAN_RUN()) {
4415 RUN();
4416 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4417 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4418 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4419 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4420 }
4421 }
4422
4423
TEST(neon_byelement_mul)4424 TEST(neon_byelement_mul) {
4425 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4426
4427 START();
4428
4429 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4430 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4431
4432
4433 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4434 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4435 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4436 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4437
4438 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4439 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4440 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4441 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4442
4443 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4444 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4445 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4446 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4447
4448 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4449 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4450 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4451 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4452
4453 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4454 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4455 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4456 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4457
4458 END();
4459
4460 if (CAN_RUN()) {
4461 RUN();
4462 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4463 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4464 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4465 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4466
4467 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4468 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4469 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4470 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4471
4472 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4473 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4474 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4475 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4476 }
4477 }
4478
4479
TEST(neon_byelement_mull)4480 TEST(neon_byelement_mull) {
4481 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4482
4483 START();
4484
4485 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4486 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4487
4488
4489 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4490 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4491 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4492 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4493
4494 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4495 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4496 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4497 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4498
4499 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4500 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4501 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4502 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4503
4504 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4505 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4506 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4507 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4508
4509 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4510 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4511 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4512 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4513
4514 END();
4515
4516 if (CAN_RUN()) {
4517 RUN();
4518
4519 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4520 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4521 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4522 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4523
4524 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4525 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4526 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4527 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4528
4529 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4530 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4531 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4532 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4533 }
4534 }
4535
4536
TEST(neon_byelement_sqdmull)4537 TEST(neon_byelement_sqdmull) {
4538 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4539
4540 START();
4541
4542 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4543 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4544
4545 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4546 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4547 __ Sqdmull(s18, h0, v1.H(), 7);
4548
4549 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4550 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4551 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4552
4553 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4554 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4555 __ Sqdmlal(s22, h0, v1.H(), 7);
4556
4557 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4558 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4559 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4560
4561 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4562 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4563 __ Sqdmlsl(s26, h0, v1.H(), 7);
4564
4565 END();
4566
4567 if (CAN_RUN()) {
4568 RUN();
4569
4570 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4571 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4572 ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4573
4574 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4575 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4576 ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4577
4578 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4579 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4580 ASSERT_EQUAL_128(0, 0x00000000, q26);
4581 }
4582 }
4583
4584
TEST(neon_3diff_absdiff)4585 TEST(neon_3diff_absdiff) {
4586 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4587
4588 START();
4589
4590 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4591 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4592 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4593 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4594 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4595 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4596
4597 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4598 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4599 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4600 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4601
4602 END();
4603
4604 if (CAN_RUN()) {
4605 RUN();
4606 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4607 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4608 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4609 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4610 }
4611 }
4612
4613
TEST(neon_3diff_sqdmull)4614 TEST(neon_3diff_sqdmull) {
4615 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4616
4617 START();
4618
4619 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4620 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4621 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4622 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4623
4624 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4625 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4626 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4627 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4628 __ Sqdmull(s20, h0, h1);
4629 __ Sqdmull(d21, s2, s3);
4630
4631 END();
4632
4633 if (CAN_RUN()) {
4634 RUN();
4635 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4636 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4637 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4638 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4639 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4640 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4641 }
4642 }
4643
4644
TEST(neon_3diff_sqdmlal)4645 TEST(neon_3diff_sqdmlal) {
4646 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4647
4648 START();
4649
4650 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4651 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4652 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4653 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4654
4655 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4656 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4657 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4658 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4659 __ Movi(v20.V2D(), 0, 0x00000001);
4660 __ Movi(v21.V2D(), 0, 0x00000001);
4661
4662 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4663 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4664 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4665 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4666 __ Sqdmlal(s20, h0, h1);
4667 __ Sqdmlal(d21, s2, s3);
4668
4669 END();
4670
4671 if (CAN_RUN()) {
4672 RUN();
4673 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4674 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4675 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4676 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4677 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4678 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4679 }
4680 }
4681
4682
TEST(neon_3diff_sqdmlsl)4683 TEST(neon_3diff_sqdmlsl) {
4684 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4685
4686 START();
4687
4688 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4689 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4690 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4691 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4692
4693 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4694 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4695 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4696 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4697 __ Movi(v20.V2D(), 0, 0x00000001);
4698 __ Movi(v21.V2D(), 0, 0x00000001);
4699
4700 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4701 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4702 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4703 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4704 __ Sqdmlsl(s20, h0, h1);
4705 __ Sqdmlsl(d21, s2, s3);
4706
4707 END();
4708
4709 if (CAN_RUN()) {
4710 RUN();
4711 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4712 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4713 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4714 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4715 ASSERT_EQUAL_128(0, 0x80000002, q20);
4716 ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4717 }
4718 }
4719
4720
TEST(neon_3diff_mla)4721 TEST(neon_3diff_mla) {
4722 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4723
4724 START();
4725
4726 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4727 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4728 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4729 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4730 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4731 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4732
4733 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4734 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4735 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4736 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4737
4738 END();
4739
4740 if (CAN_RUN()) {
4741 RUN();
4742 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4743 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4744 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4745 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4746 }
4747 }
4748
4749
TEST(neon_3diff_mls)4750 TEST(neon_3diff_mls) {
4751 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4752
4753 START();
4754
4755 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4756 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4757 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4758 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4759 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4760 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4761
4762 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4763 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4764 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4765 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4766
4767 END();
4768
4769 if (CAN_RUN()) {
4770 RUN();
4771 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4772 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4773 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4774 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4775 }
4776 }
4777
4778
TEST(neon_3same_compare)4779 TEST(neon_3same_compare) {
4780 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4781
4782 START();
4783
4784 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4785 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4786
4787 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4788 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4789 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4790 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4791 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4792 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4793 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4794 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4795 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4796 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4797
4798 END();
4799
4800 if (CAN_RUN()) {
4801 RUN();
4802 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4803 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4804 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4805 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4806 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4807 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4808 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4809 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4810 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4811 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4812 }
4813 }
4814
4815
TEST(neon_3same_scalar_compare)4816 TEST(neon_3same_scalar_compare) {
4817 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4818
4819 START();
4820
4821 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4822 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4823
4824 __ Cmeq(d16, d0, d0);
4825 __ Cmeq(d17, d0, d1);
4826 __ Cmeq(d18, d1, d0);
4827 __ Cmge(d19, d0, d0);
4828 __ Cmge(d20, d0, d1);
4829 __ Cmge(d21, d1, d0);
4830 __ Cmgt(d22, d0, d0);
4831 __ Cmgt(d23, d0, d1);
4832 __ Cmhi(d24, d0, d0);
4833 __ Cmhi(d25, d0, d1);
4834 __ Cmhs(d26, d0, d0);
4835 __ Cmhs(d27, d0, d1);
4836 __ Cmhs(d28, d1, d0);
4837
4838 END();
4839
4840 if (CAN_RUN()) {
4841 RUN();
4842
4843 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4844 ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4845 ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4846 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4847 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4848 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4849 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4850 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4851 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4852 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4853 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4854 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4855 ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4856 }
4857 }
4858
TEST(neon_fcmeq_h)4859 TEST(neon_fcmeq_h) {
4860 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4861 CPUFeatures::kFP,
4862 CPUFeatures::kNEONHalf);
4863
4864 START();
4865
4866 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4867 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4868 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4869 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4870
4871 __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4872 __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4873 __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4874 __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4875 __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4876 __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4877 __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4878 __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4879
4880 END();
4881
4882 if (CAN_RUN()) {
4883 RUN();
4884
4885 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4886 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4887 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4888 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4889 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4890 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4891 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4892 ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4893 }
4894 }
4895
TEST(neon_fcmeq_h_scalar)4896 TEST(neon_fcmeq_h_scalar) {
4897 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4898 CPUFeatures::kFP,
4899 CPUFeatures::kNEONHalf,
4900 CPUFeatures::kFPHalf);
4901
4902 START();
4903
4904 __ Fmov(h0, Float16(0.0));
4905 __ Fmov(h1, RawbitsToFloat16(0xffff));
4906 __ Fmov(h2, Float16(-1.0));
4907 __ Fmov(h3, Float16(1.0));
4908 __ Fcmeq(h4, h0, h0);
4909 __ Fcmeq(h5, h1, h0);
4910 __ Fcmeq(h6, h2, h0);
4911 __ Fcmeq(h7, h3, h0);
4912
4913 END();
4914
4915 if (CAN_RUN()) {
4916 RUN();
4917
4918 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4919 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4920 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4921 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4922 }
4923 }
4924
TEST(neon_fcmge_h)4925 TEST(neon_fcmge_h) {
4926 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4927 CPUFeatures::kFP,
4928 CPUFeatures::kNEONHalf);
4929
4930 START();
4931
4932 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4933 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4934 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4935 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4936
4937 __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4938 __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4939 __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4940 __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4941 __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4942 __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4943 __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4944 __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4945
4946 END();
4947
4948 if (CAN_RUN()) {
4949 RUN();
4950
4951 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4952 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4953 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4954 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4955 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4956 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4957 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4958 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4959 }
4960 }
4961
TEST(neon_fcmge_h_scalar)4962 TEST(neon_fcmge_h_scalar) {
4963 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4964 CPUFeatures::kFP,
4965 CPUFeatures::kNEONHalf,
4966 CPUFeatures::kFPHalf);
4967
4968 START();
4969
4970 __ Fmov(h0, Float16(0.0));
4971 __ Fmov(h1, RawbitsToFloat16(0xffff));
4972 __ Fmov(h2, Float16(-1.0));
4973 __ Fmov(h3, Float16(1.0));
4974 __ Fcmge(h4, h0, h0);
4975 __ Fcmge(h5, h1, h0);
4976 __ Fcmge(h6, h2, h0);
4977 __ Fcmge(h7, h3, h0);
4978
4979 END();
4980
4981 if (CAN_RUN()) {
4982 RUN();
4983
4984 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4985 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4986 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4987 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4988 }
4989 }
4990
TEST(neon_fcmgt_h)4991 TEST(neon_fcmgt_h) {
4992 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4993 CPUFeatures::kFP,
4994 CPUFeatures::kNEONHalf);
4995
4996 START();
4997
4998 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4999 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5000 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5001 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5002
5003 __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
5004 __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
5005 __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
5006 __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
5007 __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
5008 __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
5009 __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
5010 __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
5011
5012 END();
5013
5014 if (CAN_RUN()) {
5015 RUN();
5016
5017 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5018 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5019 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
5020 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5021 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5022 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5023 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5024 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5025 }
5026 }
5027
TEST(neon_fcmgt_h_scalar)5028 TEST(neon_fcmgt_h_scalar) {
5029 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5030 CPUFeatures::kFP,
5031 CPUFeatures::kNEONHalf,
5032 CPUFeatures::kFPHalf);
5033
5034 START();
5035
5036 __ Fmov(h0, Float16(0.0));
5037 __ Fmov(h1, RawbitsToFloat16(0xffff));
5038 __ Fmov(h2, Float16(-1.0));
5039 __ Fmov(h3, Float16(1.0));
5040 __ Fcmgt(h4, h0, h0);
5041 __ Fcmgt(h5, h1, h0);
5042 __ Fcmgt(h6, h2, h0);
5043 __ Fcmgt(h7, h3, h0);
5044
5045 END();
5046
5047 if (CAN_RUN()) {
5048 RUN();
5049
5050 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5051 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5052 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5053 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5054 }
5055 }
5056
TEST(neon_facge_h)5057 TEST(neon_facge_h) {
5058 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5059 CPUFeatures::kFP,
5060 CPUFeatures::kNEONHalf);
5061
5062 START();
5063
5064 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5065 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5066 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5067 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5068
5069 __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5070 __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5071 __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5072 __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5073 __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5074 __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5075 __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5076 __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5077
5078 END();
5079
5080 if (CAN_RUN()) {
5081 RUN();
5082
5083 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5084 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5085 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5086 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5087 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5088 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5089 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5090 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5091 }
5092 }
5093
TEST(neon_facge_h_scalar)5094 TEST(neon_facge_h_scalar) {
5095 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5096 CPUFeatures::kFP,
5097 CPUFeatures::kNEONHalf,
5098 CPUFeatures::kFPHalf);
5099
5100 START();
5101
5102 __ Fmov(h0, Float16(0.0));
5103 __ Fmov(h1, RawbitsToFloat16(0xffff));
5104 __ Fmov(h2, Float16(-1.0));
5105 __ Fmov(h3, Float16(1.0));
5106 __ Facge(h4, h0, h0);
5107 __ Facge(h5, h1, h0);
5108 __ Facge(h6, h2, h0);
5109 __ Facge(h7, h3, h0);
5110
5111 END();
5112
5113 if (CAN_RUN()) {
5114 RUN();
5115
5116 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5117 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5118 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5119 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5120 }
5121 }
5122
TEST(neon_facgt_h)5123 TEST(neon_facgt_h) {
5124 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5125 CPUFeatures::kFP,
5126 CPUFeatures::kNEONHalf);
5127
5128 START();
5129
5130 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5131 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5132 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5133 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5134
5135 __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5136 __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5137 __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5138 __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5139 __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5140 __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5141 __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5142 __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5143
5144 END();
5145
5146 if (CAN_RUN()) {
5147 RUN();
5148
5149 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5150 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5151 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5152 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5153 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5154 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5155 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5156 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5157 }
5158 }
5159
TEST(neon_facgt_h_scalar)5160 TEST(neon_facgt_h_scalar) {
5161 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5162 CPUFeatures::kFP,
5163 CPUFeatures::kNEONHalf,
5164 CPUFeatures::kFPHalf);
5165
5166 START();
5167
5168 __ Fmov(h0, Float16(0.0));
5169 __ Fmov(h1, RawbitsToFloat16(0xffff));
5170 __ Fmov(h2, Float16(-1.0));
5171 __ Fmov(h3, Float16(1.0));
5172 __ Facgt(h4, h0, h0);
5173 __ Facgt(h5, h1, h0);
5174 __ Facgt(h6, h2, h0);
5175 __ Facgt(h7, h3, h0);
5176
5177 END();
5178
5179 if (CAN_RUN()) {
5180 RUN();
5181
5182 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5183 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5184 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5185 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5186 }
5187 }
5188
TEST(neon_2regmisc_fcmeq)5189 TEST(neon_2regmisc_fcmeq) {
5190 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5191
5192 START();
5193
5194 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5195 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5196 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5197 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5198
5199 __ Fcmeq(s16, s0, 0.0);
5200 __ Fcmeq(s17, s1, 0.0);
5201 __ Fcmeq(s18, s2, 0.0);
5202 __ Fcmeq(d19, d0, 0.0);
5203 __ Fcmeq(d20, d1, 0.0);
5204 __ Fcmeq(d21, d2, 0.0);
5205 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5206 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5207 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5208 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5209
5210 END();
5211
5212 if (CAN_RUN()) {
5213 RUN();
5214 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5215 ASSERT_EQUAL_128(0, 0x00000000, q17);
5216 ASSERT_EQUAL_128(0, 0x00000000, q18);
5217 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5218 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5219 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5220 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5221 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5222 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5223 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5224 }
5225 }
5226
TEST(neon_2regmisc_fcmge)5227 TEST(neon_2regmisc_fcmge) {
5228 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5229
5230 START();
5231
5232 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5233 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5234 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5235 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5236
5237 __ Fcmge(s16, s0, 0.0);
5238 __ Fcmge(s17, s1, 0.0);
5239 __ Fcmge(s18, s2, 0.0);
5240 __ Fcmge(d19, d0, 0.0);
5241 __ Fcmge(d20, d1, 0.0);
5242 __ Fcmge(d21, d3, 0.0);
5243 __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5244 __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5245 __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5246 __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5247
5248 END();
5249
5250 if (CAN_RUN()) {
5251 RUN();
5252 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5253 ASSERT_EQUAL_128(0, 0x00000000, q17);
5254 ASSERT_EQUAL_128(0, 0x00000000, q18);
5255 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5256 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5257 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5258 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5259 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5260 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5261 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5262 }
5263 }
5264
5265
TEST(neon_2regmisc_fcmgt)5266 TEST(neon_2regmisc_fcmgt) {
5267 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5268
5269 START();
5270
5271 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5272 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5273 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5274 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5275
5276 __ Fcmgt(s16, s0, 0.0);
5277 __ Fcmgt(s17, s1, 0.0);
5278 __ Fcmgt(s18, s2, 0.0);
5279 __ Fcmgt(d19, d0, 0.0);
5280 __ Fcmgt(d20, d1, 0.0);
5281 __ Fcmgt(d21, d3, 0.0);
5282 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5283 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5284 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5285 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5286
5287 END();
5288
5289 if (CAN_RUN()) {
5290 RUN();
5291 ASSERT_EQUAL_128(0, 0x00000000, q16);
5292 ASSERT_EQUAL_128(0, 0x00000000, q17);
5293 ASSERT_EQUAL_128(0, 0x00000000, q18);
5294 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5295 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5296 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5297 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5298 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5299 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5300 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5301 }
5302 }
5303
TEST(neon_2regmisc_fcmle)5304 TEST(neon_2regmisc_fcmle) {
5305 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5306
5307 START();
5308
5309 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5310 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5311 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5312 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5313
5314 __ Fcmle(s16, s0, 0.0);
5315 __ Fcmle(s17, s1, 0.0);
5316 __ Fcmle(s18, s3, 0.0);
5317 __ Fcmle(d19, d0, 0.0);
5318 __ Fcmle(d20, d1, 0.0);
5319 __ Fcmle(d21, d2, 0.0);
5320 __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5321 __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5322 __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5323 __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5324
5325 END();
5326
5327 if (CAN_RUN()) {
5328 RUN();
5329 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5330 ASSERT_EQUAL_128(0, 0x00000000, q17);
5331 ASSERT_EQUAL_128(0, 0x00000000, q18);
5332 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5333 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5334 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5335 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5336 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5337 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5338 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5339 }
5340 }
5341
5342
TEST(neon_2regmisc_fcmlt)5343 TEST(neon_2regmisc_fcmlt) {
5344 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5345
5346 START();
5347
5348 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5349 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5350 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5351 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5352
5353 __ Fcmlt(s16, s0, 0.0);
5354 __ Fcmlt(s17, s1, 0.0);
5355 __ Fcmlt(s18, s3, 0.0);
5356 __ Fcmlt(d19, d0, 0.0);
5357 __ Fcmlt(d20, d1, 0.0);
5358 __ Fcmlt(d21, d2, 0.0);
5359 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5360 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5361 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5362 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5363
5364 END();
5365
5366 if (CAN_RUN()) {
5367 RUN();
5368 ASSERT_EQUAL_128(0, 0x00000000, q16);
5369 ASSERT_EQUAL_128(0, 0x00000000, q17);
5370 ASSERT_EQUAL_128(0, 0x00000000, q18);
5371 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5372 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5373 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5374 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5375 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5376 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5377 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5378 }
5379 }
5380
TEST(neon_2regmisc_cmeq)5381 TEST(neon_2regmisc_cmeq) {
5382 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5383
5384 START();
5385
5386 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5387 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5388
5389 __ Cmeq(v16.V8B(), v1.V8B(), 0);
5390 __ Cmeq(v17.V16B(), v1.V16B(), 0);
5391 __ Cmeq(v18.V4H(), v1.V4H(), 0);
5392 __ Cmeq(v19.V8H(), v1.V8H(), 0);
5393 __ Cmeq(v20.V2S(), v0.V2S(), 0);
5394 __ Cmeq(v21.V4S(), v0.V4S(), 0);
5395 __ Cmeq(d22, d0, 0);
5396 __ Cmeq(d23, d1, 0);
5397 __ Cmeq(v24.V2D(), v0.V2D(), 0);
5398
5399 END();
5400
5401 if (CAN_RUN()) {
5402 RUN();
5403 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5404 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5405 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5406 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5407 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5408 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5409 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5410 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5411 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5412 }
5413 }
5414
5415
TEST(neon_2regmisc_cmge)5416 TEST(neon_2regmisc_cmge) {
5417 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5418
5419 START();
5420
5421 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5422 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5423
5424 __ Cmge(v16.V8B(), v1.V8B(), 0);
5425 __ Cmge(v17.V16B(), v1.V16B(), 0);
5426 __ Cmge(v18.V4H(), v1.V4H(), 0);
5427 __ Cmge(v19.V8H(), v1.V8H(), 0);
5428 __ Cmge(v20.V2S(), v0.V2S(), 0);
5429 __ Cmge(v21.V4S(), v0.V4S(), 0);
5430 __ Cmge(d22, d0, 0);
5431 __ Cmge(d23, d1, 0);
5432 __ Cmge(v24.V2D(), v0.V2D(), 0);
5433
5434 END();
5435
5436 if (CAN_RUN()) {
5437 RUN();
5438 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5439 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5440 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5441 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5442 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5443 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5444 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5445 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5446 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5447 }
5448 }
5449
5450
TEST(neon_2regmisc_cmlt)5451 TEST(neon_2regmisc_cmlt) {
5452 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5453
5454 START();
5455
5456 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5457 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5458
5459 __ Cmlt(v16.V8B(), v1.V8B(), 0);
5460 __ Cmlt(v17.V16B(), v1.V16B(), 0);
5461 __ Cmlt(v18.V4H(), v1.V4H(), 0);
5462 __ Cmlt(v19.V8H(), v1.V8H(), 0);
5463 __ Cmlt(v20.V2S(), v1.V2S(), 0);
5464 __ Cmlt(v21.V4S(), v1.V4S(), 0);
5465 __ Cmlt(d22, d0, 0);
5466 __ Cmlt(d23, d1, 0);
5467 __ Cmlt(v24.V2D(), v0.V2D(), 0);
5468
5469 END();
5470
5471 if (CAN_RUN()) {
5472 RUN();
5473 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5474 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5475 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5476 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5477 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5478 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5479 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5480 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5481 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5482 }
5483 }
5484
5485
TEST(neon_2regmisc_cmle)5486 TEST(neon_2regmisc_cmle) {
5487 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5488
5489 START();
5490
5491 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5492 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5493
5494 __ Cmle(v16.V8B(), v1.V8B(), 0);
5495 __ Cmle(v17.V16B(), v1.V16B(), 0);
5496 __ Cmle(v18.V4H(), v1.V4H(), 0);
5497 __ Cmle(v19.V8H(), v1.V8H(), 0);
5498 __ Cmle(v20.V2S(), v1.V2S(), 0);
5499 __ Cmle(v21.V4S(), v1.V4S(), 0);
5500 __ Cmle(d22, d0, 0);
5501 __ Cmle(d23, d1, 0);
5502 __ Cmle(v24.V2D(), v0.V2D(), 0);
5503
5504 END();
5505
5506 if (CAN_RUN()) {
5507 RUN();
5508 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5509 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5510 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5511 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5512 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5513 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5514 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5515 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5516 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5517 }
5518 }
5519
5520
TEST(neon_2regmisc_cmgt)5521 TEST(neon_2regmisc_cmgt) {
5522 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5523
5524 START();
5525
5526 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5527 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5528
5529 __ Cmgt(v16.V8B(), v1.V8B(), 0);
5530 __ Cmgt(v17.V16B(), v1.V16B(), 0);
5531 __ Cmgt(v18.V4H(), v1.V4H(), 0);
5532 __ Cmgt(v19.V8H(), v1.V8H(), 0);
5533 __ Cmgt(v20.V2S(), v0.V2S(), 0);
5534 __ Cmgt(v21.V4S(), v0.V4S(), 0);
5535 __ Cmgt(d22, d0, 0);
5536 __ Cmgt(d23, d1, 0);
5537 __ Cmgt(v24.V2D(), v0.V2D(), 0);
5538
5539 END();
5540
5541 if (CAN_RUN()) {
5542 RUN();
5543 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5544 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5545 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5546 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5547 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5548 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5549 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5550 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5551 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5552 }
5553 }
5554
5555
TEST(neon_2regmisc_neg)5556 TEST(neon_2regmisc_neg) {
5557 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5558
5559 START();
5560
5561 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5562 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5563 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5564 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5565 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5566
5567 __ Neg(v16.V8B(), v0.V8B());
5568 __ Neg(v17.V16B(), v0.V16B());
5569 __ Neg(v18.V4H(), v1.V4H());
5570 __ Neg(v19.V8H(), v1.V8H());
5571 __ Neg(v20.V2S(), v2.V2S());
5572 __ Neg(v21.V4S(), v2.V4S());
5573 __ Neg(d22, d3);
5574 __ Neg(v23.V2D(), v3.V2D());
5575 __ Neg(v24.V2D(), v4.V2D());
5576
5577 END();
5578
5579 if (CAN_RUN()) {
5580 RUN();
5581 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5582 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5583 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5584 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5585 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5586 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5587 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5588 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5589 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5590 }
5591 }
5592
5593
TEST(neon_2regmisc_sqneg)5594 TEST(neon_2regmisc_sqneg) {
5595 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5596
5597 START();
5598
5599 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5600 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5601 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5602 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5603 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5604
5605 __ Sqneg(v16.V8B(), v0.V8B());
5606 __ Sqneg(v17.V16B(), v0.V16B());
5607 __ Sqneg(v18.V4H(), v1.V4H());
5608 __ Sqneg(v19.V8H(), v1.V8H());
5609 __ Sqneg(v20.V2S(), v2.V2S());
5610 __ Sqneg(v21.V4S(), v2.V4S());
5611 __ Sqneg(v22.V2D(), v3.V2D());
5612 __ Sqneg(v23.V2D(), v4.V2D());
5613
5614 __ Sqneg(b24, b0);
5615 __ Sqneg(h25, h1);
5616 __ Sqneg(s26, s2);
5617 __ Sqneg(d27, d3);
5618
5619 END();
5620
5621 if (CAN_RUN()) {
5622 RUN();
5623 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5624 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5625 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5626 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5627 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5628 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5629 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5630 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5631
5632 ASSERT_EQUAL_128(0, 0x7f, q24);
5633 ASSERT_EQUAL_128(0, 0x8001, q25);
5634 ASSERT_EQUAL_128(0, 0x80000001, q26);
5635 ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5636 }
5637 }
5638
5639
TEST(neon_2regmisc_abs)5640 TEST(neon_2regmisc_abs) {
5641 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5642
5643 START();
5644
5645 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5646 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5647 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5648 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5649 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5650
5651 __ Abs(v16.V8B(), v0.V8B());
5652 __ Abs(v17.V16B(), v0.V16B());
5653 __ Abs(v18.V4H(), v1.V4H());
5654 __ Abs(v19.V8H(), v1.V8H());
5655 __ Abs(v20.V2S(), v2.V2S());
5656 __ Abs(v21.V4S(), v2.V4S());
5657 __ Abs(d22, d3);
5658 __ Abs(v23.V2D(), v3.V2D());
5659 __ Abs(v24.V2D(), v4.V2D());
5660
5661 END();
5662
5663 if (CAN_RUN()) {
5664 RUN();
5665 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5666 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5667 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5668 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5669 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5670 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5671 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5672 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5673 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5674 }
5675 }
5676
5677
TEST(neon_2regmisc_sqabs)5678 TEST(neon_2regmisc_sqabs) {
5679 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5680
5681 START();
5682
5683 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5684 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5685 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5686 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5687 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5688
5689 __ Sqabs(v16.V8B(), v0.V8B());
5690 __ Sqabs(v17.V16B(), v0.V16B());
5691 __ Sqabs(v18.V4H(), v1.V4H());
5692 __ Sqabs(v19.V8H(), v1.V8H());
5693 __ Sqabs(v20.V2S(), v2.V2S());
5694 __ Sqabs(v21.V4S(), v2.V4S());
5695 __ Sqabs(v22.V2D(), v3.V2D());
5696 __ Sqabs(v23.V2D(), v4.V2D());
5697
5698 __ Sqabs(b24, b0);
5699 __ Sqabs(h25, h1);
5700 __ Sqabs(s26, s2);
5701 __ Sqabs(d27, d3);
5702
5703 END();
5704
5705 if (CAN_RUN()) {
5706 RUN();
5707 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5708 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5709 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5710 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5711 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5712 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5713 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5714 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5715
5716 ASSERT_EQUAL_128(0, 0x7f, q24);
5717 ASSERT_EQUAL_128(0, 0x7fff, q25);
5718 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5719 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5720 }
5721 }
5722
TEST(neon_2regmisc_suqadd)5723 TEST(neon_2regmisc_suqadd) {
5724 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5725
5726 START();
5727
5728 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5729 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5730
5731 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5732 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5733
5734 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5735 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5736
5737 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5738 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5739
5740 __ Mov(v16.V2D(), v0.V2D());
5741 __ Mov(v17.V2D(), v0.V2D());
5742 __ Mov(v18.V2D(), v2.V2D());
5743 __ Mov(v19.V2D(), v2.V2D());
5744 __ Mov(v20.V2D(), v4.V2D());
5745 __ Mov(v21.V2D(), v4.V2D());
5746 __ Mov(v22.V2D(), v6.V2D());
5747
5748 __ Mov(v23.V2D(), v0.V2D());
5749 __ Mov(v24.V2D(), v2.V2D());
5750 __ Mov(v25.V2D(), v4.V2D());
5751 __ Mov(v26.V2D(), v6.V2D());
5752
5753 __ Suqadd(v16.V8B(), v1.V8B());
5754 __ Suqadd(v17.V16B(), v1.V16B());
5755 __ Suqadd(v18.V4H(), v3.V4H());
5756 __ Suqadd(v19.V8H(), v3.V8H());
5757 __ Suqadd(v20.V2S(), v5.V2S());
5758 __ Suqadd(v21.V4S(), v5.V4S());
5759 __ Suqadd(v22.V2D(), v7.V2D());
5760
5761 __ Suqadd(b23, b1);
5762 __ Suqadd(h24, h3);
5763 __ Suqadd(s25, s5);
5764 __ Suqadd(d26, d7);
5765
5766 END();
5767
5768 if (CAN_RUN()) {
5769 RUN();
5770 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5771 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5772 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5773 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5774 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5775 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5776 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5777
5778 ASSERT_EQUAL_128(0, 0x7f, q23);
5779 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5780 ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5781 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5782 }
5783 }
5784
TEST(neon_2regmisc_usqadd)5785 TEST(neon_2regmisc_usqadd) {
5786 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5787
5788 START();
5789
5790 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5791 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5792
5793 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5794 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5795
5796 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5797 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5798
5799 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5800 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5801
5802 __ Mov(v16.V2D(), v0.V2D());
5803 __ Mov(v17.V2D(), v0.V2D());
5804 __ Mov(v18.V2D(), v2.V2D());
5805 __ Mov(v19.V2D(), v2.V2D());
5806 __ Mov(v20.V2D(), v4.V2D());
5807 __ Mov(v21.V2D(), v4.V2D());
5808 __ Mov(v22.V2D(), v6.V2D());
5809
5810 __ Mov(v23.V2D(), v0.V2D());
5811 __ Mov(v24.V2D(), v2.V2D());
5812 __ Mov(v25.V2D(), v4.V2D());
5813 __ Mov(v26.V2D(), v6.V2D());
5814
5815 __ Usqadd(v16.V8B(), v1.V8B());
5816 __ Usqadd(v17.V16B(), v1.V16B());
5817 __ Usqadd(v18.V4H(), v3.V4H());
5818 __ Usqadd(v19.V8H(), v3.V8H());
5819 __ Usqadd(v20.V2S(), v5.V2S());
5820 __ Usqadd(v21.V4S(), v5.V4S());
5821 __ Usqadd(v22.V2D(), v7.V2D());
5822
5823 __ Usqadd(b23, b1);
5824 __ Usqadd(h24, h3);
5825 __ Usqadd(s25, s5);
5826 __ Usqadd(d26, d7);
5827
5828 END();
5829
5830 if (CAN_RUN()) {
5831 RUN();
5832 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5833 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5834 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5835 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5836 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5837 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5838 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5839
5840 ASSERT_EQUAL_128(0, 0xff, q23);
5841 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5842 ASSERT_EQUAL_128(0, 0xffffffff, q25);
5843 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5844 }
5845 }
5846
TEST(neon_2regmisc_xtn)5847 TEST(neon_2regmisc_xtn) {
5848 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5849
5850 START();
5851
5852 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5853 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5854 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5855 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5856 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5857
5858 __ Xtn(v16.V8B(), v0.V8H());
5859 __ Xtn2(v16.V16B(), v1.V8H());
5860 __ Xtn(v17.V4H(), v1.V4S());
5861 __ Xtn2(v17.V8H(), v2.V4S());
5862 __ Xtn(v18.V2S(), v3.V2D());
5863 __ Xtn2(v18.V4S(), v4.V2D());
5864
5865 END();
5866
5867 if (CAN_RUN()) {
5868 RUN();
5869 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5870 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5871 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5872 }
5873 }
5874
5875
TEST(neon_2regmisc_sqxtn)5876 TEST(neon_2regmisc_sqxtn) {
5877 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5878
5879 START();
5880
5881 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5882 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5883 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5884 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5885 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5886
5887 __ Sqxtn(v16.V8B(), v0.V8H());
5888 __ Sqxtn2(v16.V16B(), v1.V8H());
5889 __ Sqxtn(v17.V4H(), v1.V4S());
5890 __ Sqxtn2(v17.V8H(), v2.V4S());
5891 __ Sqxtn(v18.V2S(), v3.V2D());
5892 __ Sqxtn2(v18.V4S(), v4.V2D());
5893 __ Sqxtn(b19, h0);
5894 __ Sqxtn(h20, s0);
5895 __ Sqxtn(s21, d0);
5896
5897 END();
5898
5899 if (CAN_RUN()) {
5900 RUN();
5901 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5902 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5903 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5904 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5905 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5906 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5907 }
5908 }
5909
5910
TEST(neon_2regmisc_uqxtn)5911 TEST(neon_2regmisc_uqxtn) {
5912 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5913
5914 START();
5915
5916 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5917 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5918 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5919 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5920 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5921
5922 __ Uqxtn(v16.V8B(), v0.V8H());
5923 __ Uqxtn2(v16.V16B(), v1.V8H());
5924 __ Uqxtn(v17.V4H(), v1.V4S());
5925 __ Uqxtn2(v17.V8H(), v2.V4S());
5926 __ Uqxtn(v18.V2S(), v3.V2D());
5927 __ Uqxtn2(v18.V4S(), v4.V2D());
5928 __ Uqxtn(b19, h0);
5929 __ Uqxtn(h20, s0);
5930 __ Uqxtn(s21, d0);
5931
5932 END();
5933
5934 if (CAN_RUN()) {
5935 RUN();
5936 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5937 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5938 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5939 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5940 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5941 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5942 }
5943 }
5944
5945
TEST(neon_2regmisc_sqxtun)5946 TEST(neon_2regmisc_sqxtun) {
5947 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5948
5949 START();
5950
5951 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5952 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5953 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5954 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5955 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5956
5957 __ Sqxtun(v16.V8B(), v0.V8H());
5958 __ Sqxtun2(v16.V16B(), v1.V8H());
5959 __ Sqxtun(v17.V4H(), v1.V4S());
5960 __ Sqxtun2(v17.V8H(), v2.V4S());
5961 __ Sqxtun(v18.V2S(), v3.V2D());
5962 __ Sqxtun2(v18.V4S(), v4.V2D());
5963 __ Sqxtun(b19, h0);
5964 __ Sqxtun(h20, s0);
5965 __ Sqxtun(s21, d0);
5966
5967 END();
5968
5969 if (CAN_RUN()) {
5970 RUN();
5971 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5972 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5973 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5974 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5975 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5976 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5977 }
5978 }
5979
TEST(neon_3same_and)5980 TEST(neon_3same_and) {
5981 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5982
5983 START();
5984
5985 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5986 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
5987
5988 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
5989 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
5990 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
5991 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
5992 END();
5993
5994 if (CAN_RUN()) {
5995 RUN();
5996 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
5997 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
5998 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
5999 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
6000 }
6001 }
6002
TEST(neon_3same_bic)6003 TEST(neon_3same_bic) {
6004 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6005
6006 START();
6007
6008 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6009 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6010
6011 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6012 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6013 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6014 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6015 END();
6016
6017 if (CAN_RUN()) {
6018 RUN();
6019 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6020 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
6021 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6022 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
6023 }
6024 }
6025
TEST(neon_3same_orr)6026 TEST(neon_3same_orr) {
6027 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6028
6029 START();
6030
6031 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6032 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6033
6034 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6035 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6036 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6037 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6038 END();
6039
6040 if (CAN_RUN()) {
6041 RUN();
6042 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6043 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6044 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6045 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6046 }
6047 }
6048
TEST(neon_3same_mov)6049 TEST(neon_3same_mov) {
6050 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6051
6052 START();
6053
6054 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6055
6056 __ Mov(v16.V16B(), v0.V16B());
6057 __ Mov(v17.V8H(), v0.V8H());
6058 __ Mov(v18.V4S(), v0.V4S());
6059 __ Mov(v19.V2D(), v0.V2D());
6060
6061 __ Mov(v24.V8B(), v0.V8B());
6062 __ Mov(v25.V4H(), v0.V4H());
6063 __ Mov(v26.V2S(), v0.V2S());
6064 END();
6065
6066 if (CAN_RUN()) {
6067 RUN();
6068
6069 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6070 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6071 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6072 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6073
6074 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6075 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6076 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6077 }
6078 }
6079
TEST(neon_3same_orn)6080 TEST(neon_3same_orn) {
6081 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6082
6083 START();
6084
6085 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6086 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6087
6088 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6089 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6090 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6091 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6092 END();
6093
6094 if (CAN_RUN()) {
6095 RUN();
6096 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6097 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6098 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6099 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6100 }
6101 }
6102
TEST(neon_3same_eor)6103 TEST(neon_3same_eor) {
6104 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6105
6106 START();
6107
6108 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6109 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6110
6111 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6112 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6113 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6114 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6115 END();
6116
6117 if (CAN_RUN()) {
6118 RUN();
6119 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6120 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6121 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6122 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6123 }
6124 }
6125
TEST(neon_3same_bif)6126 TEST(neon_3same_bif) {
6127 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6128
6129 START();
6130
6131 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6132 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6133 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6134
6135 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6136 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6137 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6138
6139 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6140 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6141 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6142
6143 __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6144 __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6145 __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6146 END();
6147
6148 if (CAN_RUN()) {
6149 RUN();
6150
6151 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6152 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6153 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6154 }
6155 }
6156
TEST(neon_3same_bit)6157 TEST(neon_3same_bit) {
6158 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6159
6160 START();
6161
6162 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6163 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6164 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6165
6166 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6167 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6168 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6169
6170 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6171 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6172 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6173
6174 __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6175 __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6176 __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6177 END();
6178
6179 if (CAN_RUN()) {
6180 RUN();
6181
6182 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6183 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6184 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6185 }
6186 }
6187
TEST(neon_3same_bsl)6188 TEST(neon_3same_bsl) {
6189 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6190
6191 START();
6192
6193 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6194 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6195 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6196
6197 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6198 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6199 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6200
6201 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6202 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6203 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6204
6205 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6206 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6207 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6208 END();
6209
6210 if (CAN_RUN()) {
6211 RUN();
6212
6213 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6214 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6215 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6216 }
6217 }
6218
6219
TEST(neon_3same_smax)6220 TEST(neon_3same_smax) {
6221 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6222
6223 START();
6224
6225 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6226 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6227
6228 __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6229 __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6230 __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6231
6232 __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6233 __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6234 __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6235 END();
6236
6237 if (CAN_RUN()) {
6238 RUN();
6239
6240 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6241 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6242 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6243 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6244 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6245 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6246 }
6247 }
6248
6249
TEST(neon_3same_smaxp)6250 TEST(neon_3same_smaxp) {
6251 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6252
6253 START();
6254
6255 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6256 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6257
6258 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6259 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6260 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6261
6262 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6263 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6264 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6265 END();
6266
6267 if (CAN_RUN()) {
6268 RUN();
6269
6270 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6271 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6272 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6273 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6274 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6275 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6276 }
6277 }
6278
6279
TEST(neon_addp_scalar)6280 TEST(neon_addp_scalar) {
6281 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6282
6283 START();
6284
6285 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6286 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6287 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6288
6289 __ Addp(d16, v0.V2D());
6290 __ Addp(d17, v1.V2D());
6291 __ Addp(d18, v2.V2D());
6292
6293 END();
6294
6295 if (CAN_RUN()) {
6296 RUN();
6297
6298 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6299 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6300 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6301 }
6302 }
6303
TEST(neon_acrosslanes_addv)6304 TEST(neon_acrosslanes_addv) {
6305 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6306
6307 START();
6308
6309 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6310 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6311 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6312
6313 __ Addv(b16, v0.V8B());
6314 __ Addv(b17, v0.V16B());
6315 __ Addv(h18, v1.V4H());
6316 __ Addv(h19, v1.V8H());
6317 __ Addv(s20, v2.V4S());
6318
6319 END();
6320
6321 if (CAN_RUN()) {
6322 RUN();
6323
6324 ASSERT_EQUAL_128(0x0, 0xc7, q16);
6325 ASSERT_EQUAL_128(0x0, 0x99, q17);
6326 ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6327 ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6328 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6329 }
6330 }
6331
6332
TEST(neon_acrosslanes_saddlv)6333 TEST(neon_acrosslanes_saddlv) {
6334 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6335
6336 START();
6337
6338 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6339 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6340 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6341
6342 __ Saddlv(h16, v0.V8B());
6343 __ Saddlv(h17, v0.V16B());
6344 __ Saddlv(s18, v1.V4H());
6345 __ Saddlv(s19, v1.V8H());
6346 __ Saddlv(d20, v2.V4S());
6347
6348 END();
6349
6350 if (CAN_RUN()) {
6351 RUN();
6352
6353 ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6354 ASSERT_EQUAL_128(0x0, 0xff99, q17);
6355 ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6356 ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6357 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6358 }
6359 }
6360
6361
TEST(neon_acrosslanes_uaddlv)6362 TEST(neon_acrosslanes_uaddlv) {
6363 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6364
6365 START();
6366
6367 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6368 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6369 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6370
6371 __ Uaddlv(h16, v0.V8B());
6372 __ Uaddlv(h17, v0.V16B());
6373 __ Uaddlv(s18, v1.V4H());
6374 __ Uaddlv(s19, v1.V8H());
6375 __ Uaddlv(d20, v2.V4S());
6376
6377 END();
6378
6379 if (CAN_RUN()) {
6380 RUN();
6381
6382 ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6383 ASSERT_EQUAL_128(0x0, 0x0599, q17);
6384 ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6385 ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6386 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6387 }
6388 }
6389
6390
TEST(neon_acrosslanes_smaxv)6391 TEST(neon_acrosslanes_smaxv) {
6392 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6393
6394 START();
6395
6396 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6397 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6398 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6399
6400 __ Smaxv(b16, v0.V8B());
6401 __ Smaxv(b17, v0.V16B());
6402 __ Smaxv(h18, v1.V4H());
6403 __ Smaxv(h19, v1.V8H());
6404 __ Smaxv(s20, v2.V4S());
6405
6406 END();
6407
6408 if (CAN_RUN()) {
6409 RUN();
6410
6411 ASSERT_EQUAL_128(0x0, 0x33, q16);
6412 ASSERT_EQUAL_128(0x0, 0x44, q17);
6413 ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6414 ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6415 ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6416 }
6417 }
6418
6419
TEST(neon_acrosslanes_sminv)6420 TEST(neon_acrosslanes_sminv) {
6421 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6422
6423 START();
6424
6425 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6426 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6427 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6428
6429 __ Sminv(b16, v0.V8B());
6430 __ Sminv(b17, v0.V16B());
6431 __ Sminv(h18, v1.V4H());
6432 __ Sminv(h19, v1.V8H());
6433 __ Sminv(s20, v2.V4S());
6434
6435 END();
6436
6437 if (CAN_RUN()) {
6438 RUN();
6439
6440 ASSERT_EQUAL_128(0x0, 0xaa, q16);
6441 ASSERT_EQUAL_128(0x0, 0x80, q17);
6442 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6443 ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6444 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6445 }
6446 }
6447
TEST(neon_acrosslanes_umaxv)6448 TEST(neon_acrosslanes_umaxv) {
6449 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6450
6451 START();
6452
6453 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6454 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6455 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6456
6457 __ Umaxv(b16, v0.V8B());
6458 __ Umaxv(b17, v0.V16B());
6459 __ Umaxv(h18, v1.V4H());
6460 __ Umaxv(h19, v1.V8H());
6461 __ Umaxv(s20, v2.V4S());
6462
6463 END();
6464
6465 if (CAN_RUN()) {
6466 RUN();
6467
6468 ASSERT_EQUAL_128(0x0, 0xfc, q16);
6469 ASSERT_EQUAL_128(0x0, 0xfe, q17);
6470 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6471 ASSERT_EQUAL_128(0x0, 0xffab, q19);
6472 ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6473 }
6474 }
6475
6476
TEST(neon_acrosslanes_uminv)6477 TEST(neon_acrosslanes_uminv) {
6478 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6479
6480 START();
6481
6482 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6483 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6484 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6485
6486 __ Uminv(b16, v0.V8B());
6487 __ Uminv(b17, v0.V16B());
6488 __ Uminv(h18, v1.V4H());
6489 __ Uminv(h19, v1.V8H());
6490 __ Uminv(s20, v2.V4S());
6491
6492 END();
6493
6494 if (CAN_RUN()) {
6495 RUN();
6496
6497 ASSERT_EQUAL_128(0x0, 0x01, q16);
6498 ASSERT_EQUAL_128(0x0, 0x00, q17);
6499 ASSERT_EQUAL_128(0x0, 0x0001, q18);
6500 ASSERT_EQUAL_128(0x0, 0x0000, q19);
6501 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6502 }
6503 }
6504
6505
TEST(neon_3same_smin)6506 TEST(neon_3same_smin) {
6507 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6508
6509 START();
6510
6511 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6512 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6513
6514 __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6515 __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6516 __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6517
6518 __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6519 __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6520 __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6521 END();
6522
6523 if (CAN_RUN()) {
6524 RUN();
6525
6526 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6527 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6528 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6529 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6530 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6531 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6532 }
6533 }
6534
6535
TEST(neon_3same_umax)6536 TEST(neon_3same_umax) {
6537 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6538
6539 START();
6540
6541 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6542 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6543
6544 __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6545 __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6546 __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6547
6548 __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6549 __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6550 __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6551 END();
6552
6553 if (CAN_RUN()) {
6554 RUN();
6555
6556 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6557 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6558 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6559 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6560 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6561 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6562 }
6563 }
6564
6565
TEST(neon_3same_umin)6566 TEST(neon_3same_umin) {
6567 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6568
6569 START();
6570
6571 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6572 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6573
6574 __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6575 __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6576 __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6577
6578 __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6579 __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6580 __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6581 END();
6582
6583 if (CAN_RUN()) {
6584 RUN();
6585
6586 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6587 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6588 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6589 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6590 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6591 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6592 }
6593 }
6594
6595
TEST(neon_3same_extra_fcadd)6596 TEST(neon_3same_extra_fcadd) {
6597 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6598
6599 START();
6600
6601 // (0i, 5) (d)
6602 __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6603 // (5i, 0) (d)
6604 __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6605 // (10i, 10) (d)
6606 __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6607 // (5i, 5), (5i, 5) (f)
6608 __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6609 // (5i, 5), (0i, 0) (f)
6610 __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6611 // 324567i, 16000 (f)
6612 __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6613
6614 // Subtraction (10, 10) - (5, 5) == (5, 5)
6615 __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6616 __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6617
6618 // Addition (10, 10) + (5, 5) == (15, 15)
6619 __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6620 __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6621
6622 // 2S
6623 __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6624 __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6625
6626 // 4S
6627 __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6628 __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6629
6630 END();
6631
6632 if (CAN_RUN()) {
6633 RUN();
6634 ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6635 ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6636 ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29); // (16000i, -324567)
6637 ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28); // (-16000i, 324567)
6638 ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6639 ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6640 }
6641 }
6642
6643
TEST(neon_3same_extra_fcmla)6644 TEST(neon_3same_extra_fcmla) {
6645 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6646
6647 START();
6648
6649 __ Movi(v1.V2D(), 0x0, 0x40A0000040400000); // (5i, 3) (f)
6650 __ Movi(v2.V2D(), 0x0, 0x4040000040A00000); // (3i, 5) (f)
6651
6652 __ Movi(v3.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6653 __ Movi(v4.V2D(), 0x0, 0x40E000003F800000); // (7i, 1) (f)
6654
6655 __ Movi(v5.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6656 __ Movi(v6.V2D(), 0x0, 0x408000003F800000); // (4i, 1) (f)
6657
6658 // (1.5i, 2.5), (31.5i, 1024) (f)
6659 __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6660 // (2048i, 412.75), (3645i, 0) (f)
6661 __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6662 // (2000i, 450,000) (d)
6663 __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6664 // (30,000i, 1250) (d)
6665 __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6666
6667 // DST
6668 __ Movi(v24.V2D(), 0x0, 0x0);
6669 __ Movi(v25.V2D(), 0x0, 0x0);
6670 __ Movi(v26.V2D(), 0x0, 0x0);
6671 __ Movi(v27.V2D(), 0x0, 0x0);
6672 __ Movi(v28.V2D(), 0x0, 0x0);
6673 __ Movi(v29.V2D(), 0x0, 0x0);
6674 __ Movi(v30.V2D(), 0x0, 0x0);
6675 __ Movi(v31.V2D(), 0x0, 0x0);
6676
6677 // Full calculations
6678 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6679 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6680
6681 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6682 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6683
6684 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6685 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6686
6687 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6688 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6689
6690 // Partial checks
6691 __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6692 __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6693
6694 __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6695 __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6696
6697 END();
6698
6699 if (CAN_RUN()) {
6700 RUN();
6701
6702 ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31); // (34i, 0)
6703 ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30); // (23i, -11)
6704 ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29); // (14i, -5)
6705
6706 // (13502500000i, 502500000)
6707 ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6708 ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27); // (9i, 15)
6709 ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26); // (25i, 15)
6710 // (512i, 1.031875E3), (373248i, 0)
6711 ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6712 // (619.125i, -3072), (0i, -114817.5)
6713 ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6714 }
6715 }
6716
6717
TEST(neon_byelement_fcmla)6718 TEST(neon_byelement_fcmla) {
6719 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6720
6721 START();
6722
6723 // (5i, 3), (5i, 3) (f)
6724 __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6725 // (3i, 5), (3i, 5) (f)
6726 __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6727 // (7i, 1), (5i, 3) (f)
6728 __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6729 // (4i, 1), (3i, 5) (f)
6730 __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6731 // (4i, 1), (7i, 1) (f)
6732 __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6733 // (2i, 3), (0, 0) (f)
6734 __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6735
6736 // DST
6737 __ Movi(v22.V2D(), 0x0, 0x0);
6738 __ Movi(v23.V2D(), 0x0, 0x0);
6739 __ Movi(v24.V2D(), 0x0, 0x0);
6740 __ Movi(v25.V2D(), 0x0, 0x0);
6741 __ Movi(v26.V2D(), 0x0, 0x0);
6742 __ Movi(v27.V2D(), 0x0, 0x0);
6743 __ Movi(v28.V2D(), 0x0, 0x0);
6744 __ Movi(v29.V2D(), 0x0, 0x0);
6745 __ Movi(v30.V2D(), 0x0, 0x0);
6746 __ Movi(v31.V2D(), 0x0, 0x0);
6747
6748 // Full calculation (pairs)
6749 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6750 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6751 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6752 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6753
6754 // Rotations
6755 __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6756 __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6757 __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6758 __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6759 __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6760 __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6761 __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6762 __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6763
6764 END();
6765
6766 if (CAN_RUN()) {
6767 RUN();
6768 // (34i, 0), (34i, 0)
6769 ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6770 // (14i, -5), (23i, -11)
6771 ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6772 // (4i, 1), (12i, 3)
6773 ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6774 // (7i, -28), (5i, -20)
6775 ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6776 // (-4i, -1), (-12i, -3)
6777 ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6778 // (-7i, 28), (-5i, 20)
6779 ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6780 // (-35i, 21), (-25i, 15)
6781 ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6782 // (-3i, -5), (-9i, -15)
6783 ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6784 // (35i, -21), (25i, -15)
6785 ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6786 // (3i, 5), (9i, 15)
6787 ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6788 }
6789 }
6790
6791
TEST(neon_2regmisc_mvn)6792 TEST(neon_2regmisc_mvn) {
6793 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6794
6795 START();
6796
6797 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6798
6799 __ Mvn(v16.V16B(), v0.V16B());
6800 __ Mvn(v17.V8H(), v0.V8H());
6801 __ Mvn(v18.V4S(), v0.V4S());
6802 __ Mvn(v19.V2D(), v0.V2D());
6803
6804 __ Mvn(v24.V8B(), v0.V8B());
6805 __ Mvn(v25.V4H(), v0.V4H());
6806 __ Mvn(v26.V2S(), v0.V2S());
6807
6808 END();
6809
6810 if (CAN_RUN()) {
6811 RUN();
6812
6813 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6814 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6815 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6816 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6817
6818 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6819 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6820 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6821 }
6822 }
6823
6824
TEST(neon_2regmisc_not)6825 TEST(neon_2regmisc_not) {
6826 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6827
6828 START();
6829
6830 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6831 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6832
6833 __ Not(v16.V16B(), v0.V16B());
6834 __ Not(v17.V8B(), v1.V8B());
6835 END();
6836
6837 if (CAN_RUN()) {
6838 RUN();
6839
6840 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6841 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6842 }
6843 }
6844
6845
TEST(neon_2regmisc_cls_clz_cnt)6846 TEST(neon_2regmisc_cls_clz_cnt) {
6847 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6848
6849 START();
6850
6851 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6852 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6853
6854 __ Cls(v16.V8B(), v1.V8B());
6855 __ Cls(v17.V16B(), v1.V16B());
6856 __ Cls(v18.V4H(), v1.V4H());
6857 __ Cls(v19.V8H(), v1.V8H());
6858 __ Cls(v20.V2S(), v1.V2S());
6859 __ Cls(v21.V4S(), v1.V4S());
6860
6861 __ Clz(v22.V8B(), v0.V8B());
6862 __ Clz(v23.V16B(), v0.V16B());
6863 __ Clz(v24.V4H(), v0.V4H());
6864 __ Clz(v25.V8H(), v0.V8H());
6865 __ Clz(v26.V2S(), v0.V2S());
6866 __ Clz(v27.V4S(), v0.V4S());
6867
6868 __ Cnt(v28.V8B(), v0.V8B());
6869 __ Cnt(v29.V16B(), v1.V16B());
6870
6871 END();
6872
6873 if (CAN_RUN()) {
6874 RUN();
6875
6876 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6877 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6878 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6879 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6880 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6881 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6882
6883 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6884 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6885 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6886 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6887 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6888 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6889
6890 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6891 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6892 }
6893 }
6894
TEST(neon_2regmisc_rev)6895 TEST(neon_2regmisc_rev) {
6896 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6897
6898 START();
6899
6900 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6901 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6902
6903 __ Rev16(v16.V8B(), v0.V8B());
6904 __ Rev16(v17.V16B(), v0.V16B());
6905
6906 __ Rev32(v18.V8B(), v0.V8B());
6907 __ Rev32(v19.V16B(), v0.V16B());
6908 __ Rev32(v20.V4H(), v0.V4H());
6909 __ Rev32(v21.V8H(), v0.V8H());
6910
6911 __ Rev64(v22.V8B(), v0.V8B());
6912 __ Rev64(v23.V16B(), v0.V16B());
6913 __ Rev64(v24.V4H(), v0.V4H());
6914 __ Rev64(v25.V8H(), v0.V8H());
6915 __ Rev64(v26.V2S(), v0.V2S());
6916 __ Rev64(v27.V4S(), v0.V4S());
6917
6918 __ Rbit(v28.V8B(), v1.V8B());
6919 __ Rbit(v29.V16B(), v1.V16B());
6920
6921 END();
6922
6923 if (CAN_RUN()) {
6924 RUN();
6925
6926 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6927 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6928
6929 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6930 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6931 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6932 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6933
6934 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6935 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6936 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6937 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6938 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6939 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6940
6941 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6942 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6943 }
6944 }
6945
6946
TEST(neon_sli)6947 TEST(neon_sli) {
6948 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6949
6950 START();
6951
6952 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6953 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6954
6955 __ Mov(v16.V2D(), v0.V2D());
6956 __ Mov(v17.V2D(), v0.V2D());
6957 __ Mov(v18.V2D(), v0.V2D());
6958 __ Mov(v19.V2D(), v0.V2D());
6959 __ Mov(v20.V2D(), v0.V2D());
6960 __ Mov(v21.V2D(), v0.V2D());
6961 __ Mov(v22.V2D(), v0.V2D());
6962 __ Mov(v23.V2D(), v0.V2D());
6963
6964 __ Sli(v16.V8B(), v1.V8B(), 4);
6965 __ Sli(v17.V16B(), v1.V16B(), 7);
6966 __ Sli(v18.V4H(), v1.V4H(), 8);
6967 __ Sli(v19.V8H(), v1.V8H(), 15);
6968 __ Sli(v20.V2S(), v1.V2S(), 0);
6969 __ Sli(v21.V4S(), v1.V4S(), 31);
6970 __ Sli(v22.V2D(), v1.V2D(), 48);
6971
6972 __ Sli(d23, d1, 48);
6973
6974 END();
6975
6976 if (CAN_RUN()) {
6977 RUN();
6978
6979 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
6980 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
6981 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
6982 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
6983 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
6984 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
6985 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
6986
6987 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
6988 }
6989 }
6990
6991
TEST(neon_sri)6992 TEST(neon_sri) {
6993 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6994
6995 START();
6996
6997 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6998 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6999
7000 __ Mov(v16.V2D(), v0.V2D());
7001 __ Mov(v17.V2D(), v0.V2D());
7002 __ Mov(v18.V2D(), v0.V2D());
7003 __ Mov(v19.V2D(), v0.V2D());
7004 __ Mov(v20.V2D(), v0.V2D());
7005 __ Mov(v21.V2D(), v0.V2D());
7006 __ Mov(v22.V2D(), v0.V2D());
7007 __ Mov(v23.V2D(), v0.V2D());
7008
7009 __ Sri(v16.V8B(), v1.V8B(), 4);
7010 __ Sri(v17.V16B(), v1.V16B(), 7);
7011 __ Sri(v18.V4H(), v1.V4H(), 8);
7012 __ Sri(v19.V8H(), v1.V8H(), 15);
7013 __ Sri(v20.V2S(), v1.V2S(), 1);
7014 __ Sri(v21.V4S(), v1.V4S(), 31);
7015 __ Sri(v22.V2D(), v1.V2D(), 48);
7016
7017 __ Sri(d23, d1, 48);
7018
7019 END();
7020
7021 if (CAN_RUN()) {
7022 RUN();
7023
7024 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7025 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7026 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7027 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7028 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7029 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7030 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7031
7032 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7033 }
7034 }
7035
7036
TEST(neon_shrn)7037 TEST(neon_shrn) {
7038 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7039
7040 START();
7041
7042 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7043 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7044 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7045 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7046 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7047
7048 __ Shrn(v16.V8B(), v0.V8H(), 8);
7049 __ Shrn2(v16.V16B(), v1.V8H(), 1);
7050 __ Shrn(v17.V4H(), v1.V4S(), 16);
7051 __ Shrn2(v17.V8H(), v2.V4S(), 1);
7052 __ Shrn(v18.V2S(), v3.V2D(), 32);
7053 __ Shrn2(v18.V4S(), v3.V2D(), 1);
7054
7055 END();
7056
7057 if (CAN_RUN()) {
7058 RUN();
7059 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7060 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7061 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7062 }
7063 }
7064
7065
TEST(neon_rshrn)7066 TEST(neon_rshrn) {
7067 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7068
7069 START();
7070
7071 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7072 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7073 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7074 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7075 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7076
7077 __ Rshrn(v16.V8B(), v0.V8H(), 8);
7078 __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7079 __ Rshrn(v17.V4H(), v1.V4S(), 16);
7080 __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7081 __ Rshrn(v18.V2S(), v3.V2D(), 32);
7082 __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7083
7084 END();
7085
7086 if (CAN_RUN()) {
7087 RUN();
7088 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7089 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7090 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7091 }
7092 }
7093
7094
TEST(neon_uqshrn)7095 TEST(neon_uqshrn) {
7096 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7097
7098 START();
7099
7100 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7101 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7102 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7103 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7104 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7105
7106 __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7107 __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7108 __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7109 __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7110 __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7111 __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7112
7113 __ Uqshrn(b19, h0, 8);
7114 __ Uqshrn(h20, s1, 16);
7115 __ Uqshrn(s21, d3, 32);
7116
7117 END();
7118
7119 if (CAN_RUN()) {
7120 RUN();
7121 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7122 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7123 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7124 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7125 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7126 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7127 }
7128 }
7129
7130
TEST(neon_uqrshrn)7131 TEST(neon_uqrshrn) {
7132 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7133
7134 START();
7135
7136 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7137 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7138 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7139 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7140 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7141
7142 __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7143 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7144 __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7145 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7146 __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7147 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7148
7149 __ Uqrshrn(b19, h0, 8);
7150 __ Uqrshrn(h20, s1, 16);
7151 __ Uqrshrn(s21, d3, 32);
7152
7153 END();
7154
7155 if (CAN_RUN()) {
7156 RUN();
7157 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7158 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7159 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7160 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7161 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7162 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7163 }
7164 }
7165
7166
TEST(neon_sqshrn)7167 TEST(neon_sqshrn) {
7168 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7169
7170 START();
7171
7172 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7173 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7174 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7175 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7176 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7177
7178 __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7179 __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7180 __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7181 __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7182 __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7183 __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7184
7185 __ Sqshrn(b19, h0, 8);
7186 __ Sqshrn(h20, s1, 16);
7187 __ Sqshrn(s21, d3, 32);
7188
7189 END();
7190
7191 if (CAN_RUN()) {
7192 RUN();
7193 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7194 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7195 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7196 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7197 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7198 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7199 }
7200 }
7201
7202
TEST(neon_sqrshrn)7203 TEST(neon_sqrshrn) {
7204 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7205
7206 START();
7207
7208 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7209 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7210 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7211 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7212 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7213
7214 __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7215 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7216 __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7217 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7218 __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7219 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7220
7221 __ Sqrshrn(b19, h0, 8);
7222 __ Sqrshrn(h20, s1, 16);
7223 __ Sqrshrn(s21, d3, 32);
7224
7225 END();
7226
7227 if (CAN_RUN()) {
7228 RUN();
7229 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7230 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7231 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7232 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7233 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7234 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7235 }
7236 }
7237
7238
TEST(neon_sqshrun)7239 TEST(neon_sqshrun) {
7240 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7241
7242 START();
7243
7244 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7245 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7246 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7247 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7248 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7249
7250 __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7251 __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7252 __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7253 __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7254 __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7255 __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7256
7257 __ Sqshrun(b19, h0, 8);
7258 __ Sqshrun(h20, s1, 16);
7259 __ Sqshrun(s21, d3, 32);
7260
7261 END();
7262
7263 if (CAN_RUN()) {
7264 RUN();
7265 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7266 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7267 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7268 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7269 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7270 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7271 }
7272 }
7273
7274
TEST(neon_sqrshrun)7275 TEST(neon_sqrshrun) {
7276 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7277
7278 START();
7279
7280 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7281 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7282 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7283 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7284 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7285
7286 __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7287 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7288 __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7289 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7290 __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7291 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7292
7293 __ Sqrshrun(b19, h0, 8);
7294 __ Sqrshrun(h20, s1, 16);
7295 __ Sqrshrun(s21, d3, 32);
7296
7297 END();
7298
7299 if (CAN_RUN()) {
7300 RUN();
7301 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7302 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7303 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7304 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7305 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7306 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7307 }
7308 }
7309
TEST(neon_modimm_bic)7310 TEST(neon_modimm_bic) {
7311 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7312
7313 START();
7314
7315 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7316 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7317 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7318 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7319 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7320 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7321 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7322 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7323 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7324 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7325 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7326 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7327
7328 __ Bic(v16.V4H(), 0x00, 0);
7329 __ Bic(v17.V4H(), 0xff, 8);
7330 __ Bic(v18.V8H(), 0x00, 0);
7331 __ Bic(v19.V8H(), 0xff, 8);
7332
7333 __ Bic(v20.V2S(), 0x00, 0);
7334 __ Bic(v21.V2S(), 0xff, 8);
7335 __ Bic(v22.V2S(), 0x00, 16);
7336 __ Bic(v23.V2S(), 0xff, 24);
7337
7338 __ Bic(v24.V4S(), 0xff, 0);
7339 __ Bic(v25.V4S(), 0x00, 8);
7340 __ Bic(v26.V4S(), 0xff, 16);
7341 __ Bic(v27.V4S(), 0x00, 24);
7342
7343 END();
7344
7345 if (CAN_RUN()) {
7346 RUN();
7347
7348 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7349 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7350 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7351 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7352
7353 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7354 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7355 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7356 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7357
7358 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7359 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7360 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7361 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7362 }
7363 }
7364
7365
TEST(neon_modimm_movi_16bit_any)7366 TEST(neon_modimm_movi_16bit_any) {
7367 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7368
7369 START();
7370
7371 __ Movi(v0.V4H(), 0xabab);
7372 __ Movi(v1.V4H(), 0xab00);
7373 __ Movi(v2.V4H(), 0xabff);
7374 __ Movi(v3.V8H(), 0x00ab);
7375 __ Movi(v4.V8H(), 0xffab);
7376 __ Movi(v5.V8H(), 0xabcd);
7377
7378 END();
7379
7380 if (CAN_RUN()) {
7381 RUN();
7382
7383 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7384 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7385 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7386 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7387 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7388 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7389 }
7390 }
7391
7392
TEST(neon_modimm_movi_32bit_any)7393 TEST(neon_modimm_movi_32bit_any) {
7394 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7395
7396 START();
7397
7398 __ Movi(v0.V2S(), 0x000000ab);
7399 __ Movi(v1.V2S(), 0x0000ab00);
7400 __ Movi(v2.V4S(), 0x00ab0000);
7401 __ Movi(v3.V4S(), 0xab000000);
7402
7403 __ Movi(v4.V2S(), 0xffffffab);
7404 __ Movi(v5.V2S(), 0xffffabff);
7405 __ Movi(v6.V4S(), 0xffabffff);
7406 __ Movi(v7.V4S(), 0xabffffff);
7407
7408 __ Movi(v16.V2S(), 0x0000abff);
7409 __ Movi(v17.V2S(), 0x00abffff);
7410 __ Movi(v18.V4S(), 0xffab0000);
7411 __ Movi(v19.V4S(), 0xffffab00);
7412
7413 __ Movi(v20.V4S(), 0xabababab);
7414 __ Movi(v21.V4S(), 0xabcdabcd);
7415 __ Movi(v22.V4S(), 0xabcdef01);
7416 __ Movi(v23.V4S(), 0x00ffff00);
7417
7418 END();
7419
7420 if (CAN_RUN()) {
7421 RUN();
7422
7423 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7424 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7425 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7426 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7427
7428 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7429 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7430 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7431 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7432
7433 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7434 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7435 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7436 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7437
7438 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7439 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7440 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7441 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7442 }
7443 }
7444
7445
TEST(neon_modimm_movi_64bit_any)7446 TEST(neon_modimm_movi_64bit_any) {
7447 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7448
7449 START();
7450
7451 __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7452 __ Movi(v1.V2D(), 0xabababababababab);
7453 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7454 __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7455 __ Movi(v4.V1D(), 0xabcdef0123456789);
7456 __ Movi(v5.V2D(), 0xabcdef0123456789);
7457
7458 END();
7459
7460 if (CAN_RUN()) {
7461 RUN();
7462
7463 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7464 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7465 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7466 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7467 ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7468 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7469 }
7470 }
7471
7472
TEST(neon_modimm_movi)7473 TEST(neon_modimm_movi) {
7474 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7475
7476 START();
7477
7478 __ Movi(v0.V8B(), 0xaa);
7479 __ Movi(v1.V16B(), 0x55);
7480
7481 __ Movi(d2, 0x00ffff0000ffffff);
7482 __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7483
7484 __ Movi(v16.V4H(), 0x00, LSL, 0);
7485 __ Movi(v17.V4H(), 0xff, LSL, 8);
7486 __ Movi(v18.V8H(), 0x00, LSL, 0);
7487 __ Movi(v19.V8H(), 0xff, LSL, 8);
7488
7489 __ Movi(v20.V2S(), 0x00, LSL, 0);
7490 __ Movi(v21.V2S(), 0xff, LSL, 8);
7491 __ Movi(v22.V2S(), 0x00, LSL, 16);
7492 __ Movi(v23.V2S(), 0xff, LSL, 24);
7493
7494 __ Movi(v24.V4S(), 0xff, LSL, 0);
7495 __ Movi(v25.V4S(), 0x00, LSL, 8);
7496 __ Movi(v26.V4S(), 0xff, LSL, 16);
7497 __ Movi(v27.V4S(), 0x00, LSL, 24);
7498
7499 __ Movi(v28.V2S(), 0xaa, MSL, 8);
7500 __ Movi(v29.V2S(), 0x55, MSL, 16);
7501 __ Movi(v30.V4S(), 0xff, MSL, 8);
7502 __ Movi(v31.V4S(), 0x00, MSL, 16);
7503
7504 END();
7505
7506 if (CAN_RUN()) {
7507 RUN();
7508
7509 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7510 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7511
7512 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7513 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7514
7515 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7516 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7517 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7518 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7519
7520 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7521 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7522 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7523 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7524
7525 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7526 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7527 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7528 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7529
7530 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7531 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7532 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7533 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7534 }
7535 }
7536
7537
TEST(neon_modimm_mvni)7538 TEST(neon_modimm_mvni) {
7539 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7540
7541 START();
7542
7543 __ Mvni(v16.V4H(), 0x00, LSL, 0);
7544 __ Mvni(v17.V4H(), 0xff, LSL, 8);
7545 __ Mvni(v18.V8H(), 0x00, LSL, 0);
7546 __ Mvni(v19.V8H(), 0xff, LSL, 8);
7547
7548 __ Mvni(v20.V2S(), 0x00, LSL, 0);
7549 __ Mvni(v21.V2S(), 0xff, LSL, 8);
7550 __ Mvni(v22.V2S(), 0x00, LSL, 16);
7551 __ Mvni(v23.V2S(), 0xff, LSL, 24);
7552
7553 __ Mvni(v24.V4S(), 0xff, LSL, 0);
7554 __ Mvni(v25.V4S(), 0x00, LSL, 8);
7555 __ Mvni(v26.V4S(), 0xff, LSL, 16);
7556 __ Mvni(v27.V4S(), 0x00, LSL, 24);
7557
7558 __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7559 __ Mvni(v29.V2S(), 0x55, MSL, 16);
7560 __ Mvni(v30.V4S(), 0xff, MSL, 8);
7561 __ Mvni(v31.V4S(), 0x00, MSL, 16);
7562
7563 END();
7564
7565 if (CAN_RUN()) {
7566 RUN();
7567
7568 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7569 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7570 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7571 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7572
7573 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7574 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7575 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7576 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7577
7578 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7579 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7580 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7581 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7582
7583 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7584 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7585 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7586 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7587 }
7588 }
7589
7590
TEST(neon_modimm_orr)7591 TEST(neon_modimm_orr) {
7592 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7593
7594 START();
7595
7596 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7597 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7598 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7599 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7600 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7601 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7602 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7603 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7604 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7605 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7606 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7607 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7608
7609 __ Orr(v16.V4H(), 0x00, 0);
7610 __ Orr(v17.V4H(), 0xff, 8);
7611 __ Orr(v18.V8H(), 0x00, 0);
7612 __ Orr(v19.V8H(), 0xff, 8);
7613
7614 __ Orr(v20.V2S(), 0x00, 0);
7615 __ Orr(v21.V2S(), 0xff, 8);
7616 __ Orr(v22.V2S(), 0x00, 16);
7617 __ Orr(v23.V2S(), 0xff, 24);
7618
7619 __ Orr(v24.V4S(), 0xff, 0);
7620 __ Orr(v25.V4S(), 0x00, 8);
7621 __ Orr(v26.V4S(), 0xff, 16);
7622 __ Orr(v27.V4S(), 0x00, 24);
7623
7624 END();
7625
7626 if (CAN_RUN()) {
7627 RUN();
7628
7629 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7630 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7631 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7632 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7633
7634 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7635 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7636 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7637 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7638
7639 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7640 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7641 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7642 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7643 }
7644 }
7645
TEST(ldr_literal_values_q)7646 TEST(ldr_literal_values_q) {
7647 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7648
7649 static const uint64_t kHalfValues[] = {0x8000000000000000,
7650 0x7fffffffffffffff,
7651 0x0000000000000000,
7652 0xffffffffffffffff,
7653 0x00ff00ff00ff00ff,
7654 0x1234567890abcdef};
7655 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7656 const Register& ref_low64 = x1;
7657 const Register& ref_high64 = x2;
7658 const Register& loaded_low64 = x3;
7659 const Register& loaded_high64 = x4;
7660 const VRegister& tgt = q0;
7661
7662 START();
7663 __ Mov(x0, 0);
7664
7665 for (int i = 0; i < card; i++) {
7666 __ Mov(ref_low64, kHalfValues[i]);
7667 for (int j = 0; j < card; j++) {
7668 __ Mov(ref_high64, kHalfValues[j]);
7669 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7670 __ Mov(loaded_low64, tgt.V2D(), 0);
7671 __ Mov(loaded_high64, tgt.V2D(), 1);
7672 __ Cmp(loaded_low64, ref_low64);
7673 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7674 __ Cset(x0, ne);
7675 }
7676 }
7677 END();
7678
7679 if (CAN_RUN()) {
7680 RUN();
7681
7682 // If one of the values differs, the trace can be used to identify which
7683 // one.
7684 ASSERT_EQUAL_64(0, x0);
7685 }
7686 }
7687
TEST(fmov_vec_imm)7688 TEST(fmov_vec_imm) {
7689 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7690 CPUFeatures::kFP,
7691 CPUFeatures::kNEONHalf);
7692
7693 START();
7694
7695 __ Fmov(v0.V2S(), 20.0);
7696 __ Fmov(v1.V4S(), 1024.0);
7697
7698 __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7699 __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7700
7701 END();
7702 if (CAN_RUN()) {
7703 RUN();
7704
7705 ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7706 ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7707 ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7708 ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7709 }
7710 }
7711
7712 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7713 TEST(neon_modimm_fmov) {
7714 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7715
7716 // Immediates which can be encoded in the instructions.
7717 const float kOne = 1.0f;
7718 const float kPointFive = 0.5f;
7719 const double kMinusThirteen = -13.0;
7720 // Immediates which cannot be encoded in the instructions.
7721 const float kNonImmFP32 = 255.0f;
7722 const double kNonImmFP64 = 12.3456;
7723
7724 START();
7725 __ Fmov(v11.V2S(), kOne);
7726 __ Fmov(v12.V4S(), kPointFive);
7727 __ Fmov(v22.V2D(), kMinusThirteen);
7728 __ Fmov(v13.V2S(), kNonImmFP32);
7729 __ Fmov(v14.V4S(), kNonImmFP32);
7730 __ Fmov(v23.V2D(), kNonImmFP64);
7731 __ Fmov(v1.V2S(), 0.0);
7732 __ Fmov(v2.V4S(), 0.0);
7733 __ Fmov(v3.V2D(), 0.0);
7734 __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7735 __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7736 __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7737 END();
7738
7739 if (CAN_RUN()) {
7740 RUN();
7741
7742 const uint64_t kOne1S = FloatToRawbits(1.0);
7743 const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7744 const uint64_t kPointFive1S = FloatToRawbits(0.5);
7745 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7746 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7747 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7748 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7749 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7750 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7751 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7752 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7753
7754 ASSERT_EQUAL_128(0x0, kOne2S, q11);
7755 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7756 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7757 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7758 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7759 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7760 ASSERT_EQUAL_128(0x0, 0x0, q1);
7761 ASSERT_EQUAL_128(0x0, 0x0, q2);
7762 ASSERT_EQUAL_128(0x0, 0x0, q3);
7763 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7764 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7765 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7766 }
7767 }
7768
7769
TEST(neon_perm)7770 TEST(neon_perm) {
7771 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7772
7773 START();
7774
7775 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7776 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7777
7778 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7779 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7780 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7781 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7782 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7783 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7784
7785 END();
7786
7787 if (CAN_RUN()) {
7788 RUN();
7789
7790 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7791 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7792 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7793 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7794 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7795 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7796 }
7797 }
7798
7799
TEST(neon_copy_dup_element)7800 TEST(neon_copy_dup_element) {
7801 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7802
7803 START();
7804
7805 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7806 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7807 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7808 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7809 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7810 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7811
7812 __ Dup(v16.V16B(), v0.B(), 0);
7813 __ Dup(v17.V8H(), v1.H(), 7);
7814 __ Dup(v18.V4S(), v1.S(), 3);
7815 __ Dup(v19.V2D(), v0.D(), 0);
7816
7817 __ Dup(v20.V8B(), v0.B(), 0);
7818 __ Dup(v21.V4H(), v1.H(), 7);
7819 __ Dup(v22.V2S(), v1.S(), 3);
7820
7821 __ Dup(v23.B(), v0.B(), 0);
7822 __ Dup(v24.H(), v1.H(), 7);
7823 __ Dup(v25.S(), v1.S(), 3);
7824 __ Dup(v26.D(), v0.D(), 0);
7825
7826 __ Dup(v2.V16B(), v2.B(), 0);
7827 __ Dup(v3.V8H(), v3.H(), 7);
7828 __ Dup(v4.V4S(), v4.S(), 0);
7829 __ Dup(v5.V2D(), v5.D(), 1);
7830
7831 END();
7832
7833 if (CAN_RUN()) {
7834 RUN();
7835
7836 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7837 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7838 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7839 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7840
7841 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7842 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7843 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7844
7845 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7846 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7847 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7848 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7849
7850 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7851 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7852 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7853 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7854 }
7855 }
7856
7857
TEST(neon_copy_dup_general)7858 TEST(neon_copy_dup_general) {
7859 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7860
7861 START();
7862
7863 __ Mov(x0, 0x0011223344556677);
7864
7865 __ Dup(v16.V16B(), w0);
7866 __ Dup(v17.V8H(), w0);
7867 __ Dup(v18.V4S(), w0);
7868 __ Dup(v19.V2D(), x0);
7869
7870 __ Dup(v20.V8B(), w0);
7871 __ Dup(v21.V4H(), w0);
7872 __ Dup(v22.V2S(), w0);
7873
7874 __ Dup(v2.V16B(), wzr);
7875 __ Dup(v3.V8H(), wzr);
7876 __ Dup(v4.V4S(), wzr);
7877 __ Dup(v5.V2D(), xzr);
7878
7879 END();
7880
7881 if (CAN_RUN()) {
7882 RUN();
7883
7884 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7885 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7886 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7887 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7888
7889 ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7890 ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7891 ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7892
7893 ASSERT_EQUAL_128(0, 0, q2);
7894 ASSERT_EQUAL_128(0, 0, q3);
7895 ASSERT_EQUAL_128(0, 0, q4);
7896 ASSERT_EQUAL_128(0, 0, q5);
7897 }
7898 }
7899
7900
TEST(neon_copy_ins_element)7901 TEST(neon_copy_ins_element) {
7902 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7903
7904 START();
7905
7906 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7907 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7908 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7909 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7910 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7911 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7912
7913 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7914 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7915 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7916 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7917
7918 __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7919 __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7920 __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7921 __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7922
7923 __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7924 __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7925 __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7926 __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7927
7928 END();
7929
7930 if (CAN_RUN()) {
7931 RUN();
7932
7933 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7934 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7935 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7936 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7937
7938 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7939 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7940 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7941 ASSERT_EQUAL_128(0, 0, q5);
7942 }
7943 }
7944
7945
TEST(neon_copy_mov_element)7946 TEST(neon_copy_mov_element) {
7947 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7948
7949 START();
7950
7951 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7952 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7953 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7954 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7955 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7956 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7957
7958 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7959 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7960 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7961 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7962
7963 __ Mov(v16.V16B(), 15, v0.V16B(), 0);
7964 __ Mov(v17.V8H(), 0, v1.V8H(), 7);
7965 __ Mov(v18.V4S(), 3, v1.V4S(), 0);
7966 __ Mov(v19.V2D(), 1, v0.V2D(), 0);
7967
7968 __ Mov(v2.V16B(), 2, v2.V16B(), 0);
7969 __ Mov(v3.V8H(), 0, v3.V8H(), 7);
7970 __ Mov(v4.V4S(), 3, v4.V4S(), 0);
7971 __ Mov(v5.V2D(), 0, v5.V2D(), 1);
7972
7973 END();
7974
7975 if (CAN_RUN()) {
7976 RUN();
7977
7978 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7979 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7980 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7981 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7982
7983 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7984 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7985 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7986 ASSERT_EQUAL_128(0, 0, q5);
7987 }
7988 }
7989
7990
TEST(neon_copy_smov)7991 TEST(neon_copy_smov) {
7992 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7993
7994 START();
7995
7996 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7997
7998 __ Smov(w0, v0.B(), 7);
7999 __ Smov(w1, v0.B(), 15);
8000
8001 __ Smov(w2, v0.H(), 0);
8002 __ Smov(w3, v0.H(), 3);
8003
8004 __ Smov(x4, v0.B(), 7);
8005 __ Smov(x5, v0.B(), 15);
8006
8007 __ Smov(x6, v0.H(), 0);
8008 __ Smov(x7, v0.H(), 3);
8009
8010 __ Smov(x16, v0.S(), 0);
8011 __ Smov(x17, v0.S(), 1);
8012
8013 END();
8014
8015 if (CAN_RUN()) {
8016 RUN();
8017
8018 ASSERT_EQUAL_32(0xfffffffe, w0);
8019 ASSERT_EQUAL_32(0x00000001, w1);
8020 ASSERT_EQUAL_32(0x00003210, w2);
8021 ASSERT_EQUAL_32(0xfffffedc, w3);
8022 ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
8023 ASSERT_EQUAL_64(0x0000000000000001, x5);
8024 ASSERT_EQUAL_64(0x0000000000003210, x6);
8025 ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8026 ASSERT_EQUAL_64(0x0000000076543210, x16);
8027 ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8028 }
8029 }
8030
8031
TEST(neon_copy_umov_mov)8032 TEST(neon_copy_umov_mov) {
8033 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8034
8035 START();
8036
8037 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8038
8039 __ Umov(w0, v0.B(), 15);
8040 __ Umov(w1, v0.H(), 0);
8041 __ Umov(w2, v0.S(), 3);
8042 __ Umov(x3, v0.D(), 1);
8043
8044 __ Mov(w4, v0.S(), 3);
8045 __ Mov(x5, v0.D(), 1);
8046
8047 END();
8048
8049 if (CAN_RUN()) {
8050 RUN();
8051
8052 ASSERT_EQUAL_32(0x00000001, w0);
8053 ASSERT_EQUAL_32(0x00003210, w1);
8054 ASSERT_EQUAL_32(0x01234567, w2);
8055 ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8056 ASSERT_EQUAL_32(0x01234567, w4);
8057 ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8058 }
8059 }
8060
8061
TEST(neon_copy_ins_general)8062 TEST(neon_copy_ins_general) {
8063 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8064
8065 START();
8066
8067 __ Mov(x0, 0x0011223344556677);
8068 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8069 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8070 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8071 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8072
8073 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8074 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8075 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8076 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8077
8078 __ Ins(v16.V16B(), 15, w0);
8079 __ Ins(v17.V8H(), 0, w0);
8080 __ Ins(v18.V4S(), 3, w0);
8081 __ Ins(v19.V2D(), 0, x0);
8082
8083 __ Ins(v2.V16B(), 2, w0);
8084 __ Ins(v3.V8H(), 0, w0);
8085 __ Ins(v4.V4S(), 3, w0);
8086 __ Ins(v5.V2D(), 1, x0);
8087
8088 END();
8089
8090 if (CAN_RUN()) {
8091 RUN();
8092
8093 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8094 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8095 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8096 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8097
8098 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8099 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8100 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8101 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8102 }
8103 }
8104
8105
TEST(neon_extract_ext)8106 TEST(neon_extract_ext) {
8107 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8108
8109 START();
8110
8111 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8112 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8113
8114 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8115 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8116
8117 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8118 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8119 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
8120 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
8121
8122 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8123 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8124 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
8125 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
8126
8127 END();
8128
8129 if (CAN_RUN()) {
8130 RUN();
8131
8132 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8133 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8134 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8135 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8136
8137 ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8138 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8139 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8140 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8141 }
8142 }
8143
8144
TEST(neon_3different_uaddl)8145 TEST(neon_3different_uaddl) {
8146 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8147
8148 START();
8149
8150 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8151 __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8152 __ Movi(v2.V2D(), 0, 0x0101010101010101);
8153
8154 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8155 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8156 __ Movi(v5.V2D(), 0, 0x0000000180008001);
8157 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8158 __ Movi(v7.V2D(), 0, 0x0001000100010001);
8159
8160 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8161 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8162 __ Movi(v18.V2D(), 0, 0x0000000000000001);
8163 __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8164 __ Movi(v20.V2D(), 0, 0x0000000100000001);
8165
8166 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8167
8168 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8169 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8170
8171 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8172 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8173
8174
8175 END();
8176
8177 if (CAN_RUN()) {
8178 RUN();
8179
8180 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8181 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8182 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8183 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8184 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8185 }
8186 }
8187
8188
TEST(neon_3different_addhn_subhn)8189 TEST(neon_3different_addhn_subhn) {
8190 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8191
8192 START();
8193
8194 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8195 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8196 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8197 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8198 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8199
8200 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8201 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8202 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8203 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8204 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8205 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8206 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8207 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8208
8209 END();
8210
8211 if (CAN_RUN()) {
8212 RUN();
8213
8214 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8215 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8216 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8217 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8218 }
8219 }
8220
TEST(neon_d_only_scalar)8221 TEST(neon_d_only_scalar) {
8222 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8223
8224 START();
8225
8226 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8227 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8228 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8229 __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8230 __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8231
8232 __ Add(d16, d0, d0);
8233 __ Add(d17, d1, d1);
8234 __ Add(d18, d2, d2);
8235 __ Sub(d19, d0, d0);
8236 __ Sub(d20, d0, d1);
8237 __ Sub(d21, d1, d0);
8238 __ Ushl(d22, d0, d3);
8239 __ Ushl(d23, d0, d4);
8240 __ Sshl(d24, d0, d3);
8241 __ Sshl(d25, d0, d4);
8242 __ Ushr(d26, d0, 1);
8243 __ Sshr(d27, d0, 3);
8244 __ Shl(d28, d0, 0);
8245 __ Shl(d29, d0, 16);
8246
8247 END();
8248
8249 if (CAN_RUN()) {
8250 RUN();
8251
8252 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8253 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8254 ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8255 ASSERT_EQUAL_128(0, 0, q19);
8256 ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8257 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8258 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8259 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8260 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8261 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8262 ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8263 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8264 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8265 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8266 }
8267 }
8268
8269
TEST(neon_sqshl_imm_scalar)8270 TEST(neon_sqshl_imm_scalar) {
8271 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8272
8273 START();
8274
8275 __ Movi(v0.V2D(), 0x0, 0x7f);
8276 __ Movi(v1.V2D(), 0x0, 0x80);
8277 __ Movi(v2.V2D(), 0x0, 0x01);
8278 __ Sqshl(b16, b0, 1);
8279 __ Sqshl(b17, b1, 1);
8280 __ Sqshl(b18, b2, 1);
8281
8282 __ Movi(v0.V2D(), 0x0, 0x7fff);
8283 __ Movi(v1.V2D(), 0x0, 0x8000);
8284 __ Movi(v2.V2D(), 0x0, 0x0001);
8285 __ Sqshl(h19, h0, 1);
8286 __ Sqshl(h20, h1, 1);
8287 __ Sqshl(h21, h2, 1);
8288
8289 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8290 __ Movi(v1.V2D(), 0x0, 0x80000000);
8291 __ Movi(v2.V2D(), 0x0, 0x00000001);
8292 __ Sqshl(s22, s0, 1);
8293 __ Sqshl(s23, s1, 1);
8294 __ Sqshl(s24, s2, 1);
8295
8296 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8297 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8298 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8299 __ Sqshl(d25, d0, 1);
8300 __ Sqshl(d26, d1, 1);
8301 __ Sqshl(d27, d2, 1);
8302
8303 END();
8304
8305 if (CAN_RUN()) {
8306 RUN();
8307
8308 ASSERT_EQUAL_128(0, 0x7f, q16);
8309 ASSERT_EQUAL_128(0, 0x80, q17);
8310 ASSERT_EQUAL_128(0, 0x02, q18);
8311
8312 ASSERT_EQUAL_128(0, 0x7fff, q19);
8313 ASSERT_EQUAL_128(0, 0x8000, q20);
8314 ASSERT_EQUAL_128(0, 0x0002, q21);
8315
8316 ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8317 ASSERT_EQUAL_128(0, 0x80000000, q23);
8318 ASSERT_EQUAL_128(0, 0x00000002, q24);
8319
8320 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8321 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8322 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8323 }
8324 }
8325
8326
TEST(neon_uqshl_imm_scalar)8327 TEST(neon_uqshl_imm_scalar) {
8328 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8329
8330 START();
8331
8332 __ Movi(v0.V2D(), 0x0, 0x7f);
8333 __ Movi(v1.V2D(), 0x0, 0x80);
8334 __ Movi(v2.V2D(), 0x0, 0x01);
8335 __ Uqshl(b16, b0, 1);
8336 __ Uqshl(b17, b1, 1);
8337 __ Uqshl(b18, b2, 1);
8338
8339 __ Movi(v0.V2D(), 0x0, 0x7fff);
8340 __ Movi(v1.V2D(), 0x0, 0x8000);
8341 __ Movi(v2.V2D(), 0x0, 0x0001);
8342 __ Uqshl(h19, h0, 1);
8343 __ Uqshl(h20, h1, 1);
8344 __ Uqshl(h21, h2, 1);
8345
8346 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8347 __ Movi(v1.V2D(), 0x0, 0x80000000);
8348 __ Movi(v2.V2D(), 0x0, 0x00000001);
8349 __ Uqshl(s22, s0, 1);
8350 __ Uqshl(s23, s1, 1);
8351 __ Uqshl(s24, s2, 1);
8352
8353 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8354 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8355 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8356 __ Uqshl(d25, d0, 1);
8357 __ Uqshl(d26, d1, 1);
8358 __ Uqshl(d27, d2, 1);
8359
8360 END();
8361
8362 if (CAN_RUN()) {
8363 RUN();
8364
8365 ASSERT_EQUAL_128(0, 0xfe, q16);
8366 ASSERT_EQUAL_128(0, 0xff, q17);
8367 ASSERT_EQUAL_128(0, 0x02, q18);
8368
8369 ASSERT_EQUAL_128(0, 0xfffe, q19);
8370 ASSERT_EQUAL_128(0, 0xffff, q20);
8371 ASSERT_EQUAL_128(0, 0x0002, q21);
8372
8373 ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8374 ASSERT_EQUAL_128(0, 0xffffffff, q23);
8375 ASSERT_EQUAL_128(0, 0x00000002, q24);
8376
8377 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8378 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8379 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8380 }
8381 }
8382
8383
TEST(neon_sqshlu_scalar)8384 TEST(neon_sqshlu_scalar) {
8385 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8386
8387 START();
8388
8389 __ Movi(v0.V2D(), 0x0, 0x7f);
8390 __ Movi(v1.V2D(), 0x0, 0x80);
8391 __ Movi(v2.V2D(), 0x0, 0x01);
8392 __ Sqshlu(b16, b0, 2);
8393 __ Sqshlu(b17, b1, 2);
8394 __ Sqshlu(b18, b2, 2);
8395
8396 __ Movi(v0.V2D(), 0x0, 0x7fff);
8397 __ Movi(v1.V2D(), 0x0, 0x8000);
8398 __ Movi(v2.V2D(), 0x0, 0x0001);
8399 __ Sqshlu(h19, h0, 2);
8400 __ Sqshlu(h20, h1, 2);
8401 __ Sqshlu(h21, h2, 2);
8402
8403 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8404 __ Movi(v1.V2D(), 0x0, 0x80000000);
8405 __ Movi(v2.V2D(), 0x0, 0x00000001);
8406 __ Sqshlu(s22, s0, 2);
8407 __ Sqshlu(s23, s1, 2);
8408 __ Sqshlu(s24, s2, 2);
8409
8410 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8411 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8412 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8413 __ Sqshlu(d25, d0, 2);
8414 __ Sqshlu(d26, d1, 2);
8415 __ Sqshlu(d27, d2, 2);
8416
8417 END();
8418
8419 if (CAN_RUN()) {
8420 RUN();
8421
8422 ASSERT_EQUAL_128(0, 0xff, q16);
8423 ASSERT_EQUAL_128(0, 0x00, q17);
8424 ASSERT_EQUAL_128(0, 0x04, q18);
8425
8426 ASSERT_EQUAL_128(0, 0xffff, q19);
8427 ASSERT_EQUAL_128(0, 0x0000, q20);
8428 ASSERT_EQUAL_128(0, 0x0004, q21);
8429
8430 ASSERT_EQUAL_128(0, 0xffffffff, q22);
8431 ASSERT_EQUAL_128(0, 0x00000000, q23);
8432 ASSERT_EQUAL_128(0, 0x00000004, q24);
8433
8434 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8435 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8436 ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8437 }
8438 }
8439
8440
TEST(neon_sshll)8441 TEST(neon_sshll) {
8442 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8443
8444 START();
8445
8446 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8447 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8448 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8449
8450 __ Sshll(v16.V8H(), v0.V8B(), 4);
8451 __ Sshll2(v17.V8H(), v0.V16B(), 4);
8452
8453 __ Sshll(v18.V4S(), v1.V4H(), 8);
8454 __ Sshll2(v19.V4S(), v1.V8H(), 8);
8455
8456 __ Sshll(v20.V2D(), v2.V2S(), 16);
8457 __ Sshll2(v21.V2D(), v2.V4S(), 16);
8458
8459 END();
8460
8461 if (CAN_RUN()) {
8462 RUN();
8463
8464 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8465 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8466 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8467 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8468 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8469 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8470 }
8471 }
8472
TEST(neon_shll)8473 TEST(neon_shll) {
8474 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8475
8476 START();
8477
8478 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8479 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8480 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8481
8482 __ Shll(v16.V8H(), v0.V8B(), 8);
8483 __ Shll2(v17.V8H(), v0.V16B(), 8);
8484
8485 __ Shll(v18.V4S(), v1.V4H(), 16);
8486 __ Shll2(v19.V4S(), v1.V8H(), 16);
8487
8488 __ Shll(v20.V2D(), v2.V2S(), 32);
8489 __ Shll2(v21.V2D(), v2.V4S(), 32);
8490
8491 END();
8492
8493 if (CAN_RUN()) {
8494 RUN();
8495
8496 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8497 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8498 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8499 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8500 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8501 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8502 }
8503 }
8504
TEST(neon_ushll)8505 TEST(neon_ushll) {
8506 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8507
8508 START();
8509
8510 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8511 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8512 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8513
8514 __ Ushll(v16.V8H(), v0.V8B(), 4);
8515 __ Ushll2(v17.V8H(), v0.V16B(), 4);
8516
8517 __ Ushll(v18.V4S(), v1.V4H(), 8);
8518 __ Ushll2(v19.V4S(), v1.V8H(), 8);
8519
8520 __ Ushll(v20.V2D(), v2.V2S(), 16);
8521 __ Ushll2(v21.V2D(), v2.V4S(), 16);
8522
8523 END();
8524
8525 if (CAN_RUN()) {
8526 RUN();
8527
8528 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8529 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8530 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8531 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8532 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8533 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8534 }
8535 }
8536
8537
TEST(neon_sxtl)8538 TEST(neon_sxtl) {
8539 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8540
8541 START();
8542
8543 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8544 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8545 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8546
8547 __ Sxtl(v16.V8H(), v0.V8B());
8548 __ Sxtl2(v17.V8H(), v0.V16B());
8549
8550 __ Sxtl(v18.V4S(), v1.V4H());
8551 __ Sxtl2(v19.V4S(), v1.V8H());
8552
8553 __ Sxtl(v20.V2D(), v2.V2S());
8554 __ Sxtl2(v21.V2D(), v2.V4S());
8555
8556 END();
8557
8558 if (CAN_RUN()) {
8559 RUN();
8560
8561 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8562 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8563 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8564 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8565 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8566 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8567 }
8568 }
8569
8570
TEST(neon_uxtl)8571 TEST(neon_uxtl) {
8572 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8573
8574 START();
8575
8576 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8577 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8578 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8579
8580 __ Uxtl(v16.V8H(), v0.V8B());
8581 __ Uxtl2(v17.V8H(), v0.V16B());
8582
8583 __ Uxtl(v18.V4S(), v1.V4H());
8584 __ Uxtl2(v19.V4S(), v1.V8H());
8585
8586 __ Uxtl(v20.V2D(), v2.V2S());
8587 __ Uxtl2(v21.V2D(), v2.V4S());
8588
8589 END();
8590
8591 if (CAN_RUN()) {
8592 RUN();
8593
8594 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8595 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8596 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8597 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8598 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8599 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8600 }
8601 }
8602
8603
TEST(neon_ssra)8604 TEST(neon_ssra) {
8605 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8606
8607 START();
8608
8609 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8610 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8611 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8612 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8613 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8614
8615 __ Mov(v16.V2D(), v0.V2D());
8616 __ Mov(v17.V2D(), v0.V2D());
8617 __ Mov(v18.V2D(), v1.V2D());
8618 __ Mov(v19.V2D(), v1.V2D());
8619 __ Mov(v20.V2D(), v2.V2D());
8620 __ Mov(v21.V2D(), v2.V2D());
8621 __ Mov(v22.V2D(), v3.V2D());
8622 __ Mov(v23.V2D(), v4.V2D());
8623 __ Mov(v24.V2D(), v3.V2D());
8624 __ Mov(v25.V2D(), v4.V2D());
8625
8626 __ Ssra(v16.V8B(), v0.V8B(), 4);
8627 __ Ssra(v17.V16B(), v0.V16B(), 4);
8628
8629 __ Ssra(v18.V4H(), v1.V4H(), 8);
8630 __ Ssra(v19.V8H(), v1.V8H(), 8);
8631
8632 __ Ssra(v20.V2S(), v2.V2S(), 16);
8633 __ Ssra(v21.V4S(), v2.V4S(), 16);
8634
8635 __ Ssra(v22.V2D(), v3.V2D(), 32);
8636 __ Ssra(v23.V2D(), v4.V2D(), 32);
8637
8638 __ Ssra(d24, d3, 48);
8639
8640 END();
8641
8642 if (CAN_RUN()) {
8643 RUN();
8644
8645 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8646 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8647 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8648 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8649 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8650 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8651 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8652 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8653 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8654 }
8655 }
8656
TEST(neon_srsra)8657 TEST(neon_srsra) {
8658 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8659
8660 START();
8661
8662 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8663 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8664 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8665 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8666 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8667
8668 __ Mov(v16.V2D(), v0.V2D());
8669 __ Mov(v17.V2D(), v0.V2D());
8670 __ Mov(v18.V2D(), v1.V2D());
8671 __ Mov(v19.V2D(), v1.V2D());
8672 __ Mov(v20.V2D(), v2.V2D());
8673 __ Mov(v21.V2D(), v2.V2D());
8674 __ Mov(v22.V2D(), v3.V2D());
8675 __ Mov(v23.V2D(), v4.V2D());
8676 __ Mov(v24.V2D(), v3.V2D());
8677 __ Mov(v25.V2D(), v4.V2D());
8678
8679 __ Srsra(v16.V8B(), v0.V8B(), 4);
8680 __ Srsra(v17.V16B(), v0.V16B(), 4);
8681
8682 __ Srsra(v18.V4H(), v1.V4H(), 8);
8683 __ Srsra(v19.V8H(), v1.V8H(), 8);
8684
8685 __ Srsra(v20.V2S(), v2.V2S(), 16);
8686 __ Srsra(v21.V4S(), v2.V4S(), 16);
8687
8688 __ Srsra(v22.V2D(), v3.V2D(), 32);
8689 __ Srsra(v23.V2D(), v4.V2D(), 32);
8690
8691 __ Srsra(d24, d3, 48);
8692
8693 END();
8694
8695 if (CAN_RUN()) {
8696 RUN();
8697
8698 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8699 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8700 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8701 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8702 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8703 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8704 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8705 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8706 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8707 }
8708 }
8709
TEST(neon_usra)8710 TEST(neon_usra) {
8711 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8712
8713 START();
8714
8715 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8716 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8717 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8718 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8719 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8720
8721 __ Mov(v16.V2D(), v0.V2D());
8722 __ Mov(v17.V2D(), v0.V2D());
8723 __ Mov(v18.V2D(), v1.V2D());
8724 __ Mov(v19.V2D(), v1.V2D());
8725 __ Mov(v20.V2D(), v2.V2D());
8726 __ Mov(v21.V2D(), v2.V2D());
8727 __ Mov(v22.V2D(), v3.V2D());
8728 __ Mov(v23.V2D(), v4.V2D());
8729 __ Mov(v24.V2D(), v3.V2D());
8730 __ Mov(v25.V2D(), v4.V2D());
8731
8732 __ Usra(v16.V8B(), v0.V8B(), 4);
8733 __ Usra(v17.V16B(), v0.V16B(), 4);
8734
8735 __ Usra(v18.V4H(), v1.V4H(), 8);
8736 __ Usra(v19.V8H(), v1.V8H(), 8);
8737
8738 __ Usra(v20.V2S(), v2.V2S(), 16);
8739 __ Usra(v21.V4S(), v2.V4S(), 16);
8740
8741 __ Usra(v22.V2D(), v3.V2D(), 32);
8742 __ Usra(v23.V2D(), v4.V2D(), 32);
8743
8744 __ Usra(d24, d3, 48);
8745
8746 END();
8747
8748 if (CAN_RUN()) {
8749 RUN();
8750
8751 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8752 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8753 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8754 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8755 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8756 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8757 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8758 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8759 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8760 }
8761 }
8762
TEST(neon_ursra)8763 TEST(neon_ursra) {
8764 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8765
8766 START();
8767
8768 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8769 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8770 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8771 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8772 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8773
8774 __ Mov(v16.V2D(), v0.V2D());
8775 __ Mov(v17.V2D(), v0.V2D());
8776 __ Mov(v18.V2D(), v1.V2D());
8777 __ Mov(v19.V2D(), v1.V2D());
8778 __ Mov(v20.V2D(), v2.V2D());
8779 __ Mov(v21.V2D(), v2.V2D());
8780 __ Mov(v22.V2D(), v3.V2D());
8781 __ Mov(v23.V2D(), v4.V2D());
8782 __ Mov(v24.V2D(), v3.V2D());
8783 __ Mov(v25.V2D(), v4.V2D());
8784
8785 __ Ursra(v16.V8B(), v0.V8B(), 4);
8786 __ Ursra(v17.V16B(), v0.V16B(), 4);
8787
8788 __ Ursra(v18.V4H(), v1.V4H(), 8);
8789 __ Ursra(v19.V8H(), v1.V8H(), 8);
8790
8791 __ Ursra(v20.V2S(), v2.V2S(), 16);
8792 __ Ursra(v21.V4S(), v2.V4S(), 16);
8793
8794 __ Ursra(v22.V2D(), v3.V2D(), 32);
8795 __ Ursra(v23.V2D(), v4.V2D(), 32);
8796
8797 __ Ursra(d24, d3, 48);
8798
8799 END();
8800
8801 if (CAN_RUN()) {
8802 RUN();
8803
8804 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8805 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8806 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8807 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8808 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8809 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8810 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8811 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8812 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8813 }
8814 }
8815
8816
TEST(neon_uqshl_scalar)8817 TEST(neon_uqshl_scalar) {
8818 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8819
8820 START();
8821
8822 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8823 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8824 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8825 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8826
8827 __ Uqshl(b16, b0, b2);
8828 __ Uqshl(b17, b0, b3);
8829 __ Uqshl(b18, b1, b2);
8830 __ Uqshl(b19, b1, b3);
8831 __ Uqshl(h20, h0, h2);
8832 __ Uqshl(h21, h0, h3);
8833 __ Uqshl(h22, h1, h2);
8834 __ Uqshl(h23, h1, h3);
8835 __ Uqshl(s24, s0, s2);
8836 __ Uqshl(s25, s0, s3);
8837 __ Uqshl(s26, s1, s2);
8838 __ Uqshl(s27, s1, s3);
8839 __ Uqshl(d28, d0, d2);
8840 __ Uqshl(d29, d0, d3);
8841 __ Uqshl(d30, d1, d2);
8842 __ Uqshl(d31, d1, d3);
8843
8844 END();
8845
8846 if (CAN_RUN()) {
8847 RUN();
8848
8849 ASSERT_EQUAL_128(0, 0xff, q16);
8850 ASSERT_EQUAL_128(0, 0x78, q17);
8851 ASSERT_EQUAL_128(0, 0xfe, q18);
8852 ASSERT_EQUAL_128(0, 0x3f, q19);
8853 ASSERT_EQUAL_128(0, 0xffff, q20);
8854 ASSERT_EQUAL_128(0, 0x7878, q21);
8855 ASSERT_EQUAL_128(0, 0xfefe, q22);
8856 ASSERT_EQUAL_128(0, 0x3fbf, q23);
8857 ASSERT_EQUAL_128(0, 0xffffffff, q24);
8858 ASSERT_EQUAL_128(0, 0x78007878, q25);
8859 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8860 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8861 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8862 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8863 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8864 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8865 }
8866 }
8867
8868
TEST(neon_sqshl_scalar)8869 TEST(neon_sqshl_scalar) {
8870 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8871
8872 START();
8873
8874 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8875 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8876 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8877 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8878
8879 __ Sqshl(b16, b0, b2);
8880 __ Sqshl(b17, b0, b3);
8881 __ Sqshl(b18, b1, b2);
8882 __ Sqshl(b19, b1, b3);
8883 __ Sqshl(h20, h0, h2);
8884 __ Sqshl(h21, h0, h3);
8885 __ Sqshl(h22, h1, h2);
8886 __ Sqshl(h23, h1, h3);
8887 __ Sqshl(s24, s0, s2);
8888 __ Sqshl(s25, s0, s3);
8889 __ Sqshl(s26, s1, s2);
8890 __ Sqshl(s27, s1, s3);
8891 __ Sqshl(d28, d0, d2);
8892 __ Sqshl(d29, d0, d3);
8893 __ Sqshl(d30, d1, d2);
8894 __ Sqshl(d31, d1, d3);
8895
8896 END();
8897
8898 if (CAN_RUN()) {
8899 RUN();
8900
8901 ASSERT_EQUAL_128(0, 0x80, q16);
8902 ASSERT_EQUAL_128(0, 0xdf, q17);
8903 ASSERT_EQUAL_128(0, 0x7f, q18);
8904 ASSERT_EQUAL_128(0, 0x20, q19);
8905 ASSERT_EQUAL_128(0, 0x8000, q20);
8906 ASSERT_EQUAL_128(0, 0xdfdf, q21);
8907 ASSERT_EQUAL_128(0, 0x7fff, q22);
8908 ASSERT_EQUAL_128(0, 0x2020, q23);
8909 ASSERT_EQUAL_128(0, 0x80000000, q24);
8910 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8911 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8912 ASSERT_EQUAL_128(0, 0x20002020, q27);
8913 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8914 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8915 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8916 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8917 }
8918 }
8919
8920
TEST(neon_urshl_scalar)8921 TEST(neon_urshl_scalar) {
8922 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8923
8924 START();
8925
8926 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8927 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8928 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8929 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8930
8931 __ Urshl(d28, d0, d2);
8932 __ Urshl(d29, d0, d3);
8933 __ Urshl(d30, d1, d2);
8934 __ Urshl(d31, d1, d3);
8935
8936 END();
8937
8938 if (CAN_RUN()) {
8939 RUN();
8940
8941 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8942 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8943 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8944 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8945 }
8946 }
8947
8948
TEST(neon_srshl_scalar)8949 TEST(neon_srshl_scalar) {
8950 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8951
8952 START();
8953
8954 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8955 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8956 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8957 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8958
8959 __ Srshl(d28, d0, d2);
8960 __ Srshl(d29, d0, d3);
8961 __ Srshl(d30, d1, d2);
8962 __ Srshl(d31, d1, d3);
8963
8964 END();
8965
8966 if (CAN_RUN()) {
8967 RUN();
8968
8969 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
8970 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
8971 ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
8972 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8973 }
8974 }
8975
8976
TEST(neon_uqrshl_scalar)8977 TEST(neon_uqrshl_scalar) {
8978 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8979
8980 START();
8981
8982 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8983 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8984 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8985 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8986
8987 __ Uqrshl(b16, b0, b2);
8988 __ Uqrshl(b17, b0, b3);
8989 __ Uqrshl(b18, b1, b2);
8990 __ Uqrshl(b19, b1, b3);
8991 __ Uqrshl(h20, h0, h2);
8992 __ Uqrshl(h21, h0, h3);
8993 __ Uqrshl(h22, h1, h2);
8994 __ Uqrshl(h23, h1, h3);
8995 __ Uqrshl(s24, s0, s2);
8996 __ Uqrshl(s25, s0, s3);
8997 __ Uqrshl(s26, s1, s2);
8998 __ Uqrshl(s27, s1, s3);
8999 __ Uqrshl(d28, d0, d2);
9000 __ Uqrshl(d29, d0, d3);
9001 __ Uqrshl(d30, d1, d2);
9002 __ Uqrshl(d31, d1, d3);
9003
9004 END();
9005
9006 if (CAN_RUN()) {
9007 RUN();
9008
9009 ASSERT_EQUAL_128(0, 0xff, q16);
9010 ASSERT_EQUAL_128(0, 0x78, q17);
9011 ASSERT_EQUAL_128(0, 0xfe, q18);
9012 ASSERT_EQUAL_128(0, 0x40, q19);
9013 ASSERT_EQUAL_128(0, 0xffff, q20);
9014 ASSERT_EQUAL_128(0, 0x7878, q21);
9015 ASSERT_EQUAL_128(0, 0xfefe, q22);
9016 ASSERT_EQUAL_128(0, 0x3fc0, q23);
9017 ASSERT_EQUAL_128(0, 0xffffffff, q24);
9018 ASSERT_EQUAL_128(0, 0x78007878, q25);
9019 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
9020 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
9021 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
9022 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
9023 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9024 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9025 }
9026 }
9027
9028
TEST(neon_sqrshl_scalar)9029 TEST(neon_sqrshl_scalar) {
9030 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9031
9032 START();
9033
9034 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9035 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9036 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9037 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9038
9039 __ Sqrshl(b16, b0, b2);
9040 __ Sqrshl(b17, b0, b3);
9041 __ Sqrshl(b18, b1, b2);
9042 __ Sqrshl(b19, b1, b3);
9043 __ Sqrshl(h20, h0, h2);
9044 __ Sqrshl(h21, h0, h3);
9045 __ Sqrshl(h22, h1, h2);
9046 __ Sqrshl(h23, h1, h3);
9047 __ Sqrshl(s24, s0, s2);
9048 __ Sqrshl(s25, s0, s3);
9049 __ Sqrshl(s26, s1, s2);
9050 __ Sqrshl(s27, s1, s3);
9051 __ Sqrshl(d28, d0, d2);
9052 __ Sqrshl(d29, d0, d3);
9053 __ Sqrshl(d30, d1, d2);
9054 __ Sqrshl(d31, d1, d3);
9055
9056 END();
9057
9058 if (CAN_RUN()) {
9059 RUN();
9060
9061 ASSERT_EQUAL_128(0, 0x80, q16);
9062 ASSERT_EQUAL_128(0, 0xe0, q17);
9063 ASSERT_EQUAL_128(0, 0x7f, q18);
9064 ASSERT_EQUAL_128(0, 0x20, q19);
9065 ASSERT_EQUAL_128(0, 0x8000, q20);
9066 ASSERT_EQUAL_128(0, 0xdfe0, q21);
9067 ASSERT_EQUAL_128(0, 0x7fff, q22);
9068 ASSERT_EQUAL_128(0, 0x2020, q23);
9069 ASSERT_EQUAL_128(0, 0x80000000, q24);
9070 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9071 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9072 ASSERT_EQUAL_128(0, 0x20002020, q27);
9073 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9074 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9075 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9076 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9077 }
9078 }
9079
9080
TEST(neon_uqadd_scalar)9081 TEST(neon_uqadd_scalar) {
9082 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9083
9084 START();
9085
9086 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9087 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9088 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9089
9090 __ Uqadd(b16, b0, b0);
9091 __ Uqadd(b17, b1, b1);
9092 __ Uqadd(b18, b2, b2);
9093 __ Uqadd(h19, h0, h0);
9094 __ Uqadd(h20, h1, h1);
9095 __ Uqadd(h21, h2, h2);
9096 __ Uqadd(s22, s0, s0);
9097 __ Uqadd(s23, s1, s1);
9098 __ Uqadd(s24, s2, s2);
9099 __ Uqadd(d25, d0, d0);
9100 __ Uqadd(d26, d1, d1);
9101 __ Uqadd(d27, d2, d2);
9102
9103 END();
9104
9105 if (CAN_RUN()) {
9106 RUN();
9107
9108 ASSERT_EQUAL_128(0, 0xff, q16);
9109 ASSERT_EQUAL_128(0, 0xfe, q17);
9110 ASSERT_EQUAL_128(0, 0x20, q18);
9111 ASSERT_EQUAL_128(0, 0xffff, q19);
9112 ASSERT_EQUAL_128(0, 0xfefe, q20);
9113 ASSERT_EQUAL_128(0, 0x2020, q21);
9114 ASSERT_EQUAL_128(0, 0xffffffff, q22);
9115 ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9116 ASSERT_EQUAL_128(0, 0x20002020, q24);
9117 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9118 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9119 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9120 }
9121 }
9122
9123
TEST(neon_sqadd_scalar)9124 TEST(neon_sqadd_scalar) {
9125 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9126
9127 START();
9128
9129 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9130 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9131 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9132
9133 __ Sqadd(b16, b0, b0);
9134 __ Sqadd(b17, b1, b1);
9135 __ Sqadd(b18, b2, b2);
9136 __ Sqadd(h19, h0, h0);
9137 __ Sqadd(h20, h1, h1);
9138 __ Sqadd(h21, h2, h2);
9139 __ Sqadd(s22, s0, s0);
9140 __ Sqadd(s23, s1, s1);
9141 __ Sqadd(s24, s2, s2);
9142 __ Sqadd(d25, d0, d0);
9143 __ Sqadd(d26, d1, d1);
9144 __ Sqadd(d27, d2, d2);
9145
9146 END();
9147
9148 if (CAN_RUN()) {
9149 RUN();
9150
9151 ASSERT_EQUAL_128(0, 0x80, q16);
9152 ASSERT_EQUAL_128(0, 0x7f, q17);
9153 ASSERT_EQUAL_128(0, 0x20, q18);
9154 ASSERT_EQUAL_128(0, 0x8000, q19);
9155 ASSERT_EQUAL_128(0, 0x7fff, q20);
9156 ASSERT_EQUAL_128(0, 0x2020, q21);
9157 ASSERT_EQUAL_128(0, 0x80000000, q22);
9158 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9159 ASSERT_EQUAL_128(0, 0x20002020, q24);
9160 ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9161 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9162 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9163 }
9164 }
9165
9166
TEST(neon_uqsub_scalar)9167 TEST(neon_uqsub_scalar) {
9168 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9169
9170 START();
9171
9172 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9173 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9174
9175 __ Uqsub(b16, b0, b0);
9176 __ Uqsub(b17, b0, b1);
9177 __ Uqsub(b18, b1, b0);
9178 __ Uqsub(h19, h0, h0);
9179 __ Uqsub(h20, h0, h1);
9180 __ Uqsub(h21, h1, h0);
9181 __ Uqsub(s22, s0, s0);
9182 __ Uqsub(s23, s0, s1);
9183 __ Uqsub(s24, s1, s0);
9184 __ Uqsub(d25, d0, d0);
9185 __ Uqsub(d26, d0, d1);
9186 __ Uqsub(d27, d1, d0);
9187
9188 END();
9189
9190 if (CAN_RUN()) {
9191 RUN();
9192
9193 ASSERT_EQUAL_128(0, 0, q16);
9194 ASSERT_EQUAL_128(0, 0x71, q17);
9195 ASSERT_EQUAL_128(0, 0, q18);
9196
9197 ASSERT_EQUAL_128(0, 0, q19);
9198 ASSERT_EQUAL_128(0, 0x7171, q20);
9199 ASSERT_EQUAL_128(0, 0, q21);
9200
9201 ASSERT_EQUAL_128(0, 0, q22);
9202 ASSERT_EQUAL_128(0, 0x70017171, q23);
9203 ASSERT_EQUAL_128(0, 0, q24);
9204
9205 ASSERT_EQUAL_128(0, 0, q25);
9206 ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9207 ASSERT_EQUAL_128(0, 0, q27);
9208 }
9209 }
9210
9211
TEST(neon_sqsub_scalar)9212 TEST(neon_sqsub_scalar) {
9213 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9214
9215 START();
9216
9217 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9218 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9219
9220 __ Sqsub(b16, b0, b0);
9221 __ Sqsub(b17, b0, b1);
9222 __ Sqsub(b18, b1, b0);
9223 __ Sqsub(h19, h0, h0);
9224 __ Sqsub(h20, h0, h1);
9225 __ Sqsub(h21, h1, h0);
9226 __ Sqsub(s22, s0, s0);
9227 __ Sqsub(s23, s0, s1);
9228 __ Sqsub(s24, s1, s0);
9229 __ Sqsub(d25, d0, d0);
9230 __ Sqsub(d26, d0, d1);
9231 __ Sqsub(d27, d1, d0);
9232
9233 END();
9234
9235 if (CAN_RUN()) {
9236 RUN();
9237
9238 ASSERT_EQUAL_128(0, 0, q16);
9239 ASSERT_EQUAL_128(0, 0x80, q17);
9240 ASSERT_EQUAL_128(0, 0x7f, q18);
9241
9242 ASSERT_EQUAL_128(0, 0, q19);
9243 ASSERT_EQUAL_128(0, 0x8000, q20);
9244 ASSERT_EQUAL_128(0, 0x7fff, q21);
9245
9246 ASSERT_EQUAL_128(0, 0, q22);
9247 ASSERT_EQUAL_128(0, 0x80000000, q23);
9248 ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9249
9250 ASSERT_EQUAL_128(0, 0, q25);
9251 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9252 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9253 }
9254 }
9255
9256
TEST(neon_fmla_fmls)9257 TEST(neon_fmla_fmls) {
9258 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9259
9260 START();
9261 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9262 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9263 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9264 __ Mov(v16.V16B(), v0.V16B());
9265 __ Mov(v17.V16B(), v0.V16B());
9266 __ Mov(v18.V16B(), v0.V16B());
9267 __ Mov(v19.V16B(), v0.V16B());
9268 __ Mov(v20.V16B(), v0.V16B());
9269 __ Mov(v21.V16B(), v0.V16B());
9270
9271 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9272 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9273 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9274 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9275 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9276 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9277 END();
9278
9279 if (CAN_RUN()) {
9280 RUN();
9281
9282 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9283 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9284 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9285 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9286 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9287 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9288 }
9289 }
9290
9291
TEST(neon_fmla_h)9292 TEST(neon_fmla_h) {
9293 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9294 CPUFeatures::kFP,
9295 CPUFeatures::kNEONHalf);
9296
9297 START();
9298 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9299 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9300 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9301 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9302 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9303 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9304 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9305 __ Mov(v16.V2D(), v0.V2D());
9306 __ Mov(v17.V2D(), v0.V2D());
9307 __ Mov(v18.V2D(), v4.V2D());
9308 __ Mov(v19.V2D(), v5.V2D());
9309 __ Mov(v20.V2D(), v0.V2D());
9310 __ Mov(v21.V2D(), v0.V2D());
9311 __ Mov(v22.V2D(), v4.V2D());
9312 __ Mov(v23.V2D(), v5.V2D());
9313
9314 __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9315 __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9316 __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9317 __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9318 __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9319 __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9320 __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9321 __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9322 END();
9323
9324 if (CAN_RUN()) {
9325 RUN();
9326
9327 ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9328 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9329 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9330 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9331 ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9332 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9333 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9334 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9335 }
9336 }
9337
9338
TEST(neon_fmls_h)9339 TEST(neon_fmls_h) {
9340 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9341 CPUFeatures::kFP,
9342 CPUFeatures::kNEONHalf);
9343
9344 START();
9345 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9346 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9347 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9348 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9349 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9350 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9351 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9352 __ Mov(v16.V2D(), v0.V2D());
9353 __ Mov(v17.V2D(), v0.V2D());
9354 __ Mov(v18.V2D(), v4.V2D());
9355 __ Mov(v19.V2D(), v5.V2D());
9356 __ Mov(v20.V2D(), v0.V2D());
9357 __ Mov(v21.V2D(), v0.V2D());
9358 __ Mov(v22.V2D(), v4.V2D());
9359 __ Mov(v23.V2D(), v5.V2D());
9360
9361 __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9362 __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9363 __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9364 __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9365 __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9366 __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9367 __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9368 __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9369 END();
9370
9371 if (CAN_RUN()) {
9372 RUN();
9373
9374 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9375 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9376 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9377 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9378 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9379 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9380 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9381 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9382 }
9383 }
9384
9385
TEST(neon_fhm)9386 TEST(neon_fhm) {
9387 // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9388 // more comprehensive input sets.
9389 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9390 CPUFeatures::kNEON,
9391 CPUFeatures::kNEONHalf,
9392 CPUFeatures::kFHM);
9393
9394 START();
9395 // Test multiplications:
9396 // v30 v31
9397 // [0] 65504 (max normal) * 65504 (max normal)
9398 // [1] -1 * 0
9399 // [2] 2^-24 (min subnormal) * 2^-24 (min subnormal)
9400 // [3] -2^-24 (min subnormal) * 65504 (max normal)
9401 // [4] 6.10e-5 (min normal) * 0.99...
9402 // [5] 0 * -0
9403 // [6] -0 * 0
9404 // [7] -Inf * -Inf
9405 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9406 __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9407
9408 // Accumulators for use with Fmlal{2}:
9409 // v0.S[0] = 384
9410 // v0.S[1] = -0
9411 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9412 // v1.S[0] = -(2^-48 + 2^-71)
9413 // v1.S[1] = 0
9414 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9415 // v2.S[0] = 128
9416 // v2.S[1] = 0
9417 // v2.S[2] = 1
9418 // v2.S[3] = 1
9419 __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9420 // v3.S[0] = 0
9421 // v3.S[1] = -0
9422 // v3.S[2] = -0
9423 // v3.S[3] = 0
9424 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9425 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9426 // results are just the negation of the Fmlal{2} results.
9427 __ Fneg(v4.V4S(), v0.V4S());
9428 __ Fneg(v5.V4S(), v1.V4S());
9429 __ Fneg(v6.V4S(), v2.V4S());
9430 __ Fneg(v7.V4S(), v3.V4S());
9431
9432 __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9433 __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9434 __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9435 __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9436
9437 __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9438 __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9439 __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9440 __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9441 END();
9442
9443 if (CAN_RUN()) {
9444 RUN();
9445
9446 // Fmlal(2S)
9447 // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9448 // v0.S[1] = -0 + (-1 * 0) = -0
9449 ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9450 // Fmlal2(2S)
9451 // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9452 // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9453 ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9454 // Fmlal(4S)
9455 // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9456 // v2.S[1] = 0 + (-1 * 0) = 0
9457 // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9458 // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9459 ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9460 // Fmlal2(4S)
9461 // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9462 // v3.S[1] = -0 + (0 * -0) = -0
9463 // v3.S[2] = -0 + (-0 * 0) = -0
9464 // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9465 ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9466
9467 // Fmlsl results are mostly the same, but negated.
9468 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9469 ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9470 // In this case: v6.S[1] = 0 - (0 * -0) = 0
9471 ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9472 ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9473 }
9474 }
9475
9476
TEST(neon_byelement_fhm)9477 TEST(neon_byelement_fhm) {
9478 // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9479 // tests have more comprehensive input sets.
9480 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9481 CPUFeatures::kNEON,
9482 CPUFeatures::kNEONHalf,
9483 CPUFeatures::kFHM);
9484
9485 START();
9486 // Set up multiplication inputs.
9487 //
9488 // v30.H[0] = 65504 (max normal)
9489 // v30.H[1] = -1
9490 // v30.H[2] = 2^-24 (min subnormal)
9491 // v30.H[3] = -2^-24 (min subnormal)
9492 // v30.H[4] = 6.10e-5 (min normal)
9493 // v30.H[5] = 0
9494 // v30.H[6] = -0
9495 // v30.H[7] = -Inf
9496 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9497
9498 // Each test instruction should only use one lane of vm, so set up unique
9499 // registers with poison values in other lanes. The poison NaN avoids the
9500 // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9501 // arbitrary.
9502 VRegister poison = v29;
9503 __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9504 // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9505 // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9506 __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9507 // Set up [v8,v15] as vm inputs.
9508 for (int i = 0; i <= 7; i++) {
9509 VRegister vm(i + 8);
9510 __ Mov(vm, poison);
9511 __ Ins(vm.V8H(), i, v31.V8H(), i);
9512 }
9513
9514 // Accumulators for use with Fmlal{2}:
9515 // v0.S[0] = 2^-8
9516 // v0.S[1] = 1
9517 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9518 // v1.S[0] = -1.5 * 2^-49
9519 // v1.S[1] = 0
9520 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9521 // v2.S[0] = 0
9522 // v2.S[1] = 2^14
9523 // v2.S[2] = 1.5 * 2^-48
9524 // v2.S[3] = Inf
9525 __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9526 // v3.S[0] = 0
9527 // v3.S[1] = -0
9528 // v3.S[2] = -0
9529 // v3.S[3] = 0
9530 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9531 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9532 // results are just the negation of the Fmlal{2} results.
9533 __ Fneg(v4.V4S(), v0.V4S());
9534 __ Fneg(v5.V4S(), v1.V4S());
9535 __ Fneg(v6.V4S(), v2.V4S());
9536 __ Fneg(v7.V4S(), v3.V4S());
9537
9538 __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9539 __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9540 __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9541 __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9542
9543 __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9544 __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9545 __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9546 __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9547 END();
9548
9549 if (CAN_RUN()) {
9550 RUN();
9551
9552 // Fmlal(2S)
9553 // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9554 // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9555 ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9556 // Fmlal2(2S)
9557 // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9558 // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9559 ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9560 // Fmlal(4S)
9561 // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9562 // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9563 // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9564 // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9565 ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9566 // Fmlal2(4S)
9567 // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9568 // v3.S[1] = -0 + (0 * 0.9995117) = 0
9569 // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9570 // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9571 ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9572
9573 // Fmlsl results are mostly the same, but negated.
9574 ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9575 ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9576 ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9577 // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9578 ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9579 }
9580 }
9581
9582
TEST(neon_fmulx_scalar)9583 TEST(neon_fmulx_scalar) {
9584 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9585
9586 START();
9587 __ Fmov(s0, 2.0);
9588 __ Fmov(s1, 0.5);
9589 __ Fmov(s2, 0.0);
9590 __ Fmov(s3, -0.0);
9591 __ Fmov(s4, kFP32PositiveInfinity);
9592 __ Fmov(s5, kFP32NegativeInfinity);
9593 __ Fmulx(s16, s0, s1);
9594 __ Fmulx(s17, s2, s4);
9595 __ Fmulx(s18, s2, s5);
9596 __ Fmulx(s19, s3, s4);
9597 __ Fmulx(s20, s3, s5);
9598
9599 __ Fmov(d21, 2.0);
9600 __ Fmov(d22, 0.5);
9601 __ Fmov(d23, 0.0);
9602 __ Fmov(d24, -0.0);
9603 __ Fmov(d25, kFP64PositiveInfinity);
9604 __ Fmov(d26, kFP64NegativeInfinity);
9605 __ Fmulx(d27, d21, d22);
9606 __ Fmulx(d28, d23, d25);
9607 __ Fmulx(d29, d23, d26);
9608 __ Fmulx(d30, d24, d25);
9609 __ Fmulx(d31, d24, d26);
9610 END();
9611
9612 if (CAN_RUN()) {
9613 RUN();
9614
9615 ASSERT_EQUAL_FP32(1.0, s16);
9616 ASSERT_EQUAL_FP32(2.0, s17);
9617 ASSERT_EQUAL_FP32(-2.0, s18);
9618 ASSERT_EQUAL_FP32(-2.0, s19);
9619 ASSERT_EQUAL_FP32(2.0, s20);
9620 ASSERT_EQUAL_FP64(1.0, d27);
9621 ASSERT_EQUAL_FP64(2.0, d28);
9622 ASSERT_EQUAL_FP64(-2.0, d29);
9623 ASSERT_EQUAL_FP64(-2.0, d30);
9624 ASSERT_EQUAL_FP64(2.0, d31);
9625 }
9626 }
9627
9628
TEST(neon_fmulx_h)9629 TEST(neon_fmulx_h) {
9630 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9631 CPUFeatures::kFP,
9632 CPUFeatures::kNEONHalf);
9633
9634 START();
9635 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9636 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9637 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9638 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9639 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9640 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9641 __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9642 __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9643 __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9644 __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9645 __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9646 __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9647 __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9648 __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9649 __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9650 __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9651 END();
9652
9653 if (CAN_RUN()) {
9654 RUN();
9655 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9656 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9657 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9658 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9659 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9660 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9661 ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9662 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9663 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9664 ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9665 }
9666 }
9667
9668
TEST(neon_fmulx_h_scalar)9669 TEST(neon_fmulx_h_scalar) {
9670 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9671 CPUFeatures::kFP,
9672 CPUFeatures::kNEONHalf,
9673 CPUFeatures::kFPHalf);
9674
9675 START();
9676 __ Fmov(h0, Float16(2.0));
9677 __ Fmov(h1, Float16(0.5));
9678 __ Fmov(h2, Float16(0.0));
9679 __ Fmov(h3, Float16(-0.0));
9680 __ Fmov(h4, kFP16PositiveInfinity);
9681 __ Fmov(h5, kFP16NegativeInfinity);
9682 __ Fmulx(h6, h0, h1);
9683 __ Fmulx(h7, h2, h4);
9684 __ Fmulx(h8, h2, h5);
9685 __ Fmulx(h9, h3, h4);
9686 __ Fmulx(h10, h3, h5);
9687 END();
9688
9689 if (CAN_RUN()) {
9690 RUN();
9691 ASSERT_EQUAL_FP16(Float16(1.0), h6);
9692 ASSERT_EQUAL_FP16(Float16(2.0), h7);
9693 ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9694 ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9695 ASSERT_EQUAL_FP16(Float16(2.0), h10);
9696 }
9697 }
9698
TEST(neon_fabd_h)9699 TEST(neon_fabd_h) {
9700 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9701 CPUFeatures::kFP,
9702 CPUFeatures::kNEONHalf);
9703
9704 START();
9705 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9706 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9707 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9708 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9709 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9710 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9711
9712 __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9713 __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9714 __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9715 __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9716 __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9717 __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9718 __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9719 __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9720 __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9721 __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9722 END();
9723
9724 if (CAN_RUN()) {
9725 RUN();
9726
9727 ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9728 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9729 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9730 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9731 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9732 ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9733 ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9734 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9735 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9736 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9737 }
9738 }
9739
9740
TEST(neon_fabd_h_scalar)9741 TEST(neon_fabd_h_scalar) {
9742 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9743 CPUFeatures::kFP,
9744 CPUFeatures::kNEONHalf,
9745 CPUFeatures::kFPHalf);
9746
9747 START();
9748 __ Fmov(h0, Float16(2.0));
9749 __ Fmov(h1, Float16(0.5));
9750 __ Fmov(h2, Float16(0.0));
9751 __ Fmov(h3, Float16(-0.0));
9752 __ Fmov(h4, kFP16PositiveInfinity);
9753 __ Fmov(h5, kFP16NegativeInfinity);
9754 __ Fabd(h16, h1, h0);
9755 __ Fabd(h17, h2, h3);
9756 __ Fabd(h18, h2, h5);
9757 __ Fabd(h19, h3, h4);
9758 __ Fabd(h20, h3, h5);
9759 END();
9760
9761 if (CAN_RUN()) {
9762 RUN();
9763 ASSERT_EQUAL_FP16(Float16(1.5), h16);
9764 ASSERT_EQUAL_FP16(Float16(0.0), h17);
9765 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9766 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9767 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9768 }
9769 }
9770
9771
TEST(neon_fabd_scalar)9772 TEST(neon_fabd_scalar) {
9773 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9774
9775 START();
9776 __ Fmov(s0, 2.0);
9777 __ Fmov(s1, 0.5);
9778 __ Fmov(s2, 0.0);
9779 __ Fmov(s3, -0.0);
9780 __ Fmov(s4, kFP32PositiveInfinity);
9781 __ Fmov(s5, kFP32NegativeInfinity);
9782 __ Fabd(s16, s1, s0);
9783 __ Fabd(s17, s2, s3);
9784 __ Fabd(s18, s2, s5);
9785 __ Fabd(s19, s3, s4);
9786 __ Fabd(s20, s3, s5);
9787
9788 __ Fmov(d21, 2.0);
9789 __ Fmov(d22, 0.5);
9790 __ Fmov(d23, 0.0);
9791 __ Fmov(d24, -0.0);
9792 __ Fmov(d25, kFP64PositiveInfinity);
9793 __ Fmov(d26, kFP64NegativeInfinity);
9794 __ Fabd(d27, d21, d22);
9795 __ Fabd(d28, d23, d24);
9796 __ Fabd(d29, d23, d26);
9797 __ Fabd(d30, d24, d25);
9798 __ Fabd(d31, d24, d26);
9799 END();
9800
9801 if (CAN_RUN()) {
9802 RUN();
9803
9804 ASSERT_EQUAL_FP32(1.5, s16);
9805 ASSERT_EQUAL_FP32(0.0, s17);
9806 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9807 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9808 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9809 ASSERT_EQUAL_FP64(1.5, d27);
9810 ASSERT_EQUAL_FP64(0.0, d28);
9811 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9812 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9813 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9814 }
9815 }
9816
9817
TEST(neon_frecps_h)9818 TEST(neon_frecps_h) {
9819 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9820 CPUFeatures::kFP,
9821 CPUFeatures::kNEONHalf);
9822
9823 START();
9824 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9825 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9826 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9827 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9828 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9829
9830 __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9831 __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9832 __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9833 __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9834 __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9835 __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9836 __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9837 __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9838 END();
9839
9840 if (CAN_RUN()) {
9841 RUN();
9842
9843 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9844 ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9845 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9846 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9847 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9848 ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9849 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9850 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9851 }
9852 }
9853
9854
TEST(neon_frecps_h_scalar)9855 TEST(neon_frecps_h_scalar) {
9856 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9857 CPUFeatures::kFP,
9858 CPUFeatures::kNEONHalf,
9859 CPUFeatures::kFPHalf);
9860
9861 START();
9862 __ Fmov(h0, Float16(2.0));
9863 __ Fmov(h1, Float16(-1.0));
9864 __ Fmov(h2, Float16(45.0));
9865 __ Fmov(h3, kFP16PositiveInfinity);
9866 __ Fmov(h4, kFP16NegativeInfinity);
9867
9868 __ Frecps(h5, h0, h2);
9869 __ Frecps(h6, h1, h2);
9870 __ Frecps(h7, h0, h3);
9871 __ Frecps(h8, h0, h4);
9872 END();
9873
9874 if (CAN_RUN()) {
9875 RUN();
9876
9877 ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9878 ASSERT_EQUAL_FP16(Float16(47.0), h6);
9879 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9880 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9881 }
9882 }
9883
9884
TEST(neon_frsqrts_h)9885 TEST(neon_frsqrts_h) {
9886 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9887 CPUFeatures::kFP,
9888 CPUFeatures::kNEONHalf);
9889
9890 START();
9891 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9892 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9893 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9894 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9895 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9896
9897 __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9898 __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9899 __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9900 __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9901 __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9902 __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9903 __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9904 __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9905 END();
9906
9907 if (CAN_RUN()) {
9908 RUN();
9909
9910 ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9911 ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9912 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9913 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9914 ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9915 ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9916 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9917 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9918 }
9919 }
9920
9921
TEST(neon_frsqrts_h_scalar)9922 TEST(neon_frsqrts_h_scalar) {
9923 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9924 CPUFeatures::kFP,
9925 CPUFeatures::kNEONHalf,
9926 CPUFeatures::kFPHalf);
9927
9928 START();
9929 __ Fmov(h0, Float16(2.0));
9930 __ Fmov(h1, Float16(-1.0));
9931 __ Fmov(h2, Float16(45.0));
9932 __ Fmov(h3, kFP16PositiveInfinity);
9933 __ Fmov(h4, kFP16NegativeInfinity);
9934
9935 __ Frsqrts(h5, h0, h2);
9936 __ Frsqrts(h6, h1, h2);
9937 __ Frsqrts(h7, h0, h3);
9938 __ Frsqrts(h8, h0, h4);
9939 END();
9940
9941 if (CAN_RUN()) {
9942 RUN();
9943
9944 ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9945 ASSERT_EQUAL_FP16(Float16(24.0), h6);
9946 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9947 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9948 }
9949 }
9950
9951
TEST(neon_faddp_h)9952 TEST(neon_faddp_h) {
9953 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9954 CPUFeatures::kFP,
9955 CPUFeatures::kNEONHalf);
9956
9957 START();
9958 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9959 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9960 __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9961 __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9962
9963 __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
9964 __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
9965 __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
9966 __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
9967 END();
9968
9969 if (CAN_RUN()) {
9970 RUN();
9971
9972 ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
9973 ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
9974 ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
9975 ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
9976 }
9977 }
9978
9979
TEST(neon_faddp_scalar)9980 TEST(neon_faddp_scalar) {
9981 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9982
9983 START();
9984 __ Movi(d0, 0x3f80000040000000);
9985 __ Movi(d1, 0xff8000007f800000);
9986 __ Movi(d2, 0x0000000080000000);
9987 __ Faddp(s0, v0.V2S());
9988 __ Faddp(s1, v1.V2S());
9989 __ Faddp(s2, v2.V2S());
9990
9991 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
9992 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
9993 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
9994 __ Faddp(d3, v3.V2D());
9995 __ Faddp(d4, v4.V2D());
9996 __ Faddp(d5, v5.V2D());
9997 END();
9998
9999 if (CAN_RUN()) {
10000 RUN();
10001
10002 ASSERT_EQUAL_FP32(3.0, s0);
10003 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
10004 ASSERT_EQUAL_FP32(0.0, s2);
10005 ASSERT_EQUAL_FP64(0.0, d3);
10006 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
10007 ASSERT_EQUAL_FP64(0.0, d5);
10008 }
10009 }
10010
10011
TEST(neon_faddp_h_scalar)10012 TEST(neon_faddp_h_scalar) {
10013 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10014 CPUFeatures::kFP,
10015 CPUFeatures::kNEONHalf);
10016
10017 START();
10018 __ Movi(s0, 0x3c004000);
10019 __ Movi(s1, 0xfc007c00);
10020 __ Movi(s2, 0x00008000);
10021 __ Faddp(h0, v0.V2H());
10022 __ Faddp(h1, v1.V2H());
10023 __ Faddp(h2, v2.V2H());
10024 END();
10025
10026 if (CAN_RUN()) {
10027 RUN();
10028
10029 ASSERT_EQUAL_FP16(Float16(3.0), h0);
10030 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10031 ASSERT_EQUAL_FP16(Float16(0.0), h2);
10032 }
10033 }
10034
10035
TEST(neon_fmaxp_scalar)10036 TEST(neon_fmaxp_scalar) {
10037 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10038
10039 START();
10040 __ Movi(d0, 0x3f80000040000000);
10041 __ Movi(d1, 0xff8000007f800000);
10042 __ Movi(d2, 0x7fc00000ff800000);
10043 __ Fmaxp(s0, v0.V2S());
10044 __ Fmaxp(s1, v1.V2S());
10045 __ Fmaxp(s2, v2.V2S());
10046
10047 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10048 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10049 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10050 __ Fmaxp(d3, v3.V2D());
10051 __ Fmaxp(d4, v4.V2D());
10052 __ Fmaxp(d5, v5.V2D());
10053 END();
10054
10055 if (CAN_RUN()) {
10056 RUN();
10057
10058 ASSERT_EQUAL_FP32(2.0, s0);
10059 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10060 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10061 ASSERT_EQUAL_FP64(2.0, d3);
10062 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10063 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10064 }
10065 }
10066
10067
TEST(neon_fmaxp_h_scalar)10068 TEST(neon_fmaxp_h_scalar) {
10069 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10070 CPUFeatures::kFP,
10071 CPUFeatures::kNEONHalf);
10072
10073 START();
10074 __ Movi(s0, 0x3c004000);
10075 __ Movi(s1, 0xfc007c00);
10076 __ Movi(s2, 0x7e00fc00);
10077 __ Fmaxp(h0, v0.V2H());
10078 __ Fmaxp(h1, v1.V2H());
10079 __ Fmaxp(h2, v2.V2H());
10080 END();
10081
10082 if (CAN_RUN()) {
10083 RUN();
10084
10085 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10086 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10087 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10088 }
10089 }
10090
10091
TEST(neon_fmax_h)10092 TEST(neon_fmax_h) {
10093 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10094 CPUFeatures::kFP,
10095 CPUFeatures::kNEONHalf);
10096
10097 START();
10098 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10099 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10100 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10101 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10102 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10103 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10104
10105 __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10106 __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10107 __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10108 __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10109 __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10110 __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10111 __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10112 __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10113 END();
10114
10115 if (CAN_RUN()) {
10116 RUN();
10117
10118 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10119 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10120 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10121 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10122 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10123 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10124 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10125 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10126 }
10127 }
10128
10129
TEST(neon_fmaxp_h)10130 TEST(neon_fmaxp_h) {
10131 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10132 CPUFeatures::kFP,
10133 CPUFeatures::kNEONHalf);
10134
10135 START();
10136 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10137 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10138 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10139 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10140
10141 __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10142 __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10143 __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10144 __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10145 END();
10146
10147 if (CAN_RUN()) {
10148 RUN();
10149
10150 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10151 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10152 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10153 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10154 }
10155 }
10156
10157
TEST(neon_fmaxnm_h)10158 TEST(neon_fmaxnm_h) {
10159 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10160 CPUFeatures::kFP,
10161 CPUFeatures::kNEONHalf);
10162
10163 START();
10164 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10165 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10166 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10167 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10168 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10169 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10170
10171 __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10172 __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10173 __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10174 __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10175 __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10176 __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10177 __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10178 __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10179 END();
10180
10181 if (CAN_RUN()) {
10182 RUN();
10183
10184 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10185 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10186 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10187 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10188 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10189 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10190 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10191 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10192 }
10193 }
10194
10195
TEST(neon_fmaxnmp_h)10196 TEST(neon_fmaxnmp_h) {
10197 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10198 CPUFeatures::kFP,
10199 CPUFeatures::kNEONHalf);
10200
10201 START();
10202 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10203 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10204 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10205 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10206
10207 __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10208 __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10209 __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10210 __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10211 END();
10212
10213 if (CAN_RUN()) {
10214 RUN();
10215
10216 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10217 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10218 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10219 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10220 }
10221 }
10222
10223
TEST(neon_fmaxnmp_scalar)10224 TEST(neon_fmaxnmp_scalar) {
10225 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10226
10227 START();
10228 __ Movi(d0, 0x3f80000040000000);
10229 __ Movi(d1, 0xff8000007f800000);
10230 __ Movi(d2, 0x7fc00000ff800000);
10231 __ Fmaxnmp(s0, v0.V2S());
10232 __ Fmaxnmp(s1, v1.V2S());
10233 __ Fmaxnmp(s2, v2.V2S());
10234
10235 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10236 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10237 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10238 __ Fmaxnmp(d3, v3.V2D());
10239 __ Fmaxnmp(d4, v4.V2D());
10240 __ Fmaxnmp(d5, v5.V2D());
10241 END();
10242
10243 if (CAN_RUN()) {
10244 RUN();
10245
10246 ASSERT_EQUAL_FP32(2.0, s0);
10247 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10248 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10249 ASSERT_EQUAL_FP64(2.0, d3);
10250 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10251 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10252 }
10253 }
10254
10255
TEST(neon_fmaxnmp_h_scalar)10256 TEST(neon_fmaxnmp_h_scalar) {
10257 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10258 CPUFeatures::kFP,
10259 CPUFeatures::kNEONHalf);
10260
10261 START();
10262 __ Movi(s0, 0x3c004000);
10263 __ Movi(s1, 0xfc007c00);
10264 __ Movi(s2, 0x7e00fc00);
10265 __ Fmaxnmp(h0, v0.V2H());
10266 __ Fmaxnmp(h1, v1.V2H());
10267 __ Fmaxnmp(h2, v2.V2H());
10268 END();
10269
10270 if (CAN_RUN()) {
10271 RUN();
10272
10273 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10274 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10275 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10276 }
10277 }
10278
10279
TEST(neon_fminp_scalar)10280 TEST(neon_fminp_scalar) {
10281 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10282
10283 START();
10284 __ Movi(d0, 0x3f80000040000000);
10285 __ Movi(d1, 0xff8000007f800000);
10286 __ Movi(d2, 0x7fc00000ff800000);
10287 __ Fminp(s0, v0.V2S());
10288 __ Fminp(s1, v1.V2S());
10289 __ Fminp(s2, v2.V2S());
10290
10291 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10292 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10293 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10294 __ Fminp(d3, v3.V2D());
10295 __ Fminp(d4, v4.V2D());
10296 __ Fminp(d5, v5.V2D());
10297 END();
10298
10299 if (CAN_RUN()) {
10300 RUN();
10301
10302 ASSERT_EQUAL_FP32(1.0, s0);
10303 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10304 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10305 ASSERT_EQUAL_FP64(1.0, d3);
10306 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10307 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10308 }
10309 }
10310
10311
TEST(neon_fminp_h_scalar)10312 TEST(neon_fminp_h_scalar) {
10313 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10314 CPUFeatures::kFP,
10315 CPUFeatures::kNEONHalf);
10316
10317 START();
10318 __ Movi(s0, 0x3c004000);
10319 __ Movi(s1, 0xfc007c00);
10320 __ Movi(s2, 0x7e00fc00);
10321 __ Fminp(h0, v0.V2H());
10322 __ Fminp(h1, v1.V2H());
10323 __ Fminp(h2, v2.V2H());
10324 END();
10325
10326 if (CAN_RUN()) {
10327 RUN();
10328
10329 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10330 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10331 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10332 }
10333 }
10334
10335
TEST(neon_fmin_h)10336 TEST(neon_fmin_h) {
10337 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10338 CPUFeatures::kFP,
10339 CPUFeatures::kNEONHalf);
10340
10341 START();
10342 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10343 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10344 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10345 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10346 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10347 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10348
10349 __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10350 __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10351 __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10352 __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10353 __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10354 __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10355 __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10356 __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10357 END();
10358
10359 if (CAN_RUN()) {
10360 RUN();
10361
10362 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10363 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10364 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10365 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10366 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10367 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10368 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10369 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10370 }
10371 }
10372
10373
TEST(neon_fminp_h)10374 TEST(neon_fminp_h) {
10375 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10376 CPUFeatures::kFP,
10377 CPUFeatures::kNEONHalf);
10378
10379 START();
10380 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10381 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10382 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10383 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10384
10385 __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10386 __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10387 __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10388 __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10389 END();
10390
10391 if (CAN_RUN()) {
10392 RUN();
10393
10394 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10395 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10396 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10397 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10398 }
10399 }
10400
10401
TEST(neon_fminnm_h)10402 TEST(neon_fminnm_h) {
10403 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10404 CPUFeatures::kFP,
10405 CPUFeatures::kNEONHalf);
10406
10407 START();
10408 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10409 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10410 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10411 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10412 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10413 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10414
10415 __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10416 __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10417 __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10418 __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10419 __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10420 __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10421 __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10422 __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10423 END();
10424
10425 if (CAN_RUN()) {
10426 RUN();
10427
10428 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10429 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10430 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10431 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10432 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10433 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10434 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10435 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10436 }
10437 }
10438
10439
TEST(neon_fminnmp_h)10440 TEST(neon_fminnmp_h) {
10441 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10442 CPUFeatures::kFP,
10443 CPUFeatures::kNEONHalf);
10444
10445 START();
10446 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10447 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10448 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10449 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10450
10451 __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10452 __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10453 __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10454 __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10455 END();
10456
10457 if (CAN_RUN()) {
10458 RUN();
10459
10460 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10461 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10462 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10463 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10464 }
10465 }
10466
10467
TEST(neon_fminnmp_scalar)10468 TEST(neon_fminnmp_scalar) {
10469 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10470
10471 START();
10472 __ Movi(d0, 0x3f80000040000000);
10473 __ Movi(d1, 0xff8000007f800000);
10474 __ Movi(d2, 0x7fc00000ff800000);
10475 __ Fminnmp(s0, v0.V2S());
10476 __ Fminnmp(s1, v1.V2S());
10477 __ Fminnmp(s2, v2.V2S());
10478
10479 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10480 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10481 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10482 __ Fminnmp(d3, v3.V2D());
10483 __ Fminnmp(d4, v4.V2D());
10484 __ Fminnmp(d5, v5.V2D());
10485 END();
10486
10487 if (CAN_RUN()) {
10488 RUN();
10489
10490 ASSERT_EQUAL_FP32(1.0, s0);
10491 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10492 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10493 ASSERT_EQUAL_FP64(1.0, d3);
10494 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10495 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10496 }
10497 }
10498
10499
TEST(neon_fminnmp_h_scalar)10500 TEST(neon_fminnmp_h_scalar) {
10501 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10502 CPUFeatures::kFP,
10503 CPUFeatures::kNEONHalf);
10504
10505 START();
10506 __ Movi(s0, 0x3c004000);
10507 __ Movi(s1, 0xfc007c00);
10508 __ Movi(s2, 0x7e00fc00);
10509 __ Fminnmp(h0, v0.V2H());
10510 __ Fminnmp(h1, v1.V2H());
10511 __ Fminnmp(h2, v2.V2H());
10512 END();
10513
10514 if (CAN_RUN()) {
10515 RUN();
10516
10517 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10518 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10519 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10520 }
10521 }
10522
Float16ToV4H(Float16 f)10523 static uint64_t Float16ToV4H(Float16 f) {
10524 uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10525 return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10526 }
10527
10528
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)10529 static void FminFmaxFloat16Helper(Float16 n,
10530 Float16 m,
10531 Float16 min,
10532 Float16 max,
10533 Float16 minnm,
10534 Float16 maxnm) {
10535 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10536 CPUFeatures::kFP,
10537 CPUFeatures::kNEONHalf,
10538 CPUFeatures::kFPHalf);
10539
10540 START();
10541 __ Fmov(h0, n);
10542 __ Fmov(h1, m);
10543 __ Fmov(v0.V8H(), n);
10544 __ Fmov(v1.V8H(), m);
10545 __ Fmin(h28, h0, h1);
10546 __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10547 __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10548 __ Fmax(h29, h0, h1);
10549 __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10550 __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10551 __ Fminnm(h30, h0, h1);
10552 __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10553 __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10554 __ Fmaxnm(h31, h0, h1);
10555 __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10556 __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10557 END();
10558
10559 uint64_t min_vec = Float16ToV4H(min);
10560 uint64_t max_vec = Float16ToV4H(max);
10561 uint64_t minnm_vec = Float16ToV4H(minnm);
10562 uint64_t maxnm_vec = Float16ToV4H(maxnm);
10563
10564 if (CAN_RUN()) {
10565 RUN();
10566
10567 ASSERT_EQUAL_FP16(min, h28);
10568 ASSERT_EQUAL_FP16(max, h29);
10569 ASSERT_EQUAL_FP16(minnm, h30);
10570 ASSERT_EQUAL_FP16(maxnm, h31);
10571
10572
10573 ASSERT_EQUAL_128(0, min_vec, v2);
10574 ASSERT_EQUAL_128(min_vec, min_vec, v3);
10575 ASSERT_EQUAL_128(0, max_vec, v4);
10576 ASSERT_EQUAL_128(max_vec, max_vec, v5);
10577 ASSERT_EQUAL_128(0, minnm_vec, v6);
10578 ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10579 ASSERT_EQUAL_128(0, maxnm_vec, v8);
10580 ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10581 }
10582 }
10583
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))10584 static Float16 MinMaxHelper(Float16 n,
10585 Float16 m,
10586 bool min,
10587 Float16 quiet_nan_substitute = Float16(0.0)) {
10588 const uint64_t kFP16QuietNaNMask = 0x0200;
10589 uint16_t raw_n = Float16ToRawbits(n);
10590 uint16_t raw_m = Float16ToRawbits(m);
10591
10592 if (IsSignallingNaN(n)) {
10593 // n is signalling NaN.
10594 return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10595 } else if (IsSignallingNaN(m)) {
10596 // m is signalling NaN.
10597 return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10598 } else if (IsZero(quiet_nan_substitute)) {
10599 if (IsNaN(n)) {
10600 // n is quiet NaN.
10601 return n;
10602 } else if (IsNaN(m)) {
10603 // m is quiet NaN.
10604 return m;
10605 }
10606 } else {
10607 // Substitute n or m if one is quiet, but not both.
10608 if (IsNaN(n) && !IsNaN(m)) {
10609 // n is quiet NaN: replace with substitute.
10610 n = quiet_nan_substitute;
10611 } else if (!IsNaN(n) && IsNaN(m)) {
10612 // m is quiet NaN: replace with substitute.
10613 m = quiet_nan_substitute;
10614 }
10615 }
10616
10617 uint16_t sign_mask = 0x8000;
10618 if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10619 return min ? Float16(-0.0) : Float16(0.0);
10620 }
10621
10622 if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10623 return min ? n : m;
10624 }
10625 return min ? m : n;
10626 }
10627
TEST(fmax_fmin_h)10628 TEST(fmax_fmin_h) {
10629 // Use non-standard NaNs to check that the payload bits are preserved.
10630 Float16 snan = RawbitsToFloat16(0x7c12);
10631 Float16 qnan = RawbitsToFloat16(0x7e34);
10632
10633 Float16 snan_processed = RawbitsToFloat16(0x7e12);
10634 Float16 qnan_processed = qnan;
10635
10636 VIXL_ASSERT(IsSignallingNaN(snan));
10637 VIXL_ASSERT(IsQuietNaN(qnan));
10638 VIXL_ASSERT(IsQuietNaN(snan_processed));
10639 VIXL_ASSERT(IsQuietNaN(qnan_processed));
10640
10641 // Bootstrap tests.
10642 FminFmaxFloat16Helper(Float16(0),
10643 Float16(0),
10644 Float16(0),
10645 Float16(0),
10646 Float16(0),
10647 Float16(0));
10648 FminFmaxFloat16Helper(Float16(0),
10649 Float16(1),
10650 Float16(0),
10651 Float16(1),
10652 Float16(0),
10653 Float16(1));
10654 FminFmaxFloat16Helper(kFP16PositiveInfinity,
10655 kFP16NegativeInfinity,
10656 kFP16NegativeInfinity,
10657 kFP16PositiveInfinity,
10658 kFP16NegativeInfinity,
10659 kFP16PositiveInfinity);
10660 FminFmaxFloat16Helper(snan,
10661 Float16(0),
10662 snan_processed,
10663 snan_processed,
10664 snan_processed,
10665 snan_processed);
10666 FminFmaxFloat16Helper(Float16(0),
10667 snan,
10668 snan_processed,
10669 snan_processed,
10670 snan_processed,
10671 snan_processed);
10672 FminFmaxFloat16Helper(qnan,
10673 Float16(0),
10674 qnan_processed,
10675 qnan_processed,
10676 Float16(0),
10677 Float16(0));
10678 FminFmaxFloat16Helper(Float16(0),
10679 qnan,
10680 qnan_processed,
10681 qnan_processed,
10682 Float16(0),
10683 Float16(0));
10684 FminFmaxFloat16Helper(qnan,
10685 snan,
10686 snan_processed,
10687 snan_processed,
10688 snan_processed,
10689 snan_processed);
10690 FminFmaxFloat16Helper(snan,
10691 qnan,
10692 snan_processed,
10693 snan_processed,
10694 snan_processed,
10695 snan_processed);
10696
10697 // Iterate over all combinations of inputs.
10698 Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10699 RawbitsToFloat16(0x0400),
10700 Float16(1.0),
10701 Float16(0.0),
10702 RawbitsToFloat16(0xfbff),
10703 RawbitsToFloat16(0x8400),
10704 Float16(-1.0),
10705 Float16(-0.0),
10706 kFP16PositiveInfinity,
10707 kFP16NegativeInfinity,
10708 kFP16QuietNaN,
10709 kFP16SignallingNaN};
10710
10711 const int count = sizeof(inputs) / sizeof(inputs[0]);
10712
10713 for (int in = 0; in < count; in++) {
10714 Float16 n = inputs[in];
10715 for (int im = 0; im < count; im++) {
10716 Float16 m = inputs[im];
10717 FminFmaxFloat16Helper(n,
10718 m,
10719 MinMaxHelper(n, m, true),
10720 MinMaxHelper(n, m, false),
10721 MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10722 MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10723 }
10724 }
10725 }
10726
TEST(neon_frint_saturating)10727 TEST(neon_frint_saturating) {
10728 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10729 CPUFeatures::kFP,
10730 CPUFeatures::kFrintToFixedSizedInt);
10731
10732 START();
10733
10734 __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10735 __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10736 __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10737 __ Frint32x(v16.V2S(), v0.V2S());
10738 __ Frint32x(v17.V4S(), v1.V4S());
10739 __ Frint32x(v18.V2D(), v2.V2D());
10740 __ Frint64x(v19.V2S(), v0.V2S());
10741 __ Frint64x(v20.V4S(), v1.V4S());
10742 __ Frint64x(v21.V2D(), v2.V2D());
10743 __ Frint32z(v22.V2S(), v0.V2S());
10744 __ Frint32z(v23.V4S(), v1.V4S());
10745 __ Frint32z(v24.V2D(), v2.V2D());
10746 __ Frint64z(v25.V2S(), v0.V2S());
10747 __ Frint64z(v26.V4S(), v1.V4S());
10748 __ Frint64z(v27.V2D(), v2.V2D());
10749
10750 END();
10751
10752 if (CAN_RUN()) {
10753 RUN();
10754
10755 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10756 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10757 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10758 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10759 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10760 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10761 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10762 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10763 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10764 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10765 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10766 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10767 }
10768 }
10769
10770
TEST(neon_tbl)10771 TEST(neon_tbl) {
10772 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10773
10774 START();
10775 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10776 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10777 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10778 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10779
10780 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10781 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10782 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10783 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10784
10785 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10786 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10787 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10788 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10789 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10790 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10791 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10792 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10793
10794 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10795 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10796 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10797 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10798 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10799 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10800 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10801 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10802
10803 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10804 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10805 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10806 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10807 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10808 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10809 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10810 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10811
10812 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10813 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10814 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10815 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10816 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10817 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10818 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10819 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10820 END();
10821
10822 if (CAN_RUN()) {
10823 RUN();
10824
10825 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10826 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10827 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10828 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10829 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10830 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10831 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10832 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10833
10834 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10835 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10836 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10837 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10838 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10839 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10840 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10841 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10842 }
10843 }
10844
TEST(neon_usdot)10845 TEST(neon_usdot) {
10846 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10847 CPUFeatures::kDotProduct,
10848 CPUFeatures::kI8MM);
10849
10850 START();
10851 __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
10852 __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
10853 __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
10854 __ Movi(v3.V2D(), 0, 0);
10855 __ Mov(q4, q3);
10856 __ Mov(q5, q3);
10857 __ Mov(q6, q3);
10858 __ Mov(q7, q3);
10859 __ Mov(q8, q3);
10860 __ Mov(q9, q3);
10861 __ Mov(q10, q3);
10862 __ Mov(q11, q3);
10863
10864 // Test Usdot against Udot/Sdot over the range of inputs where they should be
10865 // equal.
10866 __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
10867 __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
10868 __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
10869 __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
10870 __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
10871 __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
10872
10873 __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
10874 __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
10875 __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
10876 __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
10877 __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
10878 __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
10879
10880 // Construct values which, when interpreted correctly as signed/unsigned,
10881 // should give a zero result for dot product.
10882 __ Mov(w0, 0x8101ff40); // [-127, 1, -1, 64] as signed bytes.
10883 __ Mov(w1, 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes.
10884 __ Dup(v0.V4S(), w0);
10885 __ Dup(v1.V4S(), w1);
10886 __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
10887
10888 END();
10889
10890 if (CAN_RUN()) {
10891 RUN();
10892
10893 ASSERT_EQUAL_128(-1, -1, q3);
10894 ASSERT_EQUAL_128(-1, -1, q5);
10895 ASSERT_EQUAL_128(-1, -1, q7);
10896 ASSERT_EQUAL_128(-1, -1, q9);
10897 ASSERT_EQUAL_128(0, 0, q11);
10898 }
10899 }
10900
TEST(neon_usdot_element)10901 TEST(neon_usdot_element) {
10902 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM);
10903
10904 START();
10905 __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
10906 __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
10907
10908 // Test element Usdot against vector variant.
10909 __ Dup(v2.V4S(), v1.V4S(), 0);
10910 __ Dup(v3.V4S(), v1.V4S(), 1);
10911 __ Dup(v4.V4S(), v1.V4S(), 3);
10912
10913 __ Mov(q10, q1);
10914 __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
10915 __ Mov(q11, q1);
10916 __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
10917 __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
10918
10919 __ Mov(q12, q1);
10920 __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
10921 __ Mov(q13, q1);
10922 __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
10923 __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
10924
10925 __ Mov(q14, q1);
10926 __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
10927 __ Mov(q15, q1);
10928 __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
10929 __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
10930 END();
10931
10932 if (CAN_RUN()) {
10933 RUN();
10934
10935 ASSERT_EQUAL_128(-1, -1, q11);
10936 ASSERT_EQUAL_128(-1, -1, q13);
10937 ASSERT_EQUAL_128(-1, -1, q15);
10938 }
10939 }
10940
TEST(zero_high_b)10941 TEST(zero_high_b) {
10942 SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
10943 START();
10944
10945 __ Mov(x0, 0x55aa42ffaa42ff55);
10946 __ Mov(x1, 4);
10947 __ Movi(q30.V16B(), 0);
10948
10949 // Iterate over the SISD instructions using different input values on each
10950 // loop.
10951 Label loop;
10952 __ Bind(&loop);
10953
10954 __ Dup(q0.V16B(), w0);
10955 __ Ror(x0, x0, 8);
10956 __ Dup(q1.V16B(), w0);
10957 __ Ror(x0, x0, 8);
10958 __ Dup(q2.V16B(), w0);
10959 __ Ror(x0, x0, 8);
10960
10961 {
10962 ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
10963 __ movi(q9.V16B(), 0x55);
10964 __ dci(0x5e010409); // mov b9, v0.b[0]
10965 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10966
10967 __ movi(q9.V16B(), 0x55);
10968 __ dci(0x5e207809); // sqabs b9, b0
10969 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10970
10971 __ movi(q9.V16B(), 0x55);
10972 __ dci(0x5e200c29); // sqadd b9, b1, b0
10973 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10974
10975 __ movi(q9.V16B(), 0x55);
10976 __ dci(0x7e207809); // sqneg b9, b0
10977 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10978
10979 __ movi(q9.V16B(), 0x55);
10980 __ dci(0x7e008429); // sqrdmlah b9, b1, b0
10981 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10982
10983 __ movi(q9.V16B(), 0x55);
10984 __ dci(0x7e008c29); // sqrdmlsh b9, b1, b0
10985 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10986
10987 __ movi(q9.V16B(), 0x55);
10988 __ dci(0x5e205c29); // sqrshl b9, b1, b0
10989 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10990
10991 __ movi(q9.V16B(), 0x55);
10992 __ dci(0x5f089c09); // sqrshrn b9, h0, #8
10993 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10994
10995 __ movi(q9.V16B(), 0x55);
10996 __ dci(0x7f088c09); // sqrshrun b9, h0, #8
10997 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10998
10999 __ movi(q9.V16B(), 0x55);
11000 __ dci(0x5e204c29); // sqshl b9, b1, b0
11001 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11002
11003 __ movi(q9.V16B(), 0x55);
11004 __ dci(0x5f087409); // sqshl b9, b0, #0
11005 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11006
11007 __ movi(q9.V16B(), 0x55);
11008 __ dci(0x7f086409); // sqshlu b9, b0, #0
11009 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11010
11011 __ movi(q9.V16B(), 0x55);
11012 __ dci(0x5f089409); // sqshrn b9, h0, #8
11013 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11014
11015 __ movi(q9.V16B(), 0x55);
11016 __ dci(0x7f088409); // sqshrun b9, h0, #8
11017 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11018
11019 __ movi(q9.V16B(), 0x55);
11020 __ dci(0x5e202c29); // sqsub b9, b1, b0
11021 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11022
11023 __ movi(q9.V16B(), 0x55);
11024 __ dci(0x5e214809); // sqxtn b9, h0
11025 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11026
11027 __ movi(q9.V16B(), 0x55);
11028 __ dci(0x7e212809); // sqxtun b9, h0
11029 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11030
11031 __ movi(q9.V16B(), 0x55);
11032 __ dci(0x5e203809); // suqadd b9, b0
11033 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11034
11035 __ movi(q9.V16B(), 0x55);
11036 __ dci(0x7e200c29); // uqadd b9, b1, b0
11037 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11038
11039 __ movi(q9.V16B(), 0x55);
11040 __ dci(0x7e205c29); // uqrshl b9, b1, b0
11041 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11042
11043 __ movi(q9.V16B(), 0x55);
11044 __ dci(0x7f089c09); // uqrshrn b9, h0, #8
11045 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11046
11047 __ movi(q9.V16B(), 0x55);
11048 __ dci(0x7e204c29); // uqshl b9, b1, b0
11049 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11050
11051 __ movi(q9.V16B(), 0x55);
11052 __ dci(0x7f087409); // uqshl b9, b0, #0
11053 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11054
11055 __ movi(q9.V16B(), 0x55);
11056 __ dci(0x7f089409); // uqshrn b9, h0, #8
11057 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11058
11059 __ movi(q9.V16B(), 0x55);
11060 __ dci(0x7e202c29); // uqsub b9, b1, b0
11061 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11062
11063 __ movi(q9.V16B(), 0x55);
11064 __ dci(0x7e214809); // uqxtn b9, h0
11065 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11066
11067 __ movi(q9.V16B(), 0x55);
11068 __ dci(0x7e203809); // usqadd b9, b0
11069 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11070 }
11071 __ Sub(x1, x1, 1);
11072 __ Cbnz(x1, &loop);
11073
11074 __ Ins(q30.V16B(), 0, wzr);
11075
11076 END();
11077 if (CAN_RUN()) {
11078 RUN();
11079 ASSERT_EQUAL_128(0, 0, q30);
11080 }
11081 }
11082
TEST(zero_high_h)11083 TEST(zero_high_h) {
11084 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11085 CPUFeatures::kNEON,
11086 CPUFeatures::kFP,
11087 CPUFeatures::kNEONHalf,
11088 CPUFeatures::kRDM);
11089 START();
11090
11091 __ Mov(x0, 0x55aa42ffaa42ff55);
11092 __ Mov(x1, 4);
11093 __ Movi(q30.V16B(), 0);
11094
11095 // Iterate over the SISD instructions using different input values on each
11096 // loop.
11097 Label loop;
11098 __ Bind(&loop);
11099
11100 __ Dup(q0.V8H(), w0);
11101 __ Ror(x0, x0, 8);
11102 __ Dup(q1.V8H(), w0);
11103 __ Ror(x0, x0, 8);
11104 __ Dup(q2.V8H(), w0);
11105 __ Ror(x0, x0, 8);
11106
11107 {
11108 ExactAssemblyScope scope(&masm, 225 * kInstructionSize);
11109 __ movi(q9.V16B(), 0x55);
11110 __ dci(0x5e020409); // mov h9, v0.h[0]
11111 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11112
11113 __ movi(q9.V16B(), 0x55);
11114 __ dci(0x7ec01429); // fabd h9, h1, h0
11115 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11116
11117 __ movi(q9.V16B(), 0x55);
11118 __ dci(0x7e402c29); // facge h9, h1, h0
11119 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11120
11121 __ movi(q9.V16B(), 0x55);
11122 __ dci(0x7ec02c29); // facgt h9, h1, h0
11123 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11124
11125 __ movi(q9.V16B(), 0x55);
11126 __ dci(0x5e30d809); // faddp h9, v0.2h
11127 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11128
11129 __ movi(q9.V16B(), 0x55);
11130 __ dci(0x5ef8d809); // fcmeq h9, h0, #0.0
11131 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11132
11133 __ movi(q9.V16B(), 0x55);
11134 __ dci(0x5e402429); // fcmeq h9, h1, h0
11135 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11136
11137 __ movi(q9.V16B(), 0x55);
11138 __ dci(0x7ef8c809); // fcmge h9, h0, #0.0
11139 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11140
11141 __ movi(q9.V16B(), 0x55);
11142 __ dci(0x7e402429); // fcmge h9, h1, h0
11143 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11144
11145 __ movi(q9.V16B(), 0x55);
11146 __ dci(0x5ef8c809); // fcmgt h9, h0, #0.0
11147 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11148
11149 __ movi(q9.V16B(), 0x55);
11150 __ dci(0x7ec02429); // fcmgt h9, h1, h0
11151 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11152
11153 __ movi(q9.V16B(), 0x55);
11154 __ dci(0x7ef8d809); // fcmle h9, h0, #0.0
11155 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11156
11157 __ movi(q9.V16B(), 0x55);
11158 __ dci(0x5ef8e809); // fcmlt h9, h0, #0.0
11159 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11160
11161 __ movi(q9.V16B(), 0x55);
11162 __ dci(0x5e79c809); // fcvtas h9, h0
11163 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11164
11165 __ movi(q9.V16B(), 0x55);
11166 __ dci(0x7e79c809); // fcvtau h9, h0
11167 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11168
11169 __ movi(q9.V16B(), 0x55);
11170 __ dci(0x5e79b809); // fcvtms h9, h0
11171 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11172
11173 __ movi(q9.V16B(), 0x55);
11174 __ dci(0x7e79b809); // fcvtmu h9, h0
11175 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11176
11177 __ movi(q9.V16B(), 0x55);
11178 __ dci(0x5e79a809); // fcvtns h9, h0
11179 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11180
11181 __ movi(q9.V16B(), 0x55);
11182 __ dci(0x7e79a809); // fcvtnu h9, h0
11183 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11184
11185 __ movi(q9.V16B(), 0x55);
11186 __ dci(0x5ef9a809); // fcvtps h9, h0
11187 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11188
11189 __ movi(q9.V16B(), 0x55);
11190 __ dci(0x7ef9a809); // fcvtpu h9, h0
11191 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11192
11193 __ movi(q9.V16B(), 0x55);
11194 __ dci(0x5ef9b809); // fcvtzs h9, h0
11195 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11196
11197 __ movi(q9.V16B(), 0x55);
11198 __ dci(0x5f10fc09); // fcvtzs h9, h0, #16
11199 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11200
11201 __ movi(q9.V16B(), 0x55);
11202 __ dci(0x7ef9b809); // fcvtzu h9, h0
11203 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11204
11205 __ movi(q9.V16B(), 0x55);
11206 __ dci(0x7f10fc09); // fcvtzu h9, h0, #16
11207 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11208
11209 __ movi(q9.V16B(), 0x55);
11210 __ dci(0x5e30c809); // fmaxnmp h9, v0.2h
11211 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11212
11213 __ movi(q9.V16B(), 0x55);
11214 __ dci(0x5e30f809); // fmaxp h9, v0.2h
11215 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11216
11217 __ movi(q9.V16B(), 0x55);
11218 __ dci(0x5eb0c809); // fminnmp h9, v0.2h
11219 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11220
11221 __ movi(q9.V16B(), 0x55);
11222 __ dci(0x5eb0f809); // fminp h9, v0.2h
11223 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11224
11225 __ movi(q9.V16B(), 0x55);
11226 __ dci(0x5f001029); // fmla h9, h1, v0.h[0]
11227 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11228
11229 __ movi(q9.V16B(), 0x55);
11230 __ dci(0x5f005029); // fmls h9, h1, v0.h[0]
11231 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11232
11233 __ movi(q9.V16B(), 0x55);
11234 __ dci(0x5f009029); // fmul h9, h1, v0.h[0]
11235 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11236
11237 __ movi(q9.V16B(), 0x55);
11238 __ dci(0x7f009029); // fmulx h9, h1, v0.h[0]
11239 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11240
11241 __ movi(q9.V16B(), 0x55);
11242 __ dci(0x5e401c29); // fmulx h9, h1, h0
11243 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11244
11245 __ movi(q9.V16B(), 0x55);
11246 __ dci(0x5ef9d809); // frecpe h9, h0
11247 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11248
11249 __ movi(q9.V16B(), 0x55);
11250 __ dci(0x5e403c29); // frecps h9, h1, h0
11251 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11252
11253 __ movi(q9.V16B(), 0x55);
11254 __ dci(0x5ef9f809); // frecpx h9, h0
11255 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11256
11257 __ movi(q9.V16B(), 0x55);
11258 __ dci(0x7ef9d809); // frsqrte h9, h0
11259 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11260
11261 __ movi(q9.V16B(), 0x55);
11262 __ dci(0x5ec03c29); // frsqrts h9, h1, h0
11263 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11264
11265 __ movi(q9.V16B(), 0x55);
11266 __ dci(0x5e79d809); // scvtf h9, h0
11267 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11268
11269 __ movi(q9.V16B(), 0x55);
11270 __ dci(0x5f10e409); // scvtf h9, h0, #16
11271 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11272
11273 __ movi(q9.V16B(), 0x55);
11274 __ dci(0x5e607809); // sqabs h9, h0
11275 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11276
11277 __ movi(q9.V16B(), 0x55);
11278 __ dci(0x5e600c29); // sqadd h9, h1, h0
11279 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11280
11281 __ movi(q9.V16B(), 0x55);
11282 __ dci(0x5f40c029); // sqdmulh h9, h1, v0.h[0]
11283 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11284
11285 __ movi(q9.V16B(), 0x55);
11286 __ dci(0x5e60b429); // sqdmulh h9, h1, h0
11287 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11288
11289 __ movi(q9.V16B(), 0x55);
11290 __ dci(0x7e607809); // sqneg h9, h0
11291 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11292
11293 __ movi(q9.V16B(), 0x55);
11294 __ dci(0x7f40d029); // sqrdmlah h9, h1, v0.h[0]
11295 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11296
11297 __ movi(q9.V16B(), 0x55);
11298 __ dci(0x7e408429); // sqrdmlah h9, h1, h0
11299 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11300
11301 __ movi(q9.V16B(), 0x55);
11302 __ dci(0x7f40f029); // sqrdmlsh h9, h1, v0.h[0]
11303 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11304
11305 __ movi(q9.V16B(), 0x55);
11306 __ dci(0x7e408c29); // sqrdmlsh h9, h1, h0
11307 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11308
11309 __ movi(q9.V16B(), 0x55);
11310 __ dci(0x5f40d029); // sqrdmulh h9, h1, v0.h[0]
11311 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11312
11313 __ movi(q9.V16B(), 0x55);
11314 __ dci(0x7e60b429); // sqrdmulh h9, h1, h0
11315 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11316
11317 __ movi(q9.V16B(), 0x55);
11318 __ dci(0x5e605c29); // sqrshl h9, h1, h0
11319 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11320
11321 __ movi(q9.V16B(), 0x55);
11322 __ dci(0x5f109c09); // sqrshrn h9, s0, #16
11323 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11324
11325 __ movi(q9.V16B(), 0x55);
11326 __ dci(0x7f108c09); // sqrshrun h9, s0, #16
11327 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11328
11329 __ movi(q9.V16B(), 0x55);
11330 __ dci(0x5e604c29); // sqshl h9, h1, h0
11331 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11332
11333 __ movi(q9.V16B(), 0x55);
11334 __ dci(0x5f107409); // sqshl h9, h0, #0
11335 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11336
11337 __ movi(q9.V16B(), 0x55);
11338 __ dci(0x7f106409); // sqshlu h9, h0, #0
11339 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11340
11341 __ movi(q9.V16B(), 0x55);
11342 __ dci(0x5f109409); // sqshrn h9, s0, #16
11343 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11344
11345 __ movi(q9.V16B(), 0x55);
11346 __ dci(0x7f108409); // sqshrun h9, s0, #16
11347 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11348
11349 __ movi(q9.V16B(), 0x55);
11350 __ dci(0x5e602c29); // sqsub h9, h1, h0
11351 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11352
11353 __ movi(q9.V16B(), 0x55);
11354 __ dci(0x5e614809); // sqxtn h9, s0
11355 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11356
11357 __ movi(q9.V16B(), 0x55);
11358 __ dci(0x7e612809); // sqxtun h9, s0
11359 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11360
11361 __ movi(q9.V16B(), 0x55);
11362 __ dci(0x5e603809); // suqadd h9, h0
11363 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11364
11365 __ movi(q9.V16B(), 0x55);
11366 __ dci(0x7e79d809); // ucvtf h9, h0
11367 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11368
11369 __ movi(q9.V16B(), 0x55);
11370 __ dci(0x7f10e409); // ucvtf h9, h0, #16
11371 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11372
11373 __ movi(q9.V16B(), 0x55);
11374 __ dci(0x7e600c29); // uqadd h9, h1, h0
11375 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11376
11377 __ movi(q9.V16B(), 0x55);
11378 __ dci(0x7e605c29); // uqrshl h9, h1, h0
11379 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11380
11381 __ movi(q9.V16B(), 0x55);
11382 __ dci(0x7f109c09); // uqrshrn h9, s0, #16
11383 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11384
11385 __ movi(q9.V16B(), 0x55);
11386 __ dci(0x7e604c29); // uqshl h9, h1, h0
11387 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11388
11389 __ movi(q9.V16B(), 0x55);
11390 __ dci(0x7f107409); // uqshl h9, h0, #0
11391 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11392
11393 __ movi(q9.V16B(), 0x55);
11394 __ dci(0x7f109409); // uqshrn h9, s0, #16
11395 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11396
11397 __ movi(q9.V16B(), 0x55);
11398 __ dci(0x7e602c29); // uqsub h9, h1, h0
11399 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11400
11401 __ movi(q9.V16B(), 0x55);
11402 __ dci(0x7e614809); // uqxtn h9, s0
11403 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11404
11405 __ movi(q9.V16B(), 0x55);
11406 __ dci(0x7e603809); // usqadd h9, h0
11407 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11408 }
11409 __ Sub(x1, x1, 1);
11410 __ Cbnz(x1, &loop);
11411
11412 __ Ins(q30.V8H(), 0, wzr);
11413
11414 END();
11415 if (CAN_RUN()) {
11416 RUN();
11417 ASSERT_EQUAL_128(0, 0, q30);
11418 }
11419 }
11420
TEST(zero_high_s)11421 TEST(zero_high_s) {
11422 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11423 CPUFeatures::kNEON,
11424 CPUFeatures::kFP,
11425 CPUFeatures::kRDM);
11426 START();
11427
11428 __ Mov(x0, 0x55aa42ffaa42ff55);
11429 __ Mov(x1, 4);
11430 __ Movi(q30.V16B(), 0);
11431
11432 // Iterate over the SISD instructions using different input values on each
11433 // loop.
11434 Label loop;
11435 __ Bind(&loop);
11436
11437 __ Dup(q0.V4S(), w0);
11438 __ Ror(x0, x0, 8);
11439 __ Dup(q1.V4S(), w0);
11440 __ Ror(x0, x0, 8);
11441 __ Dup(q2.V4S(), w0);
11442 __ Ror(x0, x0, 8);
11443
11444 {
11445 ExactAssemblyScope scope(&masm, 246 * kInstructionSize);
11446 __ movi(q9.V16B(), 0x55);
11447 __ dci(0x5e040409); // mov s9, v0.s[0]
11448 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11449
11450 __ movi(q9.V16B(), 0x55);
11451 __ dci(0x7ea0d429); // fabd s9, s1, s0
11452 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11453
11454 __ movi(q9.V16B(), 0x55);
11455 __ dci(0x7e20ec29); // facge s9, s1, s0
11456 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11457
11458 __ movi(q9.V16B(), 0x55);
11459 __ dci(0x7ea0ec29); // facgt s9, s1, s0
11460 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11461
11462 __ movi(q9.V16B(), 0x55);
11463 __ dci(0x7e30d809); // faddp s9, v0.2s
11464 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11465
11466 __ movi(q9.V16B(), 0x55);
11467 __ dci(0x5ea0d809); // fcmeq s9, s0, #0.0
11468 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11469
11470 __ movi(q9.V16B(), 0x55);
11471 __ dci(0x5e20e429); // fcmeq s9, s1, s0
11472 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11473
11474 __ movi(q9.V16B(), 0x55);
11475 __ dci(0x7ea0c809); // fcmge s9, s0, #0.0
11476 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11477
11478 __ movi(q9.V16B(), 0x55);
11479 __ dci(0x7e20e429); // fcmge s9, s1, s0
11480 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11481
11482 __ movi(q9.V16B(), 0x55);
11483 __ dci(0x5ea0c809); // fcmgt s9, s0, #0.0
11484 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11485
11486 __ movi(q9.V16B(), 0x55);
11487 __ dci(0x7ea0e429); // fcmgt s9, s1, s0
11488 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11489
11490 __ movi(q9.V16B(), 0x55);
11491 __ dci(0x7ea0d809); // fcmle s9, s0, #0.0
11492 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11493
11494 __ movi(q9.V16B(), 0x55);
11495 __ dci(0x5ea0e809); // fcmlt s9, s0, #0.0
11496 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11497
11498 __ movi(q9.V16B(), 0x55);
11499 __ dci(0x5e21c809); // fcvtas s9, s0
11500 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11501
11502 __ movi(q9.V16B(), 0x55);
11503 __ dci(0x7e21c809); // fcvtau s9, s0
11504 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11505
11506 __ movi(q9.V16B(), 0x55);
11507 __ dci(0x5e21b809); // fcvtms s9, s0
11508 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11509
11510 __ movi(q9.V16B(), 0x55);
11511 __ dci(0x7e21b809); // fcvtmu s9, s0
11512 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11513
11514 __ movi(q9.V16B(), 0x55);
11515 __ dci(0x5e21a809); // fcvtns s9, s0
11516 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11517
11518 __ movi(q9.V16B(), 0x55);
11519 __ dci(0x7e21a809); // fcvtnu s9, s0
11520 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11521
11522 __ movi(q9.V16B(), 0x55);
11523 __ dci(0x5ea1a809); // fcvtps s9, s0
11524 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11525
11526 __ movi(q9.V16B(), 0x55);
11527 __ dci(0x7ea1a809); // fcvtpu s9, s0
11528 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11529
11530 __ movi(q9.V16B(), 0x55);
11531 __ dci(0x7e616809); // fcvtxn s9, d0
11532 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11533
11534 __ movi(q9.V16B(), 0x55);
11535 __ dci(0x5ea1b809); // fcvtzs s9, s0
11536 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11537
11538 __ movi(q9.V16B(), 0x55);
11539 __ dci(0x5f20fc09); // fcvtzs s9, s0, #32
11540 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11541
11542 __ movi(q9.V16B(), 0x55);
11543 __ dci(0x7ea1b809); // fcvtzu s9, s0
11544 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11545
11546 __ movi(q9.V16B(), 0x55);
11547 __ dci(0x7f20fc09); // fcvtzu s9, s0, #32
11548 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11549
11550 __ movi(q9.V16B(), 0x55);
11551 __ dci(0x7e30c809); // fmaxnmp s9, v0.2s
11552 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11553
11554 __ movi(q9.V16B(), 0x55);
11555 __ dci(0x7e30f809); // fmaxp s9, v0.2s
11556 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11557
11558 __ movi(q9.V16B(), 0x55);
11559 __ dci(0x7eb0c809); // fminnmp s9, v0.2s
11560 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11561
11562 __ movi(q9.V16B(), 0x55);
11563 __ dci(0x7eb0f809); // fminp s9, v0.2s
11564 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11565
11566 __ movi(q9.V16B(), 0x55);
11567 __ dci(0x5f801029); // fmla s9, s1, v0.s[0]
11568 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11569
11570 __ movi(q9.V16B(), 0x55);
11571 __ dci(0x5f805029); // fmls s9, s1, v0.s[0]
11572 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11573
11574 __ movi(q9.V16B(), 0x55);
11575 __ dci(0x5f809029); // fmul s9, s1, v0.s[0]
11576 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11577
11578 __ movi(q9.V16B(), 0x55);
11579 __ dci(0x7f809029); // fmulx s9, s1, v0.s[0]
11580 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11581
11582 __ movi(q9.V16B(), 0x55);
11583 __ dci(0x5e20dc29); // fmulx s9, s1, s0
11584 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11585
11586 __ movi(q9.V16B(), 0x55);
11587 __ dci(0x5ea1d809); // frecpe s9, s0
11588 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11589
11590 __ movi(q9.V16B(), 0x55);
11591 __ dci(0x5e20fc29); // frecps s9, s1, s0
11592 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11593
11594 __ movi(q9.V16B(), 0x55);
11595 __ dci(0x5ea1f809); // frecpx s9, s0
11596 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11597
11598 __ movi(q9.V16B(), 0x55);
11599 __ dci(0x7ea1d809); // frsqrte s9, s0
11600 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11601
11602 __ movi(q9.V16B(), 0x55);
11603 __ dci(0x5ea0fc29); // frsqrts s9, s1, s0
11604 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11605
11606 __ movi(q9.V16B(), 0x55);
11607 __ dci(0x5e21d809); // scvtf s9, s0
11608 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11609
11610 __ movi(q9.V16B(), 0x55);
11611 __ dci(0x5f20e409); // scvtf s9, s0, #32
11612 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11613
11614 __ movi(q9.V16B(), 0x55);
11615 __ dci(0x5ea07809); // sqabs s9, s0
11616 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11617
11618 __ movi(q9.V16B(), 0x55);
11619 __ dci(0x5ea00c29); // sqadd s9, s1, s0
11620 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11621
11622 __ movi(q9.V16B(), 0x55);
11623 __ dci(0x5e609029); // sqdmlal s9, h1, h0
11624 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11625
11626 __ movi(q9.V16B(), 0x55);
11627 __ dci(0x5f403029); // sqdmlal s9, h1, v0.h[0]
11628 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11629
11630 __ movi(q9.V16B(), 0x55);
11631 __ dci(0x5e60b029); // sqdmlsl s9, h1, h0
11632 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11633
11634 __ movi(q9.V16B(), 0x55);
11635 __ dci(0x5f407029); // sqdmlsl s9, h1, v0.h[0]
11636 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11637
11638 __ movi(q9.V16B(), 0x55);
11639 __ dci(0x5f80c029); // sqdmulh s9, s1, v0.s[0]
11640 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11641
11642 __ movi(q9.V16B(), 0x55);
11643 __ dci(0x5ea0b429); // sqdmulh s9, s1, s0
11644 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11645
11646 __ movi(q9.V16B(), 0x55);
11647 __ dci(0x5e60d029); // sqdmull s9, h1, h0
11648 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11649
11650 __ movi(q9.V16B(), 0x55);
11651 __ dci(0x5f40b029); // sqdmull s9, h1, v0.h[0]
11652 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11653
11654 __ movi(q9.V16B(), 0x55);
11655 __ dci(0x7ea07809); // sqneg s9, s0
11656 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11657
11658 __ movi(q9.V16B(), 0x55);
11659 __ dci(0x7f80d029); // sqrdmlah s9, s1, v0.s[0]
11660 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11661
11662 __ movi(q9.V16B(), 0x55);
11663 __ dci(0x7e808429); // sqrdmlah s9, s1, s0
11664 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11665
11666 __ movi(q9.V16B(), 0x55);
11667 __ dci(0x7f80f029); // sqrdmlsh s9, s1, v0.s[0]
11668 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11669
11670 __ movi(q9.V16B(), 0x55);
11671 __ dci(0x7e808c29); // sqrdmlsh s9, s1, s0
11672 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11673
11674 __ movi(q9.V16B(), 0x55);
11675 __ dci(0x5f80d029); // sqrdmulh s9, s1, v0.s[0]
11676 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11677
11678 __ movi(q9.V16B(), 0x55);
11679 __ dci(0x7ea0b429); // sqrdmulh s9, s1, s0
11680 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11681
11682 __ movi(q9.V16B(), 0x55);
11683 __ dci(0x5ea05c29); // sqrshl s9, s1, s0
11684 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11685
11686 __ movi(q9.V16B(), 0x55);
11687 __ dci(0x5f209c09); // sqrshrn s9, d0, #32
11688 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11689
11690 __ movi(q9.V16B(), 0x55);
11691 __ dci(0x7f208c09); // sqrshrun s9, d0, #32
11692 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11693
11694 __ movi(q9.V16B(), 0x55);
11695 __ dci(0x5ea04c29); // sqshl s9, s1, s0
11696 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11697
11698 __ movi(q9.V16B(), 0x55);
11699 __ dci(0x5f207409); // sqshl s9, s0, #0
11700 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11701
11702 __ movi(q9.V16B(), 0x55);
11703 __ dci(0x7f206409); // sqshlu s9, s0, #0
11704 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11705
11706 __ movi(q9.V16B(), 0x55);
11707 __ dci(0x5f209409); // sqshrn s9, d0, #32
11708 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11709
11710 __ movi(q9.V16B(), 0x55);
11711 __ dci(0x7f208409); // sqshrun s9, d0, #32
11712 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11713
11714 __ movi(q9.V16B(), 0x55);
11715 __ dci(0x5ea02c29); // sqsub s9, s1, s0
11716 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11717
11718 __ movi(q9.V16B(), 0x55);
11719 __ dci(0x5ea14809); // sqxtn s9, d0
11720 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11721
11722 __ movi(q9.V16B(), 0x55);
11723 __ dci(0x7ea12809); // sqxtun s9, d0
11724 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11725
11726 __ movi(q9.V16B(), 0x55);
11727 __ dci(0x5ea03809); // suqadd s9, s0
11728 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11729
11730 __ movi(q9.V16B(), 0x55);
11731 __ dci(0x7e21d809); // ucvtf s9, s0
11732 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11733
11734 __ movi(q9.V16B(), 0x55);
11735 __ dci(0x7f20e409); // ucvtf s9, s0, #32
11736 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11737
11738 __ movi(q9.V16B(), 0x55);
11739 __ dci(0x7ea00c29); // uqadd s9, s1, s0
11740 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11741
11742 __ movi(q9.V16B(), 0x55);
11743 __ dci(0x7ea05c29); // uqrshl s9, s1, s0
11744 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11745
11746 __ movi(q9.V16B(), 0x55);
11747 __ dci(0x7f209c09); // uqrshrn s9, d0, #32
11748 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11749
11750 __ movi(q9.V16B(), 0x55);
11751 __ dci(0x7ea04c29); // uqshl s9, s1, s0
11752 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11753
11754 __ movi(q9.V16B(), 0x55);
11755 __ dci(0x7f207409); // uqshl s9, s0, #0
11756 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11757
11758 __ movi(q9.V16B(), 0x55);
11759 __ dci(0x7f209409); // uqshrn s9, d0, #32
11760 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11761
11762 __ movi(q9.V16B(), 0x55);
11763 __ dci(0x7ea02c29); // uqsub s9, s1, s0
11764 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11765
11766 __ movi(q9.V16B(), 0x55);
11767 __ dci(0x7ea14809); // uqxtn s9, d0
11768 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11769
11770 __ movi(q9.V16B(), 0x55);
11771 __ dci(0x7ea03809); // usqadd s9, s0
11772 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11773 }
11774 __ Sub(x1, x1, 1);
11775 __ Cbnz(x1, &loop);
11776
11777 __ Ins(q30.V4S(), 0, wzr);
11778
11779 END();
11780 if (CAN_RUN()) {
11781 RUN();
11782 ASSERT_EQUAL_128(0, 0, q30);
11783 }
11784 }
11785
TEST(zero_high_d)11786 TEST(zero_high_d) {
11787 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11788 CPUFeatures::kNEON,
11789 CPUFeatures::kFP,
11790 CPUFeatures::kRDM);
11791 START();
11792
11793 __ Mov(x0, 0x55aa42ffaa42ff55);
11794 __ Mov(x1, 4);
11795 __ Movi(q30.V16B(), 0);
11796
11797 // Iterate over the SISD instructions using different input values on each
11798 // loop.
11799 Label loop;
11800 __ Bind(&loop);
11801
11802 __ Dup(q0.V2D(), x0);
11803 __ Ror(x0, x0, 8);
11804 __ Dup(q1.V2D(), x0);
11805 __ Ror(x0, x0, 8);
11806 __ Dup(q2.V2D(), x0);
11807 __ Ror(x0, x0, 8);
11808
11809 {
11810 ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
11811 __ movi(q9.V16B(), 0x55);
11812 __ dci(0x5ee0b809); // abs d9, d0
11813 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11814
11815 __ movi(q9.V16B(), 0x55);
11816 __ dci(0x5ee08429); // add d9, d1, d0
11817 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11818
11819 __ movi(q9.V16B(), 0x55);
11820 __ dci(0x5ef1b809); // addp d9, v0.2d
11821 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11822
11823 __ movi(q9.V16B(), 0x55);
11824 __ dci(0x5ee09809); // cmeq d9, d0, #0
11825 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11826
11827 __ movi(q9.V16B(), 0x55);
11828 __ dci(0x7ee08c29); // cmeq d9, d1, d0
11829 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11830
11831 __ movi(q9.V16B(), 0x55);
11832 __ dci(0x7ee08809); // cmge d9, d0, #0
11833 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11834
11835 __ movi(q9.V16B(), 0x55);
11836 __ dci(0x5ee03c29); // cmge d9, d1, d0
11837 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11838
11839 __ movi(q9.V16B(), 0x55);
11840 __ dci(0x5ee08809); // cmgt d9, d0, #0
11841 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11842
11843 __ movi(q9.V16B(), 0x55);
11844 __ dci(0x5ee03429); // cmgt d9, d1, d0
11845 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11846
11847 __ movi(q9.V16B(), 0x55);
11848 __ dci(0x7ee03429); // cmhi d9, d1, d0
11849 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11850
11851 __ movi(q9.V16B(), 0x55);
11852 __ dci(0x7ee03c29); // cmhs d9, d1, d0
11853 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11854
11855 __ movi(q9.V16B(), 0x55);
11856 __ dci(0x7ee09809); // cmle d9, d0, #0
11857 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11858
11859 __ movi(q9.V16B(), 0x55);
11860 __ dci(0x5ee0a809); // cmlt d9, d0, #0
11861 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11862
11863 __ movi(q9.V16B(), 0x55);
11864 __ dci(0x5ee08c29); // cmtst d9, d1, d0
11865 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11866
11867 __ movi(q9.V16B(), 0x55);
11868 __ dci(0x5e080409); // mov d9, v0.d[0]
11869 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11870
11871 __ movi(q9.V16B(), 0x55);
11872 __ dci(0x7ee0d429); // fabd d9, d1, d0
11873 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11874
11875 __ movi(q9.V16B(), 0x55);
11876 __ dci(0x7e60ec29); // facge d9, d1, d0
11877 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11878
11879 __ movi(q9.V16B(), 0x55);
11880 __ dci(0x7ee0ec29); // facgt d9, d1, d0
11881 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11882
11883 __ movi(q9.V16B(), 0x55);
11884 __ dci(0x7e70d809); // faddp d9, v0.2d
11885 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11886
11887 __ movi(q9.V16B(), 0x55);
11888 __ dci(0x5ee0d809); // fcmeq d9, d0, #0.0
11889 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11890
11891 __ movi(q9.V16B(), 0x55);
11892 __ dci(0x5e60e429); // fcmeq d9, d1, d0
11893 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11894
11895 __ movi(q9.V16B(), 0x55);
11896 __ dci(0x7ee0c809); // fcmge d9, d0, #0.0
11897 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11898
11899 __ movi(q9.V16B(), 0x55);
11900 __ dci(0x7e60e429); // fcmge d9, d1, d0
11901 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11902
11903 __ movi(q9.V16B(), 0x55);
11904 __ dci(0x5ee0c809); // fcmgt d9, d0, #0.0
11905 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11906
11907 __ movi(q9.V16B(), 0x55);
11908 __ dci(0x7ee0e429); // fcmgt d9, d1, d0
11909 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11910
11911 __ movi(q9.V16B(), 0x55);
11912 __ dci(0x7ee0d809); // fcmle d9, d0, #0.0
11913 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11914
11915 __ movi(q9.V16B(), 0x55);
11916 __ dci(0x5ee0e809); // fcmlt d9, d0, #0.0
11917 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11918
11919 __ movi(q9.V16B(), 0x55);
11920 __ dci(0x5e61c809); // fcvtas d9, d0
11921 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11922
11923 __ movi(q9.V16B(), 0x55);
11924 __ dci(0x7e61c809); // fcvtau d9, d0
11925 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11926
11927 __ movi(q9.V16B(), 0x55);
11928 __ dci(0x5e61b809); // fcvtms d9, d0
11929 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11930
11931 __ movi(q9.V16B(), 0x55);
11932 __ dci(0x7e61b809); // fcvtmu d9, d0
11933 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11934
11935 __ movi(q9.V16B(), 0x55);
11936 __ dci(0x5e61a809); // fcvtns d9, d0
11937 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11938
11939 __ movi(q9.V16B(), 0x55);
11940 __ dci(0x7e61a809); // fcvtnu d9, d0
11941 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11942
11943 __ movi(q9.V16B(), 0x55);
11944 __ dci(0x5ee1a809); // fcvtps d9, d0
11945 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11946
11947 __ movi(q9.V16B(), 0x55);
11948 __ dci(0x7ee1a809); // fcvtpu d9, d0
11949 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11950
11951 __ movi(q9.V16B(), 0x55);
11952 __ dci(0x5ee1b809); // fcvtzs d9, d0
11953 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11954
11955 __ movi(q9.V16B(), 0x55);
11956 __ dci(0x5f40fc09); // fcvtzs d9, d0, #64
11957 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11958
11959 __ movi(q9.V16B(), 0x55);
11960 __ dci(0x7ee1b809); // fcvtzu d9, d0
11961 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11962
11963 __ movi(q9.V16B(), 0x55);
11964 __ dci(0x7f40fc09); // fcvtzu d9, d0, #64
11965 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11966
11967 __ movi(q9.V16B(), 0x55);
11968 __ dci(0x7e70c809); // fmaxnmp d9, v0.2d
11969 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11970
11971 __ movi(q9.V16B(), 0x55);
11972 __ dci(0x7e70f809); // fmaxp d9, v0.2d
11973 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11974
11975 __ movi(q9.V16B(), 0x55);
11976 __ dci(0x7ef0c809); // fminnmp d9, v0.2d
11977 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11978
11979 __ movi(q9.V16B(), 0x55);
11980 __ dci(0x7ef0f809); // fminp d9, v0.2d
11981 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11982
11983 __ movi(q9.V16B(), 0x55);
11984 __ dci(0x5fc01029); // fmla d9, d1, v0.d[0]
11985 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11986
11987 __ movi(q9.V16B(), 0x55);
11988 __ dci(0x5fc05029); // fmls d9, d1, v0.d[0]
11989 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11990
11991 __ movi(q9.V16B(), 0x55);
11992 __ dci(0x5fc09029); // fmul d9, d1, v0.d[0]
11993 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11994
11995 __ movi(q9.V16B(), 0x55);
11996 __ dci(0x7fc09029); // fmulx d9, d1, v0.d[0]
11997 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11998
11999 __ movi(q9.V16B(), 0x55);
12000 __ dci(0x5e60dc29); // fmulx d9, d1, d0
12001 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12002
12003 __ movi(q9.V16B(), 0x55);
12004 __ dci(0x5ee1d809); // frecpe d9, d0
12005 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12006
12007 __ movi(q9.V16B(), 0x55);
12008 __ dci(0x5e60fc29); // frecps d9, d1, d0
12009 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12010
12011 __ movi(q9.V16B(), 0x55);
12012 __ dci(0x5ee1f809); // frecpx d9, d0
12013 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12014
12015 __ movi(q9.V16B(), 0x55);
12016 __ dci(0x7ee1d809); // frsqrte d9, d0
12017 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12018
12019 __ movi(q9.V16B(), 0x55);
12020 __ dci(0x5ee0fc29); // frsqrts d9, d1, d0
12021 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12022
12023 __ movi(q9.V16B(), 0x55);
12024 __ dci(0x7ee0b809); // neg d9, d0
12025 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12026
12027 __ movi(q9.V16B(), 0x55);
12028 __ dci(0x5e61d809); // scvtf d9, d0
12029 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12030
12031 __ movi(q9.V16B(), 0x55);
12032 __ dci(0x5f40e409); // scvtf d9, d0, #64
12033 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12034
12035 __ movi(q9.V16B(), 0x55);
12036 __ dci(0x5f405409); // shl d9, d0, #0
12037 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12038
12039 __ movi(q9.V16B(), 0x55);
12040 __ dci(0x7f405409); // sli d9, d0, #0
12041 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12042
12043 __ movi(q9.V16B(), 0x55);
12044 __ dci(0x5ee07809); // sqabs d9, d0
12045 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12046
12047 __ movi(q9.V16B(), 0x55);
12048 __ dci(0x5ee00c29); // sqadd d9, d1, d0
12049 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12050
12051 __ movi(q9.V16B(), 0x55);
12052 __ dci(0x5ea09029); // sqdmlal d9, s1, s0
12053 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12054
12055 __ movi(q9.V16B(), 0x55);
12056 __ dci(0x5f803029); // sqdmlal d9, s1, v0.s[0]
12057 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12058
12059 __ movi(q9.V16B(), 0x55);
12060 __ dci(0x5ea0b029); // sqdmlsl d9, s1, s0
12061 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12062
12063 __ movi(q9.V16B(), 0x55);
12064 __ dci(0x5f807029); // sqdmlsl d9, s1, v0.s[0]
12065 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12066
12067 __ movi(q9.V16B(), 0x55);
12068 __ dci(0x5ea0d029); // sqdmull d9, s1, s0
12069 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12070
12071 __ movi(q9.V16B(), 0x55);
12072 __ dci(0x5f80b029); // sqdmull d9, s1, v0.s[0]
12073 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12074
12075 __ movi(q9.V16B(), 0x55);
12076 __ dci(0x7ee07809); // sqneg d9, d0
12077 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12078
12079 __ movi(q9.V16B(), 0x55);
12080 __ dci(0x7ec08429); // sqrdmlah d9, d1, d0
12081 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12082
12083 __ movi(q9.V16B(), 0x55);
12084 __ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0
12085 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12086
12087 __ movi(q9.V16B(), 0x55);
12088 __ dci(0x5ee05c29); // sqrshl d9, d1, d0
12089 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12090
12091 __ movi(q9.V16B(), 0x55);
12092 __ dci(0x5ee04c29); // sqshl d9, d1, d0
12093 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12094
12095 __ movi(q9.V16B(), 0x55);
12096 __ dci(0x5f407409); // sqshl d9, d0, #0
12097 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12098
12099 __ movi(q9.V16B(), 0x55);
12100 __ dci(0x7f406409); // sqshlu d9, d0, #0
12101 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12102
12103 __ movi(q9.V16B(), 0x55);
12104 __ dci(0x5ee02c29); // sqsub d9, d1, d0
12105 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12106
12107 __ movi(q9.V16B(), 0x55);
12108 __ dci(0x7f404409); // sri d9, d0, #64
12109 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12110
12111 __ movi(q9.V16B(), 0x55);
12112 __ dci(0x5ee05429); // srshl d9, d1, d0
12113 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12114
12115 __ movi(q9.V16B(), 0x55);
12116 __ dci(0x5f402409); // srshr d9, d0, #64
12117 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12118
12119 __ movi(q9.V16B(), 0x55);
12120 __ dci(0x5f403409); // srsra d9, d0, #64
12121 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12122
12123 __ movi(q9.V16B(), 0x55);
12124 __ dci(0x5ee04429); // sshl d9, d1, d0
12125 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12126
12127 __ movi(q9.V16B(), 0x55);
12128 __ dci(0x5f400409); // sshr d9, d0, #64
12129 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12130
12131 __ movi(q9.V16B(), 0x55);
12132 __ dci(0x5f401409); // ssra d9, d0, #64
12133 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12134
12135 __ movi(q9.V16B(), 0x55);
12136 __ dci(0x7ee08429); // sub d9, d1, d0
12137 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12138
12139 __ movi(q9.V16B(), 0x55);
12140 __ dci(0x5ee03809); // suqadd d9, d0
12141 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12142
12143 __ movi(q9.V16B(), 0x55);
12144 __ dci(0x7e61d809); // ucvtf d9, d0
12145 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12146
12147 __ movi(q9.V16B(), 0x55);
12148 __ dci(0x7f40e409); // ucvtf d9, d0, #64
12149 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12150
12151 __ movi(q9.V16B(), 0x55);
12152 __ dci(0x7ee00c29); // uqadd d9, d1, d0
12153 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12154
12155 __ movi(q9.V16B(), 0x55);
12156 __ dci(0x7ee05c29); // uqrshl d9, d1, d0
12157 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12158
12159 __ movi(q9.V16B(), 0x55);
12160 __ dci(0x7ee04c29); // uqshl d9, d1, d0
12161 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12162
12163 __ movi(q9.V16B(), 0x55);
12164 __ dci(0x7f407409); // uqshl d9, d0, #0
12165 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12166
12167 __ movi(q9.V16B(), 0x55);
12168 __ dci(0x7ee02c29); // uqsub d9, d1, d0
12169 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12170
12171 __ movi(q9.V16B(), 0x55);
12172 __ dci(0x7ee05429); // urshl d9, d1, d0
12173 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12174
12175 __ movi(q9.V16B(), 0x55);
12176 __ dci(0x7f402409); // urshr d9, d0, #64
12177 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12178
12179 __ movi(q9.V16B(), 0x55);
12180 __ dci(0x7f403409); // ursra d9, d0, #64
12181 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12182
12183 __ movi(q9.V16B(), 0x55);
12184 __ dci(0x7ee04429); // ushl d9, d1, d0
12185 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12186
12187 __ movi(q9.V16B(), 0x55);
12188 __ dci(0x7f400409); // ushr d9, d0, #64
12189 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12190
12191 __ movi(q9.V16B(), 0x55);
12192 __ dci(0x7ee03809); // usqadd d9, d0
12193 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12194
12195 __ movi(q9.V16B(), 0x55);
12196 __ dci(0x7f401409); // usra d9, d0, #64
12197 __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12198 }
12199 __ Sub(x1, x1, 1);
12200 __ Cbnz(x1, &loop);
12201
12202 __ Ins(q30.V2D(), 0, xzr);
12203
12204 END();
12205 if (CAN_RUN()) {
12206 RUN();
12207 ASSERT_EQUAL_128(0, 0, q30);
12208 }
12209 }
12210
12211 } // namespace aarch64
12212 } // namespace vixl
12213