1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <sys/mman.h>
28
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 #include "test-assembler-aarch64.h"
44
45 namespace vixl {
46 namespace aarch64 {
47
TEST(load_store_b)48 TEST(load_store_b) {
49 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
50
51 uint8_t src[3] = {0x12, 0x23, 0x34};
52 uint8_t dst[3] = {0, 0, 0};
53 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
54 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
55
56 START();
57 __ Mov(x17, src_base);
58 __ Mov(x18, dst_base);
59 __ Mov(x19, src_base);
60 __ Mov(x20, dst_base);
61 __ Mov(x21, src_base);
62 __ Mov(x22, dst_base);
63 __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
64 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
65 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
66 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
67 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
68 __ Str(b2, MemOperand(x22, sizeof(dst[0])));
69 END();
70
71 if (CAN_RUN()) {
72 RUN();
73
74 ASSERT_EQUAL_128(0, 0x23, q0);
75 ASSERT_EQUAL_64(0x23, dst[0]);
76 ASSERT_EQUAL_128(0, 0x12, q1);
77 ASSERT_EQUAL_64(0x12, dst[2]);
78 ASSERT_EQUAL_128(0, 0x34, q2);
79 ASSERT_EQUAL_64(0x34, dst[1]);
80 ASSERT_EQUAL_64(src_base, x17);
81 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
82 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
83 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
84 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
85 ASSERT_EQUAL_64(dst_base, x22);
86 }
87 }
88
89
TEST(load_store_h)90 TEST(load_store_h) {
91 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
92
93 uint16_t src[3] = {0x1234, 0x2345, 0x3456};
94 uint16_t dst[3] = {0, 0, 0};
95 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
96 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
97
98 START();
99 __ Mov(x17, src_base);
100 __ Mov(x18, dst_base);
101 __ Mov(x19, src_base);
102 __ Mov(x20, dst_base);
103 __ Mov(x21, src_base);
104 __ Mov(x22, dst_base);
105 __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
106 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
107 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
108 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
109 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
110 __ Str(h2, MemOperand(x22, sizeof(dst[0])));
111 END();
112
113 if (CAN_RUN()) {
114 RUN();
115
116 ASSERT_EQUAL_128(0, 0x2345, q0);
117 ASSERT_EQUAL_64(0x2345, dst[0]);
118 ASSERT_EQUAL_128(0, 0x1234, q1);
119 ASSERT_EQUAL_64(0x1234, dst[2]);
120 ASSERT_EQUAL_128(0, 0x3456, q2);
121 ASSERT_EQUAL_64(0x3456, dst[1]);
122 ASSERT_EQUAL_64(src_base, x17);
123 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
124 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
125 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
126 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
127 ASSERT_EQUAL_64(dst_base, x22);
128 }
129 }
130
131
TEST(load_store_q)132 TEST(load_store_q) {
133 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
134
135 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
136 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
137 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
138 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
139 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
140
141 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
142 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
143 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
144
145 START();
146 __ Mov(x17, src_base);
147 __ Mov(x18, dst_base);
148 __ Mov(x19, src_base);
149 __ Mov(x20, dst_base);
150 __ Mov(x21, src_base);
151 __ Mov(x22, dst_base);
152 __ Ldr(q0, MemOperand(x17, 16));
153 __ Str(q0, MemOperand(x18, 16, PostIndex));
154 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
155 __ Str(q1, MemOperand(x20, 32, PreIndex));
156 __ Ldr(q2, MemOperand(x21, 32, PreIndex));
157 __ Str(q2, MemOperand(x22, 16));
158 END();
159
160 if (CAN_RUN()) {
161 RUN();
162
163 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
164 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
165 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
166 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
167 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
168 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
169 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
170 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
171 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
172 ASSERT_EQUAL_64(src_base, x17);
173 ASSERT_EQUAL_64(dst_base + 16, x18);
174 ASSERT_EQUAL_64(src_base + 16, x19);
175 ASSERT_EQUAL_64(dst_base + 32, x20);
176 ASSERT_EQUAL_64(src_base + 32, x21);
177 ASSERT_EQUAL_64(dst_base, x22);
178 }
179 }
180
181
TEST(load_store_v_regoffset)182 TEST(load_store_v_regoffset) {
183 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
184
185 uint8_t src[64];
186 for (unsigned i = 0; i < sizeof(src); i++) {
187 src[i] = i;
188 }
189 uint8_t dst[64];
190 memset(dst, 0, sizeof(dst));
191
192 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
193 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
194
195 START();
196 __ Mov(x17, src_base + 16);
197 __ Mov(x18, 1);
198 __ Mov(w19, -1);
199 __ Mov(x20, dst_base - 1);
200
201 __ Ldr(b0, MemOperand(x17, x18));
202 __ Ldr(b1, MemOperand(x17, x19, SXTW));
203
204 __ Ldr(h2, MemOperand(x17, x18));
205 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
206 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
207 __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
208
209 __ Ldr(s16, MemOperand(x17, x18));
210 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
211 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
212 __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
213
214 __ Ldr(d20, MemOperand(x17, x18));
215 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
216 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
217 __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
218
219 __ Ldr(q24, MemOperand(x17, x18));
220 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
221 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
222 __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
223
224 // Store [bhsdq]27 to adjacent memory locations, then load again to check.
225 __ Str(b27, MemOperand(x20, x18));
226 __ Str(h27, MemOperand(x20, x18, UXTW, 1));
227 __ Add(x20, x20, 8);
228 __ Str(s27, MemOperand(x20, x19, SXTW, 2));
229 __ Sub(x20, x20, 8);
230 __ Str(d27, MemOperand(x20, x18, LSL, 3));
231 __ Add(x20, x20, 32);
232 __ Str(q27, MemOperand(x20, x19, SXTW, 4));
233
234 __ Sub(x20, x20, 32);
235 __ Ldr(q6, MemOperand(x20, x18));
236 __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
237
238 END();
239
240 if (CAN_RUN()) {
241 RUN();
242
243 ASSERT_EQUAL_128(0, 0x11, q0);
244 ASSERT_EQUAL_128(0, 0x0f, q1);
245 ASSERT_EQUAL_128(0, 0x1211, q2);
246 ASSERT_EQUAL_128(0, 0x1312, q3);
247 ASSERT_EQUAL_128(0, 0x0f0e, q4);
248 ASSERT_EQUAL_128(0, 0x1312, q5);
249 ASSERT_EQUAL_128(0, 0x14131211, q16);
250 ASSERT_EQUAL_128(0, 0x17161514, q17);
251 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
252 ASSERT_EQUAL_128(0, 0x17161514, q19);
253 ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
254 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
255 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
256 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
257 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
258 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
259 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
260 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
261 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
262 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
263 }
264 }
265
TEST(ldp_stp_quad)266 TEST(ldp_stp_quad) {
267 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
268
269 uint64_t src[4] = {0x0123456789abcdef,
270 0xaaaaaaaa55555555,
271 0xfedcba9876543210,
272 0x55555555aaaaaaaa};
273 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
274 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
275 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
276
277 START();
278 __ Mov(x16, src_base);
279 __ Mov(x17, dst_base);
280 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
281 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
282 END();
283
284 if (CAN_RUN()) {
285 RUN();
286
287 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
288 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
289 ASSERT_EQUAL_64(0, dst[0]);
290 ASSERT_EQUAL_64(0, dst[1]);
291 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
292 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
293 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
294 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
295 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
296 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
297 }
298 }
299
TEST(neon_ld1_d)300 TEST(neon_ld1_d) {
301 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
302
303 uint8_t src[32 + 5];
304 for (unsigned i = 0; i < sizeof(src); i++) {
305 src[i] = i;
306 }
307 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
308
309 START();
310 __ Mov(x17, src_base);
311 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
312 __ Ld1(v2.V8B(), MemOperand(x17));
313 __ Add(x17, x17, 1);
314 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
315 __ Add(x17, x17, 1);
316 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
317 __ Add(x17, x17, 1);
318 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
319 __ Add(x17, x17, 1);
320 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
321 __ Add(x17, x17, 1);
322 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
323 END();
324
325 if (CAN_RUN()) {
326 RUN();
327
328 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
329 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
330 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
331 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
332 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
333 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
334 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
335 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
336 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
337 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
338 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
339 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
340 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
341 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
342 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
343 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
344 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
345 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
346 }
347 }
348
349
TEST(neon_ld1_d_postindex)350 TEST(neon_ld1_d_postindex) {
351 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
352
353 uint8_t src[32 + 5];
354 for (unsigned i = 0; i < sizeof(src); i++) {
355 src[i] = i;
356 }
357 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
358
359 START();
360 __ Mov(x17, src_base);
361 __ Mov(x18, src_base + 1);
362 __ Mov(x19, src_base + 2);
363 __ Mov(x20, src_base + 3);
364 __ Mov(x21, src_base + 4);
365 __ Mov(x22, src_base + 5);
366 __ Mov(x23, 1);
367 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
368 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
369 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
370 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
371 __ Ld1(v16.V2S(),
372 v17.V2S(),
373 v18.V2S(),
374 v19.V2S(),
375 MemOperand(x20, 32, PostIndex));
376 __ Ld1(v30.V2S(),
377 v31.V2S(),
378 v0.V2S(),
379 v1.V2S(),
380 MemOperand(x21, 32, PostIndex));
381 __ Ld1(v20.V1D(),
382 v21.V1D(),
383 v22.V1D(),
384 v23.V1D(),
385 MemOperand(x22, 32, PostIndex));
386 END();
387
388 if (CAN_RUN()) {
389 RUN();
390
391 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
392 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
393 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
394 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
395 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
396 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
397 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
398 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
399 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
400 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
401 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
402 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
403 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
404 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
405 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
406 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
407 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
408 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
409 ASSERT_EQUAL_64(src_base + 1, x17);
410 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
411 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
412 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
413 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
414 ASSERT_EQUAL_64(src_base + 5 + 32, x22);
415 }
416 }
417
418
TEST(neon_ld1_q)419 TEST(neon_ld1_q) {
420 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
421
422 uint8_t src[64 + 4];
423 for (unsigned i = 0; i < sizeof(src); i++) {
424 src[i] = i;
425 }
426 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
427
428 START();
429 __ Mov(x17, src_base);
430 __ Ld1(v2.V16B(), MemOperand(x17));
431 __ Add(x17, x17, 1);
432 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
433 __ Add(x17, x17, 1);
434 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
435 __ Add(x17, x17, 1);
436 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
437 __ Add(x17, x17, 1);
438 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
439 END();
440
441 if (CAN_RUN()) {
442 RUN();
443
444 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
445 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
446 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
447 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
448 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
449 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
450 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
451 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
452 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
453 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
454 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
455 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
456 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
457 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
458 }
459 }
460
461
TEST(neon_ld1_q_postindex)462 TEST(neon_ld1_q_postindex) {
463 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
464
465 uint8_t src[64 + 4];
466 for (unsigned i = 0; i < sizeof(src); i++) {
467 src[i] = i;
468 }
469 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
470
471 START();
472 __ Mov(x17, src_base);
473 __ Mov(x18, src_base + 1);
474 __ Mov(x19, src_base + 2);
475 __ Mov(x20, src_base + 3);
476 __ Mov(x21, src_base + 4);
477 __ Mov(x22, 1);
478 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
479 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
480 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
481 __ Ld1(v16.V4S(),
482 v17.V4S(),
483 v18.V4S(),
484 v19.V4S(),
485 MemOperand(x20, 64, PostIndex));
486 __ Ld1(v30.V2D(),
487 v31.V2D(),
488 v0.V2D(),
489 v1.V2D(),
490 MemOperand(x21, 64, PostIndex));
491 END();
492
493 if (CAN_RUN()) {
494 RUN();
495
496 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
497 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
498 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
499 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
500 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
501 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
502 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
503 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
504 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
505 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
506 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
507 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
508 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
509 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
510 ASSERT_EQUAL_64(src_base + 1, x17);
511 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
512 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
513 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
514 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
515 }
516 }
517
518
TEST(neon_ld1_lane)519 TEST(neon_ld1_lane) {
520 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
521
522 uint8_t src[64];
523 for (unsigned i = 0; i < sizeof(src); i++) {
524 src[i] = i;
525 }
526 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
527
528 START();
529
530 // Test loading whole register by element.
531 __ Mov(x17, src_base);
532 for (int i = 15; i >= 0; i--) {
533 __ Ld1(v0.B(), i, MemOperand(x17));
534 __ Add(x17, x17, 1);
535 }
536
537 __ Mov(x17, src_base);
538 for (int i = 7; i >= 0; i--) {
539 __ Ld1(v1.H(), i, MemOperand(x17));
540 __ Add(x17, x17, 1);
541 }
542
543 __ Mov(x17, src_base);
544 for (int i = 3; i >= 0; i--) {
545 __ Ld1(v2.S(), i, MemOperand(x17));
546 __ Add(x17, x17, 1);
547 }
548
549 __ Mov(x17, src_base);
550 for (int i = 1; i >= 0; i--) {
551 __ Ld1(v3.D(), i, MemOperand(x17));
552 __ Add(x17, x17, 1);
553 }
554
555 // Test loading a single element into an initialised register.
556 __ Mov(x17, src_base);
557 __ Ldr(q4, MemOperand(x17));
558 __ Ld1(v4.B(), 4, MemOperand(x17));
559 __ Ldr(q5, MemOperand(x17));
560 __ Ld1(v5.H(), 3, MemOperand(x17));
561 __ Ldr(q6, MemOperand(x17));
562 __ Ld1(v6.S(), 2, MemOperand(x17));
563 __ Ldr(q7, MemOperand(x17));
564 __ Ld1(v7.D(), 1, MemOperand(x17));
565
566 END();
567
568 if (CAN_RUN()) {
569 RUN();
570
571 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
572 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
573 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
574 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
575 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
576 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
577 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
578 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
579 }
580 }
581
TEST(neon_ld2_d)582 TEST(neon_ld2_d) {
583 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
584
585 uint8_t src[64 + 4];
586 for (unsigned i = 0; i < sizeof(src); i++) {
587 src[i] = i;
588 }
589 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
590
591 START();
592 __ Mov(x17, src_base);
593 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
594 __ Add(x17, x17, 1);
595 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
596 __ Add(x17, x17, 1);
597 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
598 __ Add(x17, x17, 1);
599 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
600 END();
601
602 if (CAN_RUN()) {
603 RUN();
604
605 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
606 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
607 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
608 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
609 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
610 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
611 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
612 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
613 }
614 }
615
TEST(neon_ld2_d_postindex)616 TEST(neon_ld2_d_postindex) {
617 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
618
619 uint8_t src[32 + 4];
620 for (unsigned i = 0; i < sizeof(src); i++) {
621 src[i] = i;
622 }
623 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
624
625 START();
626 __ Mov(x17, src_base);
627 __ Mov(x18, src_base + 1);
628 __ Mov(x19, src_base + 2);
629 __ Mov(x20, src_base + 3);
630 __ Mov(x21, src_base + 4);
631 __ Mov(x22, 1);
632 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
633 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
634 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
635 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
636 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
637 END();
638
639 if (CAN_RUN()) {
640 RUN();
641
642 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
643 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
644 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
645 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
646 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
647 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
648 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
649 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
650 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
651
652 ASSERT_EQUAL_64(src_base + 1, x17);
653 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
654 ASSERT_EQUAL_64(src_base + 2 + 16, x19);
655 ASSERT_EQUAL_64(src_base + 3 + 16, x20);
656 ASSERT_EQUAL_64(src_base + 4 + 16, x21);
657 }
658 }
659
660
TEST(neon_ld2_q)661 TEST(neon_ld2_q) {
662 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
663
664 uint8_t src[64 + 4];
665 for (unsigned i = 0; i < sizeof(src); i++) {
666 src[i] = i;
667 }
668 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
669
670 START();
671 __ Mov(x17, src_base);
672 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
673 __ Add(x17, x17, 1);
674 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
675 __ Add(x17, x17, 1);
676 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
677 __ Add(x17, x17, 1);
678 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
679 __ Add(x17, x17, 1);
680 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
681 END();
682
683 if (CAN_RUN()) {
684 RUN();
685
686 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
687 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
688 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
689 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
690 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
691 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
692 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
693 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
694 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
695 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
696 }
697 }
698
699
TEST(neon_ld2_q_postindex)700 TEST(neon_ld2_q_postindex) {
701 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
702
703 uint8_t src[64 + 4];
704 for (unsigned i = 0; i < sizeof(src); i++) {
705 src[i] = i;
706 }
707 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
708
709 START();
710 __ Mov(x17, src_base);
711 __ Mov(x18, src_base + 1);
712 __ Mov(x19, src_base + 2);
713 __ Mov(x20, src_base + 3);
714 __ Mov(x21, src_base + 4);
715 __ Mov(x22, 1);
716 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
717 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
718 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
719 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
720 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
721 END();
722
723 if (CAN_RUN()) {
724 RUN();
725
726 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
727 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
728 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
729 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
730 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
731 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
732 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
733 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
734 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
735 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
736
737
738 ASSERT_EQUAL_64(src_base + 1, x17);
739 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
740 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
741 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
742 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
743 }
744 }
745
746
TEST(neon_ld2_lane)747 TEST(neon_ld2_lane) {
748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
749
750 uint8_t src[64];
751 for (unsigned i = 0; i < sizeof(src); i++) {
752 src[i] = i;
753 }
754 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
755
756 START();
757
758 // Test loading whole register by element.
759 __ Mov(x17, src_base);
760 for (int i = 15; i >= 0; i--) {
761 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
762 __ Add(x17, x17, 1);
763 }
764
765 __ Mov(x17, src_base);
766 for (int i = 7; i >= 0; i--) {
767 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
768 __ Add(x17, x17, 1);
769 }
770
771 __ Mov(x17, src_base);
772 for (int i = 3; i >= 0; i--) {
773 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
774 __ Add(x17, x17, 1);
775 }
776
777 __ Mov(x17, src_base);
778 for (int i = 1; i >= 0; i--) {
779 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
780 __ Add(x17, x17, 1);
781 }
782
783 // Test loading a single element into an initialised register.
784 __ Mov(x17, src_base);
785 __ Mov(x4, x17);
786 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
787 __ Ldr(q9, MemOperand(x4));
788 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
789 __ Mov(x5, x17);
790 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
791 __ Ldr(q11, MemOperand(x5));
792 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
793 __ Mov(x6, x17);
794 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
795 __ Ldr(q13, MemOperand(x6));
796 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
797 __ Mov(x7, x17);
798 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
799 __ Ldr(q15, MemOperand(x7));
800 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
801
802 END();
803
804 if (CAN_RUN()) {
805 RUN();
806
807 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
808 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
809 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
810 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
811 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
812 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
813 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
814 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
815 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
816 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
817 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
818 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
819 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
820 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
821 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
822 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
823 }
824 }
825
826
TEST(neon_ld2_lane_postindex)827 TEST(neon_ld2_lane_postindex) {
828 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
829
830 uint8_t src[64];
831 for (unsigned i = 0; i < sizeof(src); i++) {
832 src[i] = i;
833 }
834 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
835
836 START();
837 __ Mov(x17, src_base);
838 __ Mov(x18, src_base);
839 __ Mov(x19, src_base);
840 __ Mov(x20, src_base);
841 __ Mov(x21, src_base);
842 __ Mov(x22, src_base);
843 __ Mov(x23, src_base);
844 __ Mov(x24, src_base);
845
846 // Test loading whole register by element.
847 for (int i = 15; i >= 0; i--) {
848 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
849 }
850
851 for (int i = 7; i >= 0; i--) {
852 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
853 }
854
855 for (int i = 3; i >= 0; i--) {
856 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
857 }
858
859 for (int i = 1; i >= 0; i--) {
860 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
861 }
862
863 // Test loading a single element into an initialised register.
864 __ Mov(x25, 1);
865 __ Mov(x4, x21);
866 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
867 __ Ldr(q9, MemOperand(x4));
868 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
869 __ Add(x25, x25, 1);
870
871 __ Mov(x5, x22);
872 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
873 __ Ldr(q11, MemOperand(x5));
874 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
875 __ Add(x25, x25, 1);
876
877 __ Mov(x6, x23);
878 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
879 __ Ldr(q13, MemOperand(x6));
880 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
881 __ Add(x25, x25, 1);
882
883 __ Mov(x7, x24);
884 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
885 __ Ldr(q15, MemOperand(x7));
886 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
887
888 END();
889
890 if (CAN_RUN()) {
891 RUN();
892
893 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
894 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
895 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
896 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
897 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
898 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
899 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
900 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
901 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
902 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
903 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
904 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
905 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
906 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
907 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
908 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
909
910
911 ASSERT_EQUAL_64(src_base + 32, x17);
912 ASSERT_EQUAL_64(src_base + 32, x18);
913 ASSERT_EQUAL_64(src_base + 32, x19);
914 ASSERT_EQUAL_64(src_base + 32, x20);
915 ASSERT_EQUAL_64(src_base + 1, x21);
916 ASSERT_EQUAL_64(src_base + 2, x22);
917 ASSERT_EQUAL_64(src_base + 3, x23);
918 ASSERT_EQUAL_64(src_base + 4, x24);
919 }
920 }
921
922
TEST(neon_ld2_alllanes)923 TEST(neon_ld2_alllanes) {
924 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
925
926 uint8_t src[64];
927 for (unsigned i = 0; i < sizeof(src); i++) {
928 src[i] = i;
929 }
930 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
931
932 START();
933 __ Mov(x17, src_base + 1);
934 __ Mov(x18, 1);
935 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
936 __ Add(x17, x17, 2);
937 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
938 __ Add(x17, x17, 1);
939 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
940 __ Add(x17, x17, 1);
941 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
942 __ Add(x17, x17, 4);
943 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
944 __ Add(x17, x17, 1);
945 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
946 __ Add(x17, x17, 8);
947 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
948 END();
949
950 if (CAN_RUN()) {
951 RUN();
952
953 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
954 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
955 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
956 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
957 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
958 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
959 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
960 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
961 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
962 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
963 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
964 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
965 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
966 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
967 }
968 }
969
970
TEST(neon_ld2_alllanes_postindex)971 TEST(neon_ld2_alllanes_postindex) {
972 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
973
974 uint8_t src[64];
975 for (unsigned i = 0; i < sizeof(src); i++) {
976 src[i] = i;
977 }
978 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
979
980 START();
981 __ Mov(x17, src_base + 1);
982 __ Mov(x18, 1);
983 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
984 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
985 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
986 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
987 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
988 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
989 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
990 END();
991
992 if (CAN_RUN()) {
993 RUN();
994
995 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
996 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
997 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
998 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
999 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
1000 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1001 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1002 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1003 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1004 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1005 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1006 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1007 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1008 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1009 ASSERT_EQUAL_64(src_base + 34, x17);
1010 }
1011 }
1012
1013
TEST(neon_ld3_d)1014 TEST(neon_ld3_d) {
1015 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1016
1017 uint8_t src[64 + 4];
1018 for (unsigned i = 0; i < sizeof(src); i++) {
1019 src[i] = i;
1020 }
1021 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1022
1023 START();
1024 __ Mov(x17, src_base);
1025 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1026 __ Add(x17, x17, 1);
1027 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1028 __ Add(x17, x17, 1);
1029 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1030 __ Add(x17, x17, 1);
1031 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1032 END();
1033
1034 if (CAN_RUN()) {
1035 RUN();
1036
1037 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1038 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1039 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1040 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1041 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1042 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1043 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1044 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1045 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1046 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1047 ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1048 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1049 }
1050 }
1051
1052
TEST(neon_ld3_d_postindex)1053 TEST(neon_ld3_d_postindex) {
1054 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1055
1056 uint8_t src[32 + 4];
1057 for (unsigned i = 0; i < sizeof(src); i++) {
1058 src[i] = i;
1059 }
1060 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1061
1062 START();
1063 __ Mov(x17, src_base);
1064 __ Mov(x18, src_base + 1);
1065 __ Mov(x19, src_base + 2);
1066 __ Mov(x20, src_base + 3);
1067 __ Mov(x21, src_base + 4);
1068 __ Mov(x22, 1);
1069 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1070 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1071 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1072 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1073 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1074 END();
1075
1076 if (CAN_RUN()) {
1077 RUN();
1078
1079 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1080 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1081 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1082 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1083 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1084 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1085 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1086 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1087 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1088 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1089 ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1090 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1091 ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1092 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1093 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1094
1095 ASSERT_EQUAL_64(src_base + 1, x17);
1096 ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1097 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1098 ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1099 ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1100 }
1101 }
1102
1103
TEST(neon_ld3_q)1104 TEST(neon_ld3_q) {
1105 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1106
1107 uint8_t src[64 + 4];
1108 for (unsigned i = 0; i < sizeof(src); i++) {
1109 src[i] = i;
1110 }
1111 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1112
1113 START();
1114 __ Mov(x17, src_base);
1115 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1116 __ Add(x17, x17, 1);
1117 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1118 __ Add(x17, x17, 1);
1119 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1120 __ Add(x17, x17, 1);
1121 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1122 __ Add(x17, x17, 1);
1123 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1124 END();
1125
1126 if (CAN_RUN()) {
1127 RUN();
1128
1129 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1130 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1131 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1132 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1133 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1134 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1135 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1136 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1137 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1138 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1139 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1140 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1141 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1142 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1143 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1144 }
1145 }
1146
1147
TEST(neon_ld3_q_postindex)1148 TEST(neon_ld3_q_postindex) {
1149 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1150
1151 uint8_t src[64 + 4];
1152 for (unsigned i = 0; i < sizeof(src); i++) {
1153 src[i] = i;
1154 }
1155 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1156
1157 START();
1158 __ Mov(x17, src_base);
1159 __ Mov(x18, src_base + 1);
1160 __ Mov(x19, src_base + 2);
1161 __ Mov(x20, src_base + 3);
1162 __ Mov(x21, src_base + 4);
1163 __ Mov(x22, 1);
1164
1165 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1166 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1167 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1168 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1169 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1170 END();
1171
1172 if (CAN_RUN()) {
1173 RUN();
1174
1175 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1176 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1177 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1178 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1179 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1180 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1181 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1182 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1183 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1184 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1185 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1186 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1187 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1188 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1189 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1190
1191 ASSERT_EQUAL_64(src_base + 1, x17);
1192 ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1193 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1194 ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1195 ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1196 }
1197 }
1198
1199
TEST(neon_ld3_lane)1200 TEST(neon_ld3_lane) {
1201 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1202
1203 uint8_t src[64];
1204 for (unsigned i = 0; i < sizeof(src); i++) {
1205 src[i] = i;
1206 }
1207 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1208
1209 START();
1210
1211 // Test loading whole register by element.
1212 __ Mov(x17, src_base);
1213 for (int i = 15; i >= 0; i--) {
1214 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1215 __ Add(x17, x17, 1);
1216 }
1217
1218 __ Mov(x17, src_base);
1219 for (int i = 7; i >= 0; i--) {
1220 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1221 __ Add(x17, x17, 1);
1222 }
1223
1224 __ Mov(x17, src_base);
1225 for (int i = 3; i >= 0; i--) {
1226 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1227 __ Add(x17, x17, 1);
1228 }
1229
1230 __ Mov(x17, src_base);
1231 for (int i = 1; i >= 0; i--) {
1232 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1233 __ Add(x17, x17, 1);
1234 }
1235
1236 // Test loading a single element into an initialised register.
1237 __ Mov(x17, src_base);
1238 __ Mov(x4, x17);
1239 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1240 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1241 __ Ldr(q14, MemOperand(x4));
1242 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1243 __ Mov(x5, x17);
1244 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1245 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1246 __ Ldr(q17, MemOperand(x5));
1247 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1248 __ Mov(x6, x17);
1249 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1250 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1251 __ Ldr(q20, MemOperand(x6));
1252 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1253 __ Mov(x7, x17);
1254 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1255 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1256 __ Ldr(q23, MemOperand(x7));
1257 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1258
1259 END();
1260
1261 if (CAN_RUN()) {
1262 RUN();
1263
1264 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1265 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1266 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1267 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1268 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1269 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1270 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1271 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1272 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1273 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1274 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1275 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1276 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1277 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1278 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1279 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1280 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1281 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1282 }
1283 }
1284
1285
TEST(neon_ld3_lane_postindex)1286 TEST(neon_ld3_lane_postindex) {
1287 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1288
1289 uint8_t src[64];
1290 for (unsigned i = 0; i < sizeof(src); i++) {
1291 src[i] = i;
1292 }
1293 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1294
1295 START();
1296
1297 // Test loading whole register by element.
1298 __ Mov(x17, src_base);
1299 __ Mov(x18, src_base);
1300 __ Mov(x19, src_base);
1301 __ Mov(x20, src_base);
1302 __ Mov(x21, src_base);
1303 __ Mov(x22, src_base);
1304 __ Mov(x23, src_base);
1305 __ Mov(x24, src_base);
1306 for (int i = 15; i >= 0; i--) {
1307 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1308 }
1309
1310 for (int i = 7; i >= 0; i--) {
1311 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1312 }
1313
1314 for (int i = 3; i >= 0; i--) {
1315 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1316 }
1317
1318 for (int i = 1; i >= 0; i--) {
1319 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1320 }
1321
1322
1323 // Test loading a single element into an initialised register.
1324 __ Mov(x25, 1);
1325 __ Mov(x4, x21);
1326 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1327 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1328 __ Ldr(q14, MemOperand(x4));
1329 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1330 __ Add(x25, x25, 1);
1331
1332 __ Mov(x5, x22);
1333 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1334 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1335 __ Ldr(q17, MemOperand(x5));
1336 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1337 __ Add(x25, x25, 1);
1338
1339 __ Mov(x6, x23);
1340 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1341 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1342 __ Ldr(q20, MemOperand(x6));
1343 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1344 __ Add(x25, x25, 1);
1345
1346 __ Mov(x7, x24);
1347 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1348 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1349 __ Ldr(q23, MemOperand(x7));
1350 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1351
1352 END();
1353
1354 if (CAN_RUN()) {
1355 RUN();
1356
1357 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1358 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1359 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1360 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1361 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1362 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1363 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1364 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1365 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1366 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1367 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1368 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1369 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1370 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1371 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1372 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1373 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1374 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1375 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1376 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1377 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1378 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1379 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1380 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1381
1382 ASSERT_EQUAL_64(src_base + 48, x17);
1383 ASSERT_EQUAL_64(src_base + 48, x18);
1384 ASSERT_EQUAL_64(src_base + 48, x19);
1385 ASSERT_EQUAL_64(src_base + 48, x20);
1386 ASSERT_EQUAL_64(src_base + 1, x21);
1387 ASSERT_EQUAL_64(src_base + 2, x22);
1388 ASSERT_EQUAL_64(src_base + 3, x23);
1389 ASSERT_EQUAL_64(src_base + 4, x24);
1390 }
1391 }
1392
1393
TEST(neon_ld3_alllanes)1394 TEST(neon_ld3_alllanes) {
1395 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1396
1397 uint8_t src[64];
1398 for (unsigned i = 0; i < sizeof(src); i++) {
1399 src[i] = i;
1400 }
1401 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1402
1403 START();
1404 __ Mov(x17, src_base + 1);
1405 __ Mov(x18, 1);
1406 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1407 __ Add(x17, x17, 3);
1408 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1409 __ Add(x17, x17, 1);
1410 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1411 __ Add(x17, x17, 1);
1412 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1413 __ Add(x17, x17, 6);
1414 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1415 __ Add(x17, x17, 1);
1416 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1417 __ Add(x17, x17, 12);
1418 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1419 END();
1420
1421 if (CAN_RUN()) {
1422 RUN();
1423
1424 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1425 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1426 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1427 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1428 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1429 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1430 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1431 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1432 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1433 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1434 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1435 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1436 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1437 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1438 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1439 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1440 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1441 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1442 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1443 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1444 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1445 }
1446 }
1447
1448
TEST(neon_ld3_alllanes_postindex)1449 TEST(neon_ld3_alllanes_postindex) {
1450 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1451
1452 uint8_t src[64];
1453 for (unsigned i = 0; i < sizeof(src); i++) {
1454 src[i] = i;
1455 }
1456 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1457 __ Mov(x17, src_base + 1);
1458 __ Mov(x18, 1);
1459
1460 START();
1461 __ Mov(x17, src_base + 1);
1462 __ Mov(x18, 1);
1463 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1464 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1465 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1466 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1467 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1468 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1469 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1470 END();
1471
1472 if (CAN_RUN()) {
1473 RUN();
1474
1475 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1476 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1477 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1478 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1479 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1480 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1481 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1482 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1483 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1484 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1485 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1486 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1487 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1488 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1489 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1490 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1491 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1492 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1493 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1494 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1495 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1496 }
1497 }
1498
1499
TEST(neon_ld4_d)1500 TEST(neon_ld4_d) {
1501 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1502
1503 uint8_t src[64 + 4];
1504 for (unsigned i = 0; i < sizeof(src); i++) {
1505 src[i] = i;
1506 }
1507 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1508
1509 START();
1510 __ Mov(x17, src_base);
1511 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1512 __ Add(x17, x17, 1);
1513 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1514 __ Add(x17, x17, 1);
1515 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1516 __ Add(x17, x17, 1);
1517 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1518 END();
1519
1520 if (CAN_RUN()) {
1521 RUN();
1522
1523 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1524 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1525 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1526 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1527 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1528 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1529 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1530 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1531 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1532 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1533 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1534 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1535 ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1536 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1537 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1538 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1539 }
1540 }
1541
1542
TEST(neon_ld4_d_postindex)1543 TEST(neon_ld4_d_postindex) {
1544 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1545
1546 uint8_t src[32 + 4];
1547 for (unsigned i = 0; i < sizeof(src); i++) {
1548 src[i] = i;
1549 }
1550 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1551
1552 START();
1553 __ Mov(x17, src_base);
1554 __ Mov(x18, src_base + 1);
1555 __ Mov(x19, src_base + 2);
1556 __ Mov(x20, src_base + 3);
1557 __ Mov(x21, src_base + 4);
1558 __ Mov(x22, 1);
1559 __ Ld4(v2.V8B(),
1560 v3.V8B(),
1561 v4.V8B(),
1562 v5.V8B(),
1563 MemOperand(x17, x22, PostIndex));
1564 __ Ld4(v6.V8B(),
1565 v7.V8B(),
1566 v8.V8B(),
1567 v9.V8B(),
1568 MemOperand(x18, 32, PostIndex));
1569 __ Ld4(v10.V4H(),
1570 v11.V4H(),
1571 v12.V4H(),
1572 v13.V4H(),
1573 MemOperand(x19, 32, PostIndex));
1574 __ Ld4(v14.V2S(),
1575 v15.V2S(),
1576 v16.V2S(),
1577 v17.V2S(),
1578 MemOperand(x20, 32, PostIndex));
1579 __ Ld4(v30.V2S(),
1580 v31.V2S(),
1581 v0.V2S(),
1582 v1.V2S(),
1583 MemOperand(x21, 32, PostIndex));
1584 END();
1585
1586 if (CAN_RUN()) {
1587 RUN();
1588
1589 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1590 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1591 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1592 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1593 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1594 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1595 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1596 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1597 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1598 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1599 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1600 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1601 ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1602 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1603 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1604 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1605 ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1606 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1607 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1608 ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1609
1610
1611 ASSERT_EQUAL_64(src_base + 1, x17);
1612 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1613 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1614 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1615 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1616 }
1617 }
1618
1619
TEST(neon_ld4_q)1620 TEST(neon_ld4_q) {
1621 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1622
1623 uint8_t src[64 + 4];
1624 for (unsigned i = 0; i < sizeof(src); i++) {
1625 src[i] = i;
1626 }
1627 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1628
1629 START();
1630 __ Mov(x17, src_base);
1631 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1632 __ Add(x17, x17, 1);
1633 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1634 __ Add(x17, x17, 1);
1635 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1636 __ Add(x17, x17, 1);
1637 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1638 __ Add(x17, x17, 1);
1639 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1640 END();
1641
1642 if (CAN_RUN()) {
1643 RUN();
1644
1645 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1646 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1647 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1648 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1649 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1650 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1651 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1652 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1653 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1654 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1655 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1656 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1657 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1658 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1659 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1660 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1661 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1662 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1663 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1664 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1665 }
1666 }
1667
1668
TEST(neon_ld4_q_postindex)1669 TEST(neon_ld4_q_postindex) {
1670 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1671
1672 uint8_t src[64 + 4];
1673 for (unsigned i = 0; i < sizeof(src); i++) {
1674 src[i] = i;
1675 }
1676 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1677
1678 START();
1679 __ Mov(x17, src_base);
1680 __ Mov(x18, src_base + 1);
1681 __ Mov(x19, src_base + 2);
1682 __ Mov(x20, src_base + 3);
1683 __ Mov(x21, src_base + 4);
1684 __ Mov(x22, 1);
1685
1686 __ Ld4(v2.V16B(),
1687 v3.V16B(),
1688 v4.V16B(),
1689 v5.V16B(),
1690 MemOperand(x17, x22, PostIndex));
1691 __ Ld4(v6.V16B(),
1692 v7.V16B(),
1693 v8.V16B(),
1694 v9.V16B(),
1695 MemOperand(x18, 64, PostIndex));
1696 __ Ld4(v10.V8H(),
1697 v11.V8H(),
1698 v12.V8H(),
1699 v13.V8H(),
1700 MemOperand(x19, 64, PostIndex));
1701 __ Ld4(v14.V4S(),
1702 v15.V4S(),
1703 v16.V4S(),
1704 v17.V4S(),
1705 MemOperand(x20, 64, PostIndex));
1706 __ Ld4(v30.V2D(),
1707 v31.V2D(),
1708 v0.V2D(),
1709 v1.V2D(),
1710 MemOperand(x21, 64, PostIndex));
1711 END();
1712
1713 if (CAN_RUN()) {
1714 RUN();
1715
1716 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1717 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1718 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1719 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1720 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1721 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1722 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1723 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1724 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1725 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1726 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1727 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1728 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1729 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1730 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1731 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1732 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1733 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1734 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1735 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1736
1737
1738 ASSERT_EQUAL_64(src_base + 1, x17);
1739 ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1740 ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1741 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1742 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1743 }
1744 }
1745
1746
TEST(neon_ld4_lane)1747 TEST(neon_ld4_lane) {
1748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1749
1750 uint8_t src[64];
1751 for (unsigned i = 0; i < sizeof(src); i++) {
1752 src[i] = i;
1753 }
1754 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1755
1756 START();
1757
1758 // Test loading whole register by element.
1759 __ Mov(x17, src_base);
1760 for (int i = 15; i >= 0; i--) {
1761 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1762 __ Add(x17, x17, 1);
1763 }
1764
1765 __ Mov(x17, src_base);
1766 for (int i = 7; i >= 0; i--) {
1767 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1768 __ Add(x17, x17, 1);
1769 }
1770
1771 __ Mov(x17, src_base);
1772 for (int i = 3; i >= 0; i--) {
1773 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1774 __ Add(x17, x17, 1);
1775 }
1776
1777 __ Mov(x17, src_base);
1778 for (int i = 1; i >= 0; i--) {
1779 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1780 __ Add(x17, x17, 1);
1781 }
1782
1783 // Test loading a single element into an initialised register.
1784 __ Mov(x17, src_base);
1785 __ Mov(x4, x17);
1786 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1787 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1788 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1789 __ Ldr(q19, MemOperand(x4));
1790 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1791
1792 __ Mov(x5, x17);
1793 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1794 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1795 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1796 __ Ldr(q23, MemOperand(x5));
1797 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1798
1799 __ Mov(x6, x17);
1800 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1801 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1802 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1803 __ Ldr(q27, MemOperand(x6));
1804 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1805
1806 __ Mov(x7, x17);
1807 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1808 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1809 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1810 __ Ldr(q31, MemOperand(x7));
1811 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1812
1813 END();
1814
1815 if (CAN_RUN()) {
1816 RUN();
1817
1818 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1819 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1820 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1821 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1822 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1823 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1824 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1825 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1826 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1827 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1828 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1829 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1830 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1831 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1832 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1833 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1834 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1835 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1836 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1837 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1838 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1839 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1840 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1841 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1842 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1843 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1844 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1845 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1846 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1847 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1848 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1849 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1850 }
1851 }
1852
1853
TEST(neon_ld4_lane_postindex)1854 TEST(neon_ld4_lane_postindex) {
1855 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1856
1857 uint8_t src[64];
1858 for (unsigned i = 0; i < sizeof(src); i++) {
1859 src[i] = i;
1860 }
1861 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1862
1863 START();
1864
1865 // Test loading whole register by element.
1866 __ Mov(x17, src_base);
1867 for (int i = 15; i >= 0; i--) {
1868 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1869 }
1870
1871 __ Mov(x18, src_base);
1872 for (int i = 7; i >= 0; i--) {
1873 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1874 }
1875
1876 __ Mov(x19, src_base);
1877 for (int i = 3; i >= 0; i--) {
1878 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1879 }
1880
1881 __ Mov(x20, src_base);
1882 for (int i = 1; i >= 0; i--) {
1883 __ Ld4(v12.D(),
1884 v13.D(),
1885 v14.D(),
1886 v15.D(),
1887 i,
1888 MemOperand(x20, 32, PostIndex));
1889 }
1890
1891 // Test loading a single element into an initialised register.
1892 __ Mov(x25, 1);
1893 __ Mov(x21, src_base);
1894 __ Mov(x22, src_base);
1895 __ Mov(x23, src_base);
1896 __ Mov(x24, src_base);
1897
1898 __ Mov(x4, x21);
1899 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1900 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1901 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1902 __ Ldr(q19, MemOperand(x4));
1903 __ Ld4(v16.B(),
1904 v17.B(),
1905 v18.B(),
1906 v19.B(),
1907 4,
1908 MemOperand(x21, x25, PostIndex));
1909 __ Add(x25, x25, 1);
1910
1911 __ Mov(x5, x22);
1912 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1913 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1914 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1915 __ Ldr(q23, MemOperand(x5));
1916 __ Ld4(v20.H(),
1917 v21.H(),
1918 v22.H(),
1919 v23.H(),
1920 3,
1921 MemOperand(x22, x25, PostIndex));
1922 __ Add(x25, x25, 1);
1923
1924 __ Mov(x6, x23);
1925 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1926 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1927 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1928 __ Ldr(q27, MemOperand(x6));
1929 __ Ld4(v24.S(),
1930 v25.S(),
1931 v26.S(),
1932 v27.S(),
1933 2,
1934 MemOperand(x23, x25, PostIndex));
1935 __ Add(x25, x25, 1);
1936
1937 __ Mov(x7, x24);
1938 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1939 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1940 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1941 __ Ldr(q31, MemOperand(x7));
1942 __ Ld4(v28.D(),
1943 v29.D(),
1944 v30.D(),
1945 v31.D(),
1946 1,
1947 MemOperand(x24, x25, PostIndex));
1948
1949 END();
1950
1951 if (CAN_RUN()) {
1952 RUN();
1953
1954 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1955 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1956 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1957 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1958 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1959 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1960 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1961 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1962 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1963 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1964 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1965 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1966 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1967 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1968 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1969 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1970 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1971 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1972 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1973 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1974 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1975 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1976 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1977 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1978 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1979 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1980 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1981 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1982 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1983 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1984 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1985 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1986
1987 ASSERT_EQUAL_64(src_base + 64, x17);
1988 ASSERT_EQUAL_64(src_base + 64, x18);
1989 ASSERT_EQUAL_64(src_base + 64, x19);
1990 ASSERT_EQUAL_64(src_base + 64, x20);
1991 ASSERT_EQUAL_64(src_base + 1, x21);
1992 ASSERT_EQUAL_64(src_base + 2, x22);
1993 ASSERT_EQUAL_64(src_base + 3, x23);
1994 ASSERT_EQUAL_64(src_base + 4, x24);
1995 }
1996 }
1997
1998
TEST(neon_ld4_alllanes)1999 TEST(neon_ld4_alllanes) {
2000 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2001
2002 uint8_t src[64];
2003 for (unsigned i = 0; i < sizeof(src); i++) {
2004 src[i] = i;
2005 }
2006 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2007
2008 START();
2009 __ Mov(x17, src_base + 1);
2010 __ Mov(x18, 1);
2011 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2012 __ Add(x17, x17, 4);
2013 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2014 __ Add(x17, x17, 1);
2015 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2016 __ Add(x17, x17, 1);
2017 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2018 __ Add(x17, x17, 8);
2019 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2020 __ Add(x17, x17, 1);
2021 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2022 __ Add(x17, x17, 16);
2023 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2024
2025
2026 END();
2027
2028 if (CAN_RUN()) {
2029 RUN();
2030
2031 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2032 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2033 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2034 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2035 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2036 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2037 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2038 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2039 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2040 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2041 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2042 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2043 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2044 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2045 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2046 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2047 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2048 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2049 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2050 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2051 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2052 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2053 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2054 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2055 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2056 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2057 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2058 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2059 }
2060 }
2061
2062
TEST(neon_ld4_alllanes_postindex)2063 TEST(neon_ld4_alllanes_postindex) {
2064 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2065
2066 uint8_t src[64];
2067 for (unsigned i = 0; i < sizeof(src); i++) {
2068 src[i] = i;
2069 }
2070 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2071 __ Mov(x17, src_base + 1);
2072 __ Mov(x18, 1);
2073
2074 START();
2075 __ Mov(x17, src_base + 1);
2076 __ Mov(x18, 1);
2077 __ Ld4r(v0.V8B(),
2078 v1.V8B(),
2079 v2.V8B(),
2080 v3.V8B(),
2081 MemOperand(x17, 4, PostIndex));
2082 __ Ld4r(v4.V16B(),
2083 v5.V16B(),
2084 v6.V16B(),
2085 v7.V16B(),
2086 MemOperand(x17, x18, PostIndex));
2087 __ Ld4r(v8.V4H(),
2088 v9.V4H(),
2089 v10.V4H(),
2090 v11.V4H(),
2091 MemOperand(x17, x18, PostIndex));
2092 __ Ld4r(v12.V8H(),
2093 v13.V8H(),
2094 v14.V8H(),
2095 v15.V8H(),
2096 MemOperand(x17, 8, PostIndex));
2097 __ Ld4r(v16.V2S(),
2098 v17.V2S(),
2099 v18.V2S(),
2100 v19.V2S(),
2101 MemOperand(x17, x18, PostIndex));
2102 __ Ld4r(v20.V4S(),
2103 v21.V4S(),
2104 v22.V4S(),
2105 v23.V4S(),
2106 MemOperand(x17, 16, PostIndex));
2107 __ Ld4r(v24.V2D(),
2108 v25.V2D(),
2109 v26.V2D(),
2110 v27.V2D(),
2111 MemOperand(x17, 32, PostIndex));
2112 END();
2113
2114 if (CAN_RUN()) {
2115 RUN();
2116
2117 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2118 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2119 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2120 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2121 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2122 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2123 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2124 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2125 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2126 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2127 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2128 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2129 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2130 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2131 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2132 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2133 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2134 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2135 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2136 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2137 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2138 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2139 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2140 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2141 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2142 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2143 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2144 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2145 ASSERT_EQUAL_64(src_base + 64, x17);
2146 }
2147 }
2148
2149
TEST(neon_st1_lane)2150 TEST(neon_st1_lane) {
2151 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2152
2153 uint8_t src[64];
2154 for (unsigned i = 0; i < sizeof(src); i++) {
2155 src[i] = i;
2156 }
2157 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2158
2159 START();
2160 __ Mov(x17, src_base);
2161 __ Mov(x18, -16);
2162 __ Ldr(q0, MemOperand(x17));
2163
2164 for (int i = 15; i >= 0; i--) {
2165 __ St1(v0.B(), i, MemOperand(x17));
2166 __ Add(x17, x17, 1);
2167 }
2168 __ Ldr(q1, MemOperand(x17, x18));
2169
2170 for (int i = 7; i >= 0; i--) {
2171 __ St1(v0.H(), i, MemOperand(x17));
2172 __ Add(x17, x17, 2);
2173 }
2174 __ Ldr(q2, MemOperand(x17, x18));
2175
2176 for (int i = 3; i >= 0; i--) {
2177 __ St1(v0.S(), i, MemOperand(x17));
2178 __ Add(x17, x17, 4);
2179 }
2180 __ Ldr(q3, MemOperand(x17, x18));
2181
2182 for (int i = 1; i >= 0; i--) {
2183 __ St1(v0.D(), i, MemOperand(x17));
2184 __ Add(x17, x17, 8);
2185 }
2186 __ Ldr(q4, MemOperand(x17, x18));
2187
2188 END();
2189
2190 if (CAN_RUN()) {
2191 RUN();
2192
2193 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2194 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2195 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2196 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2197 }
2198 }
2199
2200
TEST(neon_st2_lane)2201 TEST(neon_st2_lane) {
2202 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2203
2204 // Struct size * addressing modes * element sizes * vector size.
2205 uint8_t dst[2 * 2 * 4 * 16];
2206 memset(dst, 0, sizeof(dst));
2207 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2208
2209 START();
2210 __ Mov(x17, dst_base);
2211 __ Mov(x18, dst_base);
2212 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2213 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2214
2215 // Test B stores with and without post index.
2216 for (int i = 15; i >= 0; i--) {
2217 __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2218 __ Add(x18, x18, 2);
2219 }
2220 for (int i = 15; i >= 0; i--) {
2221 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2222 }
2223 __ Ldr(q2, MemOperand(x17, 0 * 16));
2224 __ Ldr(q3, MemOperand(x17, 1 * 16));
2225 __ Ldr(q4, MemOperand(x17, 2 * 16));
2226 __ Ldr(q5, MemOperand(x17, 3 * 16));
2227
2228 // Test H stores with and without post index.
2229 __ Mov(x0, 4);
2230 for (int i = 7; i >= 0; i--) {
2231 __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2232 __ Add(x18, x18, 4);
2233 }
2234 for (int i = 7; i >= 0; i--) {
2235 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2236 }
2237 __ Ldr(q6, MemOperand(x17, 4 * 16));
2238 __ Ldr(q7, MemOperand(x17, 5 * 16));
2239 __ Ldr(q16, MemOperand(x17, 6 * 16));
2240 __ Ldr(q17, MemOperand(x17, 7 * 16));
2241
2242 // Test S stores with and without post index.
2243 for (int i = 3; i >= 0; i--) {
2244 __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2245 __ Add(x18, x18, 8);
2246 }
2247 for (int i = 3; i >= 0; i--) {
2248 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2249 }
2250 __ Ldr(q18, MemOperand(x17, 8 * 16));
2251 __ Ldr(q19, MemOperand(x17, 9 * 16));
2252 __ Ldr(q20, MemOperand(x17, 10 * 16));
2253 __ Ldr(q21, MemOperand(x17, 11 * 16));
2254
2255 // Test D stores with and without post index.
2256 __ Mov(x0, 16);
2257 __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2258 __ Add(x18, x18, 16);
2259 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2260 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2261 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2262 __ Ldr(q22, MemOperand(x17, 12 * 16));
2263 __ Ldr(q23, MemOperand(x17, 13 * 16));
2264 __ Ldr(q24, MemOperand(x17, 14 * 16));
2265 __ Ldr(q25, MemOperand(x17, 15 * 16));
2266 END();
2267
2268 if (CAN_RUN()) {
2269 RUN();
2270
2271 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2272 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2273 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2274 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2275
2276 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2277 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2278 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2279 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2280
2281 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2282 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2283 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2284 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2285
2286 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2287 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2288 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2289 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2290 }
2291 }
2292
2293
TEST(neon_st3_lane)2294 TEST(neon_st3_lane) {
2295 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2296
2297 // Struct size * addressing modes * element sizes * vector size.
2298 uint8_t dst[3 * 2 * 4 * 16];
2299 memset(dst, 0, sizeof(dst));
2300 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2301
2302 START();
2303 __ Mov(x17, dst_base);
2304 __ Mov(x18, dst_base);
2305 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2306 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2307 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2308
2309 // Test B stores with and without post index.
2310 for (int i = 15; i >= 0; i--) {
2311 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2312 __ Add(x18, x18, 3);
2313 }
2314 for (int i = 15; i >= 0; i--) {
2315 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2316 }
2317 __ Ldr(q3, MemOperand(x17, 0 * 16));
2318 __ Ldr(q4, MemOperand(x17, 1 * 16));
2319 __ Ldr(q5, MemOperand(x17, 2 * 16));
2320 __ Ldr(q6, MemOperand(x17, 3 * 16));
2321 __ Ldr(q7, MemOperand(x17, 4 * 16));
2322 __ Ldr(q16, MemOperand(x17, 5 * 16));
2323
2324 // Test H stores with and without post index.
2325 __ Mov(x0, 6);
2326 for (int i = 7; i >= 0; i--) {
2327 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2328 __ Add(x18, x18, 6);
2329 }
2330 for (int i = 7; i >= 0; i--) {
2331 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2332 }
2333 __ Ldr(q17, MemOperand(x17, 6 * 16));
2334 __ Ldr(q18, MemOperand(x17, 7 * 16));
2335 __ Ldr(q19, MemOperand(x17, 8 * 16));
2336 __ Ldr(q20, MemOperand(x17, 9 * 16));
2337 __ Ldr(q21, MemOperand(x17, 10 * 16));
2338 __ Ldr(q22, MemOperand(x17, 11 * 16));
2339
2340 // Test S stores with and without post index.
2341 for (int i = 3; i >= 0; i--) {
2342 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2343 __ Add(x18, x18, 12);
2344 }
2345 for (int i = 3; i >= 0; i--) {
2346 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2347 }
2348 __ Ldr(q23, MemOperand(x17, 12 * 16));
2349 __ Ldr(q24, MemOperand(x17, 13 * 16));
2350 __ Ldr(q25, MemOperand(x17, 14 * 16));
2351 __ Ldr(q26, MemOperand(x17, 15 * 16));
2352 __ Ldr(q27, MemOperand(x17, 16 * 16));
2353 __ Ldr(q28, MemOperand(x17, 17 * 16));
2354
2355 // Test D stores with and without post index.
2356 __ Mov(x0, 24);
2357 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2358 __ Add(x18, x18, 24);
2359 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2360 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2361 __ Ldr(q29, MemOperand(x17, 18 * 16));
2362 __ Ldr(q30, MemOperand(x17, 19 * 16));
2363 __ Ldr(q31, MemOperand(x17, 20 * 16));
2364 END();
2365
2366 if (CAN_RUN()) {
2367 RUN();
2368
2369 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2370 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2371 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2372 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2373 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2374 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2375
2376 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2377 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2378 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2379 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2380 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2381 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2382
2383 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2384 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2385 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2386 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2387 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2388 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2389 }
2390 }
2391
2392
TEST(neon_st4_lane)2393 TEST(neon_st4_lane) {
2394 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2395
2396 // Struct size * element sizes * vector size.
2397 uint8_t dst[4 * 4 * 16];
2398 memset(dst, 0, sizeof(dst));
2399 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2400
2401 START();
2402 __ Mov(x17, dst_base);
2403 __ Mov(x18, dst_base);
2404 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2405 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2406 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2408
2409 // Test B stores without post index.
2410 for (int i = 15; i >= 0; i--) {
2411 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2412 __ Add(x18, x18, 4);
2413 }
2414 __ Ldr(q4, MemOperand(x17, 0 * 16));
2415 __ Ldr(q5, MemOperand(x17, 1 * 16));
2416 __ Ldr(q6, MemOperand(x17, 2 * 16));
2417 __ Ldr(q7, MemOperand(x17, 3 * 16));
2418
2419 // Test H stores with post index.
2420 __ Mov(x0, 8);
2421 for (int i = 7; i >= 0; i--) {
2422 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2423 }
2424 __ Ldr(q16, MemOperand(x17, 4 * 16));
2425 __ Ldr(q17, MemOperand(x17, 5 * 16));
2426 __ Ldr(q18, MemOperand(x17, 6 * 16));
2427 __ Ldr(q19, MemOperand(x17, 7 * 16));
2428
2429 // Test S stores without post index.
2430 for (int i = 3; i >= 0; i--) {
2431 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2432 __ Add(x18, x18, 16);
2433 }
2434 __ Ldr(q20, MemOperand(x17, 8 * 16));
2435 __ Ldr(q21, MemOperand(x17, 9 * 16));
2436 __ Ldr(q22, MemOperand(x17, 10 * 16));
2437 __ Ldr(q23, MemOperand(x17, 11 * 16));
2438
2439 // Test D stores with post index.
2440 __ Mov(x0, 32);
2441 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2442 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2443
2444 __ Ldr(q24, MemOperand(x17, 12 * 16));
2445 __ Ldr(q25, MemOperand(x17, 13 * 16));
2446 __ Ldr(q26, MemOperand(x17, 14 * 16));
2447 __ Ldr(q27, MemOperand(x17, 15 * 16));
2448 END();
2449
2450 if (CAN_RUN()) {
2451 RUN();
2452
2453 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2454 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2455 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2456 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2457
2458 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2459 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2460 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2461 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2462
2463 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2464 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2465 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2466 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2467
2468 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2469 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2470 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2471 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2472 }
2473 }
2474
2475
TEST(neon_ld1_lane_postindex)2476 TEST(neon_ld1_lane_postindex) {
2477 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2478
2479 uint8_t src[64];
2480 for (unsigned i = 0; i < sizeof(src); i++) {
2481 src[i] = i;
2482 }
2483 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2484
2485 START();
2486 __ Mov(x17, src_base);
2487 __ Mov(x18, src_base);
2488 __ Mov(x19, src_base);
2489 __ Mov(x20, src_base);
2490 __ Mov(x21, src_base);
2491 __ Mov(x22, src_base);
2492 __ Mov(x23, src_base);
2493 __ Mov(x24, src_base);
2494
2495 // Test loading whole register by element.
2496 for (int i = 15; i >= 0; i--) {
2497 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2498 }
2499
2500 for (int i = 7; i >= 0; i--) {
2501 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2502 }
2503
2504 for (int i = 3; i >= 0; i--) {
2505 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2506 }
2507
2508 for (int i = 1; i >= 0; i--) {
2509 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2510 }
2511
2512 // Test loading a single element into an initialised register.
2513 __ Mov(x25, 1);
2514 __ Ldr(q4, MemOperand(x21));
2515 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2516 __ Add(x25, x25, 1);
2517
2518 __ Ldr(q5, MemOperand(x22));
2519 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2520 __ Add(x25, x25, 1);
2521
2522 __ Ldr(q6, MemOperand(x23));
2523 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2524 __ Add(x25, x25, 1);
2525
2526 __ Ldr(q7, MemOperand(x24));
2527 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2528
2529 END();
2530
2531 if (CAN_RUN()) {
2532 RUN();
2533
2534 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2535 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2536 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2537 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2538 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2539 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2540 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2541 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2542 ASSERT_EQUAL_64(src_base + 16, x17);
2543 ASSERT_EQUAL_64(src_base + 16, x18);
2544 ASSERT_EQUAL_64(src_base + 16, x19);
2545 ASSERT_EQUAL_64(src_base + 16, x20);
2546 ASSERT_EQUAL_64(src_base + 1, x21);
2547 ASSERT_EQUAL_64(src_base + 2, x22);
2548 ASSERT_EQUAL_64(src_base + 3, x23);
2549 ASSERT_EQUAL_64(src_base + 4, x24);
2550 }
2551 }
2552
2553
TEST(neon_st1_lane_postindex)2554 TEST(neon_st1_lane_postindex) {
2555 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2556
2557 uint8_t src[64];
2558 for (unsigned i = 0; i < sizeof(src); i++) {
2559 src[i] = i;
2560 }
2561 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2562
2563 START();
2564 __ Mov(x17, src_base);
2565 __ Mov(x18, -16);
2566 __ Ldr(q0, MemOperand(x17));
2567
2568 for (int i = 15; i >= 0; i--) {
2569 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2570 }
2571 __ Ldr(q1, MemOperand(x17, x18));
2572
2573 for (int i = 7; i >= 0; i--) {
2574 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2575 }
2576 __ Ldr(q2, MemOperand(x17, x18));
2577
2578 for (int i = 3; i >= 0; i--) {
2579 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2580 }
2581 __ Ldr(q3, MemOperand(x17, x18));
2582
2583 for (int i = 1; i >= 0; i--) {
2584 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2585 }
2586 __ Ldr(q4, MemOperand(x17, x18));
2587
2588 END();
2589
2590 if (CAN_RUN()) {
2591 RUN();
2592
2593 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2594 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2595 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2596 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2597 }
2598 }
2599
2600
TEST(neon_ld1_alllanes)2601 TEST(neon_ld1_alllanes) {
2602 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2603
2604 uint8_t src[64];
2605 for (unsigned i = 0; i < sizeof(src); i++) {
2606 src[i] = i;
2607 }
2608 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2609
2610 START();
2611 __ Mov(x17, src_base + 1);
2612 __ Ld1r(v0.V8B(), MemOperand(x17));
2613 __ Add(x17, x17, 1);
2614 __ Ld1r(v1.V16B(), MemOperand(x17));
2615 __ Add(x17, x17, 1);
2616 __ Ld1r(v2.V4H(), MemOperand(x17));
2617 __ Add(x17, x17, 1);
2618 __ Ld1r(v3.V8H(), MemOperand(x17));
2619 __ Add(x17, x17, 1);
2620 __ Ld1r(v4.V2S(), MemOperand(x17));
2621 __ Add(x17, x17, 1);
2622 __ Ld1r(v5.V4S(), MemOperand(x17));
2623 __ Add(x17, x17, 1);
2624 __ Ld1r(v6.V1D(), MemOperand(x17));
2625 __ Add(x17, x17, 1);
2626 __ Ld1r(v7.V2D(), MemOperand(x17));
2627 END();
2628
2629 if (CAN_RUN()) {
2630 RUN();
2631
2632 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2633 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2634 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2635 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2636 ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2637 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2638 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2639 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2640 }
2641 }
2642
2643
TEST(neon_ld1_alllanes_postindex)2644 TEST(neon_ld1_alllanes_postindex) {
2645 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2646
2647 uint8_t src[64];
2648 for (unsigned i = 0; i < sizeof(src); i++) {
2649 src[i] = i;
2650 }
2651 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2652
2653 START();
2654 __ Mov(x17, src_base + 1);
2655 __ Mov(x18, 1);
2656 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2657 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2658 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2659 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2660 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2661 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2662 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2663 END();
2664
2665 if (CAN_RUN()) {
2666 RUN();
2667
2668 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2669 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2670 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2671 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2672 ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2673 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2674 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2675 ASSERT_EQUAL_64(src_base + 19, x17);
2676 }
2677 }
2678
2679
TEST(neon_st1_d)2680 TEST(neon_st1_d) {
2681 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2682
2683 uint8_t src[14 * kDRegSizeInBytes];
2684 for (unsigned i = 0; i < sizeof(src); i++) {
2685 src[i] = i;
2686 }
2687 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2688
2689 START();
2690 __ Mov(x17, src_base);
2691 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2692 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2693 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2694 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2695 __ Mov(x17, src_base);
2696
2697 __ St1(v0.V8B(), MemOperand(x17));
2698 __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2699
2700 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2701 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2702
2703 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2704 __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2705 __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2706 __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2707
2708 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2709 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2710 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2711
2712 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2713 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2714 __ Ldr(q24, MemOperand(x17));
2715 END();
2716
2717 if (CAN_RUN()) {
2718 RUN();
2719
2720 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2721 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2722 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2723 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2724 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2725 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2726 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2727 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2728 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2729 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2730 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2731 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2732 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2733 }
2734 }
2735
2736
TEST(neon_st1_d_postindex)2737 TEST(neon_st1_d_postindex) {
2738 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2739
2740 uint8_t src[64 + 14 * kDRegSizeInBytes];
2741 for (unsigned i = 0; i < sizeof(src); i++) {
2742 src[i] = i;
2743 }
2744 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2745
2746 START();
2747 __ Mov(x17, src_base);
2748 __ Mov(x18, -8);
2749 __ Mov(x19, -16);
2750 __ Mov(x20, -24);
2751 __ Mov(x21, -32);
2752 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2753 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2754 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2755 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2756 __ Mov(x17, src_base);
2757
2758 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2759 __ Ldr(d16, MemOperand(x17, x18));
2760
2761 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2762 __ Ldr(q17, MemOperand(x17, x19));
2763
2764 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2765 __ Ldr(d18, MemOperand(x17, x20));
2766 __ Ldr(d19, MemOperand(x17, x19));
2767 __ Ldr(d20, MemOperand(x17, x18));
2768
2769 __ St1(v0.V2S(),
2770 v1.V2S(),
2771 v2.V2S(),
2772 v3.V2S(),
2773 MemOperand(x17, 32, PostIndex));
2774 __ Ldr(q21, MemOperand(x17, x21));
2775 __ Ldr(q22, MemOperand(x17, x19));
2776
2777 __ St1(v0.V1D(),
2778 v1.V1D(),
2779 v2.V1D(),
2780 v3.V1D(),
2781 MemOperand(x17, 32, PostIndex));
2782 __ Ldr(q23, MemOperand(x17, x21));
2783 __ Ldr(q24, MemOperand(x17, x19));
2784 END();
2785
2786 if (CAN_RUN()) {
2787 RUN();
2788
2789 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2790 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2791 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2792 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2793 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2794 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2795 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2796 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2797 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2798 }
2799 }
2800
2801
TEST(neon_st1_q)2802 TEST(neon_st1_q) {
2803 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2804
2805 uint8_t src[64 + 160];
2806 for (unsigned i = 0; i < sizeof(src); i++) {
2807 src[i] = i;
2808 }
2809 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2810
2811 START();
2812 __ Mov(x17, src_base);
2813 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2814 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2815 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2816 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2817
2818 __ St1(v0.V16B(), MemOperand(x17));
2819 __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2820
2821 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2822 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2823 __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2824
2825 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2826 __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2827 __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2828 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2829
2830 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2831 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2832 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2833 __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2834 __ Ldr(q25, MemOperand(x17));
2835 END();
2836
2837 if (CAN_RUN()) {
2838 RUN();
2839
2840 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2841 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2842 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2843 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2844 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2845 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2846 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2847 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2848 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2849 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2850 }
2851 }
2852
2853
TEST(neon_st1_q_postindex)2854 TEST(neon_st1_q_postindex) {
2855 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2856
2857 uint8_t src[64 + 160];
2858 for (unsigned i = 0; i < sizeof(src); i++) {
2859 src[i] = i;
2860 }
2861 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2862
2863 START();
2864 __ Mov(x17, src_base);
2865 __ Mov(x18, -16);
2866 __ Mov(x19, -32);
2867 __ Mov(x20, -48);
2868 __ Mov(x21, -64);
2869 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2870 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2871 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2872 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2873
2874 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2875 __ Ldr(q16, MemOperand(x17, x18));
2876
2877 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2878 __ Ldr(q17, MemOperand(x17, x19));
2879 __ Ldr(q18, MemOperand(x17, x18));
2880
2881 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2882 __ Ldr(q19, MemOperand(x17, x20));
2883 __ Ldr(q20, MemOperand(x17, x19));
2884 __ Ldr(q21, MemOperand(x17, x18));
2885
2886 __ St1(v0.V2D(),
2887 v1.V2D(),
2888 v2.V2D(),
2889 v3.V2D(),
2890 MemOperand(x17, 64, PostIndex));
2891 __ Ldr(q22, MemOperand(x17, x21));
2892 __ Ldr(q23, MemOperand(x17, x20));
2893 __ Ldr(q24, MemOperand(x17, x19));
2894 __ Ldr(q25, MemOperand(x17, x18));
2895
2896 END();
2897
2898 if (CAN_RUN()) {
2899 RUN();
2900
2901 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2902 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2903 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2904 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2905 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2906 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2907 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2908 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2909 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2910 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2911 }
2912 }
2913
2914
TEST(neon_st2_d)2915 TEST(neon_st2_d) {
2916 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2917
2918 uint8_t src[4 * 16];
2919 for (unsigned i = 0; i < sizeof(src); i++) {
2920 src[i] = i;
2921 }
2922 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2923
2924 START();
2925 __ Mov(x17, src_base);
2926 __ Mov(x18, src_base);
2927 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2928 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2929
2930 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2931 __ Add(x18, x18, 22);
2932 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2933 __ Add(x18, x18, 11);
2934 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2935
2936 __ Mov(x19, src_base);
2937 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2938 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2939 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2940 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2941
2942 END();
2943
2944 if (CAN_RUN()) {
2945 RUN();
2946
2947 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2948 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2949 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2950 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2951 }
2952 }
2953
2954
TEST(neon_st2_d_postindex)2955 TEST(neon_st2_d_postindex) {
2956 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2957
2958 uint8_t src[4 * 16];
2959 for (unsigned i = 0; i < sizeof(src); i++) {
2960 src[i] = i;
2961 }
2962 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2963
2964 START();
2965 __ Mov(x22, 5);
2966 __ Mov(x17, src_base);
2967 __ Mov(x18, src_base);
2968 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2969 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2970
2971 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2972 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2973 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2974
2975
2976 __ Mov(x19, src_base);
2977 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2978 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2979 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2980
2981 END();
2982
2983 if (CAN_RUN()) {
2984 RUN();
2985
2986 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2987 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2988 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2989 }
2990 }
2991
2992
TEST(neon_st2_q)2993 TEST(neon_st2_q) {
2994 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2995
2996 uint8_t src[5 * 16];
2997 for (unsigned i = 0; i < sizeof(src); i++) {
2998 src[i] = i;
2999 }
3000 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3001
3002 START();
3003 __ Mov(x17, src_base);
3004 __ Mov(x18, src_base);
3005 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3006 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3007
3008 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3009 __ Add(x18, x18, 8);
3010 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3011 __ Add(x18, x18, 22);
3012 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3013 __ Add(x18, x18, 2);
3014 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3015
3016 __ Mov(x19, src_base);
3017 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3018 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3019 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3020 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3021
3022 END();
3023
3024 if (CAN_RUN()) {
3025 RUN();
3026
3027 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3028 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3029 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3030 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3031 }
3032 }
3033
3034
TEST(neon_st2_q_postindex)3035 TEST(neon_st2_q_postindex) {
3036 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3037
3038 uint8_t src[5 * 16];
3039 for (unsigned i = 0; i < sizeof(src); i++) {
3040 src[i] = i;
3041 }
3042 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3043
3044 START();
3045 __ Mov(x22, 5);
3046 __ Mov(x17, src_base);
3047 __ Mov(x18, src_base);
3048 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3049 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3050
3051 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3052 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3053 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3054 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3055
3056 __ Mov(x19, src_base);
3057 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3058 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3059 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3060 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3061 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3062
3063 END();
3064
3065 if (CAN_RUN()) {
3066 RUN();
3067
3068 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3069 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3070 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3071 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3072 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3073 }
3074 }
3075
3076
TEST(neon_st3_d)3077 TEST(neon_st3_d) {
3078 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3079
3080 uint8_t src[3 * 16];
3081 for (unsigned i = 0; i < sizeof(src); i++) {
3082 src[i] = i;
3083 }
3084 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3085
3086 START();
3087 __ Mov(x17, src_base);
3088 __ Mov(x18, src_base);
3089 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3090 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3091 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3092
3093 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3094 __ Add(x18, x18, 3);
3095 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3096 __ Add(x18, x18, 2);
3097 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3098
3099
3100 __ Mov(x19, src_base);
3101 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3102 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3103
3104 END();
3105
3106 if (CAN_RUN()) {
3107 RUN();
3108
3109 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3110 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3111 }
3112 }
3113
3114
TEST(neon_st3_d_postindex)3115 TEST(neon_st3_d_postindex) {
3116 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3117
3118 uint8_t src[4 * 16];
3119 for (unsigned i = 0; i < sizeof(src); i++) {
3120 src[i] = i;
3121 }
3122 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3123
3124 START();
3125 __ Mov(x22, 5);
3126 __ Mov(x17, src_base);
3127 __ Mov(x18, src_base);
3128 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3129 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3130 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3131
3132 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3133 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3134 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3135
3136
3137 __ Mov(x19, src_base);
3138 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3139 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3140 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3141 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3142
3143 END();
3144
3145 if (CAN_RUN()) {
3146 RUN();
3147
3148 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3149 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3150 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3151 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3152 }
3153 }
3154
3155
TEST(neon_st3_q)3156 TEST(neon_st3_q) {
3157 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3158
3159 uint8_t src[6 * 16];
3160 for (unsigned i = 0; i < sizeof(src); i++) {
3161 src[i] = i;
3162 }
3163 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3164
3165 START();
3166 __ Mov(x17, src_base);
3167 __ Mov(x18, src_base);
3168 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3169 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3170 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3171
3172 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3173 __ Add(x18, x18, 5);
3174 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3175 __ Add(x18, x18, 12);
3176 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3177 __ Add(x18, x18, 22);
3178 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3179
3180 __ Mov(x19, src_base);
3181 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3182 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3183 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3184 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3185 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3186 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3187
3188 END();
3189
3190 if (CAN_RUN()) {
3191 RUN();
3192
3193 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3194 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3195 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3196 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3197 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3198 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3199 }
3200 }
3201
3202
TEST(neon_st3_q_postindex)3203 TEST(neon_st3_q_postindex) {
3204 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3205
3206 uint8_t src[7 * 16];
3207 for (unsigned i = 0; i < sizeof(src); i++) {
3208 src[i] = i;
3209 }
3210 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3211
3212 START();
3213 __ Mov(x22, 5);
3214 __ Mov(x17, src_base);
3215 __ Mov(x18, src_base);
3216 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3217 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3218 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3219
3220 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3221 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3222 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3223 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3224
3225 __ Mov(x19, src_base);
3226 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3227 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3228 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3229 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3230 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3231 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3232 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3233
3234 END();
3235
3236 if (CAN_RUN()) {
3237 RUN();
3238
3239 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3240 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3241 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3242 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3243 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3244 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3245 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3246 }
3247 }
3248
3249
TEST(neon_st4_d)3250 TEST(neon_st4_d) {
3251 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3252
3253 uint8_t src[4 * 16];
3254 for (unsigned i = 0; i < sizeof(src); i++) {
3255 src[i] = i;
3256 }
3257 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3258
3259 START();
3260 __ Mov(x17, src_base);
3261 __ Mov(x18, src_base);
3262 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3263 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3264 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3265 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3266
3267 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3268 __ Add(x18, x18, 12);
3269 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3270 __ Add(x18, x18, 15);
3271 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3272
3273
3274 __ Mov(x19, src_base);
3275 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3276 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3277 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3278 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3279
3280 END();
3281
3282 if (CAN_RUN()) {
3283 RUN();
3284
3285 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3286 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3287 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3288 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3289 }
3290 }
3291
3292
TEST(neon_st4_d_postindex)3293 TEST(neon_st4_d_postindex) {
3294 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3295
3296 uint8_t src[5 * 16];
3297 for (unsigned i = 0; i < sizeof(src); i++) {
3298 src[i] = i;
3299 }
3300 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3301
3302 START();
3303 __ Mov(x22, 5);
3304 __ Mov(x17, src_base);
3305 __ Mov(x18, src_base);
3306 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3307 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3308 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3309 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3310
3311 __ St4(v0.V8B(),
3312 v1.V8B(),
3313 v2.V8B(),
3314 v3.V8B(),
3315 MemOperand(x18, x22, PostIndex));
3316 __ St4(v0.V4H(),
3317 v1.V4H(),
3318 v2.V4H(),
3319 v3.V4H(),
3320 MemOperand(x18, 32, PostIndex));
3321 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3322
3323
3324 __ Mov(x19, src_base);
3325 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3326 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3327 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3328 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3329 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3330
3331 END();
3332
3333 if (CAN_RUN()) {
3334 RUN();
3335
3336 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3337 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3338 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3339 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3340 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3341 }
3342 }
3343
3344
TEST(neon_st4_q)3345 TEST(neon_st4_q) {
3346 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3347
3348 uint8_t src[7 * 16];
3349 for (unsigned i = 0; i < sizeof(src); i++) {
3350 src[i] = i;
3351 }
3352 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3353
3354 START();
3355 __ Mov(x17, src_base);
3356 __ Mov(x18, src_base);
3357 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3358 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3359 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3360 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3361
3362 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3363 __ Add(x18, x18, 5);
3364 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3365 __ Add(x18, x18, 12);
3366 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3367 __ Add(x18, x18, 22);
3368 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3369 __ Add(x18, x18, 10);
3370
3371 __ Mov(x19, src_base);
3372 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3373 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3374 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3375 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3376 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3377 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3378 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3379
3380 END();
3381
3382 if (CAN_RUN()) {
3383 RUN();
3384
3385 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3386 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3387 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3388 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3389 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3390 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3391 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3392 }
3393 }
3394
3395
TEST(neon_st4_q_postindex)3396 TEST(neon_st4_q_postindex) {
3397 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3398
3399 uint8_t src[9 * 16];
3400 for (unsigned i = 0; i < sizeof(src); i++) {
3401 src[i] = i;
3402 }
3403 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3404
3405 START();
3406 __ Mov(x22, 5);
3407 __ Mov(x17, src_base);
3408 __ Mov(x18, src_base);
3409 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3410 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3411 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3412 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3413
3414 __ St4(v0.V16B(),
3415 v1.V16B(),
3416 v2.V16B(),
3417 v3.V16B(),
3418 MemOperand(x18, x22, PostIndex));
3419 __ St4(v0.V8H(),
3420 v1.V8H(),
3421 v2.V8H(),
3422 v3.V8H(),
3423 MemOperand(x18, 64, PostIndex));
3424 __ St4(v0.V4S(),
3425 v1.V4S(),
3426 v2.V4S(),
3427 v3.V4S(),
3428 MemOperand(x18, x22, PostIndex));
3429 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3430
3431 __ Mov(x19, src_base);
3432 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3433 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3434 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3435 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3436 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3437 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3438 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3439 __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3440 __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3441
3442 END();
3443
3444 if (CAN_RUN()) {
3445 RUN();
3446
3447 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3448 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3449 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3450 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3451 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3452 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3453 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3454 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3455 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3456 }
3457 }
3458
3459
TEST(neon_destructive_minmaxp)3460 TEST(neon_destructive_minmaxp) {
3461 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3462
3463 START();
3464 __ Movi(v0.V2D(), 0, 0x2222222233333333);
3465 __ Movi(v1.V2D(), 0, 0x0000000011111111);
3466
3467 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3468 __ Mov(v17, v0);
3469 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3470 __ Mov(v18, v1);
3471 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3472 __ Mov(v19, v0);
3473 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3474
3475 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3476 __ Mov(v21, v0);
3477 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3478 __ Mov(v22, v1);
3479 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3480 __ Mov(v23, v0);
3481 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3482
3483 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3484 __ Mov(v25, v0);
3485 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3486 __ Mov(v26, v1);
3487 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3488 __ Mov(v27, v0);
3489 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3490
3491 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3492 __ Mov(v29, v0);
3493 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3494 __ Mov(v30, v1);
3495 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3496 __ Mov(v31, v0);
3497 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3498 END();
3499
3500 if (CAN_RUN()) {
3501 RUN();
3502
3503 ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3504 ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3505 ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3506 ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3507
3508 ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3509 ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3510 ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3511 ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3512
3513 ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3514 ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3515 ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3516 ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3517
3518 ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3519 ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3520 ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3521 ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3522 }
3523 }
3524
3525
TEST(neon_destructive_tbl)3526 TEST(neon_destructive_tbl) {
3527 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3528
3529 START();
3530 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3531 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3532 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3533 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3534 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3535
3536 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3537 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3538 __ Mov(v17, v0);
3539 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3540 __ Mov(v18, v1);
3541 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3542 __ Mov(v19, v0);
3543 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3544
3545 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3546 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3547 __ Mov(v21, v0);
3548 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3549 __ Mov(v22, v1);
3550 __ Mov(v23, v2);
3551 __ Mov(v24, v3);
3552 __ Mov(v25, v4);
3553 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3554 __ Mov(v26, v0);
3555 __ Mov(v27, v1);
3556 __ Mov(v28, v2);
3557 __ Mov(v29, v3);
3558 __ Tbl(v26.V16B(),
3559 v26.V16B(),
3560 v27.V16B(),
3561 v28.V16B(),
3562 v29.V16B(),
3563 v26.V16B());
3564 END();
3565
3566 if (CAN_RUN()) {
3567 RUN();
3568
3569 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3570 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3571 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3572 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3573
3574 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3575 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3576 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3577 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3578 }
3579 }
3580
3581
TEST(neon_destructive_tbx)3582 TEST(neon_destructive_tbx) {
3583 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3584
3585 START();
3586 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3587 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3588 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3589 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3590 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3591
3592 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3593 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3594 __ Mov(v17, v0);
3595 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3596 __ Mov(v18, v1);
3597 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3598 __ Mov(v19, v0);
3599 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3600
3601 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3602 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3603 __ Mov(v21, v0);
3604 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3605 __ Mov(v22, v1);
3606 __ Mov(v23, v2);
3607 __ Mov(v24, v3);
3608 __ Mov(v25, v4);
3609 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3610 __ Mov(v26, v0);
3611 __ Mov(v27, v1);
3612 __ Mov(v28, v2);
3613 __ Mov(v29, v3);
3614 __ Tbx(v26.V16B(),
3615 v26.V16B(),
3616 v27.V16B(),
3617 v28.V16B(),
3618 v29.V16B(),
3619 v26.V16B());
3620 END();
3621
3622 if (CAN_RUN()) {
3623 RUN();
3624
3625 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3626 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3627 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3628 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3629
3630 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3631 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3632 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3633 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3634 }
3635 }
3636
3637
TEST(neon_destructive_fcvtl)3638 TEST(neon_destructive_fcvtl) {
3639 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3640
3641 START();
3642 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3643 __ Fcvtl(v16.V2D(), v0.V2S());
3644 __ Fcvtl2(v17.V2D(), v0.V4S());
3645 __ Mov(v18, v0);
3646 __ Mov(v19, v0);
3647 __ Fcvtl(v18.V2D(), v18.V2S());
3648 __ Fcvtl2(v19.V2D(), v19.V4S());
3649
3650 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3651 __ Fcvtl(v20.V4S(), v1.V4H());
3652 __ Fcvtl2(v21.V4S(), v1.V8H());
3653 __ Mov(v22, v1);
3654 __ Mov(v23, v1);
3655 __ Fcvtl(v22.V4S(), v22.V4H());
3656 __ Fcvtl2(v23.V4S(), v23.V8H());
3657
3658 END();
3659
3660 if (CAN_RUN()) {
3661 RUN();
3662
3663 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3664 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3665 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3666 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3667
3668 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3669 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3670 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3671 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3672 }
3673 }
3674
TEST(fadd_h_neon)3675 TEST(fadd_h_neon) {
3676 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3677 CPUFeatures::kFP,
3678 CPUFeatures::kNEONHalf);
3679
3680 START();
3681 __ Fmov(v0.V4H(), 24.0);
3682 __ Fmov(v1.V4H(), 1024.0);
3683 __ Fmov(v2.V8H(), 5.5);
3684 __ Fmov(v3.V8H(), 2048.0);
3685 __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3686 __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3687 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3688 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3689
3690 __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3691 __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3692 __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3693
3694 __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3695 __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3696
3697 END();
3698
3699 if (CAN_RUN()) {
3700 RUN();
3701
3702 ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3703 // 2053.5 is unrepresentable in FP16.
3704 ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3705
3706 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3707 // and we don't have traces for half-precision enabled hardware.
3708 // Default (Signalling NaN)
3709 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3710 // Quiet NaN from Signalling.
3711 ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3712 // Quiet NaN.
3713 ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3714 }
3715 }
3716
TEST(fsub_h_neon)3717 TEST(fsub_h_neon) {
3718 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3719 CPUFeatures::kFP,
3720 CPUFeatures::kNEONHalf);
3721
3722 START();
3723 __ Fmov(v0.V4H(), 24.0);
3724 __ Fmov(v1.V4H(), 1024.0);
3725 __ Fmov(v2.V8H(), 5.5);
3726 __ Fmov(v3.V8H(), 2048.0);
3727 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3728 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3729 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3730 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3731
3732 __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3733 __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3734 __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3735 __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3736
3737 __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3738 __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3739 END();
3740
3741 if (CAN_RUN()) {
3742 RUN();
3743
3744 ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3745 // 2042.5 is unpresentable in FP16:
3746 ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3747
3748 // Note: we test NaNs here as vectors aren't covered by process_nans_half
3749 // and we don't have traces for half-precision enabled hardware.
3750 // Signalling (Default) NaN.
3751 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3752 ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3753 // Quiet NaN from Signalling.
3754 ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3755 // Quiet NaN.
3756 ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3757 }
3758 }
3759
TEST(fmul_h_neon)3760 TEST(fmul_h_neon) {
3761 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3762 CPUFeatures::kFP,
3763 CPUFeatures::kNEONHalf);
3764
3765 START();
3766 __ Fmov(v0.V4H(), 24.0);
3767 __ Fmov(v1.V4H(), -2.0);
3768 __ Fmov(v2.V8H(), 5.5);
3769 __ Fmov(v3.V8H(), 0.5);
3770 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3771 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3772
3773 __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3774 __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3775 __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3776 __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3777 __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3778 END();
3779
3780 if (CAN_RUN()) {
3781 RUN();
3782
3783 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3784 ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3785 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3786 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3787 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3788 }
3789 }
3790
TEST(fdiv_h_neon)3791 TEST(fdiv_h_neon) {
3792 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3793 CPUFeatures::kFP,
3794 CPUFeatures::kNEONHalf);
3795
3796 START();
3797 __ Fmov(v0.V4H(), 24.0);
3798 __ Fmov(v1.V4H(), -2.0);
3799 __ Fmov(v2.V8H(), 5.5);
3800 __ Fmov(v3.V8H(), 0.5);
3801 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3802 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3803
3804 __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3805 __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3806 __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3807 __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3808 __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3809 END();
3810
3811 if (CAN_RUN()) {
3812 RUN();
3813
3814 ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3815 ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3816 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3817 // -0.083333... is unrepresentable in FP16:
3818 ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3819 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3820 }
3821 }
3822
TEST(neon_fcvtl)3823 TEST(neon_fcvtl) {
3824 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3825
3826 START();
3827
3828 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3829 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3830 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3831 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3832 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3833 __ Fcvtl(v16.V4S(), v0.V4H());
3834 __ Fcvtl2(v17.V4S(), v0.V8H());
3835 __ Fcvtl(v18.V4S(), v1.V4H());
3836 __ Fcvtl2(v19.V4S(), v1.V8H());
3837
3838 __ Fcvtl(v20.V2D(), v2.V2S());
3839 __ Fcvtl2(v21.V2D(), v2.V4S());
3840 __ Fcvtl(v22.V2D(), v3.V2S());
3841 __ Fcvtl2(v23.V2D(), v3.V4S());
3842 __ Fcvtl(v24.V2D(), v4.V2S());
3843 __ Fcvtl2(v25.V2D(), v4.V4S());
3844
3845 END();
3846
3847 if (CAN_RUN()) {
3848 RUN();
3849 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3850 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3851 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3852 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3853 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3854 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3855 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3856 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3857 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3858 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3859 }
3860 }
3861
3862
TEST(neon_fcvtn)3863 TEST(neon_fcvtn) {
3864 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3865
3866 START();
3867
3868 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3869 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3870 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3871 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3872 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3873 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3874 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3875 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3876 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3877
3878 __ Fcvtn(v16.V4H(), v0.V4S());
3879 __ Fcvtn2(v16.V8H(), v1.V4S());
3880 __ Fcvtn(v17.V4H(), v2.V4S());
3881 __ Fcvtn(v18.V2S(), v3.V2D());
3882 __ Fcvtn2(v18.V4S(), v4.V2D());
3883 __ Fcvtn(v19.V2S(), v5.V2D());
3884 __ Fcvtn2(v19.V4S(), v6.V2D());
3885 __ Fcvtn(v20.V2S(), v7.V2D());
3886 __ Fcvtn2(v20.V4S(), v8.V2D());
3887 END();
3888
3889 if (CAN_RUN()) {
3890 RUN();
3891 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3892 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3893 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3894 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3895 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3896 }
3897 }
3898
3899
TEST(neon_fcvtxn)3900 TEST(neon_fcvtxn) {
3901 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3902
3903 START();
3904 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3905 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3906 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3907 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3908 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3909 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3910 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3911 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3912 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3913 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3914 __ Fcvtxn(v16.V2S(), v0.V2D());
3915 __ Fcvtxn2(v16.V4S(), v1.V2D());
3916 __ Fcvtxn(v17.V2S(), v2.V2D());
3917 __ Fcvtxn2(v17.V4S(), v3.V2D());
3918 __ Fcvtxn(v18.V2S(), v4.V2D());
3919 __ Fcvtxn2(v18.V4S(), v5.V2D());
3920 __ Fcvtxn(v19.V2S(), v6.V2D());
3921 __ Fcvtxn2(v19.V4S(), v7.V2D());
3922 __ Fcvtxn(v20.V2S(), v8.V2D());
3923 __ Fcvtxn2(v20.V4S(), v9.V2D());
3924 __ Fcvtxn(s21, d0);
3925 END();
3926
3927 if (CAN_RUN()) {
3928 RUN();
3929 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3930 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3931 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3932 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3933 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3934 ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3935 }
3936 }
3937
TEST(neon_3same_addp)3938 TEST(neon_3same_addp) {
3939 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3940
3941 START();
3942
3943 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3944 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3945 __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3946
3947 END();
3948
3949 if (CAN_RUN()) {
3950 RUN();
3951 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3952 }
3953 }
3954
TEST(neon_3same_sqdmulh_sqrdmulh)3955 TEST(neon_3same_sqdmulh_sqrdmulh) {
3956 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3957
3958 START();
3959
3960 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3961 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3962 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3963 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3964
3965 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3966 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3967 __ Sqdmulh(h18, h0, h1);
3968 __ Sqdmulh(s19, s2, s3);
3969
3970 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3971 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3972 __ Sqrdmulh(h22, h0, h1);
3973 __ Sqrdmulh(s23, s2, s3);
3974
3975 END();
3976
3977 if (CAN_RUN()) {
3978 RUN();
3979 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
3980 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
3981 ASSERT_EQUAL_128(0, 0x7fff, q18);
3982 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
3983 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
3984 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
3985 ASSERT_EQUAL_128(0, 0x7fff, q22);
3986 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
3987 }
3988 }
3989
TEST(neon_byelement_sqdmulh_sqrdmulh)3990 TEST(neon_byelement_sqdmulh_sqrdmulh) {
3991 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3992
3993 START();
3994
3995 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3996 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3997 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3998 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3999
4000 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4001 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4002 __ Sqdmulh(h18, h0, v1.H(), 0);
4003 __ Sqdmulh(s19, s2, v3.S(), 0);
4004
4005 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4006 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4007 __ Sqrdmulh(h22, h0, v1.H(), 0);
4008 __ Sqrdmulh(s23, s2, v3.S(), 0);
4009
4010 END();
4011
4012 if (CAN_RUN()) {
4013 RUN();
4014 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4015 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4016 ASSERT_EQUAL_128(0, 0x7fff, q18);
4017 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4018 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4019 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4020 ASSERT_EQUAL_128(0, 0x7fff, q22);
4021 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4022 }
4023 }
4024
TEST(neon_3same_sqrdmlah)4025 TEST(neon_3same_sqrdmlah) {
4026 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4027
4028 START();
4029
4030 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4031 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4032 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4033 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4034
4035 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4036 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4037 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4038 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4039
4040 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4041 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4042 __ Sqrdmlah(h18, h0, h1);
4043 __ Sqrdmlah(s19, s2, s3);
4044
4045 END();
4046
4047 if (CAN_RUN()) {
4048 RUN();
4049 ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4050 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4051 ASSERT_EQUAL_128(0, 0x7fff, q18);
4052 ASSERT_EQUAL_128(0, 0, q19);
4053 }
4054 }
4055
TEST(neon_byelement_sqrdmlah)4056 TEST(neon_byelement_sqrdmlah) {
4057 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4058
4059 START();
4060
4061 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4062 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4063 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4064 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4065
4066 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4067 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4068 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4069 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4070
4071 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4072 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4073 __ Sqrdmlah(h18, h0, v1.H(), 0);
4074 __ Sqrdmlah(s19, s2, v3.S(), 0);
4075
4076 END();
4077
4078 if (CAN_RUN()) {
4079 RUN();
4080 ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4081 ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4082 ASSERT_EQUAL_128(0, 0x7fff, q18);
4083 ASSERT_EQUAL_128(0, 0, q19);
4084 }
4085 }
4086
TEST(neon_3same_sqrdmlsh)4087 TEST(neon_3same_sqrdmlsh) {
4088 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4089
4090 START();
4091
4092 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4093 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4094 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4095 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4096
4097 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4098 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4099 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4100 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4101
4102 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4103 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4104 __ Sqrdmlsh(h18, h0, h1);
4105 __ Sqrdmlsh(s19, s2, s3);
4106
4107 END();
4108
4109 if (CAN_RUN()) {
4110 RUN();
4111 ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4112 ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4113 ASSERT_EQUAL_128(0, 0x3ffb, q18);
4114 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4115 }
4116 }
4117
TEST(neon_byelement_sqrdmlsh)4118 TEST(neon_byelement_sqrdmlsh) {
4119 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4120
4121 START();
4122
4123 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4124 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4125 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4126 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4127
4128 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4129 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4130 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4131 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4132
4133 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4134 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4135 __ Sqrdmlsh(h18, h0, v1.H(), 0);
4136 __ Sqrdmlsh(s19, s2, v3.S(), 0);
4137
4138 END();
4139
4140 if (CAN_RUN()) {
4141 RUN();
4142 ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4143 ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4144 ASSERT_EQUAL_128(0, 0xc000, q18);
4145 ASSERT_EQUAL_128(0, 0xc0004000, q19);
4146 }
4147 }
4148
TEST(neon_3same_sdot_udot)4149 TEST(neon_3same_sdot_udot) {
4150 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4151
4152 START();
4153
4154 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4155 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4156 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4157
4158 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4159 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4160 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4161 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4162
4163 __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4164 __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4165
4166 __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4167 __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4168
4169 END();
4170
4171 if (CAN_RUN()) {
4172 RUN();
4173 ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4174 ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4175 ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4176 ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4177 }
4178 }
4179
TEST(neon_byelement_sdot_udot)4180 TEST(neon_byelement_sdot_udot) {
4181 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4182
4183 START();
4184
4185 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4186 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4187 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4188
4189 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4190 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4191 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4192 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4193
4194 __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4195 __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4196
4197 __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4198 __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4199
4200 END();
4201
4202 if (CAN_RUN()) {
4203 RUN();
4204 ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4205 ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4206 ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4207 ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4208 }
4209 }
4210
4211
TEST(neon_2regmisc_saddlp)4212 TEST(neon_2regmisc_saddlp) {
4213 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4214
4215 START();
4216
4217 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4218
4219 __ Saddlp(v16.V8H(), v0.V16B());
4220 __ Saddlp(v17.V4H(), v0.V8B());
4221
4222 __ Saddlp(v18.V4S(), v0.V8H());
4223 __ Saddlp(v19.V2S(), v0.V4H());
4224
4225 __ Saddlp(v20.V2D(), v0.V4S());
4226 __ Saddlp(v21.V1D(), v0.V2S());
4227
4228 END();
4229
4230 if (CAN_RUN()) {
4231 RUN();
4232 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4233 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4234 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4235 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4236 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4237 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4238 }
4239 }
4240
TEST(neon_2regmisc_uaddlp)4241 TEST(neon_2regmisc_uaddlp) {
4242 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4243
4244 START();
4245
4246 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4247
4248 __ Uaddlp(v16.V8H(), v0.V16B());
4249 __ Uaddlp(v17.V4H(), v0.V8B());
4250
4251 __ Uaddlp(v18.V4S(), v0.V8H());
4252 __ Uaddlp(v19.V2S(), v0.V4H());
4253
4254 __ Uaddlp(v20.V2D(), v0.V4S());
4255 __ Uaddlp(v21.V1D(), v0.V2S());
4256
4257 END();
4258
4259 if (CAN_RUN()) {
4260 RUN();
4261 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4262 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4263 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4264 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4265 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4266 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4267 }
4268 }
4269
TEST(neon_2regmisc_sadalp)4270 TEST(neon_2regmisc_sadalp) {
4271 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4272
4273 START();
4274
4275 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4276 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4277 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4278 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4279 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4280
4281 __ Mov(v16.V16B(), v1.V16B());
4282 __ Mov(v17.V16B(), v1.V16B());
4283 __ Sadalp(v16.V8H(), v0.V16B());
4284 __ Sadalp(v17.V4H(), v0.V8B());
4285
4286 __ Mov(v18.V16B(), v2.V16B());
4287 __ Mov(v19.V16B(), v2.V16B());
4288 __ Sadalp(v18.V4S(), v1.V8H());
4289 __ Sadalp(v19.V2S(), v1.V4H());
4290
4291 __ Mov(v20.V16B(), v3.V16B());
4292 __ Mov(v21.V16B(), v4.V16B());
4293 __ Sadalp(v20.V2D(), v2.V4S());
4294 __ Sadalp(v21.V1D(), v2.V2S());
4295
4296 END();
4297
4298 if (CAN_RUN()) {
4299 RUN();
4300 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4301 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4302 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4303 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4304 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4305 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4306 }
4307 }
4308
TEST(neon_2regmisc_uadalp)4309 TEST(neon_2regmisc_uadalp) {
4310 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4311
4312 START();
4313
4314 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4315 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4316 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4317 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4318 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4319
4320 __ Mov(v16.V16B(), v1.V16B());
4321 __ Mov(v17.V16B(), v1.V16B());
4322 __ Uadalp(v16.V8H(), v0.V16B());
4323 __ Uadalp(v17.V4H(), v0.V8B());
4324
4325 __ Mov(v18.V16B(), v2.V16B());
4326 __ Mov(v19.V16B(), v2.V16B());
4327 __ Uadalp(v18.V4S(), v1.V8H());
4328 __ Uadalp(v19.V2S(), v1.V4H());
4329
4330 __ Mov(v20.V16B(), v3.V16B());
4331 __ Mov(v21.V16B(), v4.V16B());
4332 __ Uadalp(v20.V2D(), v2.V4S());
4333 __ Uadalp(v21.V1D(), v2.V2S());
4334
4335 END();
4336
4337 if (CAN_RUN()) {
4338 RUN();
4339 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4340 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4341 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4342 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4343 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4344 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4345 }
4346 }
4347
TEST(neon_3same_mul)4348 TEST(neon_3same_mul) {
4349 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4350
4351 START();
4352
4353 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4354 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4355 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4356 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4357
4358 __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4359 __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4360 __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4361
4362 END();
4363
4364 if (CAN_RUN()) {
4365 RUN();
4366 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4367 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4368 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4369 }
4370 }
4371
4372
TEST(neon_3same_absdiff)4373 TEST(neon_3same_absdiff) {
4374 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4375
4376 START();
4377
4378 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4379 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4380 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4381 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4382
4383 __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4384 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4385 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4386 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4387
4388 END();
4389
4390 if (CAN_RUN()) {
4391 RUN();
4392 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4393 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4394 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4395 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4396 }
4397 }
4398
4399
TEST(neon_byelement_mul)4400 TEST(neon_byelement_mul) {
4401 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4402
4403 START();
4404
4405 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4406 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4407
4408
4409 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4410 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4411 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4412 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4413
4414 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4415 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4416 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4417 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4418
4419 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4420 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4421 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4422 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4423
4424 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4425 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4426 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4427 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4428
4429 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4430 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4431 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4432 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4433
4434 END();
4435
4436 if (CAN_RUN()) {
4437 RUN();
4438 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4439 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4440 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4441 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4442
4443 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4444 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4445 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4446 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4447
4448 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4449 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4450 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4451 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4452 }
4453 }
4454
4455
TEST(neon_byelement_mull)4456 TEST(neon_byelement_mull) {
4457 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4458
4459 START();
4460
4461 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4462 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4463
4464
4465 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4466 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4467 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4468 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4469
4470 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4471 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4472 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4473 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4474
4475 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4476 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4477 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4478 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4479
4480 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4481 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4482 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4483 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4484
4485 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4486 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4487 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4488 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4489
4490 END();
4491
4492 if (CAN_RUN()) {
4493 RUN();
4494
4495 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4496 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4497 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4498 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4499
4500 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4501 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4502 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4503 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4504
4505 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4506 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4507 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4508 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4509 }
4510 }
4511
4512
TEST(neon_byelement_sqdmull)4513 TEST(neon_byelement_sqdmull) {
4514 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4515
4516 START();
4517
4518 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4519 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4520
4521 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4522 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4523 __ Sqdmull(s18, h0, v1.H(), 7);
4524
4525 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4526 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4527 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4528
4529 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4530 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4531 __ Sqdmlal(s22, h0, v1.H(), 7);
4532
4533 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4534 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4535 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4536
4537 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4538 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4539 __ Sqdmlsl(s26, h0, v1.H(), 7);
4540
4541 END();
4542
4543 if (CAN_RUN()) {
4544 RUN();
4545
4546 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4547 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4548 ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4549
4550 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4551 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4552 ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4553
4554 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4555 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4556 ASSERT_EQUAL_128(0, 0x00000000, q26);
4557 }
4558 }
4559
4560
TEST(neon_3diff_absdiff)4561 TEST(neon_3diff_absdiff) {
4562 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4563
4564 START();
4565
4566 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4567 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4568 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4569 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4570 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4571 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4572
4573 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4574 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4575 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4576 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4577
4578 END();
4579
4580 if (CAN_RUN()) {
4581 RUN();
4582 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4583 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4584 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4585 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4586 }
4587 }
4588
4589
TEST(neon_3diff_sqdmull)4590 TEST(neon_3diff_sqdmull) {
4591 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4592
4593 START();
4594
4595 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4596 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4597 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4598 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4599
4600 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4601 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4602 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4603 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4604 __ Sqdmull(s20, h0, h1);
4605 __ Sqdmull(d21, s2, s3);
4606
4607 END();
4608
4609 if (CAN_RUN()) {
4610 RUN();
4611 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4612 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4613 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4614 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4615 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4616 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4617 }
4618 }
4619
4620
TEST(neon_3diff_sqdmlal)4621 TEST(neon_3diff_sqdmlal) {
4622 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4623
4624 START();
4625
4626 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4627 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4628 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4629 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4630
4631 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4632 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4633 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4634 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4635 __ Movi(v20.V2D(), 0, 0x00000001);
4636 __ Movi(v21.V2D(), 0, 0x00000001);
4637
4638 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4639 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4640 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4641 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4642 __ Sqdmlal(s20, h0, h1);
4643 __ Sqdmlal(d21, s2, s3);
4644
4645 END();
4646
4647 if (CAN_RUN()) {
4648 RUN();
4649 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4650 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4651 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4652 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4653 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4654 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4655 }
4656 }
4657
4658
TEST(neon_3diff_sqdmlsl)4659 TEST(neon_3diff_sqdmlsl) {
4660 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4661
4662 START();
4663
4664 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4665 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4666 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4667 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4668
4669 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4670 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4671 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4672 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4673 __ Movi(v20.V2D(), 0, 0x00000001);
4674 __ Movi(v21.V2D(), 0, 0x00000001);
4675
4676 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4677 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4678 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4679 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4680 __ Sqdmlsl(s20, h0, h1);
4681 __ Sqdmlsl(d21, s2, s3);
4682
4683 END();
4684
4685 if (CAN_RUN()) {
4686 RUN();
4687 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4688 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4689 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4690 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4691 ASSERT_EQUAL_128(0, 0x80000002, q20);
4692 ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4693 }
4694 }
4695
4696
TEST(neon_3diff_mla)4697 TEST(neon_3diff_mla) {
4698 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4699
4700 START();
4701
4702 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4703 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4704 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4705 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4706 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4707 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4708
4709 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4710 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4711 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4712 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4713
4714 END();
4715
4716 if (CAN_RUN()) {
4717 RUN();
4718 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4719 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4720 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4721 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4722 }
4723 }
4724
4725
TEST(neon_3diff_mls)4726 TEST(neon_3diff_mls) {
4727 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4728
4729 START();
4730
4731 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4732 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4733 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4734 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4735 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4736 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4737
4738 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4739 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4740 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4741 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4742
4743 END();
4744
4745 if (CAN_RUN()) {
4746 RUN();
4747 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4748 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4749 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4750 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4751 }
4752 }
4753
4754
TEST(neon_3same_compare)4755 TEST(neon_3same_compare) {
4756 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4757
4758 START();
4759
4760 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4761 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4762
4763 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4764 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4765 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4766 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4767 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4768 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4769 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4770 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4771 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4772 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4773
4774 END();
4775
4776 if (CAN_RUN()) {
4777 RUN();
4778 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4779 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4780 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4781 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4782 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4783 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4784 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4785 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4786 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4787 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4788 }
4789 }
4790
4791
TEST(neon_3same_scalar_compare)4792 TEST(neon_3same_scalar_compare) {
4793 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4794
4795 START();
4796
4797 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4798 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4799
4800 __ Cmeq(d16, d0, d0);
4801 __ Cmeq(d17, d0, d1);
4802 __ Cmeq(d18, d1, d0);
4803 __ Cmge(d19, d0, d0);
4804 __ Cmge(d20, d0, d1);
4805 __ Cmge(d21, d1, d0);
4806 __ Cmgt(d22, d0, d0);
4807 __ Cmgt(d23, d0, d1);
4808 __ Cmhi(d24, d0, d0);
4809 __ Cmhi(d25, d0, d1);
4810 __ Cmhs(d26, d0, d0);
4811 __ Cmhs(d27, d0, d1);
4812 __ Cmhs(d28, d1, d0);
4813
4814 END();
4815
4816 if (CAN_RUN()) {
4817 RUN();
4818
4819 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4820 ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4821 ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4822 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4823 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4824 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4825 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4826 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4827 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4828 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4829 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4830 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4831 ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4832 }
4833 }
4834
TEST(neon_fcmeq_h)4835 TEST(neon_fcmeq_h) {
4836 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4837 CPUFeatures::kFP,
4838 CPUFeatures::kNEONHalf);
4839
4840 START();
4841
4842 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4843 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4844 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4845 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4846
4847 __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4848 __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4849 __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4850 __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4851 __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4852 __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4853 __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4854 __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4855
4856 END();
4857
4858 if (CAN_RUN()) {
4859 RUN();
4860
4861 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4862 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4863 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4864 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4865 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4866 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4867 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4868 ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4869 }
4870 }
4871
TEST(neon_fcmeq_h_scalar)4872 TEST(neon_fcmeq_h_scalar) {
4873 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4874 CPUFeatures::kFP,
4875 CPUFeatures::kNEONHalf,
4876 CPUFeatures::kFPHalf);
4877
4878 START();
4879
4880 __ Fmov(h0, Float16(0.0));
4881 __ Fmov(h1, RawbitsToFloat16(0xffff));
4882 __ Fmov(h2, Float16(-1.0));
4883 __ Fmov(h3, Float16(1.0));
4884 __ Fcmeq(h4, h0, h0);
4885 __ Fcmeq(h5, h1, h0);
4886 __ Fcmeq(h6, h2, h0);
4887 __ Fcmeq(h7, h3, h0);
4888
4889 END();
4890
4891 if (CAN_RUN()) {
4892 RUN();
4893
4894 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4895 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4896 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4897 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4898 }
4899 }
4900
TEST(neon_fcmge_h)4901 TEST(neon_fcmge_h) {
4902 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4903 CPUFeatures::kFP,
4904 CPUFeatures::kNEONHalf);
4905
4906 START();
4907
4908 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4909 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4910 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4911 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4912
4913 __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4914 __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4915 __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4916 __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4917 __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4918 __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4919 __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4920 __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4921
4922 END();
4923
4924 if (CAN_RUN()) {
4925 RUN();
4926
4927 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4928 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4929 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4930 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4931 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4932 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4933 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4934 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4935 }
4936 }
4937
TEST(neon_fcmge_h_scalar)4938 TEST(neon_fcmge_h_scalar) {
4939 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4940 CPUFeatures::kFP,
4941 CPUFeatures::kNEONHalf,
4942 CPUFeatures::kFPHalf);
4943
4944 START();
4945
4946 __ Fmov(h0, Float16(0.0));
4947 __ Fmov(h1, RawbitsToFloat16(0xffff));
4948 __ Fmov(h2, Float16(-1.0));
4949 __ Fmov(h3, Float16(1.0));
4950 __ Fcmge(h4, h0, h0);
4951 __ Fcmge(h5, h1, h0);
4952 __ Fcmge(h6, h2, h0);
4953 __ Fcmge(h7, h3, h0);
4954
4955 END();
4956
4957 if (CAN_RUN()) {
4958 RUN();
4959
4960 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4961 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4962 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4963 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4964 }
4965 }
4966
TEST(neon_fcmgt_h)4967 TEST(neon_fcmgt_h) {
4968 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4969 CPUFeatures::kFP,
4970 CPUFeatures::kNEONHalf);
4971
4972 START();
4973
4974 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
4975 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
4976 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
4977 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
4978
4979 __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
4980 __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
4981 __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
4982 __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
4983 __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
4984 __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
4985 __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
4986 __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
4987
4988 END();
4989
4990 if (CAN_RUN()) {
4991 RUN();
4992
4993 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
4994 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4995 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4996 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4997 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
4998 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4999 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5000 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5001 }
5002 }
5003
TEST(neon_fcmgt_h_scalar)5004 TEST(neon_fcmgt_h_scalar) {
5005 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5006 CPUFeatures::kFP,
5007 CPUFeatures::kNEONHalf,
5008 CPUFeatures::kFPHalf);
5009
5010 START();
5011
5012 __ Fmov(h0, Float16(0.0));
5013 __ Fmov(h1, RawbitsToFloat16(0xffff));
5014 __ Fmov(h2, Float16(-1.0));
5015 __ Fmov(h3, Float16(1.0));
5016 __ Fcmgt(h4, h0, h0);
5017 __ Fcmgt(h5, h1, h0);
5018 __ Fcmgt(h6, h2, h0);
5019 __ Fcmgt(h7, h3, h0);
5020
5021 END();
5022
5023 if (CAN_RUN()) {
5024 RUN();
5025
5026 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5027 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5028 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5029 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5030 }
5031 }
5032
TEST(neon_facge_h)5033 TEST(neon_facge_h) {
5034 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5035 CPUFeatures::kFP,
5036 CPUFeatures::kNEONHalf);
5037
5038 START();
5039
5040 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5041 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5042 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5043 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5044
5045 __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5046 __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5047 __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5048 __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5049 __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5050 __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5051 __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5052 __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5053
5054 END();
5055
5056 if (CAN_RUN()) {
5057 RUN();
5058
5059 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5060 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5061 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5062 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5063 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5064 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5065 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5066 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5067 }
5068 }
5069
TEST(neon_facge_h_scalar)5070 TEST(neon_facge_h_scalar) {
5071 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5072 CPUFeatures::kFP,
5073 CPUFeatures::kNEONHalf,
5074 CPUFeatures::kFPHalf);
5075
5076 START();
5077
5078 __ Fmov(h0, Float16(0.0));
5079 __ Fmov(h1, RawbitsToFloat16(0xffff));
5080 __ Fmov(h2, Float16(-1.0));
5081 __ Fmov(h3, Float16(1.0));
5082 __ Facge(h4, h0, h0);
5083 __ Facge(h5, h1, h0);
5084 __ Facge(h6, h2, h0);
5085 __ Facge(h7, h3, h0);
5086
5087 END();
5088
5089 if (CAN_RUN()) {
5090 RUN();
5091
5092 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5093 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5094 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5095 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5096 }
5097 }
5098
TEST(neon_facgt_h)5099 TEST(neon_facgt_h) {
5100 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5101 CPUFeatures::kFP,
5102 CPUFeatures::kNEONHalf);
5103
5104 START();
5105
5106 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
5107 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
5108 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
5109 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
5110
5111 __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5112 __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5113 __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5114 __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5115 __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5116 __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5117 __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5118 __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5119
5120 END();
5121
5122 if (CAN_RUN()) {
5123 RUN();
5124
5125 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5126 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5127 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5128 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5129 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5130 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5131 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5132 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5133 }
5134 }
5135
TEST(neon_facgt_h_scalar)5136 TEST(neon_facgt_h_scalar) {
5137 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5138 CPUFeatures::kFP,
5139 CPUFeatures::kNEONHalf,
5140 CPUFeatures::kFPHalf);
5141
5142 START();
5143
5144 __ Fmov(h0, Float16(0.0));
5145 __ Fmov(h1, RawbitsToFloat16(0xffff));
5146 __ Fmov(h2, Float16(-1.0));
5147 __ Fmov(h3, Float16(1.0));
5148 __ Facgt(h4, h0, h0);
5149 __ Facgt(h5, h1, h0);
5150 __ Facgt(h6, h2, h0);
5151 __ Facgt(h7, h3, h0);
5152
5153 END();
5154
5155 if (CAN_RUN()) {
5156 RUN();
5157
5158 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5159 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5160 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5161 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5162 }
5163 }
5164
TEST(neon_2regmisc_fcmeq)5165 TEST(neon_2regmisc_fcmeq) {
5166 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5167
5168 START();
5169
5170 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5171 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5172 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5173 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5174
5175 __ Fcmeq(s16, s0, 0.0);
5176 __ Fcmeq(s17, s1, 0.0);
5177 __ Fcmeq(s18, s2, 0.0);
5178 __ Fcmeq(d19, d0, 0.0);
5179 __ Fcmeq(d20, d1, 0.0);
5180 __ Fcmeq(d21, d2, 0.0);
5181 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5182 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5183 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5184 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5185
5186 END();
5187
5188 if (CAN_RUN()) {
5189 RUN();
5190 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5191 ASSERT_EQUAL_128(0, 0x00000000, q17);
5192 ASSERT_EQUAL_128(0, 0x00000000, q18);
5193 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5194 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5195 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5196 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5197 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5198 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5199 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5200 }
5201 }
5202
TEST(neon_2regmisc_fcmge)5203 TEST(neon_2regmisc_fcmge) {
5204 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5205
5206 START();
5207
5208 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5209 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5210 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5211 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5212
5213 __ Fcmge(s16, s0, 0.0);
5214 __ Fcmge(s17, s1, 0.0);
5215 __ Fcmge(s18, s2, 0.0);
5216 __ Fcmge(d19, d0, 0.0);
5217 __ Fcmge(d20, d1, 0.0);
5218 __ Fcmge(d21, d3, 0.0);
5219 __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5220 __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5221 __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5222 __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5223
5224 END();
5225
5226 if (CAN_RUN()) {
5227 RUN();
5228 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5229 ASSERT_EQUAL_128(0, 0x00000000, q17);
5230 ASSERT_EQUAL_128(0, 0x00000000, q18);
5231 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5232 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5233 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5234 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5235 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5236 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5237 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5238 }
5239 }
5240
5241
TEST(neon_2regmisc_fcmgt)5242 TEST(neon_2regmisc_fcmgt) {
5243 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5244
5245 START();
5246
5247 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5248 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5249 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5250 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5251
5252 __ Fcmgt(s16, s0, 0.0);
5253 __ Fcmgt(s17, s1, 0.0);
5254 __ Fcmgt(s18, s2, 0.0);
5255 __ Fcmgt(d19, d0, 0.0);
5256 __ Fcmgt(d20, d1, 0.0);
5257 __ Fcmgt(d21, d3, 0.0);
5258 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5259 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5260 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5261 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5262
5263 END();
5264
5265 if (CAN_RUN()) {
5266 RUN();
5267 ASSERT_EQUAL_128(0, 0x00000000, q16);
5268 ASSERT_EQUAL_128(0, 0x00000000, q17);
5269 ASSERT_EQUAL_128(0, 0x00000000, q18);
5270 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5271 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5272 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5273 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5274 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5275 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5276 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5277 }
5278 }
5279
TEST(neon_2regmisc_fcmle)5280 TEST(neon_2regmisc_fcmle) {
5281 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5282
5283 START();
5284
5285 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5286 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5287 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5288 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5289
5290 __ Fcmle(s16, s0, 0.0);
5291 __ Fcmle(s17, s1, 0.0);
5292 __ Fcmle(s18, s3, 0.0);
5293 __ Fcmle(d19, d0, 0.0);
5294 __ Fcmle(d20, d1, 0.0);
5295 __ Fcmle(d21, d2, 0.0);
5296 __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5297 __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5298 __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5299 __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5300
5301 END();
5302
5303 if (CAN_RUN()) {
5304 RUN();
5305 ASSERT_EQUAL_128(0, 0xffffffff, q16);
5306 ASSERT_EQUAL_128(0, 0x00000000, q17);
5307 ASSERT_EQUAL_128(0, 0x00000000, q18);
5308 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5309 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5310 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5311 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5312 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5313 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5314 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5315 }
5316 }
5317
5318
TEST(neon_2regmisc_fcmlt)5319 TEST(neon_2regmisc_fcmlt) {
5320 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5321
5322 START();
5323
5324 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
5325 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
5326 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
5327 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
5328
5329 __ Fcmlt(s16, s0, 0.0);
5330 __ Fcmlt(s17, s1, 0.0);
5331 __ Fcmlt(s18, s3, 0.0);
5332 __ Fcmlt(d19, d0, 0.0);
5333 __ Fcmlt(d20, d1, 0.0);
5334 __ Fcmlt(d21, d2, 0.0);
5335 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5336 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5337 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5338 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5339
5340 END();
5341
5342 if (CAN_RUN()) {
5343 RUN();
5344 ASSERT_EQUAL_128(0, 0x00000000, q16);
5345 ASSERT_EQUAL_128(0, 0x00000000, q17);
5346 ASSERT_EQUAL_128(0, 0x00000000, q18);
5347 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5348 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5349 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5350 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5351 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5352 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5353 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5354 }
5355 }
5356
TEST(neon_2regmisc_cmeq)5357 TEST(neon_2regmisc_cmeq) {
5358 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5359
5360 START();
5361
5362 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5363 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5364
5365 __ Cmeq(v16.V8B(), v1.V8B(), 0);
5366 __ Cmeq(v17.V16B(), v1.V16B(), 0);
5367 __ Cmeq(v18.V4H(), v1.V4H(), 0);
5368 __ Cmeq(v19.V8H(), v1.V8H(), 0);
5369 __ Cmeq(v20.V2S(), v0.V2S(), 0);
5370 __ Cmeq(v21.V4S(), v0.V4S(), 0);
5371 __ Cmeq(d22, d0, 0);
5372 __ Cmeq(d23, d1, 0);
5373 __ Cmeq(v24.V2D(), v0.V2D(), 0);
5374
5375 END();
5376
5377 if (CAN_RUN()) {
5378 RUN();
5379 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5380 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5381 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5382 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5383 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5384 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5385 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5386 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5387 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5388 }
5389 }
5390
5391
TEST(neon_2regmisc_cmge)5392 TEST(neon_2regmisc_cmge) {
5393 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5394
5395 START();
5396
5397 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5398 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5399
5400 __ Cmge(v16.V8B(), v1.V8B(), 0);
5401 __ Cmge(v17.V16B(), v1.V16B(), 0);
5402 __ Cmge(v18.V4H(), v1.V4H(), 0);
5403 __ Cmge(v19.V8H(), v1.V8H(), 0);
5404 __ Cmge(v20.V2S(), v0.V2S(), 0);
5405 __ Cmge(v21.V4S(), v0.V4S(), 0);
5406 __ Cmge(d22, d0, 0);
5407 __ Cmge(d23, d1, 0);
5408 __ Cmge(v24.V2D(), v0.V2D(), 0);
5409
5410 END();
5411
5412 if (CAN_RUN()) {
5413 RUN();
5414 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5415 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5416 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5417 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5418 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5419 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5420 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5421 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5422 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5423 }
5424 }
5425
5426
TEST(neon_2regmisc_cmlt)5427 TEST(neon_2regmisc_cmlt) {
5428 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5429
5430 START();
5431
5432 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5433 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5434
5435 __ Cmlt(v16.V8B(), v1.V8B(), 0);
5436 __ Cmlt(v17.V16B(), v1.V16B(), 0);
5437 __ Cmlt(v18.V4H(), v1.V4H(), 0);
5438 __ Cmlt(v19.V8H(), v1.V8H(), 0);
5439 __ Cmlt(v20.V2S(), v1.V2S(), 0);
5440 __ Cmlt(v21.V4S(), v1.V4S(), 0);
5441 __ Cmlt(d22, d0, 0);
5442 __ Cmlt(d23, d1, 0);
5443 __ Cmlt(v24.V2D(), v0.V2D(), 0);
5444
5445 END();
5446
5447 if (CAN_RUN()) {
5448 RUN();
5449 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5450 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5451 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5452 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5453 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5454 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5455 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5456 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5457 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5458 }
5459 }
5460
5461
TEST(neon_2regmisc_cmle)5462 TEST(neon_2regmisc_cmle) {
5463 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5464
5465 START();
5466
5467 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5468 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5469
5470 __ Cmle(v16.V8B(), v1.V8B(), 0);
5471 __ Cmle(v17.V16B(), v1.V16B(), 0);
5472 __ Cmle(v18.V4H(), v1.V4H(), 0);
5473 __ Cmle(v19.V8H(), v1.V8H(), 0);
5474 __ Cmle(v20.V2S(), v1.V2S(), 0);
5475 __ Cmle(v21.V4S(), v1.V4S(), 0);
5476 __ Cmle(d22, d0, 0);
5477 __ Cmle(d23, d1, 0);
5478 __ Cmle(v24.V2D(), v0.V2D(), 0);
5479
5480 END();
5481
5482 if (CAN_RUN()) {
5483 RUN();
5484 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5485 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5486 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5487 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5488 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5489 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5490 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5491 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5492 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5493 }
5494 }
5495
5496
TEST(neon_2regmisc_cmgt)5497 TEST(neon_2regmisc_cmgt) {
5498 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5499
5500 START();
5501
5502 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5503 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5504
5505 __ Cmgt(v16.V8B(), v1.V8B(), 0);
5506 __ Cmgt(v17.V16B(), v1.V16B(), 0);
5507 __ Cmgt(v18.V4H(), v1.V4H(), 0);
5508 __ Cmgt(v19.V8H(), v1.V8H(), 0);
5509 __ Cmgt(v20.V2S(), v0.V2S(), 0);
5510 __ Cmgt(v21.V4S(), v0.V4S(), 0);
5511 __ Cmgt(d22, d0, 0);
5512 __ Cmgt(d23, d1, 0);
5513 __ Cmgt(v24.V2D(), v0.V2D(), 0);
5514
5515 END();
5516
5517 if (CAN_RUN()) {
5518 RUN();
5519 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5520 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5521 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5522 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5523 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5524 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5525 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5526 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5527 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5528 }
5529 }
5530
5531
TEST(neon_2regmisc_neg)5532 TEST(neon_2regmisc_neg) {
5533 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5534
5535 START();
5536
5537 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5538 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5539 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5540 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5541 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5542
5543 __ Neg(v16.V8B(), v0.V8B());
5544 __ Neg(v17.V16B(), v0.V16B());
5545 __ Neg(v18.V4H(), v1.V4H());
5546 __ Neg(v19.V8H(), v1.V8H());
5547 __ Neg(v20.V2S(), v2.V2S());
5548 __ Neg(v21.V4S(), v2.V4S());
5549 __ Neg(d22, d3);
5550 __ Neg(v23.V2D(), v3.V2D());
5551 __ Neg(v24.V2D(), v4.V2D());
5552
5553 END();
5554
5555 if (CAN_RUN()) {
5556 RUN();
5557 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5558 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5559 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5560 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5561 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5562 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5563 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5564 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5565 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5566 }
5567 }
5568
5569
TEST(neon_2regmisc_sqneg)5570 TEST(neon_2regmisc_sqneg) {
5571 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5572
5573 START();
5574
5575 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5576 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5577 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5578 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5579 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5580
5581 __ Sqneg(v16.V8B(), v0.V8B());
5582 __ Sqneg(v17.V16B(), v0.V16B());
5583 __ Sqneg(v18.V4H(), v1.V4H());
5584 __ Sqneg(v19.V8H(), v1.V8H());
5585 __ Sqneg(v20.V2S(), v2.V2S());
5586 __ Sqneg(v21.V4S(), v2.V4S());
5587 __ Sqneg(v22.V2D(), v3.V2D());
5588 __ Sqneg(v23.V2D(), v4.V2D());
5589
5590 __ Sqneg(b24, b0);
5591 __ Sqneg(h25, h1);
5592 __ Sqneg(s26, s2);
5593 __ Sqneg(d27, d3);
5594
5595 END();
5596
5597 if (CAN_RUN()) {
5598 RUN();
5599 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5600 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5601 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5602 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5603 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5604 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5605 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5606 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5607
5608 ASSERT_EQUAL_128(0, 0x7f, q24);
5609 ASSERT_EQUAL_128(0, 0x8001, q25);
5610 ASSERT_EQUAL_128(0, 0x80000001, q26);
5611 ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5612 }
5613 }
5614
5615
TEST(neon_2regmisc_abs)5616 TEST(neon_2regmisc_abs) {
5617 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5618
5619 START();
5620
5621 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5622 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5623 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5624 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5625 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5626
5627 __ Abs(v16.V8B(), v0.V8B());
5628 __ Abs(v17.V16B(), v0.V16B());
5629 __ Abs(v18.V4H(), v1.V4H());
5630 __ Abs(v19.V8H(), v1.V8H());
5631 __ Abs(v20.V2S(), v2.V2S());
5632 __ Abs(v21.V4S(), v2.V4S());
5633 __ Abs(d22, d3);
5634 __ Abs(v23.V2D(), v3.V2D());
5635 __ Abs(v24.V2D(), v4.V2D());
5636
5637 END();
5638
5639 if (CAN_RUN()) {
5640 RUN();
5641 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5642 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5643 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5644 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5645 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5646 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5647 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5648 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5649 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5650 }
5651 }
5652
5653
TEST(neon_2regmisc_sqabs)5654 TEST(neon_2regmisc_sqabs) {
5655 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5656
5657 START();
5658
5659 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5660 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5661 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5662 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5663 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5664
5665 __ Sqabs(v16.V8B(), v0.V8B());
5666 __ Sqabs(v17.V16B(), v0.V16B());
5667 __ Sqabs(v18.V4H(), v1.V4H());
5668 __ Sqabs(v19.V8H(), v1.V8H());
5669 __ Sqabs(v20.V2S(), v2.V2S());
5670 __ Sqabs(v21.V4S(), v2.V4S());
5671 __ Sqabs(v22.V2D(), v3.V2D());
5672 __ Sqabs(v23.V2D(), v4.V2D());
5673
5674 __ Sqabs(b24, b0);
5675 __ Sqabs(h25, h1);
5676 __ Sqabs(s26, s2);
5677 __ Sqabs(d27, d3);
5678
5679 END();
5680
5681 if (CAN_RUN()) {
5682 RUN();
5683 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5684 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5685 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5686 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5687 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5688 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5689 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5690 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5691
5692 ASSERT_EQUAL_128(0, 0x7f, q24);
5693 ASSERT_EQUAL_128(0, 0x7fff, q25);
5694 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5695 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5696 }
5697 }
5698
TEST(neon_2regmisc_suqadd)5699 TEST(neon_2regmisc_suqadd) {
5700 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5701
5702 START();
5703
5704 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5705 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5706
5707 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5708 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5709
5710 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5711 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5712
5713 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5714 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5715
5716 __ Mov(v16.V2D(), v0.V2D());
5717 __ Mov(v17.V2D(), v0.V2D());
5718 __ Mov(v18.V2D(), v2.V2D());
5719 __ Mov(v19.V2D(), v2.V2D());
5720 __ Mov(v20.V2D(), v4.V2D());
5721 __ Mov(v21.V2D(), v4.V2D());
5722 __ Mov(v22.V2D(), v6.V2D());
5723
5724 __ Mov(v23.V2D(), v0.V2D());
5725 __ Mov(v24.V2D(), v2.V2D());
5726 __ Mov(v25.V2D(), v4.V2D());
5727 __ Mov(v26.V2D(), v6.V2D());
5728
5729 __ Suqadd(v16.V8B(), v1.V8B());
5730 __ Suqadd(v17.V16B(), v1.V16B());
5731 __ Suqadd(v18.V4H(), v3.V4H());
5732 __ Suqadd(v19.V8H(), v3.V8H());
5733 __ Suqadd(v20.V2S(), v5.V2S());
5734 __ Suqadd(v21.V4S(), v5.V4S());
5735 __ Suqadd(v22.V2D(), v7.V2D());
5736
5737 __ Suqadd(b23, b1);
5738 __ Suqadd(h24, h3);
5739 __ Suqadd(s25, s5);
5740 __ Suqadd(d26, d7);
5741
5742 END();
5743
5744 if (CAN_RUN()) {
5745 RUN();
5746 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5747 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5748 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5749 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5750 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5751 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5752 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5753
5754 ASSERT_EQUAL_128(0, 0x7f, q23);
5755 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5756 ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5757 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5758 }
5759 }
5760
TEST(neon_2regmisc_usqadd)5761 TEST(neon_2regmisc_usqadd) {
5762 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5763
5764 START();
5765
5766 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5767 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5768
5769 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5770 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5771
5772 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5773 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5774
5775 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5776 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5777
5778 __ Mov(v16.V2D(), v0.V2D());
5779 __ Mov(v17.V2D(), v0.V2D());
5780 __ Mov(v18.V2D(), v2.V2D());
5781 __ Mov(v19.V2D(), v2.V2D());
5782 __ Mov(v20.V2D(), v4.V2D());
5783 __ Mov(v21.V2D(), v4.V2D());
5784 __ Mov(v22.V2D(), v6.V2D());
5785
5786 __ Mov(v23.V2D(), v0.V2D());
5787 __ Mov(v24.V2D(), v2.V2D());
5788 __ Mov(v25.V2D(), v4.V2D());
5789 __ Mov(v26.V2D(), v6.V2D());
5790
5791 __ Usqadd(v16.V8B(), v1.V8B());
5792 __ Usqadd(v17.V16B(), v1.V16B());
5793 __ Usqadd(v18.V4H(), v3.V4H());
5794 __ Usqadd(v19.V8H(), v3.V8H());
5795 __ Usqadd(v20.V2S(), v5.V2S());
5796 __ Usqadd(v21.V4S(), v5.V4S());
5797 __ Usqadd(v22.V2D(), v7.V2D());
5798
5799 __ Usqadd(b23, b1);
5800 __ Usqadd(h24, h3);
5801 __ Usqadd(s25, s5);
5802 __ Usqadd(d26, d7);
5803
5804 END();
5805
5806 if (CAN_RUN()) {
5807 RUN();
5808 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5809 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5810 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5811 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5812 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5813 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5814 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5815
5816 ASSERT_EQUAL_128(0, 0xff, q23);
5817 ASSERT_EQUAL_128(0, 0x7ffe, q24);
5818 ASSERT_EQUAL_128(0, 0xffffffff, q25);
5819 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5820 }
5821 }
5822
TEST(neon_2regmisc_xtn)5823 TEST(neon_2regmisc_xtn) {
5824 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5825
5826 START();
5827
5828 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5829 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5830 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5831 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5832 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5833
5834 __ Xtn(v16.V8B(), v0.V8H());
5835 __ Xtn2(v16.V16B(), v1.V8H());
5836 __ Xtn(v17.V4H(), v1.V4S());
5837 __ Xtn2(v17.V8H(), v2.V4S());
5838 __ Xtn(v18.V2S(), v3.V2D());
5839 __ Xtn2(v18.V4S(), v4.V2D());
5840
5841 END();
5842
5843 if (CAN_RUN()) {
5844 RUN();
5845 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5846 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5847 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5848 }
5849 }
5850
5851
TEST(neon_2regmisc_sqxtn)5852 TEST(neon_2regmisc_sqxtn) {
5853 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5854
5855 START();
5856
5857 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5858 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5859 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5860 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5861 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5862
5863 __ Sqxtn(v16.V8B(), v0.V8H());
5864 __ Sqxtn2(v16.V16B(), v1.V8H());
5865 __ Sqxtn(v17.V4H(), v1.V4S());
5866 __ Sqxtn2(v17.V8H(), v2.V4S());
5867 __ Sqxtn(v18.V2S(), v3.V2D());
5868 __ Sqxtn2(v18.V4S(), v4.V2D());
5869 __ Sqxtn(b19, h0);
5870 __ Sqxtn(h20, s0);
5871 __ Sqxtn(s21, d0);
5872
5873 END();
5874
5875 if (CAN_RUN()) {
5876 RUN();
5877 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5878 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5879 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5880 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5881 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5882 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5883 }
5884 }
5885
5886
TEST(neon_2regmisc_uqxtn)5887 TEST(neon_2regmisc_uqxtn) {
5888 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5889
5890 START();
5891
5892 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5893 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5894 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5895 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5896 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5897
5898 __ Uqxtn(v16.V8B(), v0.V8H());
5899 __ Uqxtn2(v16.V16B(), v1.V8H());
5900 __ Uqxtn(v17.V4H(), v1.V4S());
5901 __ Uqxtn2(v17.V8H(), v2.V4S());
5902 __ Uqxtn(v18.V2S(), v3.V2D());
5903 __ Uqxtn2(v18.V4S(), v4.V2D());
5904 __ Uqxtn(b19, h0);
5905 __ Uqxtn(h20, s0);
5906 __ Uqxtn(s21, d0);
5907
5908 END();
5909
5910 if (CAN_RUN()) {
5911 RUN();
5912 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5913 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5914 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5915 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5916 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5917 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5918 }
5919 }
5920
5921
TEST(neon_2regmisc_sqxtun)5922 TEST(neon_2regmisc_sqxtun) {
5923 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5924
5925 START();
5926
5927 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5928 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5929 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5930 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5931 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5932
5933 __ Sqxtun(v16.V8B(), v0.V8H());
5934 __ Sqxtun2(v16.V16B(), v1.V8H());
5935 __ Sqxtun(v17.V4H(), v1.V4S());
5936 __ Sqxtun2(v17.V8H(), v2.V4S());
5937 __ Sqxtun(v18.V2S(), v3.V2D());
5938 __ Sqxtun2(v18.V4S(), v4.V2D());
5939 __ Sqxtun(b19, h0);
5940 __ Sqxtun(h20, s0);
5941 __ Sqxtun(s21, d0);
5942
5943 END();
5944
5945 if (CAN_RUN()) {
5946 RUN();
5947 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5948 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5949 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5950 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5951 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5952 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5953 }
5954 }
5955
TEST(neon_3same_and)5956 TEST(neon_3same_and) {
5957 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5958
5959 START();
5960
5961 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5962 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
5963
5964 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
5965 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
5966 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
5967 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
5968 END();
5969
5970 if (CAN_RUN()) {
5971 RUN();
5972 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
5973 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
5974 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
5975 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
5976 }
5977 }
5978
TEST(neon_3same_bic)5979 TEST(neon_3same_bic) {
5980 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5981
5982 START();
5983
5984 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5985 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
5986
5987 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
5988 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
5989 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
5990 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
5991 END();
5992
5993 if (CAN_RUN()) {
5994 RUN();
5995 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
5996 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
5997 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
5998 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
5999 }
6000 }
6001
TEST(neon_3same_orr)6002 TEST(neon_3same_orr) {
6003 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6004
6005 START();
6006
6007 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6008 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6009
6010 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6011 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6012 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6013 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6014 END();
6015
6016 if (CAN_RUN()) {
6017 RUN();
6018 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6019 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6020 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6021 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6022 }
6023 }
6024
TEST(neon_3same_mov)6025 TEST(neon_3same_mov) {
6026 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6027
6028 START();
6029
6030 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6031
6032 __ Mov(v16.V16B(), v0.V16B());
6033 __ Mov(v17.V8H(), v0.V8H());
6034 __ Mov(v18.V4S(), v0.V4S());
6035 __ Mov(v19.V2D(), v0.V2D());
6036
6037 __ Mov(v24.V8B(), v0.V8B());
6038 __ Mov(v25.V4H(), v0.V4H());
6039 __ Mov(v26.V2S(), v0.V2S());
6040 END();
6041
6042 if (CAN_RUN()) {
6043 RUN();
6044
6045 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6046 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6047 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6048 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6049
6050 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6051 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6052 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6053 }
6054 }
6055
TEST(neon_3same_orn)6056 TEST(neon_3same_orn) {
6057 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6058
6059 START();
6060
6061 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6062 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6063
6064 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6065 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6066 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6067 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6068 END();
6069
6070 if (CAN_RUN()) {
6071 RUN();
6072 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6073 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6074 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6075 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6076 }
6077 }
6078
TEST(neon_3same_eor)6079 TEST(neon_3same_eor) {
6080 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6081
6082 START();
6083
6084 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6085 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6086
6087 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
6088 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
6089 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
6090 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
6091 END();
6092
6093 if (CAN_RUN()) {
6094 RUN();
6095 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6096 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6097 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6098 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6099 }
6100 }
6101
TEST(neon_3same_bif)6102 TEST(neon_3same_bif) {
6103 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6104
6105 START();
6106
6107 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6108 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6109 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6110
6111 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6112 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6113 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6114
6115 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6116 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6117 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6118
6119 __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6120 __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6121 __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6122 END();
6123
6124 if (CAN_RUN()) {
6125 RUN();
6126
6127 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6128 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6129 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6130 }
6131 }
6132
TEST(neon_3same_bit)6133 TEST(neon_3same_bit) {
6134 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6135
6136 START();
6137
6138 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6139 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6140 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6141
6142 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6143 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6144 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6145
6146 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6147 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6148 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6149
6150 __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6151 __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6152 __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6153 END();
6154
6155 if (CAN_RUN()) {
6156 RUN();
6157
6158 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6159 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6160 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6161 }
6162 }
6163
TEST(neon_3same_bsl)6164 TEST(neon_3same_bsl) {
6165 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6166
6167 START();
6168
6169 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6170 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6171 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6172
6173 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6174 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6175 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6176
6177 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6178 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6179 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6180
6181 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6182 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6183 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6184 END();
6185
6186 if (CAN_RUN()) {
6187 RUN();
6188
6189 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6190 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6191 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6192 }
6193 }
6194
6195
TEST(neon_3same_smax)6196 TEST(neon_3same_smax) {
6197 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6198
6199 START();
6200
6201 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6202 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6203
6204 __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6205 __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6206 __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6207
6208 __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6209 __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6210 __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6211 END();
6212
6213 if (CAN_RUN()) {
6214 RUN();
6215
6216 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6217 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6218 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6219 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6220 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6221 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6222 }
6223 }
6224
6225
TEST(neon_3same_smaxp)6226 TEST(neon_3same_smaxp) {
6227 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6228
6229 START();
6230
6231 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6232 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6233
6234 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6235 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6236 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6237
6238 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6239 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6240 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6241 END();
6242
6243 if (CAN_RUN()) {
6244 RUN();
6245
6246 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6247 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6248 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6249 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6250 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6251 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6252 }
6253 }
6254
6255
TEST(neon_addp_scalar)6256 TEST(neon_addp_scalar) {
6257 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6258
6259 START();
6260
6261 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6262 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6263 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6264
6265 __ Addp(d16, v0.V2D());
6266 __ Addp(d17, v1.V2D());
6267 __ Addp(d18, v2.V2D());
6268
6269 END();
6270
6271 if (CAN_RUN()) {
6272 RUN();
6273
6274 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6275 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6276 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6277 }
6278 }
6279
TEST(neon_acrosslanes_addv)6280 TEST(neon_acrosslanes_addv) {
6281 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6282
6283 START();
6284
6285 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6286 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6287 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6288
6289 __ Addv(b16, v0.V8B());
6290 __ Addv(b17, v0.V16B());
6291 __ Addv(h18, v1.V4H());
6292 __ Addv(h19, v1.V8H());
6293 __ Addv(s20, v2.V4S());
6294
6295 END();
6296
6297 if (CAN_RUN()) {
6298 RUN();
6299
6300 ASSERT_EQUAL_128(0x0, 0xc7, q16);
6301 ASSERT_EQUAL_128(0x0, 0x99, q17);
6302 ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6303 ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6304 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6305 }
6306 }
6307
6308
TEST(neon_acrosslanes_saddlv)6309 TEST(neon_acrosslanes_saddlv) {
6310 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6311
6312 START();
6313
6314 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6315 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6316 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6317
6318 __ Saddlv(h16, v0.V8B());
6319 __ Saddlv(h17, v0.V16B());
6320 __ Saddlv(s18, v1.V4H());
6321 __ Saddlv(s19, v1.V8H());
6322 __ Saddlv(d20, v2.V4S());
6323
6324 END();
6325
6326 if (CAN_RUN()) {
6327 RUN();
6328
6329 ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6330 ASSERT_EQUAL_128(0x0, 0xff99, q17);
6331 ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6332 ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6333 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6334 }
6335 }
6336
6337
TEST(neon_acrosslanes_uaddlv)6338 TEST(neon_acrosslanes_uaddlv) {
6339 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6340
6341 START();
6342
6343 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6344 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6345 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6346
6347 __ Uaddlv(h16, v0.V8B());
6348 __ Uaddlv(h17, v0.V16B());
6349 __ Uaddlv(s18, v1.V4H());
6350 __ Uaddlv(s19, v1.V8H());
6351 __ Uaddlv(d20, v2.V4S());
6352
6353 END();
6354
6355 if (CAN_RUN()) {
6356 RUN();
6357
6358 ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6359 ASSERT_EQUAL_128(0x0, 0x0599, q17);
6360 ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6361 ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6362 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6363 }
6364 }
6365
6366
TEST(neon_acrosslanes_smaxv)6367 TEST(neon_acrosslanes_smaxv) {
6368 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6369
6370 START();
6371
6372 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6373 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6374 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6375
6376 __ Smaxv(b16, v0.V8B());
6377 __ Smaxv(b17, v0.V16B());
6378 __ Smaxv(h18, v1.V4H());
6379 __ Smaxv(h19, v1.V8H());
6380 __ Smaxv(s20, v2.V4S());
6381
6382 END();
6383
6384 if (CAN_RUN()) {
6385 RUN();
6386
6387 ASSERT_EQUAL_128(0x0, 0x33, q16);
6388 ASSERT_EQUAL_128(0x0, 0x44, q17);
6389 ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6390 ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6391 ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6392 }
6393 }
6394
6395
TEST(neon_acrosslanes_sminv)6396 TEST(neon_acrosslanes_sminv) {
6397 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6398
6399 START();
6400
6401 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6402 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6403 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6404
6405 __ Sminv(b16, v0.V8B());
6406 __ Sminv(b17, v0.V16B());
6407 __ Sminv(h18, v1.V4H());
6408 __ Sminv(h19, v1.V8H());
6409 __ Sminv(s20, v2.V4S());
6410
6411 END();
6412
6413 if (CAN_RUN()) {
6414 RUN();
6415
6416 ASSERT_EQUAL_128(0x0, 0xaa, q16);
6417 ASSERT_EQUAL_128(0x0, 0x80, q17);
6418 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6419 ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6420 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6421 }
6422 }
6423
TEST(neon_acrosslanes_umaxv)6424 TEST(neon_acrosslanes_umaxv) {
6425 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6426
6427 START();
6428
6429 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6430 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6431 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6432
6433 __ Umaxv(b16, v0.V8B());
6434 __ Umaxv(b17, v0.V16B());
6435 __ Umaxv(h18, v1.V4H());
6436 __ Umaxv(h19, v1.V8H());
6437 __ Umaxv(s20, v2.V4S());
6438
6439 END();
6440
6441 if (CAN_RUN()) {
6442 RUN();
6443
6444 ASSERT_EQUAL_128(0x0, 0xfc, q16);
6445 ASSERT_EQUAL_128(0x0, 0xfe, q17);
6446 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6447 ASSERT_EQUAL_128(0x0, 0xffab, q19);
6448 ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6449 }
6450 }
6451
6452
TEST(neon_acrosslanes_uminv)6453 TEST(neon_acrosslanes_uminv) {
6454 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6455
6456 START();
6457
6458 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6459 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6460 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6461
6462 __ Uminv(b16, v0.V8B());
6463 __ Uminv(b17, v0.V16B());
6464 __ Uminv(h18, v1.V4H());
6465 __ Uminv(h19, v1.V8H());
6466 __ Uminv(s20, v2.V4S());
6467
6468 END();
6469
6470 if (CAN_RUN()) {
6471 RUN();
6472
6473 ASSERT_EQUAL_128(0x0, 0x01, q16);
6474 ASSERT_EQUAL_128(0x0, 0x00, q17);
6475 ASSERT_EQUAL_128(0x0, 0x0001, q18);
6476 ASSERT_EQUAL_128(0x0, 0x0000, q19);
6477 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6478 }
6479 }
6480
6481
TEST(neon_3same_smin)6482 TEST(neon_3same_smin) {
6483 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6484
6485 START();
6486
6487 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6488 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6489
6490 __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6491 __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6492 __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6493
6494 __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6495 __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6496 __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6497 END();
6498
6499 if (CAN_RUN()) {
6500 RUN();
6501
6502 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6503 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6504 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6505 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6506 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6507 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6508 }
6509 }
6510
6511
TEST(neon_3same_umax)6512 TEST(neon_3same_umax) {
6513 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6514
6515 START();
6516
6517 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6518 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6519
6520 __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6521 __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6522 __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6523
6524 __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6525 __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6526 __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6527 END();
6528
6529 if (CAN_RUN()) {
6530 RUN();
6531
6532 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6533 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6534 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6535 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6536 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6537 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6538 }
6539 }
6540
6541
TEST(neon_3same_umin)6542 TEST(neon_3same_umin) {
6543 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6544
6545 START();
6546
6547 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6548 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6549
6550 __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6551 __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6552 __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6553
6554 __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6555 __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6556 __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6557 END();
6558
6559 if (CAN_RUN()) {
6560 RUN();
6561
6562 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6563 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6564 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6565 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6566 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6567 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6568 }
6569 }
6570
6571
TEST(neon_3same_extra_fcadd)6572 TEST(neon_3same_extra_fcadd) {
6573 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6574
6575 START();
6576
6577 // (0i, 5) (d)
6578 __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6579 // (5i, 0) (d)
6580 __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6581 // (10i, 10) (d)
6582 __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6583 // (5i, 5), (5i, 5) (f)
6584 __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6585 // (5i, 5), (0i, 0) (f)
6586 __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6587 // 324567i, 16000 (f)
6588 __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6589
6590 // Subtraction (10, 10) - (5, 5) == (5, 5)
6591 __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6592 __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6593
6594 // Addition (10, 10) + (5, 5) == (15, 15)
6595 __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6596 __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6597
6598 // 2S
6599 __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6600 __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6601
6602 // 4S
6603 __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6604 __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6605
6606 END();
6607
6608 if (CAN_RUN()) {
6609 RUN();
6610 ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6611 ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6612 ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29); // (16000i, -324567)
6613 ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28); // (-16000i, 324567)
6614 ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6615 ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6616 }
6617 }
6618
6619
TEST(neon_3same_extra_fcmla)6620 TEST(neon_3same_extra_fcmla) {
6621 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6622
6623 START();
6624
6625 __ Movi(v1.V2D(), 0x0, 0x40A0000040400000); // (5i, 3) (f)
6626 __ Movi(v2.V2D(), 0x0, 0x4040000040A00000); // (3i, 5) (f)
6627
6628 __ Movi(v3.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6629 __ Movi(v4.V2D(), 0x0, 0x40E000003F800000); // (7i, 1) (f)
6630
6631 __ Movi(v5.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
6632 __ Movi(v6.V2D(), 0x0, 0x408000003F800000); // (4i, 1) (f)
6633
6634 // (1.5i, 2.5), (31.5i, 1024) (f)
6635 __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6636 // (2048i, 412.75), (3645i, 0) (f)
6637 __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6638 // (2000i, 450,000) (d)
6639 __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6640 // (30,000i, 1250) (d)
6641 __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6642
6643 // DST
6644 __ Movi(v24.V2D(), 0x0, 0x0);
6645 __ Movi(v25.V2D(), 0x0, 0x0);
6646 __ Movi(v26.V2D(), 0x0, 0x0);
6647 __ Movi(v27.V2D(), 0x0, 0x0);
6648 __ Movi(v28.V2D(), 0x0, 0x0);
6649 __ Movi(v29.V2D(), 0x0, 0x0);
6650 __ Movi(v30.V2D(), 0x0, 0x0);
6651 __ Movi(v31.V2D(), 0x0, 0x0);
6652
6653 // Full calculations
6654 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6655 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6656
6657 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6658 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6659
6660 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6661 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6662
6663 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6664 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6665
6666 // Partial checks
6667 __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6668 __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6669
6670 __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6671 __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6672
6673 END();
6674
6675 if (CAN_RUN()) {
6676 RUN();
6677
6678 ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31); // (34i, 0)
6679 ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30); // (23i, -11)
6680 ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29); // (14i, -5)
6681
6682 // (13502500000i, 502500000)
6683 ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6684 ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27); // (9i, 15)
6685 ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26); // (25i, 15)
6686 // (512i, 1.031875E3), (373248i, 0)
6687 ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6688 // (619.125i, -3072), (0i, -114817.5)
6689 ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6690 }
6691 }
6692
6693
TEST(neon_byelement_fcmla)6694 TEST(neon_byelement_fcmla) {
6695 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6696
6697 START();
6698
6699 // (5i, 3), (5i, 3) (f)
6700 __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6701 // (3i, 5), (3i, 5) (f)
6702 __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6703 // (7i, 1), (5i, 3) (f)
6704 __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6705 // (4i, 1), (3i, 5) (f)
6706 __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6707 // (4i, 1), (7i, 1) (f)
6708 __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6709 // (2i, 3), (0, 0) (f)
6710 __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6711
6712 // DST
6713 __ Movi(v22.V2D(), 0x0, 0x0);
6714 __ Movi(v23.V2D(), 0x0, 0x0);
6715 __ Movi(v24.V2D(), 0x0, 0x0);
6716 __ Movi(v25.V2D(), 0x0, 0x0);
6717 __ Movi(v26.V2D(), 0x0, 0x0);
6718 __ Movi(v27.V2D(), 0x0, 0x0);
6719 __ Movi(v28.V2D(), 0x0, 0x0);
6720 __ Movi(v29.V2D(), 0x0, 0x0);
6721 __ Movi(v30.V2D(), 0x0, 0x0);
6722 __ Movi(v31.V2D(), 0x0, 0x0);
6723
6724 // Full calculation (pairs)
6725 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6726 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6727 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6728 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6729
6730 // Rotations
6731 __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6732 __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6733 __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6734 __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6735 __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6736 __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6737 __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6738 __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6739
6740 END();
6741
6742 if (CAN_RUN()) {
6743 RUN();
6744 // (34i, 0), (34i, 0)
6745 ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6746 // (14i, -5), (23i, -11)
6747 ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6748 // (4i, 1), (12i, 3)
6749 ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6750 // (7i, -28), (5i, -20)
6751 ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6752 // (-4i, -1), (-12i, -3)
6753 ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6754 // (-7i, 28), (-5i, 20)
6755 ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6756 // (-35i, 21), (-25i, 15)
6757 ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6758 // (-3i, -5), (-9i, -15)
6759 ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6760 // (35i, -21), (25i, -15)
6761 ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6762 // (3i, 5), (9i, 15)
6763 ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6764 }
6765 }
6766
6767
TEST(neon_2regmisc_mvn)6768 TEST(neon_2regmisc_mvn) {
6769 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6770
6771 START();
6772
6773 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6774
6775 __ Mvn(v16.V16B(), v0.V16B());
6776 __ Mvn(v17.V8H(), v0.V8H());
6777 __ Mvn(v18.V4S(), v0.V4S());
6778 __ Mvn(v19.V2D(), v0.V2D());
6779
6780 __ Mvn(v24.V8B(), v0.V8B());
6781 __ Mvn(v25.V4H(), v0.V4H());
6782 __ Mvn(v26.V2S(), v0.V2S());
6783
6784 END();
6785
6786 if (CAN_RUN()) {
6787 RUN();
6788
6789 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6790 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6791 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6792 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6793
6794 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6795 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6796 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6797 }
6798 }
6799
6800
TEST(neon_2regmisc_not)6801 TEST(neon_2regmisc_not) {
6802 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6803
6804 START();
6805
6806 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6807 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6808
6809 __ Not(v16.V16B(), v0.V16B());
6810 __ Not(v17.V8B(), v1.V8B());
6811 END();
6812
6813 if (CAN_RUN()) {
6814 RUN();
6815
6816 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6817 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6818 }
6819 }
6820
6821
TEST(neon_2regmisc_cls_clz_cnt)6822 TEST(neon_2regmisc_cls_clz_cnt) {
6823 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6824
6825 START();
6826
6827 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6828 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6829
6830 __ Cls(v16.V8B(), v1.V8B());
6831 __ Cls(v17.V16B(), v1.V16B());
6832 __ Cls(v18.V4H(), v1.V4H());
6833 __ Cls(v19.V8H(), v1.V8H());
6834 __ Cls(v20.V2S(), v1.V2S());
6835 __ Cls(v21.V4S(), v1.V4S());
6836
6837 __ Clz(v22.V8B(), v0.V8B());
6838 __ Clz(v23.V16B(), v0.V16B());
6839 __ Clz(v24.V4H(), v0.V4H());
6840 __ Clz(v25.V8H(), v0.V8H());
6841 __ Clz(v26.V2S(), v0.V2S());
6842 __ Clz(v27.V4S(), v0.V4S());
6843
6844 __ Cnt(v28.V8B(), v0.V8B());
6845 __ Cnt(v29.V16B(), v1.V16B());
6846
6847 END();
6848
6849 if (CAN_RUN()) {
6850 RUN();
6851
6852 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6853 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6854 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6855 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6856 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6857 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6858
6859 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6860 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6861 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6862 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6863 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6864 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6865
6866 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6867 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6868 }
6869 }
6870
TEST(neon_2regmisc_rev)6871 TEST(neon_2regmisc_rev) {
6872 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6873
6874 START();
6875
6876 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6877 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6878
6879 __ Rev16(v16.V8B(), v0.V8B());
6880 __ Rev16(v17.V16B(), v0.V16B());
6881
6882 __ Rev32(v18.V8B(), v0.V8B());
6883 __ Rev32(v19.V16B(), v0.V16B());
6884 __ Rev32(v20.V4H(), v0.V4H());
6885 __ Rev32(v21.V8H(), v0.V8H());
6886
6887 __ Rev64(v22.V8B(), v0.V8B());
6888 __ Rev64(v23.V16B(), v0.V16B());
6889 __ Rev64(v24.V4H(), v0.V4H());
6890 __ Rev64(v25.V8H(), v0.V8H());
6891 __ Rev64(v26.V2S(), v0.V2S());
6892 __ Rev64(v27.V4S(), v0.V4S());
6893
6894 __ Rbit(v28.V8B(), v1.V8B());
6895 __ Rbit(v29.V16B(), v1.V16B());
6896
6897 END();
6898
6899 if (CAN_RUN()) {
6900 RUN();
6901
6902 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6903 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6904
6905 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6906 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6907 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6908 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6909
6910 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6911 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6912 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6913 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6914 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6915 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6916
6917 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6918 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6919 }
6920 }
6921
6922
TEST(neon_sli)6923 TEST(neon_sli) {
6924 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6925
6926 START();
6927
6928 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6929 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6930
6931 __ Mov(v16.V2D(), v0.V2D());
6932 __ Mov(v17.V2D(), v0.V2D());
6933 __ Mov(v18.V2D(), v0.V2D());
6934 __ Mov(v19.V2D(), v0.V2D());
6935 __ Mov(v20.V2D(), v0.V2D());
6936 __ Mov(v21.V2D(), v0.V2D());
6937 __ Mov(v22.V2D(), v0.V2D());
6938 __ Mov(v23.V2D(), v0.V2D());
6939
6940 __ Sli(v16.V8B(), v1.V8B(), 4);
6941 __ Sli(v17.V16B(), v1.V16B(), 7);
6942 __ Sli(v18.V4H(), v1.V4H(), 8);
6943 __ Sli(v19.V8H(), v1.V8H(), 15);
6944 __ Sli(v20.V2S(), v1.V2S(), 0);
6945 __ Sli(v21.V4S(), v1.V4S(), 31);
6946 __ Sli(v22.V2D(), v1.V2D(), 48);
6947
6948 __ Sli(d23, d1, 48);
6949
6950 END();
6951
6952 if (CAN_RUN()) {
6953 RUN();
6954
6955 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
6956 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
6957 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
6958 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
6959 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
6960 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
6961 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
6962
6963 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
6964 }
6965 }
6966
6967
TEST(neon_sri)6968 TEST(neon_sri) {
6969 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6970
6971 START();
6972
6973 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6974 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6975
6976 __ Mov(v16.V2D(), v0.V2D());
6977 __ Mov(v17.V2D(), v0.V2D());
6978 __ Mov(v18.V2D(), v0.V2D());
6979 __ Mov(v19.V2D(), v0.V2D());
6980 __ Mov(v20.V2D(), v0.V2D());
6981 __ Mov(v21.V2D(), v0.V2D());
6982 __ Mov(v22.V2D(), v0.V2D());
6983 __ Mov(v23.V2D(), v0.V2D());
6984
6985 __ Sri(v16.V8B(), v1.V8B(), 4);
6986 __ Sri(v17.V16B(), v1.V16B(), 7);
6987 __ Sri(v18.V4H(), v1.V4H(), 8);
6988 __ Sri(v19.V8H(), v1.V8H(), 15);
6989 __ Sri(v20.V2S(), v1.V2S(), 1);
6990 __ Sri(v21.V4S(), v1.V4S(), 31);
6991 __ Sri(v22.V2D(), v1.V2D(), 48);
6992
6993 __ Sri(d23, d1, 48);
6994
6995 END();
6996
6997 if (CAN_RUN()) {
6998 RUN();
6999
7000 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7001 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7002 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7003 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7004 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7005 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7006 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7007
7008 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7009 }
7010 }
7011
7012
TEST(neon_shrn)7013 TEST(neon_shrn) {
7014 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7015
7016 START();
7017
7018 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7019 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7020 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7021 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7022 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7023
7024 __ Shrn(v16.V8B(), v0.V8H(), 8);
7025 __ Shrn2(v16.V16B(), v1.V8H(), 1);
7026 __ Shrn(v17.V4H(), v1.V4S(), 16);
7027 __ Shrn2(v17.V8H(), v2.V4S(), 1);
7028 __ Shrn(v18.V2S(), v3.V2D(), 32);
7029 __ Shrn2(v18.V4S(), v3.V2D(), 1);
7030
7031 END();
7032
7033 if (CAN_RUN()) {
7034 RUN();
7035 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7036 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7037 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7038 }
7039 }
7040
7041
TEST(neon_rshrn)7042 TEST(neon_rshrn) {
7043 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7044
7045 START();
7046
7047 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7048 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7049 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7050 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7051 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7052
7053 __ Rshrn(v16.V8B(), v0.V8H(), 8);
7054 __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7055 __ Rshrn(v17.V4H(), v1.V4S(), 16);
7056 __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7057 __ Rshrn(v18.V2S(), v3.V2D(), 32);
7058 __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7059
7060 END();
7061
7062 if (CAN_RUN()) {
7063 RUN();
7064 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7065 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7066 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7067 }
7068 }
7069
7070
TEST(neon_uqshrn)7071 TEST(neon_uqshrn) {
7072 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7073
7074 START();
7075
7076 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7077 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7078 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7079 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7080 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7081
7082 __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7083 __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7084 __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7085 __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7086 __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7087 __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7088
7089 __ Uqshrn(b19, h0, 8);
7090 __ Uqshrn(h20, s1, 16);
7091 __ Uqshrn(s21, d3, 32);
7092
7093 END();
7094
7095 if (CAN_RUN()) {
7096 RUN();
7097 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7098 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7099 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7100 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7101 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7102 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7103 }
7104 }
7105
7106
TEST(neon_uqrshrn)7107 TEST(neon_uqrshrn) {
7108 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7109
7110 START();
7111
7112 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7113 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7114 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7115 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7116 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7117
7118 __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7119 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7120 __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7121 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7122 __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7123 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7124
7125 __ Uqrshrn(b19, h0, 8);
7126 __ Uqrshrn(h20, s1, 16);
7127 __ Uqrshrn(s21, d3, 32);
7128
7129 END();
7130
7131 if (CAN_RUN()) {
7132 RUN();
7133 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7134 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7135 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7136 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7137 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7138 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7139 }
7140 }
7141
7142
TEST(neon_sqshrn)7143 TEST(neon_sqshrn) {
7144 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7145
7146 START();
7147
7148 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7149 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7150 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7151 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7152 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7153
7154 __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7155 __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7156 __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7157 __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7158 __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7159 __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7160
7161 __ Sqshrn(b19, h0, 8);
7162 __ Sqshrn(h20, s1, 16);
7163 __ Sqshrn(s21, d3, 32);
7164
7165 END();
7166
7167 if (CAN_RUN()) {
7168 RUN();
7169 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7170 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7171 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7172 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7173 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7174 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7175 }
7176 }
7177
7178
TEST(neon_sqrshrn)7179 TEST(neon_sqrshrn) {
7180 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7181
7182 START();
7183
7184 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7185 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7186 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7187 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7188 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7189
7190 __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7191 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7192 __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7193 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7194 __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7195 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7196
7197 __ Sqrshrn(b19, h0, 8);
7198 __ Sqrshrn(h20, s1, 16);
7199 __ Sqrshrn(s21, d3, 32);
7200
7201 END();
7202
7203 if (CAN_RUN()) {
7204 RUN();
7205 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7206 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7207 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7208 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7209 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7210 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7211 }
7212 }
7213
7214
TEST(neon_sqshrun)7215 TEST(neon_sqshrun) {
7216 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7217
7218 START();
7219
7220 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7221 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7222 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7223 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7224 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7225
7226 __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7227 __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7228 __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7229 __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7230 __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7231 __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7232
7233 __ Sqshrun(b19, h0, 8);
7234 __ Sqshrun(h20, s1, 16);
7235 __ Sqshrun(s21, d3, 32);
7236
7237 END();
7238
7239 if (CAN_RUN()) {
7240 RUN();
7241 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7242 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7243 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7244 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7245 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7246 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7247 }
7248 }
7249
7250
TEST(neon_sqrshrun)7251 TEST(neon_sqrshrun) {
7252 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7253
7254 START();
7255
7256 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7257 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7258 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7259 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7260 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7261
7262 __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7263 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7264 __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7265 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7266 __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7267 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7268
7269 __ Sqrshrun(b19, h0, 8);
7270 __ Sqrshrun(h20, s1, 16);
7271 __ Sqrshrun(s21, d3, 32);
7272
7273 END();
7274
7275 if (CAN_RUN()) {
7276 RUN();
7277 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7278 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7279 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7280 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7281 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7282 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7283 }
7284 }
7285
TEST(neon_modimm_bic)7286 TEST(neon_modimm_bic) {
7287 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7288
7289 START();
7290
7291 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7292 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7293 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7294 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7295 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7296 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7297 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7298 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7299 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7300 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7301 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7302 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7303
7304 __ Bic(v16.V4H(), 0x00, 0);
7305 __ Bic(v17.V4H(), 0xff, 8);
7306 __ Bic(v18.V8H(), 0x00, 0);
7307 __ Bic(v19.V8H(), 0xff, 8);
7308
7309 __ Bic(v20.V2S(), 0x00, 0);
7310 __ Bic(v21.V2S(), 0xff, 8);
7311 __ Bic(v22.V2S(), 0x00, 16);
7312 __ Bic(v23.V2S(), 0xff, 24);
7313
7314 __ Bic(v24.V4S(), 0xff, 0);
7315 __ Bic(v25.V4S(), 0x00, 8);
7316 __ Bic(v26.V4S(), 0xff, 16);
7317 __ Bic(v27.V4S(), 0x00, 24);
7318
7319 END();
7320
7321 if (CAN_RUN()) {
7322 RUN();
7323
7324 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7325 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7326 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7327 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7328
7329 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7330 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7331 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7332 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7333
7334 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7335 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7336 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7337 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7338 }
7339 }
7340
7341
TEST(neon_modimm_movi_16bit_any)7342 TEST(neon_modimm_movi_16bit_any) {
7343 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7344
7345 START();
7346
7347 __ Movi(v0.V4H(), 0xabab);
7348 __ Movi(v1.V4H(), 0xab00);
7349 __ Movi(v2.V4H(), 0xabff);
7350 __ Movi(v3.V8H(), 0x00ab);
7351 __ Movi(v4.V8H(), 0xffab);
7352 __ Movi(v5.V8H(), 0xabcd);
7353
7354 END();
7355
7356 if (CAN_RUN()) {
7357 RUN();
7358
7359 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7360 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7361 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7362 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7363 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7364 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7365 }
7366 }
7367
7368
TEST(neon_modimm_movi_32bit_any)7369 TEST(neon_modimm_movi_32bit_any) {
7370 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7371
7372 START();
7373
7374 __ Movi(v0.V2S(), 0x000000ab);
7375 __ Movi(v1.V2S(), 0x0000ab00);
7376 __ Movi(v2.V4S(), 0x00ab0000);
7377 __ Movi(v3.V4S(), 0xab000000);
7378
7379 __ Movi(v4.V2S(), 0xffffffab);
7380 __ Movi(v5.V2S(), 0xffffabff);
7381 __ Movi(v6.V4S(), 0xffabffff);
7382 __ Movi(v7.V4S(), 0xabffffff);
7383
7384 __ Movi(v16.V2S(), 0x0000abff);
7385 __ Movi(v17.V2S(), 0x00abffff);
7386 __ Movi(v18.V4S(), 0xffab0000);
7387 __ Movi(v19.V4S(), 0xffffab00);
7388
7389 __ Movi(v20.V4S(), 0xabababab);
7390 __ Movi(v21.V4S(), 0xabcdabcd);
7391 __ Movi(v22.V4S(), 0xabcdef01);
7392 __ Movi(v23.V4S(), 0x00ffff00);
7393
7394 END();
7395
7396 if (CAN_RUN()) {
7397 RUN();
7398
7399 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7400 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7401 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7402 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7403
7404 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7405 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7406 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7407 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7408
7409 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7410 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7411 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7412 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7413
7414 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7415 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7416 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7417 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7418 }
7419 }
7420
7421
TEST(neon_modimm_movi_64bit_any)7422 TEST(neon_modimm_movi_64bit_any) {
7423 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7424
7425 START();
7426
7427 __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7428 __ Movi(v1.V2D(), 0xabababababababab);
7429 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7430 __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7431 __ Movi(v4.V1D(), 0xabcdef0123456789);
7432 __ Movi(v5.V2D(), 0xabcdef0123456789);
7433
7434 END();
7435
7436 if (CAN_RUN()) {
7437 RUN();
7438
7439 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7440 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7441 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7442 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7443 ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7444 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7445 }
7446 }
7447
7448
TEST(neon_modimm_movi)7449 TEST(neon_modimm_movi) {
7450 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7451
7452 START();
7453
7454 __ Movi(v0.V8B(), 0xaa);
7455 __ Movi(v1.V16B(), 0x55);
7456
7457 __ Movi(d2, 0x00ffff0000ffffff);
7458 __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7459
7460 __ Movi(v16.V4H(), 0x00, LSL, 0);
7461 __ Movi(v17.V4H(), 0xff, LSL, 8);
7462 __ Movi(v18.V8H(), 0x00, LSL, 0);
7463 __ Movi(v19.V8H(), 0xff, LSL, 8);
7464
7465 __ Movi(v20.V2S(), 0x00, LSL, 0);
7466 __ Movi(v21.V2S(), 0xff, LSL, 8);
7467 __ Movi(v22.V2S(), 0x00, LSL, 16);
7468 __ Movi(v23.V2S(), 0xff, LSL, 24);
7469
7470 __ Movi(v24.V4S(), 0xff, LSL, 0);
7471 __ Movi(v25.V4S(), 0x00, LSL, 8);
7472 __ Movi(v26.V4S(), 0xff, LSL, 16);
7473 __ Movi(v27.V4S(), 0x00, LSL, 24);
7474
7475 __ Movi(v28.V2S(), 0xaa, MSL, 8);
7476 __ Movi(v29.V2S(), 0x55, MSL, 16);
7477 __ Movi(v30.V4S(), 0xff, MSL, 8);
7478 __ Movi(v31.V4S(), 0x00, MSL, 16);
7479
7480 END();
7481
7482 if (CAN_RUN()) {
7483 RUN();
7484
7485 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7486 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7487
7488 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7489 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7490
7491 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7492 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7493 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7494 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7495
7496 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7497 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7498 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7499 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7500
7501 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7502 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7503 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7504 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7505
7506 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7507 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7508 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7509 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7510 }
7511 }
7512
7513
TEST(neon_modimm_mvni)7514 TEST(neon_modimm_mvni) {
7515 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7516
7517 START();
7518
7519 __ Mvni(v16.V4H(), 0x00, LSL, 0);
7520 __ Mvni(v17.V4H(), 0xff, LSL, 8);
7521 __ Mvni(v18.V8H(), 0x00, LSL, 0);
7522 __ Mvni(v19.V8H(), 0xff, LSL, 8);
7523
7524 __ Mvni(v20.V2S(), 0x00, LSL, 0);
7525 __ Mvni(v21.V2S(), 0xff, LSL, 8);
7526 __ Mvni(v22.V2S(), 0x00, LSL, 16);
7527 __ Mvni(v23.V2S(), 0xff, LSL, 24);
7528
7529 __ Mvni(v24.V4S(), 0xff, LSL, 0);
7530 __ Mvni(v25.V4S(), 0x00, LSL, 8);
7531 __ Mvni(v26.V4S(), 0xff, LSL, 16);
7532 __ Mvni(v27.V4S(), 0x00, LSL, 24);
7533
7534 __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7535 __ Mvni(v29.V2S(), 0x55, MSL, 16);
7536 __ Mvni(v30.V4S(), 0xff, MSL, 8);
7537 __ Mvni(v31.V4S(), 0x00, MSL, 16);
7538
7539 END();
7540
7541 if (CAN_RUN()) {
7542 RUN();
7543
7544 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7545 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7546 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7547 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7548
7549 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7550 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7551 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7552 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7553
7554 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7555 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7556 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7557 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7558
7559 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7560 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7561 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7562 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7563 }
7564 }
7565
7566
TEST(neon_modimm_orr)7567 TEST(neon_modimm_orr) {
7568 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7569
7570 START();
7571
7572 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7573 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7574 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7575 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7576 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7577 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7578 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7579 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7580 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7581 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7582 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7583 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7584
7585 __ Orr(v16.V4H(), 0x00, 0);
7586 __ Orr(v17.V4H(), 0xff, 8);
7587 __ Orr(v18.V8H(), 0x00, 0);
7588 __ Orr(v19.V8H(), 0xff, 8);
7589
7590 __ Orr(v20.V2S(), 0x00, 0);
7591 __ Orr(v21.V2S(), 0xff, 8);
7592 __ Orr(v22.V2S(), 0x00, 16);
7593 __ Orr(v23.V2S(), 0xff, 24);
7594
7595 __ Orr(v24.V4S(), 0xff, 0);
7596 __ Orr(v25.V4S(), 0x00, 8);
7597 __ Orr(v26.V4S(), 0xff, 16);
7598 __ Orr(v27.V4S(), 0x00, 24);
7599
7600 END();
7601
7602 if (CAN_RUN()) {
7603 RUN();
7604
7605 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7606 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7607 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7608 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7609
7610 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7611 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7612 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7613 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7614
7615 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7616 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7617 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7618 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7619 }
7620 }
7621
TEST(ldr_literal_values_q)7622 TEST(ldr_literal_values_q) {
7623 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7624
7625 static const uint64_t kHalfValues[] = {0x8000000000000000,
7626 0x7fffffffffffffff,
7627 0x0000000000000000,
7628 0xffffffffffffffff,
7629 0x00ff00ff00ff00ff,
7630 0x1234567890abcdef};
7631 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7632 const Register& ref_low64 = x1;
7633 const Register& ref_high64 = x2;
7634 const Register& loaded_low64 = x3;
7635 const Register& loaded_high64 = x4;
7636 const VRegister& tgt = q0;
7637
7638 START();
7639 __ Mov(x0, 0);
7640
7641 for (int i = 0; i < card; i++) {
7642 __ Mov(ref_low64, kHalfValues[i]);
7643 for (int j = 0; j < card; j++) {
7644 __ Mov(ref_high64, kHalfValues[j]);
7645 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7646 __ Mov(loaded_low64, tgt.V2D(), 0);
7647 __ Mov(loaded_high64, tgt.V2D(), 1);
7648 __ Cmp(loaded_low64, ref_low64);
7649 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7650 __ Cset(x0, ne);
7651 }
7652 }
7653 END();
7654
7655 if (CAN_RUN()) {
7656 RUN();
7657
7658 // If one of the values differs, the trace can be used to identify which
7659 // one.
7660 ASSERT_EQUAL_64(0, x0);
7661 }
7662 }
7663
TEST(fmov_vec_imm)7664 TEST(fmov_vec_imm) {
7665 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7666 CPUFeatures::kFP,
7667 CPUFeatures::kNEONHalf);
7668
7669 START();
7670
7671 __ Fmov(v0.V2S(), 20.0);
7672 __ Fmov(v1.V4S(), 1024.0);
7673
7674 __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7675 __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7676
7677 END();
7678 if (CAN_RUN()) {
7679 RUN();
7680
7681 ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7682 ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7683 ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7684 ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7685 }
7686 }
7687
7688 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7689 TEST(neon_modimm_fmov) {
7690 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7691
7692 // Immediates which can be encoded in the instructions.
7693 const float kOne = 1.0f;
7694 const float kPointFive = 0.5f;
7695 const double kMinusThirteen = -13.0;
7696 // Immediates which cannot be encoded in the instructions.
7697 const float kNonImmFP32 = 255.0f;
7698 const double kNonImmFP64 = 12.3456;
7699
7700 START();
7701 __ Fmov(v11.V2S(), kOne);
7702 __ Fmov(v12.V4S(), kPointFive);
7703 __ Fmov(v22.V2D(), kMinusThirteen);
7704 __ Fmov(v13.V2S(), kNonImmFP32);
7705 __ Fmov(v14.V4S(), kNonImmFP32);
7706 __ Fmov(v23.V2D(), kNonImmFP64);
7707 __ Fmov(v1.V2S(), 0.0);
7708 __ Fmov(v2.V4S(), 0.0);
7709 __ Fmov(v3.V2D(), 0.0);
7710 __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7711 __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7712 __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7713 END();
7714
7715 if (CAN_RUN()) {
7716 RUN();
7717
7718 const uint64_t kOne1S = FloatToRawbits(1.0);
7719 const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7720 const uint64_t kPointFive1S = FloatToRawbits(0.5);
7721 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7722 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7723 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7724 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7725 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7726 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7727 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7728 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7729
7730 ASSERT_EQUAL_128(0x0, kOne2S, q11);
7731 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7732 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7733 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7734 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7735 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7736 ASSERT_EQUAL_128(0x0, 0x0, q1);
7737 ASSERT_EQUAL_128(0x0, 0x0, q2);
7738 ASSERT_EQUAL_128(0x0, 0x0, q3);
7739 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7740 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7741 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7742 }
7743 }
7744
7745
TEST(neon_perm)7746 TEST(neon_perm) {
7747 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7748
7749 START();
7750
7751 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7752 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7753
7754 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7755 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7756 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7757 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7758 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7759 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7760
7761 END();
7762
7763 if (CAN_RUN()) {
7764 RUN();
7765
7766 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7767 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7768 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7769 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7770 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7771 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7772 }
7773 }
7774
7775
TEST(neon_copy_dup_element)7776 TEST(neon_copy_dup_element) {
7777 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7778
7779 START();
7780
7781 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7782 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7783 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7784 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7785 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7786 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7787
7788 __ Dup(v16.V16B(), v0.B(), 0);
7789 __ Dup(v17.V8H(), v1.H(), 7);
7790 __ Dup(v18.V4S(), v1.S(), 3);
7791 __ Dup(v19.V2D(), v0.D(), 0);
7792
7793 __ Dup(v20.V8B(), v0.B(), 0);
7794 __ Dup(v21.V4H(), v1.H(), 7);
7795 __ Dup(v22.V2S(), v1.S(), 3);
7796
7797 __ Dup(v23.B(), v0.B(), 0);
7798 __ Dup(v24.H(), v1.H(), 7);
7799 __ Dup(v25.S(), v1.S(), 3);
7800 __ Dup(v26.D(), v0.D(), 0);
7801
7802 __ Dup(v2.V16B(), v2.B(), 0);
7803 __ Dup(v3.V8H(), v3.H(), 7);
7804 __ Dup(v4.V4S(), v4.S(), 0);
7805 __ Dup(v5.V2D(), v5.D(), 1);
7806
7807 END();
7808
7809 if (CAN_RUN()) {
7810 RUN();
7811
7812 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7813 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7814 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7815 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7816
7817 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7818 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7819 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7820
7821 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7822 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7823 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7824 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7825
7826 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7827 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7828 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7829 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7830 }
7831 }
7832
7833
TEST(neon_copy_dup_general)7834 TEST(neon_copy_dup_general) {
7835 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7836
7837 START();
7838
7839 __ Mov(x0, 0x0011223344556677);
7840
7841 __ Dup(v16.V16B(), w0);
7842 __ Dup(v17.V8H(), w0);
7843 __ Dup(v18.V4S(), w0);
7844 __ Dup(v19.V2D(), x0);
7845
7846 __ Dup(v20.V8B(), w0);
7847 __ Dup(v21.V4H(), w0);
7848 __ Dup(v22.V2S(), w0);
7849
7850 __ Dup(v2.V16B(), wzr);
7851 __ Dup(v3.V8H(), wzr);
7852 __ Dup(v4.V4S(), wzr);
7853 __ Dup(v5.V2D(), xzr);
7854
7855 END();
7856
7857 if (CAN_RUN()) {
7858 RUN();
7859
7860 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7861 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7862 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7863 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7864
7865 ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7866 ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7867 ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7868
7869 ASSERT_EQUAL_128(0, 0, q2);
7870 ASSERT_EQUAL_128(0, 0, q3);
7871 ASSERT_EQUAL_128(0, 0, q4);
7872 ASSERT_EQUAL_128(0, 0, q5);
7873 }
7874 }
7875
7876
TEST(neon_copy_ins_element)7877 TEST(neon_copy_ins_element) {
7878 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7879
7880 START();
7881
7882 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7883 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7884 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7885 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7886 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7887 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7888
7889 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7890 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7891 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7892 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7893
7894 __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7895 __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7896 __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7897 __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7898
7899 __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7900 __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7901 __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7902 __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7903
7904 END();
7905
7906 if (CAN_RUN()) {
7907 RUN();
7908
7909 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7910 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7911 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7912 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7913
7914 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7915 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7916 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7917 ASSERT_EQUAL_128(0, 0, q5);
7918 }
7919 }
7920
7921
TEST(neon_copy_mov_element)7922 TEST(neon_copy_mov_element) {
7923 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7924
7925 START();
7926
7927 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7928 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7929 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7930 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7931 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7932 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7933
7934 __ Movi(v2.V2D(), 0, 0x0011223344556677);
7935 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7936 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7937 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7938
7939 __ Mov(v16.V16B(), 15, v0.V16B(), 0);
7940 __ Mov(v17.V8H(), 0, v1.V8H(), 7);
7941 __ Mov(v18.V4S(), 3, v1.V4S(), 0);
7942 __ Mov(v19.V2D(), 1, v0.V2D(), 0);
7943
7944 __ Mov(v2.V16B(), 2, v2.V16B(), 0);
7945 __ Mov(v3.V8H(), 0, v3.V8H(), 7);
7946 __ Mov(v4.V4S(), 3, v4.V4S(), 0);
7947 __ Mov(v5.V2D(), 0, v5.V2D(), 1);
7948
7949 END();
7950
7951 if (CAN_RUN()) {
7952 RUN();
7953
7954 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7955 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7956 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7957 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7958
7959 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7960 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7961 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7962 ASSERT_EQUAL_128(0, 0, q5);
7963 }
7964 }
7965
7966
TEST(neon_copy_smov)7967 TEST(neon_copy_smov) {
7968 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7969
7970 START();
7971
7972 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7973
7974 __ Smov(w0, v0.B(), 7);
7975 __ Smov(w1, v0.B(), 15);
7976
7977 __ Smov(w2, v0.H(), 0);
7978 __ Smov(w3, v0.H(), 3);
7979
7980 __ Smov(x4, v0.B(), 7);
7981 __ Smov(x5, v0.B(), 15);
7982
7983 __ Smov(x6, v0.H(), 0);
7984 __ Smov(x7, v0.H(), 3);
7985
7986 __ Smov(x16, v0.S(), 0);
7987 __ Smov(x17, v0.S(), 1);
7988
7989 END();
7990
7991 if (CAN_RUN()) {
7992 RUN();
7993
7994 ASSERT_EQUAL_32(0xfffffffe, w0);
7995 ASSERT_EQUAL_32(0x00000001, w1);
7996 ASSERT_EQUAL_32(0x00003210, w2);
7997 ASSERT_EQUAL_32(0xfffffedc, w3);
7998 ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
7999 ASSERT_EQUAL_64(0x0000000000000001, x5);
8000 ASSERT_EQUAL_64(0x0000000000003210, x6);
8001 ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8002 ASSERT_EQUAL_64(0x0000000076543210, x16);
8003 ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8004 }
8005 }
8006
8007
TEST(neon_copy_umov_mov)8008 TEST(neon_copy_umov_mov) {
8009 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8010
8011 START();
8012
8013 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8014
8015 __ Umov(w0, v0.B(), 15);
8016 __ Umov(w1, v0.H(), 0);
8017 __ Umov(w2, v0.S(), 3);
8018 __ Umov(x3, v0.D(), 1);
8019
8020 __ Mov(w4, v0.S(), 3);
8021 __ Mov(x5, v0.D(), 1);
8022
8023 END();
8024
8025 if (CAN_RUN()) {
8026 RUN();
8027
8028 ASSERT_EQUAL_32(0x00000001, w0);
8029 ASSERT_EQUAL_32(0x00003210, w1);
8030 ASSERT_EQUAL_32(0x01234567, w2);
8031 ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8032 ASSERT_EQUAL_32(0x01234567, w4);
8033 ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8034 }
8035 }
8036
8037
TEST(neon_copy_ins_general)8038 TEST(neon_copy_ins_general) {
8039 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8040
8041 START();
8042
8043 __ Mov(x0, 0x0011223344556677);
8044 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8045 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8046 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8047 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8048
8049 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8050 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8051 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8052 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8053
8054 __ Ins(v16.V16B(), 15, w0);
8055 __ Ins(v17.V8H(), 0, w0);
8056 __ Ins(v18.V4S(), 3, w0);
8057 __ Ins(v19.V2D(), 0, x0);
8058
8059 __ Ins(v2.V16B(), 2, w0);
8060 __ Ins(v3.V8H(), 0, w0);
8061 __ Ins(v4.V4S(), 3, w0);
8062 __ Ins(v5.V2D(), 1, x0);
8063
8064 END();
8065
8066 if (CAN_RUN()) {
8067 RUN();
8068
8069 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8070 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8071 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8072 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8073
8074 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8075 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8076 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8077 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8078 }
8079 }
8080
8081
TEST(neon_extract_ext)8082 TEST(neon_extract_ext) {
8083 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8084
8085 START();
8086
8087 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8088 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8089
8090 __ Movi(v2.V2D(), 0, 0x0011223344556677);
8091 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8092
8093 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8094 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8095 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
8096 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
8097
8098 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8099 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8100 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
8101 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
8102
8103 END();
8104
8105 if (CAN_RUN()) {
8106 RUN();
8107
8108 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8109 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8110 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8111 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8112
8113 ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8114 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8115 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8116 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8117 }
8118 }
8119
8120
TEST(neon_3different_uaddl)8121 TEST(neon_3different_uaddl) {
8122 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8123
8124 START();
8125
8126 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8127 __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8128 __ Movi(v2.V2D(), 0, 0x0101010101010101);
8129
8130 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8131 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8132 __ Movi(v5.V2D(), 0, 0x0000000180008001);
8133 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8134 __ Movi(v7.V2D(), 0, 0x0001000100010001);
8135
8136 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8137 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8138 __ Movi(v18.V2D(), 0, 0x0000000000000001);
8139 __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8140 __ Movi(v20.V2D(), 0, 0x0000000100000001);
8141
8142 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8143
8144 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8145 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8146
8147 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8148 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8149
8150
8151 END();
8152
8153 if (CAN_RUN()) {
8154 RUN();
8155
8156 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8157 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8158 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8159 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8160 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8161 }
8162 }
8163
8164
TEST(neon_3different_addhn_subhn)8165 TEST(neon_3different_addhn_subhn) {
8166 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8167
8168 START();
8169
8170 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8171 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8172 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8173 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8174 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8175
8176 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8177 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8178 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8179 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8180 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8181 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8182 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8183 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8184
8185 END();
8186
8187 if (CAN_RUN()) {
8188 RUN();
8189
8190 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8191 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8192 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8193 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8194 }
8195 }
8196
TEST(neon_d_only_scalar)8197 TEST(neon_d_only_scalar) {
8198 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8199
8200 START();
8201
8202 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8203 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8204 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8205 __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8206 __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8207
8208 __ Add(d16, d0, d0);
8209 __ Add(d17, d1, d1);
8210 __ Add(d18, d2, d2);
8211 __ Sub(d19, d0, d0);
8212 __ Sub(d20, d0, d1);
8213 __ Sub(d21, d1, d0);
8214 __ Ushl(d22, d0, d3);
8215 __ Ushl(d23, d0, d4);
8216 __ Sshl(d24, d0, d3);
8217 __ Sshl(d25, d0, d4);
8218 __ Ushr(d26, d0, 1);
8219 __ Sshr(d27, d0, 3);
8220 __ Shl(d28, d0, 0);
8221 __ Shl(d29, d0, 16);
8222
8223 END();
8224
8225 if (CAN_RUN()) {
8226 RUN();
8227
8228 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8229 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8230 ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8231 ASSERT_EQUAL_128(0, 0, q19);
8232 ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8233 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8234 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8235 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8236 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8237 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8238 ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8239 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8240 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8241 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8242 }
8243 }
8244
8245
TEST(neon_sqshl_imm_scalar)8246 TEST(neon_sqshl_imm_scalar) {
8247 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8248
8249 START();
8250
8251 __ Movi(v0.V2D(), 0x0, 0x7f);
8252 __ Movi(v1.V2D(), 0x0, 0x80);
8253 __ Movi(v2.V2D(), 0x0, 0x01);
8254 __ Sqshl(b16, b0, 1);
8255 __ Sqshl(b17, b1, 1);
8256 __ Sqshl(b18, b2, 1);
8257
8258 __ Movi(v0.V2D(), 0x0, 0x7fff);
8259 __ Movi(v1.V2D(), 0x0, 0x8000);
8260 __ Movi(v2.V2D(), 0x0, 0x0001);
8261 __ Sqshl(h19, h0, 1);
8262 __ Sqshl(h20, h1, 1);
8263 __ Sqshl(h21, h2, 1);
8264
8265 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8266 __ Movi(v1.V2D(), 0x0, 0x80000000);
8267 __ Movi(v2.V2D(), 0x0, 0x00000001);
8268 __ Sqshl(s22, s0, 1);
8269 __ Sqshl(s23, s1, 1);
8270 __ Sqshl(s24, s2, 1);
8271
8272 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8273 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8274 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8275 __ Sqshl(d25, d0, 1);
8276 __ Sqshl(d26, d1, 1);
8277 __ Sqshl(d27, d2, 1);
8278
8279 END();
8280
8281 if (CAN_RUN()) {
8282 RUN();
8283
8284 ASSERT_EQUAL_128(0, 0x7f, q16);
8285 ASSERT_EQUAL_128(0, 0x80, q17);
8286 ASSERT_EQUAL_128(0, 0x02, q18);
8287
8288 ASSERT_EQUAL_128(0, 0x7fff, q19);
8289 ASSERT_EQUAL_128(0, 0x8000, q20);
8290 ASSERT_EQUAL_128(0, 0x0002, q21);
8291
8292 ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8293 ASSERT_EQUAL_128(0, 0x80000000, q23);
8294 ASSERT_EQUAL_128(0, 0x00000002, q24);
8295
8296 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8297 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8298 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8299 }
8300 }
8301
8302
TEST(neon_uqshl_imm_scalar)8303 TEST(neon_uqshl_imm_scalar) {
8304 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8305
8306 START();
8307
8308 __ Movi(v0.V2D(), 0x0, 0x7f);
8309 __ Movi(v1.V2D(), 0x0, 0x80);
8310 __ Movi(v2.V2D(), 0x0, 0x01);
8311 __ Uqshl(b16, b0, 1);
8312 __ Uqshl(b17, b1, 1);
8313 __ Uqshl(b18, b2, 1);
8314
8315 __ Movi(v0.V2D(), 0x0, 0x7fff);
8316 __ Movi(v1.V2D(), 0x0, 0x8000);
8317 __ Movi(v2.V2D(), 0x0, 0x0001);
8318 __ Uqshl(h19, h0, 1);
8319 __ Uqshl(h20, h1, 1);
8320 __ Uqshl(h21, h2, 1);
8321
8322 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8323 __ Movi(v1.V2D(), 0x0, 0x80000000);
8324 __ Movi(v2.V2D(), 0x0, 0x00000001);
8325 __ Uqshl(s22, s0, 1);
8326 __ Uqshl(s23, s1, 1);
8327 __ Uqshl(s24, s2, 1);
8328
8329 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8330 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8331 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8332 __ Uqshl(d25, d0, 1);
8333 __ Uqshl(d26, d1, 1);
8334 __ Uqshl(d27, d2, 1);
8335
8336 END();
8337
8338 if (CAN_RUN()) {
8339 RUN();
8340
8341 ASSERT_EQUAL_128(0, 0xfe, q16);
8342 ASSERT_EQUAL_128(0, 0xff, q17);
8343 ASSERT_EQUAL_128(0, 0x02, q18);
8344
8345 ASSERT_EQUAL_128(0, 0xfffe, q19);
8346 ASSERT_EQUAL_128(0, 0xffff, q20);
8347 ASSERT_EQUAL_128(0, 0x0002, q21);
8348
8349 ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8350 ASSERT_EQUAL_128(0, 0xffffffff, q23);
8351 ASSERT_EQUAL_128(0, 0x00000002, q24);
8352
8353 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8354 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8355 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8356 }
8357 }
8358
8359
TEST(neon_sqshlu_scalar)8360 TEST(neon_sqshlu_scalar) {
8361 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8362
8363 START();
8364
8365 __ Movi(v0.V2D(), 0x0, 0x7f);
8366 __ Movi(v1.V2D(), 0x0, 0x80);
8367 __ Movi(v2.V2D(), 0x0, 0x01);
8368 __ Sqshlu(b16, b0, 2);
8369 __ Sqshlu(b17, b1, 2);
8370 __ Sqshlu(b18, b2, 2);
8371
8372 __ Movi(v0.V2D(), 0x0, 0x7fff);
8373 __ Movi(v1.V2D(), 0x0, 0x8000);
8374 __ Movi(v2.V2D(), 0x0, 0x0001);
8375 __ Sqshlu(h19, h0, 2);
8376 __ Sqshlu(h20, h1, 2);
8377 __ Sqshlu(h21, h2, 2);
8378
8379 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8380 __ Movi(v1.V2D(), 0x0, 0x80000000);
8381 __ Movi(v2.V2D(), 0x0, 0x00000001);
8382 __ Sqshlu(s22, s0, 2);
8383 __ Sqshlu(s23, s1, 2);
8384 __ Sqshlu(s24, s2, 2);
8385
8386 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8387 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8388 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8389 __ Sqshlu(d25, d0, 2);
8390 __ Sqshlu(d26, d1, 2);
8391 __ Sqshlu(d27, d2, 2);
8392
8393 END();
8394
8395 if (CAN_RUN()) {
8396 RUN();
8397
8398 ASSERT_EQUAL_128(0, 0xff, q16);
8399 ASSERT_EQUAL_128(0, 0x00, q17);
8400 ASSERT_EQUAL_128(0, 0x04, q18);
8401
8402 ASSERT_EQUAL_128(0, 0xffff, q19);
8403 ASSERT_EQUAL_128(0, 0x0000, q20);
8404 ASSERT_EQUAL_128(0, 0x0004, q21);
8405
8406 ASSERT_EQUAL_128(0, 0xffffffff, q22);
8407 ASSERT_EQUAL_128(0, 0x00000000, q23);
8408 ASSERT_EQUAL_128(0, 0x00000004, q24);
8409
8410 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8411 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8412 ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8413 }
8414 }
8415
8416
TEST(neon_sshll)8417 TEST(neon_sshll) {
8418 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8419
8420 START();
8421
8422 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8423 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8424 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8425
8426 __ Sshll(v16.V8H(), v0.V8B(), 4);
8427 __ Sshll2(v17.V8H(), v0.V16B(), 4);
8428
8429 __ Sshll(v18.V4S(), v1.V4H(), 8);
8430 __ Sshll2(v19.V4S(), v1.V8H(), 8);
8431
8432 __ Sshll(v20.V2D(), v2.V2S(), 16);
8433 __ Sshll2(v21.V2D(), v2.V4S(), 16);
8434
8435 END();
8436
8437 if (CAN_RUN()) {
8438 RUN();
8439
8440 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8441 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8442 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8443 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8444 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8445 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8446 }
8447 }
8448
TEST(neon_shll)8449 TEST(neon_shll) {
8450 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8451
8452 START();
8453
8454 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8455 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8456 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8457
8458 __ Shll(v16.V8H(), v0.V8B(), 8);
8459 __ Shll2(v17.V8H(), v0.V16B(), 8);
8460
8461 __ Shll(v18.V4S(), v1.V4H(), 16);
8462 __ Shll2(v19.V4S(), v1.V8H(), 16);
8463
8464 __ Shll(v20.V2D(), v2.V2S(), 32);
8465 __ Shll2(v21.V2D(), v2.V4S(), 32);
8466
8467 END();
8468
8469 if (CAN_RUN()) {
8470 RUN();
8471
8472 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8473 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8474 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8475 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8476 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8477 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8478 }
8479 }
8480
TEST(neon_ushll)8481 TEST(neon_ushll) {
8482 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8483
8484 START();
8485
8486 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8487 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8488 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8489
8490 __ Ushll(v16.V8H(), v0.V8B(), 4);
8491 __ Ushll2(v17.V8H(), v0.V16B(), 4);
8492
8493 __ Ushll(v18.V4S(), v1.V4H(), 8);
8494 __ Ushll2(v19.V4S(), v1.V8H(), 8);
8495
8496 __ Ushll(v20.V2D(), v2.V2S(), 16);
8497 __ Ushll2(v21.V2D(), v2.V4S(), 16);
8498
8499 END();
8500
8501 if (CAN_RUN()) {
8502 RUN();
8503
8504 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8505 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8506 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8507 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8508 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8509 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8510 }
8511 }
8512
8513
TEST(neon_sxtl)8514 TEST(neon_sxtl) {
8515 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8516
8517 START();
8518
8519 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8520 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8521 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8522
8523 __ Sxtl(v16.V8H(), v0.V8B());
8524 __ Sxtl2(v17.V8H(), v0.V16B());
8525
8526 __ Sxtl(v18.V4S(), v1.V4H());
8527 __ Sxtl2(v19.V4S(), v1.V8H());
8528
8529 __ Sxtl(v20.V2D(), v2.V2S());
8530 __ Sxtl2(v21.V2D(), v2.V4S());
8531
8532 END();
8533
8534 if (CAN_RUN()) {
8535 RUN();
8536
8537 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8538 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8539 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8540 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8541 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8542 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8543 }
8544 }
8545
8546
TEST(neon_uxtl)8547 TEST(neon_uxtl) {
8548 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8549
8550 START();
8551
8552 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8553 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8554 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8555
8556 __ Uxtl(v16.V8H(), v0.V8B());
8557 __ Uxtl2(v17.V8H(), v0.V16B());
8558
8559 __ Uxtl(v18.V4S(), v1.V4H());
8560 __ Uxtl2(v19.V4S(), v1.V8H());
8561
8562 __ Uxtl(v20.V2D(), v2.V2S());
8563 __ Uxtl2(v21.V2D(), v2.V4S());
8564
8565 END();
8566
8567 if (CAN_RUN()) {
8568 RUN();
8569
8570 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8571 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8572 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8573 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8574 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8575 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8576 }
8577 }
8578
8579
TEST(neon_ssra)8580 TEST(neon_ssra) {
8581 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8582
8583 START();
8584
8585 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8586 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8587 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8588 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8589 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8590
8591 __ Mov(v16.V2D(), v0.V2D());
8592 __ Mov(v17.V2D(), v0.V2D());
8593 __ Mov(v18.V2D(), v1.V2D());
8594 __ Mov(v19.V2D(), v1.V2D());
8595 __ Mov(v20.V2D(), v2.V2D());
8596 __ Mov(v21.V2D(), v2.V2D());
8597 __ Mov(v22.V2D(), v3.V2D());
8598 __ Mov(v23.V2D(), v4.V2D());
8599 __ Mov(v24.V2D(), v3.V2D());
8600 __ Mov(v25.V2D(), v4.V2D());
8601
8602 __ Ssra(v16.V8B(), v0.V8B(), 4);
8603 __ Ssra(v17.V16B(), v0.V16B(), 4);
8604
8605 __ Ssra(v18.V4H(), v1.V4H(), 8);
8606 __ Ssra(v19.V8H(), v1.V8H(), 8);
8607
8608 __ Ssra(v20.V2S(), v2.V2S(), 16);
8609 __ Ssra(v21.V4S(), v2.V4S(), 16);
8610
8611 __ Ssra(v22.V2D(), v3.V2D(), 32);
8612 __ Ssra(v23.V2D(), v4.V2D(), 32);
8613
8614 __ Ssra(d24, d3, 48);
8615
8616 END();
8617
8618 if (CAN_RUN()) {
8619 RUN();
8620
8621 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8622 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8623 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8624 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8625 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8626 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8627 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8628 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8629 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8630 }
8631 }
8632
TEST(neon_srsra)8633 TEST(neon_srsra) {
8634 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8635
8636 START();
8637
8638 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8639 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8640 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8641 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8642 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8643
8644 __ Mov(v16.V2D(), v0.V2D());
8645 __ Mov(v17.V2D(), v0.V2D());
8646 __ Mov(v18.V2D(), v1.V2D());
8647 __ Mov(v19.V2D(), v1.V2D());
8648 __ Mov(v20.V2D(), v2.V2D());
8649 __ Mov(v21.V2D(), v2.V2D());
8650 __ Mov(v22.V2D(), v3.V2D());
8651 __ Mov(v23.V2D(), v4.V2D());
8652 __ Mov(v24.V2D(), v3.V2D());
8653 __ Mov(v25.V2D(), v4.V2D());
8654
8655 __ Srsra(v16.V8B(), v0.V8B(), 4);
8656 __ Srsra(v17.V16B(), v0.V16B(), 4);
8657
8658 __ Srsra(v18.V4H(), v1.V4H(), 8);
8659 __ Srsra(v19.V8H(), v1.V8H(), 8);
8660
8661 __ Srsra(v20.V2S(), v2.V2S(), 16);
8662 __ Srsra(v21.V4S(), v2.V4S(), 16);
8663
8664 __ Srsra(v22.V2D(), v3.V2D(), 32);
8665 __ Srsra(v23.V2D(), v4.V2D(), 32);
8666
8667 __ Srsra(d24, d3, 48);
8668
8669 END();
8670
8671 if (CAN_RUN()) {
8672 RUN();
8673
8674 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8675 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8676 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8677 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8678 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8679 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8680 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8681 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8682 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8683 }
8684 }
8685
TEST(neon_usra)8686 TEST(neon_usra) {
8687 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8688
8689 START();
8690
8691 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8692 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8693 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8694 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8695 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8696
8697 __ Mov(v16.V2D(), v0.V2D());
8698 __ Mov(v17.V2D(), v0.V2D());
8699 __ Mov(v18.V2D(), v1.V2D());
8700 __ Mov(v19.V2D(), v1.V2D());
8701 __ Mov(v20.V2D(), v2.V2D());
8702 __ Mov(v21.V2D(), v2.V2D());
8703 __ Mov(v22.V2D(), v3.V2D());
8704 __ Mov(v23.V2D(), v4.V2D());
8705 __ Mov(v24.V2D(), v3.V2D());
8706 __ Mov(v25.V2D(), v4.V2D());
8707
8708 __ Usra(v16.V8B(), v0.V8B(), 4);
8709 __ Usra(v17.V16B(), v0.V16B(), 4);
8710
8711 __ Usra(v18.V4H(), v1.V4H(), 8);
8712 __ Usra(v19.V8H(), v1.V8H(), 8);
8713
8714 __ Usra(v20.V2S(), v2.V2S(), 16);
8715 __ Usra(v21.V4S(), v2.V4S(), 16);
8716
8717 __ Usra(v22.V2D(), v3.V2D(), 32);
8718 __ Usra(v23.V2D(), v4.V2D(), 32);
8719
8720 __ Usra(d24, d3, 48);
8721
8722 END();
8723
8724 if (CAN_RUN()) {
8725 RUN();
8726
8727 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8728 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8729 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8730 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8731 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8732 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8733 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8734 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8735 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8736 }
8737 }
8738
TEST(neon_ursra)8739 TEST(neon_ursra) {
8740 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8741
8742 START();
8743
8744 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8745 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8746 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8747 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8748 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8749
8750 __ Mov(v16.V2D(), v0.V2D());
8751 __ Mov(v17.V2D(), v0.V2D());
8752 __ Mov(v18.V2D(), v1.V2D());
8753 __ Mov(v19.V2D(), v1.V2D());
8754 __ Mov(v20.V2D(), v2.V2D());
8755 __ Mov(v21.V2D(), v2.V2D());
8756 __ Mov(v22.V2D(), v3.V2D());
8757 __ Mov(v23.V2D(), v4.V2D());
8758 __ Mov(v24.V2D(), v3.V2D());
8759 __ Mov(v25.V2D(), v4.V2D());
8760
8761 __ Ursra(v16.V8B(), v0.V8B(), 4);
8762 __ Ursra(v17.V16B(), v0.V16B(), 4);
8763
8764 __ Ursra(v18.V4H(), v1.V4H(), 8);
8765 __ Ursra(v19.V8H(), v1.V8H(), 8);
8766
8767 __ Ursra(v20.V2S(), v2.V2S(), 16);
8768 __ Ursra(v21.V4S(), v2.V4S(), 16);
8769
8770 __ Ursra(v22.V2D(), v3.V2D(), 32);
8771 __ Ursra(v23.V2D(), v4.V2D(), 32);
8772
8773 __ Ursra(d24, d3, 48);
8774
8775 END();
8776
8777 if (CAN_RUN()) {
8778 RUN();
8779
8780 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8781 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8782 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8783 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8784 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8785 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8786 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8787 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8788 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8789 }
8790 }
8791
8792
TEST(neon_uqshl_scalar)8793 TEST(neon_uqshl_scalar) {
8794 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8795
8796 START();
8797
8798 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8799 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8800 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8801 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8802
8803 __ Uqshl(b16, b0, b2);
8804 __ Uqshl(b17, b0, b3);
8805 __ Uqshl(b18, b1, b2);
8806 __ Uqshl(b19, b1, b3);
8807 __ Uqshl(h20, h0, h2);
8808 __ Uqshl(h21, h0, h3);
8809 __ Uqshl(h22, h1, h2);
8810 __ Uqshl(h23, h1, h3);
8811 __ Uqshl(s24, s0, s2);
8812 __ Uqshl(s25, s0, s3);
8813 __ Uqshl(s26, s1, s2);
8814 __ Uqshl(s27, s1, s3);
8815 __ Uqshl(d28, d0, d2);
8816 __ Uqshl(d29, d0, d3);
8817 __ Uqshl(d30, d1, d2);
8818 __ Uqshl(d31, d1, d3);
8819
8820 END();
8821
8822 if (CAN_RUN()) {
8823 RUN();
8824
8825 ASSERT_EQUAL_128(0, 0xff, q16);
8826 ASSERT_EQUAL_128(0, 0x78, q17);
8827 ASSERT_EQUAL_128(0, 0xfe, q18);
8828 ASSERT_EQUAL_128(0, 0x3f, q19);
8829 ASSERT_EQUAL_128(0, 0xffff, q20);
8830 ASSERT_EQUAL_128(0, 0x7878, q21);
8831 ASSERT_EQUAL_128(0, 0xfefe, q22);
8832 ASSERT_EQUAL_128(0, 0x3fbf, q23);
8833 ASSERT_EQUAL_128(0, 0xffffffff, q24);
8834 ASSERT_EQUAL_128(0, 0x78007878, q25);
8835 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8836 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8837 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8838 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8839 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8840 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8841 }
8842 }
8843
8844
TEST(neon_sqshl_scalar)8845 TEST(neon_sqshl_scalar) {
8846 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8847
8848 START();
8849
8850 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8851 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8852 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8853 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8854
8855 __ Sqshl(b16, b0, b2);
8856 __ Sqshl(b17, b0, b3);
8857 __ Sqshl(b18, b1, b2);
8858 __ Sqshl(b19, b1, b3);
8859 __ Sqshl(h20, h0, h2);
8860 __ Sqshl(h21, h0, h3);
8861 __ Sqshl(h22, h1, h2);
8862 __ Sqshl(h23, h1, h3);
8863 __ Sqshl(s24, s0, s2);
8864 __ Sqshl(s25, s0, s3);
8865 __ Sqshl(s26, s1, s2);
8866 __ Sqshl(s27, s1, s3);
8867 __ Sqshl(d28, d0, d2);
8868 __ Sqshl(d29, d0, d3);
8869 __ Sqshl(d30, d1, d2);
8870 __ Sqshl(d31, d1, d3);
8871
8872 END();
8873
8874 if (CAN_RUN()) {
8875 RUN();
8876
8877 ASSERT_EQUAL_128(0, 0x80, q16);
8878 ASSERT_EQUAL_128(0, 0xdf, q17);
8879 ASSERT_EQUAL_128(0, 0x7f, q18);
8880 ASSERT_EQUAL_128(0, 0x20, q19);
8881 ASSERT_EQUAL_128(0, 0x8000, q20);
8882 ASSERT_EQUAL_128(0, 0xdfdf, q21);
8883 ASSERT_EQUAL_128(0, 0x7fff, q22);
8884 ASSERT_EQUAL_128(0, 0x2020, q23);
8885 ASSERT_EQUAL_128(0, 0x80000000, q24);
8886 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8887 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8888 ASSERT_EQUAL_128(0, 0x20002020, q27);
8889 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8890 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8891 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8892 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8893 }
8894 }
8895
8896
TEST(neon_urshl_scalar)8897 TEST(neon_urshl_scalar) {
8898 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8899
8900 START();
8901
8902 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8903 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8904 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8905 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8906
8907 __ Urshl(d28, d0, d2);
8908 __ Urshl(d29, d0, d3);
8909 __ Urshl(d30, d1, d2);
8910 __ Urshl(d31, d1, d3);
8911
8912 END();
8913
8914 if (CAN_RUN()) {
8915 RUN();
8916
8917 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8918 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8919 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8920 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8921 }
8922 }
8923
8924
TEST(neon_srshl_scalar)8925 TEST(neon_srshl_scalar) {
8926 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8927
8928 START();
8929
8930 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8931 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8932 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8933 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8934
8935 __ Srshl(d28, d0, d2);
8936 __ Srshl(d29, d0, d3);
8937 __ Srshl(d30, d1, d2);
8938 __ Srshl(d31, d1, d3);
8939
8940 END();
8941
8942 if (CAN_RUN()) {
8943 RUN();
8944
8945 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
8946 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
8947 ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
8948 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8949 }
8950 }
8951
8952
TEST(neon_uqrshl_scalar)8953 TEST(neon_uqrshl_scalar) {
8954 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8955
8956 START();
8957
8958 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8959 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8960 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8961 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8962
8963 __ Uqrshl(b16, b0, b2);
8964 __ Uqrshl(b17, b0, b3);
8965 __ Uqrshl(b18, b1, b2);
8966 __ Uqrshl(b19, b1, b3);
8967 __ Uqrshl(h20, h0, h2);
8968 __ Uqrshl(h21, h0, h3);
8969 __ Uqrshl(h22, h1, h2);
8970 __ Uqrshl(h23, h1, h3);
8971 __ Uqrshl(s24, s0, s2);
8972 __ Uqrshl(s25, s0, s3);
8973 __ Uqrshl(s26, s1, s2);
8974 __ Uqrshl(s27, s1, s3);
8975 __ Uqrshl(d28, d0, d2);
8976 __ Uqrshl(d29, d0, d3);
8977 __ Uqrshl(d30, d1, d2);
8978 __ Uqrshl(d31, d1, d3);
8979
8980 END();
8981
8982 if (CAN_RUN()) {
8983 RUN();
8984
8985 ASSERT_EQUAL_128(0, 0xff, q16);
8986 ASSERT_EQUAL_128(0, 0x78, q17);
8987 ASSERT_EQUAL_128(0, 0xfe, q18);
8988 ASSERT_EQUAL_128(0, 0x40, q19);
8989 ASSERT_EQUAL_128(0, 0xffff, q20);
8990 ASSERT_EQUAL_128(0, 0x7878, q21);
8991 ASSERT_EQUAL_128(0, 0xfefe, q22);
8992 ASSERT_EQUAL_128(0, 0x3fc0, q23);
8993 ASSERT_EQUAL_128(0, 0xffffffff, q24);
8994 ASSERT_EQUAL_128(0, 0x78007878, q25);
8995 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8996 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
8997 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8998 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8999 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9000 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9001 }
9002 }
9003
9004
TEST(neon_sqrshl_scalar)9005 TEST(neon_sqrshl_scalar) {
9006 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9007
9008 START();
9009
9010 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9011 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9012 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9013 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9014
9015 __ Sqrshl(b16, b0, b2);
9016 __ Sqrshl(b17, b0, b3);
9017 __ Sqrshl(b18, b1, b2);
9018 __ Sqrshl(b19, b1, b3);
9019 __ Sqrshl(h20, h0, h2);
9020 __ Sqrshl(h21, h0, h3);
9021 __ Sqrshl(h22, h1, h2);
9022 __ Sqrshl(h23, h1, h3);
9023 __ Sqrshl(s24, s0, s2);
9024 __ Sqrshl(s25, s0, s3);
9025 __ Sqrshl(s26, s1, s2);
9026 __ Sqrshl(s27, s1, s3);
9027 __ Sqrshl(d28, d0, d2);
9028 __ Sqrshl(d29, d0, d3);
9029 __ Sqrshl(d30, d1, d2);
9030 __ Sqrshl(d31, d1, d3);
9031
9032 END();
9033
9034 if (CAN_RUN()) {
9035 RUN();
9036
9037 ASSERT_EQUAL_128(0, 0x80, q16);
9038 ASSERT_EQUAL_128(0, 0xe0, q17);
9039 ASSERT_EQUAL_128(0, 0x7f, q18);
9040 ASSERT_EQUAL_128(0, 0x20, q19);
9041 ASSERT_EQUAL_128(0, 0x8000, q20);
9042 ASSERT_EQUAL_128(0, 0xdfe0, q21);
9043 ASSERT_EQUAL_128(0, 0x7fff, q22);
9044 ASSERT_EQUAL_128(0, 0x2020, q23);
9045 ASSERT_EQUAL_128(0, 0x80000000, q24);
9046 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9047 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9048 ASSERT_EQUAL_128(0, 0x20002020, q27);
9049 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9050 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9051 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9052 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9053 }
9054 }
9055
9056
TEST(neon_uqadd_scalar)9057 TEST(neon_uqadd_scalar) {
9058 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9059
9060 START();
9061
9062 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9063 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9064 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9065
9066 __ Uqadd(b16, b0, b0);
9067 __ Uqadd(b17, b1, b1);
9068 __ Uqadd(b18, b2, b2);
9069 __ Uqadd(h19, h0, h0);
9070 __ Uqadd(h20, h1, h1);
9071 __ Uqadd(h21, h2, h2);
9072 __ Uqadd(s22, s0, s0);
9073 __ Uqadd(s23, s1, s1);
9074 __ Uqadd(s24, s2, s2);
9075 __ Uqadd(d25, d0, d0);
9076 __ Uqadd(d26, d1, d1);
9077 __ Uqadd(d27, d2, d2);
9078
9079 END();
9080
9081 if (CAN_RUN()) {
9082 RUN();
9083
9084 ASSERT_EQUAL_128(0, 0xff, q16);
9085 ASSERT_EQUAL_128(0, 0xfe, q17);
9086 ASSERT_EQUAL_128(0, 0x20, q18);
9087 ASSERT_EQUAL_128(0, 0xffff, q19);
9088 ASSERT_EQUAL_128(0, 0xfefe, q20);
9089 ASSERT_EQUAL_128(0, 0x2020, q21);
9090 ASSERT_EQUAL_128(0, 0xffffffff, q22);
9091 ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9092 ASSERT_EQUAL_128(0, 0x20002020, q24);
9093 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9094 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9095 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9096 }
9097 }
9098
9099
TEST(neon_sqadd_scalar)9100 TEST(neon_sqadd_scalar) {
9101 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9102
9103 START();
9104
9105 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9106 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9107 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9108
9109 __ Sqadd(b16, b0, b0);
9110 __ Sqadd(b17, b1, b1);
9111 __ Sqadd(b18, b2, b2);
9112 __ Sqadd(h19, h0, h0);
9113 __ Sqadd(h20, h1, h1);
9114 __ Sqadd(h21, h2, h2);
9115 __ Sqadd(s22, s0, s0);
9116 __ Sqadd(s23, s1, s1);
9117 __ Sqadd(s24, s2, s2);
9118 __ Sqadd(d25, d0, d0);
9119 __ Sqadd(d26, d1, d1);
9120 __ Sqadd(d27, d2, d2);
9121
9122 END();
9123
9124 if (CAN_RUN()) {
9125 RUN();
9126
9127 ASSERT_EQUAL_128(0, 0x80, q16);
9128 ASSERT_EQUAL_128(0, 0x7f, q17);
9129 ASSERT_EQUAL_128(0, 0x20, q18);
9130 ASSERT_EQUAL_128(0, 0x8000, q19);
9131 ASSERT_EQUAL_128(0, 0x7fff, q20);
9132 ASSERT_EQUAL_128(0, 0x2020, q21);
9133 ASSERT_EQUAL_128(0, 0x80000000, q22);
9134 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9135 ASSERT_EQUAL_128(0, 0x20002020, q24);
9136 ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9137 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9138 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9139 }
9140 }
9141
9142
TEST(neon_uqsub_scalar)9143 TEST(neon_uqsub_scalar) {
9144 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9145
9146 START();
9147
9148 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9149 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9150
9151 __ Uqsub(b16, b0, b0);
9152 __ Uqsub(b17, b0, b1);
9153 __ Uqsub(b18, b1, b0);
9154 __ Uqsub(h19, h0, h0);
9155 __ Uqsub(h20, h0, h1);
9156 __ Uqsub(h21, h1, h0);
9157 __ Uqsub(s22, s0, s0);
9158 __ Uqsub(s23, s0, s1);
9159 __ Uqsub(s24, s1, s0);
9160 __ Uqsub(d25, d0, d0);
9161 __ Uqsub(d26, d0, d1);
9162 __ Uqsub(d27, d1, d0);
9163
9164 END();
9165
9166 if (CAN_RUN()) {
9167 RUN();
9168
9169 ASSERT_EQUAL_128(0, 0, q16);
9170 ASSERT_EQUAL_128(0, 0x71, q17);
9171 ASSERT_EQUAL_128(0, 0, q18);
9172
9173 ASSERT_EQUAL_128(0, 0, q19);
9174 ASSERT_EQUAL_128(0, 0x7171, q20);
9175 ASSERT_EQUAL_128(0, 0, q21);
9176
9177 ASSERT_EQUAL_128(0, 0, q22);
9178 ASSERT_EQUAL_128(0, 0x70017171, q23);
9179 ASSERT_EQUAL_128(0, 0, q24);
9180
9181 ASSERT_EQUAL_128(0, 0, q25);
9182 ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9183 ASSERT_EQUAL_128(0, 0, q27);
9184 }
9185 }
9186
9187
TEST(neon_sqsub_scalar)9188 TEST(neon_sqsub_scalar) {
9189 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9190
9191 START();
9192
9193 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9194 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9195
9196 __ Sqsub(b16, b0, b0);
9197 __ Sqsub(b17, b0, b1);
9198 __ Sqsub(b18, b1, b0);
9199 __ Sqsub(h19, h0, h0);
9200 __ Sqsub(h20, h0, h1);
9201 __ Sqsub(h21, h1, h0);
9202 __ Sqsub(s22, s0, s0);
9203 __ Sqsub(s23, s0, s1);
9204 __ Sqsub(s24, s1, s0);
9205 __ Sqsub(d25, d0, d0);
9206 __ Sqsub(d26, d0, d1);
9207 __ Sqsub(d27, d1, d0);
9208
9209 END();
9210
9211 if (CAN_RUN()) {
9212 RUN();
9213
9214 ASSERT_EQUAL_128(0, 0, q16);
9215 ASSERT_EQUAL_128(0, 0x80, q17);
9216 ASSERT_EQUAL_128(0, 0x7f, q18);
9217
9218 ASSERT_EQUAL_128(0, 0, q19);
9219 ASSERT_EQUAL_128(0, 0x8000, q20);
9220 ASSERT_EQUAL_128(0, 0x7fff, q21);
9221
9222 ASSERT_EQUAL_128(0, 0, q22);
9223 ASSERT_EQUAL_128(0, 0x80000000, q23);
9224 ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9225
9226 ASSERT_EQUAL_128(0, 0, q25);
9227 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9228 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9229 }
9230 }
9231
9232
TEST(neon_fmla_fmls)9233 TEST(neon_fmla_fmls) {
9234 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9235
9236 START();
9237 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9238 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9239 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9240 __ Mov(v16.V16B(), v0.V16B());
9241 __ Mov(v17.V16B(), v0.V16B());
9242 __ Mov(v18.V16B(), v0.V16B());
9243 __ Mov(v19.V16B(), v0.V16B());
9244 __ Mov(v20.V16B(), v0.V16B());
9245 __ Mov(v21.V16B(), v0.V16B());
9246
9247 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9248 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9249 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9250 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9251 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9252 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9253 END();
9254
9255 if (CAN_RUN()) {
9256 RUN();
9257
9258 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9259 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9260 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9261 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9262 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9263 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9264 }
9265 }
9266
9267
TEST(neon_fmla_h)9268 TEST(neon_fmla_h) {
9269 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9270 CPUFeatures::kFP,
9271 CPUFeatures::kNEONHalf);
9272
9273 START();
9274 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9275 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9276 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9277 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9278 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9279 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9280 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9281 __ Mov(v16.V2D(), v0.V2D());
9282 __ Mov(v17.V2D(), v0.V2D());
9283 __ Mov(v18.V2D(), v4.V2D());
9284 __ Mov(v19.V2D(), v5.V2D());
9285 __ Mov(v20.V2D(), v0.V2D());
9286 __ Mov(v21.V2D(), v0.V2D());
9287 __ Mov(v22.V2D(), v4.V2D());
9288 __ Mov(v23.V2D(), v5.V2D());
9289
9290 __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9291 __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9292 __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9293 __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9294 __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9295 __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9296 __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9297 __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9298 END();
9299
9300 if (CAN_RUN()) {
9301 RUN();
9302
9303 ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9304 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9305 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9306 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9307 ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9308 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9309 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9310 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9311 }
9312 }
9313
9314
TEST(neon_fmls_h)9315 TEST(neon_fmls_h) {
9316 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9317 CPUFeatures::kFP,
9318 CPUFeatures::kNEONHalf);
9319
9320 START();
9321 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9322 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9323 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9324 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9325 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9326 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9327 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9328 __ Mov(v16.V2D(), v0.V2D());
9329 __ Mov(v17.V2D(), v0.V2D());
9330 __ Mov(v18.V2D(), v4.V2D());
9331 __ Mov(v19.V2D(), v5.V2D());
9332 __ Mov(v20.V2D(), v0.V2D());
9333 __ Mov(v21.V2D(), v0.V2D());
9334 __ Mov(v22.V2D(), v4.V2D());
9335 __ Mov(v23.V2D(), v5.V2D());
9336
9337 __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9338 __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9339 __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9340 __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9341 __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9342 __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9343 __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9344 __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9345 END();
9346
9347 if (CAN_RUN()) {
9348 RUN();
9349
9350 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9351 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9352 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9353 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9354 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9355 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9356 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9357 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9358 }
9359 }
9360
9361
TEST(neon_fhm)9362 TEST(neon_fhm) {
9363 // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9364 // more comprehensive input sets.
9365 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9366 CPUFeatures::kNEON,
9367 CPUFeatures::kNEONHalf,
9368 CPUFeatures::kFHM);
9369
9370 START();
9371 // Test multiplications:
9372 // v30 v31
9373 // [0] 65504 (max normal) * 65504 (max normal)
9374 // [1] -1 * 0
9375 // [2] 2^-24 (min subnormal) * 2^-24 (min subnormal)
9376 // [3] -2^-24 (min subnormal) * 65504 (max normal)
9377 // [4] 6.10e-5 (min normal) * 0.99...
9378 // [5] 0 * -0
9379 // [6] -0 * 0
9380 // [7] -Inf * -Inf
9381 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9382 __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9383
9384 // Accumulators for use with Fmlal{2}:
9385 // v0.S[0] = 384
9386 // v0.S[1] = -0
9387 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9388 // v1.S[0] = -(2^-48 + 2^-71)
9389 // v1.S[1] = 0
9390 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9391 // v2.S[0] = 128
9392 // v2.S[1] = 0
9393 // v2.S[2] = 1
9394 // v2.S[3] = 1
9395 __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9396 // v3.S[0] = 0
9397 // v3.S[1] = -0
9398 // v3.S[2] = -0
9399 // v3.S[3] = 0
9400 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9401 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9402 // results are just the negation of the Fmlal{2} results.
9403 __ Fneg(v4.V4S(), v0.V4S());
9404 __ Fneg(v5.V4S(), v1.V4S());
9405 __ Fneg(v6.V4S(), v2.V4S());
9406 __ Fneg(v7.V4S(), v3.V4S());
9407
9408 __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9409 __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9410 __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9411 __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9412
9413 __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9414 __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9415 __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9416 __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9417 END();
9418
9419 if (CAN_RUN()) {
9420 RUN();
9421
9422 // Fmlal(2S)
9423 // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9424 // v0.S[1] = -0 + (-1 * 0) = -0
9425 ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9426 // Fmlal2(2S)
9427 // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9428 // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9429 ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9430 // Fmlal(4S)
9431 // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9432 // v2.S[1] = 0 + (-1 * 0) = 0
9433 // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9434 // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9435 ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9436 // Fmlal2(4S)
9437 // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9438 // v3.S[1] = -0 + (0 * -0) = -0
9439 // v3.S[2] = -0 + (-0 * 0) = -0
9440 // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9441 ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9442
9443 // Fmlsl results are mostly the same, but negated.
9444 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9445 ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9446 // In this case: v6.S[1] = 0 - (0 * -0) = 0
9447 ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9448 ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9449 }
9450 }
9451
9452
TEST(neon_byelement_fhm)9453 TEST(neon_byelement_fhm) {
9454 // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9455 // tests have more comprehensive input sets.
9456 SETUP_WITH_FEATURES(CPUFeatures::kFP,
9457 CPUFeatures::kNEON,
9458 CPUFeatures::kNEONHalf,
9459 CPUFeatures::kFHM);
9460
9461 START();
9462 // Set up multiplication inputs.
9463 //
9464 // v30.H[0] = 65504 (max normal)
9465 // v30.H[1] = -1
9466 // v30.H[2] = 2^-24 (min subnormal)
9467 // v30.H[3] = -2^-24 (min subnormal)
9468 // v30.H[4] = 6.10e-5 (min normal)
9469 // v30.H[5] = 0
9470 // v30.H[6] = -0
9471 // v30.H[7] = -Inf
9472 __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9473
9474 // Each test instruction should only use one lane of vm, so set up unique
9475 // registers with poison values in other lanes. The poison NaN avoids the
9476 // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9477 // arbitrary.
9478 VRegister poison = v29;
9479 __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9480 // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9481 // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9482 __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9483 // Set up [v8,v15] as vm inputs.
9484 for (int i = 0; i <= 7; i++) {
9485 VRegister vm(i + 8);
9486 __ Mov(vm, poison);
9487 __ Ins(vm.V8H(), i, v31.V8H(), i);
9488 }
9489
9490 // Accumulators for use with Fmlal{2}:
9491 // v0.S[0] = 2^-8
9492 // v0.S[1] = 1
9493 __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9494 // v1.S[0] = -1.5 * 2^-49
9495 // v1.S[1] = 0
9496 __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9497 // v2.S[0] = 0
9498 // v2.S[1] = 2^14
9499 // v2.S[2] = 1.5 * 2^-48
9500 // v2.S[3] = Inf
9501 __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9502 // v3.S[0] = 0
9503 // v3.S[1] = -0
9504 // v3.S[2] = -0
9505 // v3.S[3] = 0
9506 __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9507 // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9508 // results are just the negation of the Fmlal{2} results.
9509 __ Fneg(v4.V4S(), v0.V4S());
9510 __ Fneg(v5.V4S(), v1.V4S());
9511 __ Fneg(v6.V4S(), v2.V4S());
9512 __ Fneg(v7.V4S(), v3.V4S());
9513
9514 __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9515 __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9516 __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9517 __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9518
9519 __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9520 __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9521 __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9522 __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9523 END();
9524
9525 if (CAN_RUN()) {
9526 RUN();
9527
9528 // Fmlal(2S)
9529 // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9530 // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9531 ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9532 // Fmlal2(2S)
9533 // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9534 // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9535 ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9536 // Fmlal(4S)
9537 // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9538 // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9539 // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9540 // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9541 ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9542 // Fmlal2(4S)
9543 // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9544 // v3.S[1] = -0 + (0 * 0.9995117) = 0
9545 // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9546 // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9547 ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9548
9549 // Fmlsl results are mostly the same, but negated.
9550 ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9551 ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9552 ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9553 // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9554 ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9555 }
9556 }
9557
9558
TEST(neon_fmulx_scalar)9559 TEST(neon_fmulx_scalar) {
9560 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9561
9562 START();
9563 __ Fmov(s0, 2.0);
9564 __ Fmov(s1, 0.5);
9565 __ Fmov(s2, 0.0);
9566 __ Fmov(s3, -0.0);
9567 __ Fmov(s4, kFP32PositiveInfinity);
9568 __ Fmov(s5, kFP32NegativeInfinity);
9569 __ Fmulx(s16, s0, s1);
9570 __ Fmulx(s17, s2, s4);
9571 __ Fmulx(s18, s2, s5);
9572 __ Fmulx(s19, s3, s4);
9573 __ Fmulx(s20, s3, s5);
9574
9575 __ Fmov(d21, 2.0);
9576 __ Fmov(d22, 0.5);
9577 __ Fmov(d23, 0.0);
9578 __ Fmov(d24, -0.0);
9579 __ Fmov(d25, kFP64PositiveInfinity);
9580 __ Fmov(d26, kFP64NegativeInfinity);
9581 __ Fmulx(d27, d21, d22);
9582 __ Fmulx(d28, d23, d25);
9583 __ Fmulx(d29, d23, d26);
9584 __ Fmulx(d30, d24, d25);
9585 __ Fmulx(d31, d24, d26);
9586 END();
9587
9588 if (CAN_RUN()) {
9589 RUN();
9590
9591 ASSERT_EQUAL_FP32(1.0, s16);
9592 ASSERT_EQUAL_FP32(2.0, s17);
9593 ASSERT_EQUAL_FP32(-2.0, s18);
9594 ASSERT_EQUAL_FP32(-2.0, s19);
9595 ASSERT_EQUAL_FP32(2.0, s20);
9596 ASSERT_EQUAL_FP64(1.0, d27);
9597 ASSERT_EQUAL_FP64(2.0, d28);
9598 ASSERT_EQUAL_FP64(-2.0, d29);
9599 ASSERT_EQUAL_FP64(-2.0, d30);
9600 ASSERT_EQUAL_FP64(2.0, d31);
9601 }
9602 }
9603
9604
TEST(neon_fmulx_h)9605 TEST(neon_fmulx_h) {
9606 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9607 CPUFeatures::kFP,
9608 CPUFeatures::kNEONHalf);
9609
9610 START();
9611 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9612 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9613 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9614 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9615 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9616 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9617 __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9618 __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9619 __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9620 __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9621 __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9622 __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9623 __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9624 __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9625 __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9626 __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9627 END();
9628
9629 if (CAN_RUN()) {
9630 RUN();
9631 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9632 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9633 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9634 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9635 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9636 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9637 ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9638 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9639 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9640 ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9641 }
9642 }
9643
9644
TEST(neon_fmulx_h_scalar)9645 TEST(neon_fmulx_h_scalar) {
9646 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9647 CPUFeatures::kFP,
9648 CPUFeatures::kNEONHalf,
9649 CPUFeatures::kFPHalf);
9650
9651 START();
9652 __ Fmov(h0, Float16(2.0));
9653 __ Fmov(h1, Float16(0.5));
9654 __ Fmov(h2, Float16(0.0));
9655 __ Fmov(h3, Float16(-0.0));
9656 __ Fmov(h4, kFP16PositiveInfinity);
9657 __ Fmov(h5, kFP16NegativeInfinity);
9658 __ Fmulx(h6, h0, h1);
9659 __ Fmulx(h7, h2, h4);
9660 __ Fmulx(h8, h2, h5);
9661 __ Fmulx(h9, h3, h4);
9662 __ Fmulx(h10, h3, h5);
9663 END();
9664
9665 if (CAN_RUN()) {
9666 RUN();
9667 ASSERT_EQUAL_FP16(Float16(1.0), h6);
9668 ASSERT_EQUAL_FP16(Float16(2.0), h7);
9669 ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9670 ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9671 ASSERT_EQUAL_FP16(Float16(2.0), h10);
9672 }
9673 }
9674
TEST(neon_fabd_h)9675 TEST(neon_fabd_h) {
9676 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9677 CPUFeatures::kFP,
9678 CPUFeatures::kNEONHalf);
9679
9680 START();
9681 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9682 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9683 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9684 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9685 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9686 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9687
9688 __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9689 __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9690 __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9691 __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9692 __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9693 __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9694 __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9695 __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9696 __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9697 __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9698 END();
9699
9700 if (CAN_RUN()) {
9701 RUN();
9702
9703 ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9704 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9705 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9706 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9707 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9708 ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9709 ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9710 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9711 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9712 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9713 }
9714 }
9715
9716
TEST(neon_fabd_h_scalar)9717 TEST(neon_fabd_h_scalar) {
9718 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9719 CPUFeatures::kFP,
9720 CPUFeatures::kNEONHalf,
9721 CPUFeatures::kFPHalf);
9722
9723 START();
9724 __ Fmov(h0, Float16(2.0));
9725 __ Fmov(h1, Float16(0.5));
9726 __ Fmov(h2, Float16(0.0));
9727 __ Fmov(h3, Float16(-0.0));
9728 __ Fmov(h4, kFP16PositiveInfinity);
9729 __ Fmov(h5, kFP16NegativeInfinity);
9730 __ Fabd(h16, h1, h0);
9731 __ Fabd(h17, h2, h3);
9732 __ Fabd(h18, h2, h5);
9733 __ Fabd(h19, h3, h4);
9734 __ Fabd(h20, h3, h5);
9735 END();
9736
9737 if (CAN_RUN()) {
9738 RUN();
9739 ASSERT_EQUAL_FP16(Float16(1.5), h16);
9740 ASSERT_EQUAL_FP16(Float16(0.0), h17);
9741 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9742 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9743 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9744 }
9745 }
9746
9747
TEST(neon_fabd_scalar)9748 TEST(neon_fabd_scalar) {
9749 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9750
9751 START();
9752 __ Fmov(s0, 2.0);
9753 __ Fmov(s1, 0.5);
9754 __ Fmov(s2, 0.0);
9755 __ Fmov(s3, -0.0);
9756 __ Fmov(s4, kFP32PositiveInfinity);
9757 __ Fmov(s5, kFP32NegativeInfinity);
9758 __ Fabd(s16, s1, s0);
9759 __ Fabd(s17, s2, s3);
9760 __ Fabd(s18, s2, s5);
9761 __ Fabd(s19, s3, s4);
9762 __ Fabd(s20, s3, s5);
9763
9764 __ Fmov(d21, 2.0);
9765 __ Fmov(d22, 0.5);
9766 __ Fmov(d23, 0.0);
9767 __ Fmov(d24, -0.0);
9768 __ Fmov(d25, kFP64PositiveInfinity);
9769 __ Fmov(d26, kFP64NegativeInfinity);
9770 __ Fabd(d27, d21, d22);
9771 __ Fabd(d28, d23, d24);
9772 __ Fabd(d29, d23, d26);
9773 __ Fabd(d30, d24, d25);
9774 __ Fabd(d31, d24, d26);
9775 END();
9776
9777 if (CAN_RUN()) {
9778 RUN();
9779
9780 ASSERT_EQUAL_FP32(1.5, s16);
9781 ASSERT_EQUAL_FP32(0.0, s17);
9782 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9783 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9784 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9785 ASSERT_EQUAL_FP64(1.5, d27);
9786 ASSERT_EQUAL_FP64(0.0, d28);
9787 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9788 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9789 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9790 }
9791 }
9792
9793
TEST(neon_frecps_h)9794 TEST(neon_frecps_h) {
9795 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9796 CPUFeatures::kFP,
9797 CPUFeatures::kNEONHalf);
9798
9799 START();
9800 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9801 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9802 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9803 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9804 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9805
9806 __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9807 __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9808 __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9809 __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9810 __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9811 __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9812 __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9813 __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9814 END();
9815
9816 if (CAN_RUN()) {
9817 RUN();
9818
9819 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9820 ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9821 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9822 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9823 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9824 ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9825 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9826 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9827 }
9828 }
9829
9830
TEST(neon_frecps_h_scalar)9831 TEST(neon_frecps_h_scalar) {
9832 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9833 CPUFeatures::kFP,
9834 CPUFeatures::kNEONHalf,
9835 CPUFeatures::kFPHalf);
9836
9837 START();
9838 __ Fmov(h0, Float16(2.0));
9839 __ Fmov(h1, Float16(-1.0));
9840 __ Fmov(h2, Float16(45.0));
9841 __ Fmov(h3, kFP16PositiveInfinity);
9842 __ Fmov(h4, kFP16NegativeInfinity);
9843
9844 __ Frecps(h5, h0, h2);
9845 __ Frecps(h6, h1, h2);
9846 __ Frecps(h7, h0, h3);
9847 __ Frecps(h8, h0, h4);
9848 END();
9849
9850 if (CAN_RUN()) {
9851 RUN();
9852
9853 ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9854 ASSERT_EQUAL_FP16(Float16(47.0), h6);
9855 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9856 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9857 }
9858 }
9859
9860
TEST(neon_frsqrts_h)9861 TEST(neon_frsqrts_h) {
9862 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9863 CPUFeatures::kFP,
9864 CPUFeatures::kNEONHalf);
9865
9866 START();
9867 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9868 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9869 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9870 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9871 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9872
9873 __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9874 __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9875 __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9876 __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9877 __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9878 __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9879 __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9880 __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9881 END();
9882
9883 if (CAN_RUN()) {
9884 RUN();
9885
9886 ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9887 ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9888 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9889 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9890 ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9891 ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9892 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9893 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9894 }
9895 }
9896
9897
TEST(neon_frsqrts_h_scalar)9898 TEST(neon_frsqrts_h_scalar) {
9899 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9900 CPUFeatures::kFP,
9901 CPUFeatures::kNEONHalf,
9902 CPUFeatures::kFPHalf);
9903
9904 START();
9905 __ Fmov(h0, Float16(2.0));
9906 __ Fmov(h1, Float16(-1.0));
9907 __ Fmov(h2, Float16(45.0));
9908 __ Fmov(h3, kFP16PositiveInfinity);
9909 __ Fmov(h4, kFP16NegativeInfinity);
9910
9911 __ Frsqrts(h5, h0, h2);
9912 __ Frsqrts(h6, h1, h2);
9913 __ Frsqrts(h7, h0, h3);
9914 __ Frsqrts(h8, h0, h4);
9915 END();
9916
9917 if (CAN_RUN()) {
9918 RUN();
9919
9920 ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9921 ASSERT_EQUAL_FP16(Float16(24.0), h6);
9922 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9923 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9924 }
9925 }
9926
9927
TEST(neon_faddp_h)9928 TEST(neon_faddp_h) {
9929 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9930 CPUFeatures::kFP,
9931 CPUFeatures::kNEONHalf);
9932
9933 START();
9934 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9935 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9936 __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9937 __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9938
9939 __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
9940 __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
9941 __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
9942 __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
9943 END();
9944
9945 if (CAN_RUN()) {
9946 RUN();
9947
9948 ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
9949 ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
9950 ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
9951 ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
9952 }
9953 }
9954
9955
TEST(neon_faddp_scalar)9956 TEST(neon_faddp_scalar) {
9957 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9958
9959 START();
9960 __ Movi(d0, 0x3f80000040000000);
9961 __ Movi(d1, 0xff8000007f800000);
9962 __ Movi(d2, 0x0000000080000000);
9963 __ Faddp(s0, v0.V2S());
9964 __ Faddp(s1, v1.V2S());
9965 __ Faddp(s2, v2.V2S());
9966
9967 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
9968 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
9969 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
9970 __ Faddp(d3, v3.V2D());
9971 __ Faddp(d4, v4.V2D());
9972 __ Faddp(d5, v5.V2D());
9973 END();
9974
9975 if (CAN_RUN()) {
9976 RUN();
9977
9978 ASSERT_EQUAL_FP32(3.0, s0);
9979 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
9980 ASSERT_EQUAL_FP32(0.0, s2);
9981 ASSERT_EQUAL_FP64(0.0, d3);
9982 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
9983 ASSERT_EQUAL_FP64(0.0, d5);
9984 }
9985 }
9986
9987
TEST(neon_faddp_h_scalar)9988 TEST(neon_faddp_h_scalar) {
9989 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9990 CPUFeatures::kFP,
9991 CPUFeatures::kNEONHalf);
9992
9993 START();
9994 __ Movi(s0, 0x3c004000);
9995 __ Movi(s1, 0xfc007c00);
9996 __ Movi(s2, 0x00008000);
9997 __ Faddp(h0, v0.V2H());
9998 __ Faddp(h1, v1.V2H());
9999 __ Faddp(h2, v2.V2H());
10000 END();
10001
10002 if (CAN_RUN()) {
10003 RUN();
10004
10005 ASSERT_EQUAL_FP16(Float16(3.0), h0);
10006 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10007 ASSERT_EQUAL_FP16(Float16(0.0), h2);
10008 }
10009 }
10010
10011
TEST(neon_fmaxp_scalar)10012 TEST(neon_fmaxp_scalar) {
10013 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10014
10015 START();
10016 __ Movi(d0, 0x3f80000040000000);
10017 __ Movi(d1, 0xff8000007f800000);
10018 __ Movi(d2, 0x7fc00000ff800000);
10019 __ Fmaxp(s0, v0.V2S());
10020 __ Fmaxp(s1, v1.V2S());
10021 __ Fmaxp(s2, v2.V2S());
10022
10023 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10024 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10025 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10026 __ Fmaxp(d3, v3.V2D());
10027 __ Fmaxp(d4, v4.V2D());
10028 __ Fmaxp(d5, v5.V2D());
10029 END();
10030
10031 if (CAN_RUN()) {
10032 RUN();
10033
10034 ASSERT_EQUAL_FP32(2.0, s0);
10035 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10036 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10037 ASSERT_EQUAL_FP64(2.0, d3);
10038 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10039 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10040 }
10041 }
10042
10043
TEST(neon_fmaxp_h_scalar)10044 TEST(neon_fmaxp_h_scalar) {
10045 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10046 CPUFeatures::kFP,
10047 CPUFeatures::kNEONHalf);
10048
10049 START();
10050 __ Movi(s0, 0x3c004000);
10051 __ Movi(s1, 0xfc007c00);
10052 __ Movi(s2, 0x7e00fc00);
10053 __ Fmaxp(h0, v0.V2H());
10054 __ Fmaxp(h1, v1.V2H());
10055 __ Fmaxp(h2, v2.V2H());
10056 END();
10057
10058 if (CAN_RUN()) {
10059 RUN();
10060
10061 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10062 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10063 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10064 }
10065 }
10066
10067
TEST(neon_fmax_h)10068 TEST(neon_fmax_h) {
10069 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10070 CPUFeatures::kFP,
10071 CPUFeatures::kNEONHalf);
10072
10073 START();
10074 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10075 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10076 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10077 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10078 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10079 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10080
10081 __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10082 __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10083 __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10084 __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10085 __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10086 __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10087 __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10088 __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10089 END();
10090
10091 if (CAN_RUN()) {
10092 RUN();
10093
10094 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10095 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10096 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10097 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10098 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10099 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10100 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10101 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10102 }
10103 }
10104
10105
TEST(neon_fmaxp_h)10106 TEST(neon_fmaxp_h) {
10107 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10108 CPUFeatures::kFP,
10109 CPUFeatures::kNEONHalf);
10110
10111 START();
10112 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10113 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10114 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10115 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10116
10117 __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10118 __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10119 __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10120 __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10121 END();
10122
10123 if (CAN_RUN()) {
10124 RUN();
10125
10126 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10127 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10128 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10129 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10130 }
10131 }
10132
10133
TEST(neon_fmaxnm_h)10134 TEST(neon_fmaxnm_h) {
10135 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10136 CPUFeatures::kFP,
10137 CPUFeatures::kNEONHalf);
10138
10139 START();
10140 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10141 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10142 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10143 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10144 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10145 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10146
10147 __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10148 __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10149 __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10150 __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10151 __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10152 __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10153 __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10154 __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10155 END();
10156
10157 if (CAN_RUN()) {
10158 RUN();
10159
10160 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10161 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10162 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10163 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10164 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10165 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10166 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10167 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10168 }
10169 }
10170
10171
TEST(neon_fmaxnmp_h)10172 TEST(neon_fmaxnmp_h) {
10173 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10174 CPUFeatures::kFP,
10175 CPUFeatures::kNEONHalf);
10176
10177 START();
10178 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10179 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10180 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10181 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10182
10183 __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10184 __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10185 __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10186 __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10187 END();
10188
10189 if (CAN_RUN()) {
10190 RUN();
10191
10192 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10193 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10194 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10195 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10196 }
10197 }
10198
10199
TEST(neon_fmaxnmp_scalar)10200 TEST(neon_fmaxnmp_scalar) {
10201 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10202
10203 START();
10204 __ Movi(d0, 0x3f80000040000000);
10205 __ Movi(d1, 0xff8000007f800000);
10206 __ Movi(d2, 0x7fc00000ff800000);
10207 __ Fmaxnmp(s0, v0.V2S());
10208 __ Fmaxnmp(s1, v1.V2S());
10209 __ Fmaxnmp(s2, v2.V2S());
10210
10211 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10212 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10213 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10214 __ Fmaxnmp(d3, v3.V2D());
10215 __ Fmaxnmp(d4, v4.V2D());
10216 __ Fmaxnmp(d5, v5.V2D());
10217 END();
10218
10219 if (CAN_RUN()) {
10220 RUN();
10221
10222 ASSERT_EQUAL_FP32(2.0, s0);
10223 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10224 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10225 ASSERT_EQUAL_FP64(2.0, d3);
10226 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10227 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10228 }
10229 }
10230
10231
TEST(neon_fmaxnmp_h_scalar)10232 TEST(neon_fmaxnmp_h_scalar) {
10233 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10234 CPUFeatures::kFP,
10235 CPUFeatures::kNEONHalf);
10236
10237 START();
10238 __ Movi(s0, 0x3c004000);
10239 __ Movi(s1, 0xfc007c00);
10240 __ Movi(s2, 0x7e00fc00);
10241 __ Fmaxnmp(h0, v0.V2H());
10242 __ Fmaxnmp(h1, v1.V2H());
10243 __ Fmaxnmp(h2, v2.V2H());
10244 END();
10245
10246 if (CAN_RUN()) {
10247 RUN();
10248
10249 ASSERT_EQUAL_FP16(Float16(2.0), h0);
10250 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10251 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10252 }
10253 }
10254
10255
TEST(neon_fminp_scalar)10256 TEST(neon_fminp_scalar) {
10257 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10258
10259 START();
10260 __ Movi(d0, 0x3f80000040000000);
10261 __ Movi(d1, 0xff8000007f800000);
10262 __ Movi(d2, 0x7fc00000ff800000);
10263 __ Fminp(s0, v0.V2S());
10264 __ Fminp(s1, v1.V2S());
10265 __ Fminp(s2, v2.V2S());
10266
10267 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10268 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10269 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10270 __ Fminp(d3, v3.V2D());
10271 __ Fminp(d4, v4.V2D());
10272 __ Fminp(d5, v5.V2D());
10273 END();
10274
10275 if (CAN_RUN()) {
10276 RUN();
10277
10278 ASSERT_EQUAL_FP32(1.0, s0);
10279 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10280 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10281 ASSERT_EQUAL_FP64(1.0, d3);
10282 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10283 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10284 }
10285 }
10286
10287
TEST(neon_fminp_h_scalar)10288 TEST(neon_fminp_h_scalar) {
10289 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10290 CPUFeatures::kFP,
10291 CPUFeatures::kNEONHalf);
10292
10293 START();
10294 __ Movi(s0, 0x3c004000);
10295 __ Movi(s1, 0xfc007c00);
10296 __ Movi(s2, 0x7e00fc00);
10297 __ Fminp(h0, v0.V2H());
10298 __ Fminp(h1, v1.V2H());
10299 __ Fminp(h2, v2.V2H());
10300 END();
10301
10302 if (CAN_RUN()) {
10303 RUN();
10304
10305 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10306 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10307 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10308 }
10309 }
10310
10311
TEST(neon_fmin_h)10312 TEST(neon_fmin_h) {
10313 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10314 CPUFeatures::kFP,
10315 CPUFeatures::kNEONHalf);
10316
10317 START();
10318 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10319 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10320 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10321 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10322 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10323 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10324
10325 __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10326 __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10327 __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10328 __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10329 __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10330 __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10331 __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10332 __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10333 END();
10334
10335 if (CAN_RUN()) {
10336 RUN();
10337
10338 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10339 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10340 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10341 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10342 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10343 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10344 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10345 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10346 }
10347 }
10348
10349
TEST(neon_fminp_h)10350 TEST(neon_fminp_h) {
10351 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10352 CPUFeatures::kFP,
10353 CPUFeatures::kNEONHalf);
10354
10355 START();
10356 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10357 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10358 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10359 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10360
10361 __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10362 __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10363 __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10364 __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10365 END();
10366
10367 if (CAN_RUN()) {
10368 RUN();
10369
10370 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10371 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10372 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10373 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10374 }
10375 }
10376
10377
TEST(neon_fminnm_h)10378 TEST(neon_fminnm_h) {
10379 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10380 CPUFeatures::kFP,
10381 CPUFeatures::kNEONHalf);
10382
10383 START();
10384 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10385 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10386 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10387 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10388 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10389 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10390
10391 __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10392 __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10393 __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10394 __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10395 __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10396 __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10397 __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10398 __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10399 END();
10400
10401 if (CAN_RUN()) {
10402 RUN();
10403
10404 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10405 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10406 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10407 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10408 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10409 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10410 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10411 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10412 }
10413 }
10414
10415
TEST(neon_fminnmp_h)10416 TEST(neon_fminnmp_h) {
10417 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10418 CPUFeatures::kFP,
10419 CPUFeatures::kNEONHalf);
10420
10421 START();
10422 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10423 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10424 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10425 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10426
10427 __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10428 __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10429 __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10430 __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10431 END();
10432
10433 if (CAN_RUN()) {
10434 RUN();
10435
10436 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10437 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10438 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10439 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10440 }
10441 }
10442
10443
TEST(neon_fminnmp_scalar)10444 TEST(neon_fminnmp_scalar) {
10445 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10446
10447 START();
10448 __ Movi(d0, 0x3f80000040000000);
10449 __ Movi(d1, 0xff8000007f800000);
10450 __ Movi(d2, 0x7fc00000ff800000);
10451 __ Fminnmp(s0, v0.V2S());
10452 __ Fminnmp(s1, v1.V2S());
10453 __ Fminnmp(s2, v2.V2S());
10454
10455 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10456 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10457 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10458 __ Fminnmp(d3, v3.V2D());
10459 __ Fminnmp(d4, v4.V2D());
10460 __ Fminnmp(d5, v5.V2D());
10461 END();
10462
10463 if (CAN_RUN()) {
10464 RUN();
10465
10466 ASSERT_EQUAL_FP32(1.0, s0);
10467 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10468 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10469 ASSERT_EQUAL_FP64(1.0, d3);
10470 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10471 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10472 }
10473 }
10474
10475
TEST(neon_fminnmp_h_scalar)10476 TEST(neon_fminnmp_h_scalar) {
10477 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10478 CPUFeatures::kFP,
10479 CPUFeatures::kNEONHalf);
10480
10481 START();
10482 __ Movi(s0, 0x3c004000);
10483 __ Movi(s1, 0xfc007c00);
10484 __ Movi(s2, 0x7e00fc00);
10485 __ Fminnmp(h0, v0.V2H());
10486 __ Fminnmp(h1, v1.V2H());
10487 __ Fminnmp(h2, v2.V2H());
10488 END();
10489
10490 if (CAN_RUN()) {
10491 RUN();
10492
10493 ASSERT_EQUAL_FP16(Float16(1.0), h0);
10494 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10495 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10496 }
10497 }
10498
Float16ToV4H(Float16 f)10499 static uint64_t Float16ToV4H(Float16 f) {
10500 uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10501 return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10502 }
10503
10504
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)10505 static void FminFmaxFloat16Helper(Float16 n,
10506 Float16 m,
10507 Float16 min,
10508 Float16 max,
10509 Float16 minnm,
10510 Float16 maxnm) {
10511 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10512 CPUFeatures::kFP,
10513 CPUFeatures::kNEONHalf,
10514 CPUFeatures::kFPHalf);
10515
10516 START();
10517 __ Fmov(h0, n);
10518 __ Fmov(h1, m);
10519 __ Fmov(v0.V8H(), n);
10520 __ Fmov(v1.V8H(), m);
10521 __ Fmin(h28, h0, h1);
10522 __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10523 __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10524 __ Fmax(h29, h0, h1);
10525 __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10526 __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10527 __ Fminnm(h30, h0, h1);
10528 __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10529 __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10530 __ Fmaxnm(h31, h0, h1);
10531 __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10532 __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10533 END();
10534
10535 uint64_t min_vec = Float16ToV4H(min);
10536 uint64_t max_vec = Float16ToV4H(max);
10537 uint64_t minnm_vec = Float16ToV4H(minnm);
10538 uint64_t maxnm_vec = Float16ToV4H(maxnm);
10539
10540 if (CAN_RUN()) {
10541 RUN();
10542
10543 ASSERT_EQUAL_FP16(min, h28);
10544 ASSERT_EQUAL_FP16(max, h29);
10545 ASSERT_EQUAL_FP16(minnm, h30);
10546 ASSERT_EQUAL_FP16(maxnm, h31);
10547
10548
10549 ASSERT_EQUAL_128(0, min_vec, v2);
10550 ASSERT_EQUAL_128(min_vec, min_vec, v3);
10551 ASSERT_EQUAL_128(0, max_vec, v4);
10552 ASSERT_EQUAL_128(max_vec, max_vec, v5);
10553 ASSERT_EQUAL_128(0, minnm_vec, v6);
10554 ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10555 ASSERT_EQUAL_128(0, maxnm_vec, v8);
10556 ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10557 }
10558 }
10559
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))10560 static Float16 MinMaxHelper(Float16 n,
10561 Float16 m,
10562 bool min,
10563 Float16 quiet_nan_substitute = Float16(0.0)) {
10564 const uint64_t kFP16QuietNaNMask = 0x0200;
10565 uint16_t raw_n = Float16ToRawbits(n);
10566 uint16_t raw_m = Float16ToRawbits(m);
10567
10568 if (IsSignallingNaN(n)) {
10569 // n is signalling NaN.
10570 return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10571 } else if (IsSignallingNaN(m)) {
10572 // m is signalling NaN.
10573 return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10574 } else if (IsZero(quiet_nan_substitute)) {
10575 if (IsNaN(n)) {
10576 // n is quiet NaN.
10577 return n;
10578 } else if (IsNaN(m)) {
10579 // m is quiet NaN.
10580 return m;
10581 }
10582 } else {
10583 // Substitute n or m if one is quiet, but not both.
10584 if (IsNaN(n) && !IsNaN(m)) {
10585 // n is quiet NaN: replace with substitute.
10586 n = quiet_nan_substitute;
10587 } else if (!IsNaN(n) && IsNaN(m)) {
10588 // m is quiet NaN: replace with substitute.
10589 m = quiet_nan_substitute;
10590 }
10591 }
10592
10593 uint16_t sign_mask = 0x8000;
10594 if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10595 return min ? Float16(-0.0) : Float16(0.0);
10596 }
10597
10598 if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10599 return min ? n : m;
10600 }
10601 return min ? m : n;
10602 }
10603
TEST(fmax_fmin_h)10604 TEST(fmax_fmin_h) {
10605 // Use non-standard NaNs to check that the payload bits are preserved.
10606 Float16 snan = RawbitsToFloat16(0x7c12);
10607 Float16 qnan = RawbitsToFloat16(0x7e34);
10608
10609 Float16 snan_processed = RawbitsToFloat16(0x7e12);
10610 Float16 qnan_processed = qnan;
10611
10612 VIXL_ASSERT(IsSignallingNaN(snan));
10613 VIXL_ASSERT(IsQuietNaN(qnan));
10614 VIXL_ASSERT(IsQuietNaN(snan_processed));
10615 VIXL_ASSERT(IsQuietNaN(qnan_processed));
10616
10617 // Bootstrap tests.
10618 FminFmaxFloat16Helper(Float16(0),
10619 Float16(0),
10620 Float16(0),
10621 Float16(0),
10622 Float16(0),
10623 Float16(0));
10624 FminFmaxFloat16Helper(Float16(0),
10625 Float16(1),
10626 Float16(0),
10627 Float16(1),
10628 Float16(0),
10629 Float16(1));
10630 FminFmaxFloat16Helper(kFP16PositiveInfinity,
10631 kFP16NegativeInfinity,
10632 kFP16NegativeInfinity,
10633 kFP16PositiveInfinity,
10634 kFP16NegativeInfinity,
10635 kFP16PositiveInfinity);
10636 FminFmaxFloat16Helper(snan,
10637 Float16(0),
10638 snan_processed,
10639 snan_processed,
10640 snan_processed,
10641 snan_processed);
10642 FminFmaxFloat16Helper(Float16(0),
10643 snan,
10644 snan_processed,
10645 snan_processed,
10646 snan_processed,
10647 snan_processed);
10648 FminFmaxFloat16Helper(qnan,
10649 Float16(0),
10650 qnan_processed,
10651 qnan_processed,
10652 Float16(0),
10653 Float16(0));
10654 FminFmaxFloat16Helper(Float16(0),
10655 qnan,
10656 qnan_processed,
10657 qnan_processed,
10658 Float16(0),
10659 Float16(0));
10660 FminFmaxFloat16Helper(qnan,
10661 snan,
10662 snan_processed,
10663 snan_processed,
10664 snan_processed,
10665 snan_processed);
10666 FminFmaxFloat16Helper(snan,
10667 qnan,
10668 snan_processed,
10669 snan_processed,
10670 snan_processed,
10671 snan_processed);
10672
10673 // Iterate over all combinations of inputs.
10674 Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10675 RawbitsToFloat16(0x0400),
10676 Float16(1.0),
10677 Float16(0.0),
10678 RawbitsToFloat16(0xfbff),
10679 RawbitsToFloat16(0x8400),
10680 Float16(-1.0),
10681 Float16(-0.0),
10682 kFP16PositiveInfinity,
10683 kFP16NegativeInfinity,
10684 kFP16QuietNaN,
10685 kFP16SignallingNaN};
10686
10687 const int count = sizeof(inputs) / sizeof(inputs[0]);
10688
10689 for (int in = 0; in < count; in++) {
10690 Float16 n = inputs[in];
10691 for (int im = 0; im < count; im++) {
10692 Float16 m = inputs[im];
10693 FminFmaxFloat16Helper(n,
10694 m,
10695 MinMaxHelper(n, m, true),
10696 MinMaxHelper(n, m, false),
10697 MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10698 MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10699 }
10700 }
10701 }
10702
TEST(neon_frint_saturating)10703 TEST(neon_frint_saturating) {
10704 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10705 CPUFeatures::kFP,
10706 CPUFeatures::kFrintToFixedSizedInt);
10707
10708 START();
10709
10710 __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10711 __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10712 __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10713 __ Frint32x(v16.V2S(), v0.V2S());
10714 __ Frint32x(v17.V4S(), v1.V4S());
10715 __ Frint32x(v18.V2D(), v2.V2D());
10716 __ Frint64x(v19.V2S(), v0.V2S());
10717 __ Frint64x(v20.V4S(), v1.V4S());
10718 __ Frint64x(v21.V2D(), v2.V2D());
10719 __ Frint32z(v22.V2S(), v0.V2S());
10720 __ Frint32z(v23.V4S(), v1.V4S());
10721 __ Frint32z(v24.V2D(), v2.V2D());
10722 __ Frint64z(v25.V2S(), v0.V2S());
10723 __ Frint64z(v26.V4S(), v1.V4S());
10724 __ Frint64z(v27.V2D(), v2.V2D());
10725
10726 END();
10727
10728 if (CAN_RUN()) {
10729 RUN();
10730
10731 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10732 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10733 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10734 ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10735 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10736 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10737 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10738 ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10739 ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10740 ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10741 ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10742 ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10743 }
10744 }
10745
10746
TEST(neon_tbl)10747 TEST(neon_tbl) {
10748 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10749
10750 START();
10751 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10752 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10753 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10754 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10755
10756 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10757 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10758 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10759 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10760
10761 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10762 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10763 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10764 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10765 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10766 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10767 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10768 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10769
10770 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10771 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10772 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10773 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10774 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10775 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10776 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10777 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10778
10779 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10780 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10781 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10782 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10783 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10784 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10785 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10786 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10787
10788 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10789 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10790 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10791 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10792 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10793 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10794 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10795 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10796 END();
10797
10798 if (CAN_RUN()) {
10799 RUN();
10800
10801 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10802 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10803 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10804 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10805 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10806 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10807 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10808 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10809
10810 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10811 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10812 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10813 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10814 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10815 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10816 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10817 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10818 }
10819 }
10820
10821
10822 } // namespace aarch64
10823 } // namespace vixl
10824