• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <sys/mman.h>
28 
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38 
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 #include "test-assembler-aarch64.h"
44 
45 namespace vixl {
46 namespace aarch64 {
47 
TEST(load_store_b)48 TEST(load_store_b) {
49   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
50 
51   uint8_t src[3] = {0x12, 0x23, 0x34};
52   uint8_t dst[3] = {0, 0, 0};
53   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
54   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
55 
56   START();
57   __ Mov(x17, src_base);
58   __ Mov(x18, dst_base);
59   __ Mov(x19, src_base);
60   __ Mov(x20, dst_base);
61   __ Mov(x21, src_base);
62   __ Mov(x22, dst_base);
63   __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
64   __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
65   __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
66   __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
67   __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
68   __ Str(b2, MemOperand(x22, sizeof(dst[0])));
69   END();
70 
71   if (CAN_RUN()) {
72     RUN();
73 
74     ASSERT_EQUAL_128(0, 0x23, q0);
75     ASSERT_EQUAL_64(0x23, dst[0]);
76     ASSERT_EQUAL_128(0, 0x12, q1);
77     ASSERT_EQUAL_64(0x12, dst[2]);
78     ASSERT_EQUAL_128(0, 0x34, q2);
79     ASSERT_EQUAL_64(0x34, dst[1]);
80     ASSERT_EQUAL_64(src_base, x17);
81     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
82     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
83     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
84     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
85     ASSERT_EQUAL_64(dst_base, x22);
86   }
87 }
88 
89 
TEST(load_store_h)90 TEST(load_store_h) {
91   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
92 
93   uint16_t src[3] = {0x1234, 0x2345, 0x3456};
94   uint16_t dst[3] = {0, 0, 0};
95   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
96   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
97 
98   START();
99   __ Mov(x17, src_base);
100   __ Mov(x18, dst_base);
101   __ Mov(x19, src_base);
102   __ Mov(x20, dst_base);
103   __ Mov(x21, src_base);
104   __ Mov(x22, dst_base);
105   __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
106   __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
107   __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
108   __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
109   __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
110   __ Str(h2, MemOperand(x22, sizeof(dst[0])));
111   END();
112 
113   if (CAN_RUN()) {
114     RUN();
115 
116     ASSERT_EQUAL_128(0, 0x2345, q0);
117     ASSERT_EQUAL_64(0x2345, dst[0]);
118     ASSERT_EQUAL_128(0, 0x1234, q1);
119     ASSERT_EQUAL_64(0x1234, dst[2]);
120     ASSERT_EQUAL_128(0, 0x3456, q2);
121     ASSERT_EQUAL_64(0x3456, dst[1]);
122     ASSERT_EQUAL_64(src_base, x17);
123     ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
124     ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
125     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
126     ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
127     ASSERT_EQUAL_64(dst_base, x22);
128   }
129 }
130 
131 
TEST(load_store_q)132 TEST(load_store_q) {
133   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
134 
135   uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
136                      0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
137                      0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
138                      0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
139                      0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
140 
141   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
142   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
143   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
144 
145   START();
146   __ Mov(x17, src_base);
147   __ Mov(x18, dst_base);
148   __ Mov(x19, src_base);
149   __ Mov(x20, dst_base);
150   __ Mov(x21, src_base);
151   __ Mov(x22, dst_base);
152   __ Ldr(q0, MemOperand(x17, 16));
153   __ Str(q0, MemOperand(x18, 16, PostIndex));
154   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
155   __ Str(q1, MemOperand(x20, 32, PreIndex));
156   __ Ldr(q2, MemOperand(x21, 32, PreIndex));
157   __ Str(q2, MemOperand(x22, 16));
158   END();
159 
160   if (CAN_RUN()) {
161     RUN();
162 
163     ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
164     ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
165     ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
166     ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
167     ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
168     ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
169     ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
170     ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
171     ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
172     ASSERT_EQUAL_64(src_base, x17);
173     ASSERT_EQUAL_64(dst_base + 16, x18);
174     ASSERT_EQUAL_64(src_base + 16, x19);
175     ASSERT_EQUAL_64(dst_base + 32, x20);
176     ASSERT_EQUAL_64(src_base + 32, x21);
177     ASSERT_EQUAL_64(dst_base, x22);
178   }
179 }
180 
181 
TEST(load_store_v_regoffset)182 TEST(load_store_v_regoffset) {
183   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
184 
185   uint8_t src[64];
186   for (unsigned i = 0; i < sizeof(src); i++) {
187     src[i] = i;
188   }
189   uint8_t dst[64];
190   memset(dst, 0, sizeof(dst));
191 
192   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
193   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
194 
195   START();
196   __ Mov(x17, src_base + 16);
197   __ Mov(x18, 1);
198   __ Mov(w19, -1);
199   __ Mov(x20, dst_base - 1);
200 
201   __ Ldr(b0, MemOperand(x17, x18));
202   __ Ldr(b1, MemOperand(x17, x19, SXTW));
203 
204   __ Ldr(h2, MemOperand(x17, x18));
205   __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
206   __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
207   __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
208 
209   __ Ldr(s16, MemOperand(x17, x18));
210   __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
211   __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
212   __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
213 
214   __ Ldr(d20, MemOperand(x17, x18));
215   __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
216   __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
217   __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
218 
219   __ Ldr(q24, MemOperand(x17, x18));
220   __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
221   __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
222   __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
223 
224   // Store [bhsdq]27 to adjacent memory locations, then load again to check.
225   __ Str(b27, MemOperand(x20, x18));
226   __ Str(h27, MemOperand(x20, x18, UXTW, 1));
227   __ Add(x20, x20, 8);
228   __ Str(s27, MemOperand(x20, x19, SXTW, 2));
229   __ Sub(x20, x20, 8);
230   __ Str(d27, MemOperand(x20, x18, LSL, 3));
231   __ Add(x20, x20, 32);
232   __ Str(q27, MemOperand(x20, x19, SXTW, 4));
233 
234   __ Sub(x20, x20, 32);
235   __ Ldr(q6, MemOperand(x20, x18));
236   __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
237 
238   END();
239 
240   if (CAN_RUN()) {
241     RUN();
242 
243     ASSERT_EQUAL_128(0, 0x11, q0);
244     ASSERT_EQUAL_128(0, 0x0f, q1);
245     ASSERT_EQUAL_128(0, 0x1211, q2);
246     ASSERT_EQUAL_128(0, 0x1312, q3);
247     ASSERT_EQUAL_128(0, 0x0f0e, q4);
248     ASSERT_EQUAL_128(0, 0x1312, q5);
249     ASSERT_EQUAL_128(0, 0x14131211, q16);
250     ASSERT_EQUAL_128(0, 0x17161514, q17);
251     ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
252     ASSERT_EQUAL_128(0, 0x17161514, q19);
253     ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
254     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
255     ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
256     ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
257     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
258     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
259     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
260     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
261     ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
262     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
263   }
264 }
265 
TEST(ldp_stp_quad)266 TEST(ldp_stp_quad) {
267   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
268 
269   uint64_t src[4] = {0x0123456789abcdef,
270                      0xaaaaaaaa55555555,
271                      0xfedcba9876543210,
272                      0x55555555aaaaaaaa};
273   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
274   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
275   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
276 
277   START();
278   __ Mov(x16, src_base);
279   __ Mov(x17, dst_base);
280   __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
281   __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
282   END();
283 
284   if (CAN_RUN()) {
285     RUN();
286 
287     ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
288     ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
289     ASSERT_EQUAL_64(0, dst[0]);
290     ASSERT_EQUAL_64(0, dst[1]);
291     ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
292     ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
293     ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
294     ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
295     ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
296     ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
297   }
298 }
299 
TEST(neon_ld1_d)300 TEST(neon_ld1_d) {
301   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
302 
303   uint8_t src[32 + 5];
304   for (unsigned i = 0; i < sizeof(src); i++) {
305     src[i] = i;
306   }
307   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
308 
309   START();
310   __ Mov(x17, src_base);
311   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
312   __ Ld1(v2.V8B(), MemOperand(x17));
313   __ Add(x17, x17, 1);
314   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
315   __ Add(x17, x17, 1);
316   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
317   __ Add(x17, x17, 1);
318   __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
319   __ Add(x17, x17, 1);
320   __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
321   __ Add(x17, x17, 1);
322   __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
323   END();
324 
325   if (CAN_RUN()) {
326     RUN();
327 
328     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
329     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
330     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
331     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
332     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
333     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
334     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
335     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
336     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
337     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
338     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
339     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
340     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
341     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
342     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
343     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
344     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
345     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
346   }
347 }
348 
349 
TEST(neon_ld1_d_postindex)350 TEST(neon_ld1_d_postindex) {
351   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
352 
353   uint8_t src[32 + 5];
354   for (unsigned i = 0; i < sizeof(src); i++) {
355     src[i] = i;
356   }
357   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
358 
359   START();
360   __ Mov(x17, src_base);
361   __ Mov(x18, src_base + 1);
362   __ Mov(x19, src_base + 2);
363   __ Mov(x20, src_base + 3);
364   __ Mov(x21, src_base + 4);
365   __ Mov(x22, src_base + 5);
366   __ Mov(x23, 1);
367   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
368   __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
369   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
370   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
371   __ Ld1(v16.V2S(),
372          v17.V2S(),
373          v18.V2S(),
374          v19.V2S(),
375          MemOperand(x20, 32, PostIndex));
376   __ Ld1(v30.V2S(),
377          v31.V2S(),
378          v0.V2S(),
379          v1.V2S(),
380          MemOperand(x21, 32, PostIndex));
381   __ Ld1(v20.V1D(),
382          v21.V1D(),
383          v22.V1D(),
384          v23.V1D(),
385          MemOperand(x22, 32, PostIndex));
386   END();
387 
388   if (CAN_RUN()) {
389     RUN();
390 
391     ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
392     ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
393     ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
394     ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
395     ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
396     ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
397     ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
398     ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
399     ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
400     ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
401     ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
402     ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
403     ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
404     ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
405     ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
406     ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
407     ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
408     ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
409     ASSERT_EQUAL_64(src_base + 1, x17);
410     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
411     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
412     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
413     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
414     ASSERT_EQUAL_64(src_base + 5 + 32, x22);
415   }
416 }
417 
418 
TEST(neon_ld1_q)419 TEST(neon_ld1_q) {
420   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
421 
422   uint8_t src[64 + 4];
423   for (unsigned i = 0; i < sizeof(src); i++) {
424     src[i] = i;
425   }
426   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
427 
428   START();
429   __ Mov(x17, src_base);
430   __ Ld1(v2.V16B(), MemOperand(x17));
431   __ Add(x17, x17, 1);
432   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
433   __ Add(x17, x17, 1);
434   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
435   __ Add(x17, x17, 1);
436   __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
437   __ Add(x17, x17, 1);
438   __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
439   END();
440 
441   if (CAN_RUN()) {
442     RUN();
443 
444     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
445     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
446     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
447     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
448     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
449     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
450     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
451     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
452     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
453     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
454     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
455     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
456     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
457     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
458   }
459 }
460 
461 
TEST(neon_ld1_q_postindex)462 TEST(neon_ld1_q_postindex) {
463   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
464 
465   uint8_t src[64 + 4];
466   for (unsigned i = 0; i < sizeof(src); i++) {
467     src[i] = i;
468   }
469   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
470 
471   START();
472   __ Mov(x17, src_base);
473   __ Mov(x18, src_base + 1);
474   __ Mov(x19, src_base + 2);
475   __ Mov(x20, src_base + 3);
476   __ Mov(x21, src_base + 4);
477   __ Mov(x22, 1);
478   __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
479   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
480   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
481   __ Ld1(v16.V4S(),
482          v17.V4S(),
483          v18.V4S(),
484          v19.V4S(),
485          MemOperand(x20, 64, PostIndex));
486   __ Ld1(v30.V2D(),
487          v31.V2D(),
488          v0.V2D(),
489          v1.V2D(),
490          MemOperand(x21, 64, PostIndex));
491   END();
492 
493   if (CAN_RUN()) {
494     RUN();
495 
496     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
497     ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
498     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
499     ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
500     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
501     ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
502     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
503     ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
504     ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
505     ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
506     ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
507     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
508     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
509     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
510     ASSERT_EQUAL_64(src_base + 1, x17);
511     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
512     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
513     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
514     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
515   }
516 }
517 
518 
TEST(neon_ld1_lane)519 TEST(neon_ld1_lane) {
520   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
521 
522   uint8_t src[64];
523   for (unsigned i = 0; i < sizeof(src); i++) {
524     src[i] = i;
525   }
526   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
527 
528   START();
529 
530   // Test loading whole register by element.
531   __ Mov(x17, src_base);
532   for (int i = 15; i >= 0; i--) {
533     __ Ld1(v0.B(), i, MemOperand(x17));
534     __ Add(x17, x17, 1);
535   }
536 
537   __ Mov(x17, src_base);
538   for (int i = 7; i >= 0; i--) {
539     __ Ld1(v1.H(), i, MemOperand(x17));
540     __ Add(x17, x17, 1);
541   }
542 
543   __ Mov(x17, src_base);
544   for (int i = 3; i >= 0; i--) {
545     __ Ld1(v2.S(), i, MemOperand(x17));
546     __ Add(x17, x17, 1);
547   }
548 
549   __ Mov(x17, src_base);
550   for (int i = 1; i >= 0; i--) {
551     __ Ld1(v3.D(), i, MemOperand(x17));
552     __ Add(x17, x17, 1);
553   }
554 
555   // Test loading a single element into an initialised register.
556   __ Mov(x17, src_base);
557   __ Ldr(q4, MemOperand(x17));
558   __ Ld1(v4.B(), 4, MemOperand(x17));
559   __ Ldr(q5, MemOperand(x17));
560   __ Ld1(v5.H(), 3, MemOperand(x17));
561   __ Ldr(q6, MemOperand(x17));
562   __ Ld1(v6.S(), 2, MemOperand(x17));
563   __ Ldr(q7, MemOperand(x17));
564   __ Ld1(v7.D(), 1, MemOperand(x17));
565 
566   END();
567 
568   if (CAN_RUN()) {
569     RUN();
570 
571     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
572     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
573     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
574     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
575     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
576     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
577     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
578     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
579   }
580 }
581 
TEST(neon_ld2_d)582 TEST(neon_ld2_d) {
583   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
584 
585   uint8_t src[64 + 4];
586   for (unsigned i = 0; i < sizeof(src); i++) {
587     src[i] = i;
588   }
589   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
590 
591   START();
592   __ Mov(x17, src_base);
593   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
594   __ Add(x17, x17, 1);
595   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
596   __ Add(x17, x17, 1);
597   __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
598   __ Add(x17, x17, 1);
599   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
600   END();
601 
602   if (CAN_RUN()) {
603     RUN();
604 
605     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
606     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
607     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
608     ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
609     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
610     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
611     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
612     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
613   }
614 }
615 
TEST(neon_ld2_d_postindex)616 TEST(neon_ld2_d_postindex) {
617   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
618 
619   uint8_t src[32 + 4];
620   for (unsigned i = 0; i < sizeof(src); i++) {
621     src[i] = i;
622   }
623   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
624 
625   START();
626   __ Mov(x17, src_base);
627   __ Mov(x18, src_base + 1);
628   __ Mov(x19, src_base + 2);
629   __ Mov(x20, src_base + 3);
630   __ Mov(x21, src_base + 4);
631   __ Mov(x22, 1);
632   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
633   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
634   __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
635   __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
636   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
637   END();
638 
639   if (CAN_RUN()) {
640     RUN();
641 
642     ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
643     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
644     ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
645     ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
646     ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
647     ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
648     ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
649     ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
650     ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
651 
652     ASSERT_EQUAL_64(src_base + 1, x17);
653     ASSERT_EQUAL_64(src_base + 1 + 16, x18);
654     ASSERT_EQUAL_64(src_base + 2 + 16, x19);
655     ASSERT_EQUAL_64(src_base + 3 + 16, x20);
656     ASSERT_EQUAL_64(src_base + 4 + 16, x21);
657   }
658 }
659 
660 
TEST(neon_ld2_q)661 TEST(neon_ld2_q) {
662   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
663 
664   uint8_t src[64 + 4];
665   for (unsigned i = 0; i < sizeof(src); i++) {
666     src[i] = i;
667   }
668   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
669 
670   START();
671   __ Mov(x17, src_base);
672   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
673   __ Add(x17, x17, 1);
674   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
675   __ Add(x17, x17, 1);
676   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
677   __ Add(x17, x17, 1);
678   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
679   __ Add(x17, x17, 1);
680   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
681   END();
682 
683   if (CAN_RUN()) {
684     RUN();
685 
686     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
687     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
688     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
689     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
690     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
691     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
692     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
693     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
694     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
695     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
696   }
697 }
698 
699 
TEST(neon_ld2_q_postindex)700 TEST(neon_ld2_q_postindex) {
701   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
702 
703   uint8_t src[64 + 4];
704   for (unsigned i = 0; i < sizeof(src); i++) {
705     src[i] = i;
706   }
707   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
708 
709   START();
710   __ Mov(x17, src_base);
711   __ Mov(x18, src_base + 1);
712   __ Mov(x19, src_base + 2);
713   __ Mov(x20, src_base + 3);
714   __ Mov(x21, src_base + 4);
715   __ Mov(x22, 1);
716   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
717   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
718   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
719   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
720   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
721   END();
722 
723   if (CAN_RUN()) {
724     RUN();
725 
726     ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
727     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
728     ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
729     ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
730     ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
731     ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
732     ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
733     ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
734     ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
735     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
736 
737 
738     ASSERT_EQUAL_64(src_base + 1, x17);
739     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
740     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
741     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
742     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
743   }
744 }
745 
746 
TEST(neon_ld2_lane)747 TEST(neon_ld2_lane) {
748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
749 
750   uint8_t src[64];
751   for (unsigned i = 0; i < sizeof(src); i++) {
752     src[i] = i;
753   }
754   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
755 
756   START();
757 
758   // Test loading whole register by element.
759   __ Mov(x17, src_base);
760   for (int i = 15; i >= 0; i--) {
761     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
762     __ Add(x17, x17, 1);
763   }
764 
765   __ Mov(x17, src_base);
766   for (int i = 7; i >= 0; i--) {
767     __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
768     __ Add(x17, x17, 1);
769   }
770 
771   __ Mov(x17, src_base);
772   for (int i = 3; i >= 0; i--) {
773     __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
774     __ Add(x17, x17, 1);
775   }
776 
777   __ Mov(x17, src_base);
778   for (int i = 1; i >= 0; i--) {
779     __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
780     __ Add(x17, x17, 1);
781   }
782 
783   // Test loading a single element into an initialised register.
784   __ Mov(x17, src_base);
785   __ Mov(x4, x17);
786   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
787   __ Ldr(q9, MemOperand(x4));
788   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
789   __ Mov(x5, x17);
790   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
791   __ Ldr(q11, MemOperand(x5));
792   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
793   __ Mov(x6, x17);
794   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
795   __ Ldr(q13, MemOperand(x6));
796   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
797   __ Mov(x7, x17);
798   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
799   __ Ldr(q15, MemOperand(x7));
800   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
801 
802   END();
803 
804   if (CAN_RUN()) {
805     RUN();
806 
807     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
808     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
809     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
810     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
811     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
812     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
813     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
814     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
815     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
816     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
817     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
818     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
819     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
820     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
821     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
822     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
823   }
824 }
825 
826 
TEST(neon_ld2_lane_postindex)827 TEST(neon_ld2_lane_postindex) {
828   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
829 
830   uint8_t src[64];
831   for (unsigned i = 0; i < sizeof(src); i++) {
832     src[i] = i;
833   }
834   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
835 
836   START();
837   __ Mov(x17, src_base);
838   __ Mov(x18, src_base);
839   __ Mov(x19, src_base);
840   __ Mov(x20, src_base);
841   __ Mov(x21, src_base);
842   __ Mov(x22, src_base);
843   __ Mov(x23, src_base);
844   __ Mov(x24, src_base);
845 
846   // Test loading whole register by element.
847   for (int i = 15; i >= 0; i--) {
848     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
849   }
850 
851   for (int i = 7; i >= 0; i--) {
852     __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
853   }
854 
855   for (int i = 3; i >= 0; i--) {
856     __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
857   }
858 
859   for (int i = 1; i >= 0; i--) {
860     __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
861   }
862 
863   // Test loading a single element into an initialised register.
864   __ Mov(x25, 1);
865   __ Mov(x4, x21);
866   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
867   __ Ldr(q9, MemOperand(x4));
868   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
869   __ Add(x25, x25, 1);
870 
871   __ Mov(x5, x22);
872   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
873   __ Ldr(q11, MemOperand(x5));
874   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
875   __ Add(x25, x25, 1);
876 
877   __ Mov(x6, x23);
878   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
879   __ Ldr(q13, MemOperand(x6));
880   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
881   __ Add(x25, x25, 1);
882 
883   __ Mov(x7, x24);
884   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
885   __ Ldr(q15, MemOperand(x7));
886   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
887 
888   END();
889 
890   if (CAN_RUN()) {
891     RUN();
892 
893     ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
894     ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
895     ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
896     ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
897     ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
898     ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
899     ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
900     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
901     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
902     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
903     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
904     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
905     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
906     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
907     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
908     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
909 
910 
911     ASSERT_EQUAL_64(src_base + 32, x17);
912     ASSERT_EQUAL_64(src_base + 32, x18);
913     ASSERT_EQUAL_64(src_base + 32, x19);
914     ASSERT_EQUAL_64(src_base + 32, x20);
915     ASSERT_EQUAL_64(src_base + 1, x21);
916     ASSERT_EQUAL_64(src_base + 2, x22);
917     ASSERT_EQUAL_64(src_base + 3, x23);
918     ASSERT_EQUAL_64(src_base + 4, x24);
919   }
920 }
921 
922 
TEST(neon_ld2_alllanes)923 TEST(neon_ld2_alllanes) {
924   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
925 
926   uint8_t src[64];
927   for (unsigned i = 0; i < sizeof(src); i++) {
928     src[i] = i;
929   }
930   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
931 
932   START();
933   __ Mov(x17, src_base + 1);
934   __ Mov(x18, 1);
935   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
936   __ Add(x17, x17, 2);
937   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
938   __ Add(x17, x17, 1);
939   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
940   __ Add(x17, x17, 1);
941   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
942   __ Add(x17, x17, 4);
943   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
944   __ Add(x17, x17, 1);
945   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
946   __ Add(x17, x17, 8);
947   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
948   END();
949 
950   if (CAN_RUN()) {
951     RUN();
952 
953     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
954     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
955     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
956     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
957     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
958     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
959     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
960     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
961     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
962     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
963     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
964     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
965     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
966     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
967   }
968 }
969 
970 
TEST(neon_ld2_alllanes_postindex)971 TEST(neon_ld2_alllanes_postindex) {
972   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
973 
974   uint8_t src[64];
975   for (unsigned i = 0; i < sizeof(src); i++) {
976     src[i] = i;
977   }
978   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
979 
980   START();
981   __ Mov(x17, src_base + 1);
982   __ Mov(x18, 1);
983   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
984   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
985   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
986   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
987   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
988   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
989   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
990   END();
991 
992   if (CAN_RUN()) {
993     RUN();
994 
995     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
996     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
997     ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
998     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
999     ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
1000     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
1001     ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
1002     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
1003     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
1004     ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
1005     ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
1006     ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
1007     ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
1008     ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
1009     ASSERT_EQUAL_64(src_base + 34, x17);
1010   }
1011 }
1012 
1013 
TEST(neon_ld3_d)1014 TEST(neon_ld3_d) {
1015   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1016 
1017   uint8_t src[64 + 4];
1018   for (unsigned i = 0; i < sizeof(src); i++) {
1019     src[i] = i;
1020   }
1021   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1022 
1023   START();
1024   __ Mov(x17, src_base);
1025   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
1026   __ Add(x17, x17, 1);
1027   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
1028   __ Add(x17, x17, 1);
1029   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
1030   __ Add(x17, x17, 1);
1031   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1032   END();
1033 
1034   if (CAN_RUN()) {
1035     RUN();
1036 
1037     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1038     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1039     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1040     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1041     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1042     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1043     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1044     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1045     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1046     ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
1047     ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
1048     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
1049   }
1050 }
1051 
1052 
TEST(neon_ld3_d_postindex)1053 TEST(neon_ld3_d_postindex) {
1054   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1055 
1056   uint8_t src[32 + 4];
1057   for (unsigned i = 0; i < sizeof(src); i++) {
1058     src[i] = i;
1059   }
1060   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1061 
1062   START();
1063   __ Mov(x17, src_base);
1064   __ Mov(x18, src_base + 1);
1065   __ Mov(x19, src_base + 2);
1066   __ Mov(x20, src_base + 3);
1067   __ Mov(x21, src_base + 4);
1068   __ Mov(x22, 1);
1069   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
1070   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
1071   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
1072   __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
1073   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
1074   END();
1075 
1076   if (CAN_RUN()) {
1077     RUN();
1078 
1079     ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
1080     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
1081     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
1082     ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
1083     ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
1084     ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
1085     ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
1086     ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
1087     ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
1088     ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
1089     ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
1090     ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
1091     ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
1092     ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
1093     ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
1094 
1095     ASSERT_EQUAL_64(src_base + 1, x17);
1096     ASSERT_EQUAL_64(src_base + 1 + 24, x18);
1097     ASSERT_EQUAL_64(src_base + 2 + 24, x19);
1098     ASSERT_EQUAL_64(src_base + 3 + 24, x20);
1099     ASSERT_EQUAL_64(src_base + 4 + 24, x21);
1100   }
1101 }
1102 
1103 
TEST(neon_ld3_q)1104 TEST(neon_ld3_q) {
1105   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1106 
1107   uint8_t src[64 + 4];
1108   for (unsigned i = 0; i < sizeof(src); i++) {
1109     src[i] = i;
1110   }
1111   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1112 
1113   START();
1114   __ Mov(x17, src_base);
1115   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
1116   __ Add(x17, x17, 1);
1117   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
1118   __ Add(x17, x17, 1);
1119   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
1120   __ Add(x17, x17, 1);
1121   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
1122   __ Add(x17, x17, 1);
1123   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
1124   END();
1125 
1126   if (CAN_RUN()) {
1127     RUN();
1128 
1129     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1130     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1131     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1132     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1133     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1134     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1135     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1136     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1137     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1138     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1139     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1140     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1141     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1142     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1143     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1144   }
1145 }
1146 
1147 
TEST(neon_ld3_q_postindex)1148 TEST(neon_ld3_q_postindex) {
1149   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1150 
1151   uint8_t src[64 + 4];
1152   for (unsigned i = 0; i < sizeof(src); i++) {
1153     src[i] = i;
1154   }
1155   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1156 
1157   START();
1158   __ Mov(x17, src_base);
1159   __ Mov(x18, src_base + 1);
1160   __ Mov(x19, src_base + 2);
1161   __ Mov(x20, src_base + 3);
1162   __ Mov(x21, src_base + 4);
1163   __ Mov(x22, 1);
1164 
1165   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
1166   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
1167   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
1168   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
1169   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
1170   END();
1171 
1172   if (CAN_RUN()) {
1173     RUN();
1174 
1175     ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
1176     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
1177     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
1178     ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
1179     ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
1180     ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
1181     ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
1182     ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
1183     ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
1184     ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
1185     ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
1186     ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
1187     ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
1188     ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
1189     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
1190 
1191     ASSERT_EQUAL_64(src_base + 1, x17);
1192     ASSERT_EQUAL_64(src_base + 1 + 48, x18);
1193     ASSERT_EQUAL_64(src_base + 2 + 48, x19);
1194     ASSERT_EQUAL_64(src_base + 3 + 48, x20);
1195     ASSERT_EQUAL_64(src_base + 4 + 48, x21);
1196   }
1197 }
1198 
1199 
TEST(neon_ld3_lane)1200 TEST(neon_ld3_lane) {
1201   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1202 
1203   uint8_t src[64];
1204   for (unsigned i = 0; i < sizeof(src); i++) {
1205     src[i] = i;
1206   }
1207   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1208 
1209   START();
1210 
1211   // Test loading whole register by element.
1212   __ Mov(x17, src_base);
1213   for (int i = 15; i >= 0; i--) {
1214     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
1215     __ Add(x17, x17, 1);
1216   }
1217 
1218   __ Mov(x17, src_base);
1219   for (int i = 7; i >= 0; i--) {
1220     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
1221     __ Add(x17, x17, 1);
1222   }
1223 
1224   __ Mov(x17, src_base);
1225   for (int i = 3; i >= 0; i--) {
1226     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
1227     __ Add(x17, x17, 1);
1228   }
1229 
1230   __ Mov(x17, src_base);
1231   for (int i = 1; i >= 0; i--) {
1232     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
1233     __ Add(x17, x17, 1);
1234   }
1235 
1236   // Test loading a single element into an initialised register.
1237   __ Mov(x17, src_base);
1238   __ Mov(x4, x17);
1239   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1240   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1241   __ Ldr(q14, MemOperand(x4));
1242   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
1243   __ Mov(x5, x17);
1244   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1245   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1246   __ Ldr(q17, MemOperand(x5));
1247   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
1248   __ Mov(x6, x17);
1249   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1250   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1251   __ Ldr(q20, MemOperand(x6));
1252   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
1253   __ Mov(x7, x17);
1254   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1255   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1256   __ Ldr(q23, MemOperand(x7));
1257   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
1258 
1259   END();
1260 
1261   if (CAN_RUN()) {
1262     RUN();
1263 
1264     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1265     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1266     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1267     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
1268     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
1269     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
1270     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
1271     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
1272     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
1273     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
1274     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
1275     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
1276     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1277     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1278     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1279     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1280     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1281     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1282   }
1283 }
1284 
1285 
TEST(neon_ld3_lane_postindex)1286 TEST(neon_ld3_lane_postindex) {
1287   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1288 
1289   uint8_t src[64];
1290   for (unsigned i = 0; i < sizeof(src); i++) {
1291     src[i] = i;
1292   }
1293   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1294 
1295   START();
1296 
1297   // Test loading whole register by element.
1298   __ Mov(x17, src_base);
1299   __ Mov(x18, src_base);
1300   __ Mov(x19, src_base);
1301   __ Mov(x20, src_base);
1302   __ Mov(x21, src_base);
1303   __ Mov(x22, src_base);
1304   __ Mov(x23, src_base);
1305   __ Mov(x24, src_base);
1306   for (int i = 15; i >= 0; i--) {
1307     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
1308   }
1309 
1310   for (int i = 7; i >= 0; i--) {
1311     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
1312   }
1313 
1314   for (int i = 3; i >= 0; i--) {
1315     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
1316   }
1317 
1318   for (int i = 1; i >= 0; i--) {
1319     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
1320   }
1321 
1322 
1323   // Test loading a single element into an initialised register.
1324   __ Mov(x25, 1);
1325   __ Mov(x4, x21);
1326   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
1327   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
1328   __ Ldr(q14, MemOperand(x4));
1329   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
1330   __ Add(x25, x25, 1);
1331 
1332   __ Mov(x5, x22);
1333   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
1334   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
1335   __ Ldr(q17, MemOperand(x5));
1336   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
1337   __ Add(x25, x25, 1);
1338 
1339   __ Mov(x6, x23);
1340   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
1341   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
1342   __ Ldr(q20, MemOperand(x6));
1343   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
1344   __ Add(x25, x25, 1);
1345 
1346   __ Mov(x7, x24);
1347   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
1348   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
1349   __ Ldr(q23, MemOperand(x7));
1350   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
1351 
1352   END();
1353 
1354   if (CAN_RUN()) {
1355     RUN();
1356 
1357     ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
1358     ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
1359     ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
1360     ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
1361     ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
1362     ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
1363     ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
1364     ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
1365     ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
1366     ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
1367     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
1368     ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
1369     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
1370     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
1371     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
1372     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
1373     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
1374     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
1375     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
1376     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
1377     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
1378     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
1379     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
1380     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
1381 
1382     ASSERT_EQUAL_64(src_base + 48, x17);
1383     ASSERT_EQUAL_64(src_base + 48, x18);
1384     ASSERT_EQUAL_64(src_base + 48, x19);
1385     ASSERT_EQUAL_64(src_base + 48, x20);
1386     ASSERT_EQUAL_64(src_base + 1, x21);
1387     ASSERT_EQUAL_64(src_base + 2, x22);
1388     ASSERT_EQUAL_64(src_base + 3, x23);
1389     ASSERT_EQUAL_64(src_base + 4, x24);
1390   }
1391 }
1392 
1393 
TEST(neon_ld3_alllanes)1394 TEST(neon_ld3_alllanes) {
1395   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1396 
1397   uint8_t src[64];
1398   for (unsigned i = 0; i < sizeof(src); i++) {
1399     src[i] = i;
1400   }
1401   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1402 
1403   START();
1404   __ Mov(x17, src_base + 1);
1405   __ Mov(x18, 1);
1406   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
1407   __ Add(x17, x17, 3);
1408   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1409   __ Add(x17, x17, 1);
1410   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
1411   __ Add(x17, x17, 1);
1412   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
1413   __ Add(x17, x17, 6);
1414   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
1415   __ Add(x17, x17, 1);
1416   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1417   __ Add(x17, x17, 12);
1418   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
1419   END();
1420 
1421   if (CAN_RUN()) {
1422     RUN();
1423 
1424     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1425     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1426     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1427     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1428     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1429     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1430     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1431     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1432     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1433     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1434     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1435     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1436     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1437     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1438     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1439     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1440     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1441     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1442     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1443     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1444     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1445   }
1446 }
1447 
1448 
TEST(neon_ld3_alllanes_postindex)1449 TEST(neon_ld3_alllanes_postindex) {
1450   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1451 
1452   uint8_t src[64];
1453   for (unsigned i = 0; i < sizeof(src); i++) {
1454     src[i] = i;
1455   }
1456   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1457   __ Mov(x17, src_base + 1);
1458   __ Mov(x18, 1);
1459 
1460   START();
1461   __ Mov(x17, src_base + 1);
1462   __ Mov(x18, 1);
1463   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
1464   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
1465   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
1466   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
1467   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
1468   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
1469   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
1470   END();
1471 
1472   if (CAN_RUN()) {
1473     RUN();
1474 
1475     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
1476     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
1477     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
1478     ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
1479     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
1480     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
1481     ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
1482     ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
1483     ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
1484     ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
1485     ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
1486     ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
1487     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
1488     ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
1489     ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
1490     ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
1491     ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
1492     ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
1493     ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
1494     ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
1495     ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
1496   }
1497 }
1498 
1499 
TEST(neon_ld4_d)1500 TEST(neon_ld4_d) {
1501   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1502 
1503   uint8_t src[64 + 4];
1504   for (unsigned i = 0; i < sizeof(src); i++) {
1505     src[i] = i;
1506   }
1507   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1508 
1509   START();
1510   __ Mov(x17, src_base);
1511   __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
1512   __ Add(x17, x17, 1);
1513   __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
1514   __ Add(x17, x17, 1);
1515   __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
1516   __ Add(x17, x17, 1);
1517   __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
1518   END();
1519 
1520   if (CAN_RUN()) {
1521     RUN();
1522 
1523     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1524     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1525     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1526     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1527     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1528     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1529     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1530     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1531     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1532     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1533     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1534     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1535     ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
1536     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
1537     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
1538     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
1539   }
1540 }
1541 
1542 
TEST(neon_ld4_d_postindex)1543 TEST(neon_ld4_d_postindex) {
1544   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1545 
1546   uint8_t src[32 + 4];
1547   for (unsigned i = 0; i < sizeof(src); i++) {
1548     src[i] = i;
1549   }
1550   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1551 
1552   START();
1553   __ Mov(x17, src_base);
1554   __ Mov(x18, src_base + 1);
1555   __ Mov(x19, src_base + 2);
1556   __ Mov(x20, src_base + 3);
1557   __ Mov(x21, src_base + 4);
1558   __ Mov(x22, 1);
1559   __ Ld4(v2.V8B(),
1560          v3.V8B(),
1561          v4.V8B(),
1562          v5.V8B(),
1563          MemOperand(x17, x22, PostIndex));
1564   __ Ld4(v6.V8B(),
1565          v7.V8B(),
1566          v8.V8B(),
1567          v9.V8B(),
1568          MemOperand(x18, 32, PostIndex));
1569   __ Ld4(v10.V4H(),
1570          v11.V4H(),
1571          v12.V4H(),
1572          v13.V4H(),
1573          MemOperand(x19, 32, PostIndex));
1574   __ Ld4(v14.V2S(),
1575          v15.V2S(),
1576          v16.V2S(),
1577          v17.V2S(),
1578          MemOperand(x20, 32, PostIndex));
1579   __ Ld4(v30.V2S(),
1580          v31.V2S(),
1581          v0.V2S(),
1582          v1.V2S(),
1583          MemOperand(x21, 32, PostIndex));
1584   END();
1585 
1586   if (CAN_RUN()) {
1587     RUN();
1588 
1589     ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
1590     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
1591     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
1592     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
1593     ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
1594     ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
1595     ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
1596     ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
1597     ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
1598     ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
1599     ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
1600     ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
1601     ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
1602     ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
1603     ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
1604     ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
1605     ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
1606     ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
1607     ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
1608     ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
1609 
1610 
1611     ASSERT_EQUAL_64(src_base + 1, x17);
1612     ASSERT_EQUAL_64(src_base + 1 + 32, x18);
1613     ASSERT_EQUAL_64(src_base + 2 + 32, x19);
1614     ASSERT_EQUAL_64(src_base + 3 + 32, x20);
1615     ASSERT_EQUAL_64(src_base + 4 + 32, x21);
1616   }
1617 }
1618 
1619 
TEST(neon_ld4_q)1620 TEST(neon_ld4_q) {
1621   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1622 
1623   uint8_t src[64 + 4];
1624   for (unsigned i = 0; i < sizeof(src); i++) {
1625     src[i] = i;
1626   }
1627   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1628 
1629   START();
1630   __ Mov(x17, src_base);
1631   __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
1632   __ Add(x17, x17, 1);
1633   __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
1634   __ Add(x17, x17, 1);
1635   __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
1636   __ Add(x17, x17, 1);
1637   __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
1638   __ Add(x17, x17, 1);
1639   __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
1640   END();
1641 
1642   if (CAN_RUN()) {
1643     RUN();
1644 
1645     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1646     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1647     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1648     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1649     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1650     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1651     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1652     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1653     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1654     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1655     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1656     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1657     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1658     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1659     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1660     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1661     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
1662     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
1663     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
1664     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
1665   }
1666 }
1667 
1668 
TEST(neon_ld4_q_postindex)1669 TEST(neon_ld4_q_postindex) {
1670   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1671 
1672   uint8_t src[64 + 4];
1673   for (unsigned i = 0; i < sizeof(src); i++) {
1674     src[i] = i;
1675   }
1676   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1677 
1678   START();
1679   __ Mov(x17, src_base);
1680   __ Mov(x18, src_base + 1);
1681   __ Mov(x19, src_base + 2);
1682   __ Mov(x20, src_base + 3);
1683   __ Mov(x21, src_base + 4);
1684   __ Mov(x22, 1);
1685 
1686   __ Ld4(v2.V16B(),
1687          v3.V16B(),
1688          v4.V16B(),
1689          v5.V16B(),
1690          MemOperand(x17, x22, PostIndex));
1691   __ Ld4(v6.V16B(),
1692          v7.V16B(),
1693          v8.V16B(),
1694          v9.V16B(),
1695          MemOperand(x18, 64, PostIndex));
1696   __ Ld4(v10.V8H(),
1697          v11.V8H(),
1698          v12.V8H(),
1699          v13.V8H(),
1700          MemOperand(x19, 64, PostIndex));
1701   __ Ld4(v14.V4S(),
1702          v15.V4S(),
1703          v16.V4S(),
1704          v17.V4S(),
1705          MemOperand(x20, 64, PostIndex));
1706   __ Ld4(v30.V2D(),
1707          v31.V2D(),
1708          v0.V2D(),
1709          v1.V2D(),
1710          MemOperand(x21, 64, PostIndex));
1711   END();
1712 
1713   if (CAN_RUN()) {
1714     RUN();
1715 
1716     ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
1717     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
1718     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
1719     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
1720     ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
1721     ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
1722     ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
1723     ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
1724     ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
1725     ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
1726     ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
1727     ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
1728     ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
1729     ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
1730     ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
1731     ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
1732     ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
1733     ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
1734     ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
1735     ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
1736 
1737 
1738     ASSERT_EQUAL_64(src_base + 1, x17);
1739     ASSERT_EQUAL_64(src_base + 1 + 64, x18);
1740     ASSERT_EQUAL_64(src_base + 2 + 64, x19);
1741     ASSERT_EQUAL_64(src_base + 3 + 64, x20);
1742     ASSERT_EQUAL_64(src_base + 4 + 64, x21);
1743   }
1744 }
1745 
1746 
TEST(neon_ld4_lane)1747 TEST(neon_ld4_lane) {
1748   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1749 
1750   uint8_t src[64];
1751   for (unsigned i = 0; i < sizeof(src); i++) {
1752     src[i] = i;
1753   }
1754   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1755 
1756   START();
1757 
1758   // Test loading whole register by element.
1759   __ Mov(x17, src_base);
1760   for (int i = 15; i >= 0; i--) {
1761     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
1762     __ Add(x17, x17, 1);
1763   }
1764 
1765   __ Mov(x17, src_base);
1766   for (int i = 7; i >= 0; i--) {
1767     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
1768     __ Add(x17, x17, 1);
1769   }
1770 
1771   __ Mov(x17, src_base);
1772   for (int i = 3; i >= 0; i--) {
1773     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
1774     __ Add(x17, x17, 1);
1775   }
1776 
1777   __ Mov(x17, src_base);
1778   for (int i = 1; i >= 0; i--) {
1779     __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
1780     __ Add(x17, x17, 1);
1781   }
1782 
1783   // Test loading a single element into an initialised register.
1784   __ Mov(x17, src_base);
1785   __ Mov(x4, x17);
1786   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1787   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1788   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1789   __ Ldr(q19, MemOperand(x4));
1790   __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
1791 
1792   __ Mov(x5, x17);
1793   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1794   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1795   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1796   __ Ldr(q23, MemOperand(x5));
1797   __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
1798 
1799   __ Mov(x6, x17);
1800   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1801   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1802   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1803   __ Ldr(q27, MemOperand(x6));
1804   __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
1805 
1806   __ Mov(x7, x17);
1807   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1808   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1809   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1810   __ Ldr(q31, MemOperand(x7));
1811   __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
1812 
1813   END();
1814 
1815   if (CAN_RUN()) {
1816     RUN();
1817 
1818     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
1819     ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
1820     ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
1821     ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
1822     ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
1823     ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
1824     ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
1825     ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
1826     ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
1827     ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
1828     ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
1829     ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
1830     ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
1831     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
1832     ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
1833     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
1834     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1835     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1836     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1837     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1838     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1839     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1840     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1841     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1842     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1843     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1844     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1845     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1846     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1847     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1848     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1849     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1850   }
1851 }
1852 
1853 
TEST(neon_ld4_lane_postindex)1854 TEST(neon_ld4_lane_postindex) {
1855   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
1856 
1857   uint8_t src[64];
1858   for (unsigned i = 0; i < sizeof(src); i++) {
1859     src[i] = i;
1860   }
1861   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
1862 
1863   START();
1864 
1865   // Test loading whole register by element.
1866   __ Mov(x17, src_base);
1867   for (int i = 15; i >= 0; i--) {
1868     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
1869   }
1870 
1871   __ Mov(x18, src_base);
1872   for (int i = 7; i >= 0; i--) {
1873     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
1874   }
1875 
1876   __ Mov(x19, src_base);
1877   for (int i = 3; i >= 0; i--) {
1878     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
1879   }
1880 
1881   __ Mov(x20, src_base);
1882   for (int i = 1; i >= 0; i--) {
1883     __ Ld4(v12.D(),
1884            v13.D(),
1885            v14.D(),
1886            v15.D(),
1887            i,
1888            MemOperand(x20, 32, PostIndex));
1889   }
1890 
1891   // Test loading a single element into an initialised register.
1892   __ Mov(x25, 1);
1893   __ Mov(x21, src_base);
1894   __ Mov(x22, src_base);
1895   __ Mov(x23, src_base);
1896   __ Mov(x24, src_base);
1897 
1898   __ Mov(x4, x21);
1899   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
1900   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
1901   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
1902   __ Ldr(q19, MemOperand(x4));
1903   __ Ld4(v16.B(),
1904          v17.B(),
1905          v18.B(),
1906          v19.B(),
1907          4,
1908          MemOperand(x21, x25, PostIndex));
1909   __ Add(x25, x25, 1);
1910 
1911   __ Mov(x5, x22);
1912   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
1913   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
1914   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
1915   __ Ldr(q23, MemOperand(x5));
1916   __ Ld4(v20.H(),
1917          v21.H(),
1918          v22.H(),
1919          v23.H(),
1920          3,
1921          MemOperand(x22, x25, PostIndex));
1922   __ Add(x25, x25, 1);
1923 
1924   __ Mov(x6, x23);
1925   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
1926   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
1927   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
1928   __ Ldr(q27, MemOperand(x6));
1929   __ Ld4(v24.S(),
1930          v25.S(),
1931          v26.S(),
1932          v27.S(),
1933          2,
1934          MemOperand(x23, x25, PostIndex));
1935   __ Add(x25, x25, 1);
1936 
1937   __ Mov(x7, x24);
1938   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
1939   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
1940   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
1941   __ Ldr(q31, MemOperand(x7));
1942   __ Ld4(v28.D(),
1943          v29.D(),
1944          v30.D(),
1945          v31.D(),
1946          1,
1947          MemOperand(x24, x25, PostIndex));
1948 
1949   END();
1950 
1951   if (CAN_RUN()) {
1952     RUN();
1953 
1954     ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
1955     ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
1956     ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
1957     ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
1958     ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
1959     ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
1960     ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
1961     ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
1962     ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
1963     ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
1964     ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
1965     ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
1966     ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
1967     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
1968     ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
1969     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
1970     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
1971     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
1972     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
1973     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
1974     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
1975     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
1976     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
1977     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
1978     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
1979     ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
1980     ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
1981     ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
1982     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
1983     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
1984     ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
1985     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
1986 
1987     ASSERT_EQUAL_64(src_base + 64, x17);
1988     ASSERT_EQUAL_64(src_base + 64, x18);
1989     ASSERT_EQUAL_64(src_base + 64, x19);
1990     ASSERT_EQUAL_64(src_base + 64, x20);
1991     ASSERT_EQUAL_64(src_base + 1, x21);
1992     ASSERT_EQUAL_64(src_base + 2, x22);
1993     ASSERT_EQUAL_64(src_base + 3, x23);
1994     ASSERT_EQUAL_64(src_base + 4, x24);
1995   }
1996 }
1997 
1998 
TEST(neon_ld4_alllanes)1999 TEST(neon_ld4_alllanes) {
2000   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2001 
2002   uint8_t src[64];
2003   for (unsigned i = 0; i < sizeof(src); i++) {
2004     src[i] = i;
2005   }
2006   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2007 
2008   START();
2009   __ Mov(x17, src_base + 1);
2010   __ Mov(x18, 1);
2011   __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
2012   __ Add(x17, x17, 4);
2013   __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
2014   __ Add(x17, x17, 1);
2015   __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
2016   __ Add(x17, x17, 1);
2017   __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
2018   __ Add(x17, x17, 8);
2019   __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
2020   __ Add(x17, x17, 1);
2021   __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
2022   __ Add(x17, x17, 16);
2023   __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
2024 
2025 
2026   END();
2027 
2028   if (CAN_RUN()) {
2029     RUN();
2030 
2031     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2032     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2033     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2034     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2035     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2036     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2037     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2038     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2039     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2040     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2041     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2042     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2043     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2044     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2045     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2046     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2047     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2048     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2049     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2050     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2051     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2052     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2053     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2054     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2055     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2056     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2057     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2058     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2059   }
2060 }
2061 
2062 
TEST(neon_ld4_alllanes_postindex)2063 TEST(neon_ld4_alllanes_postindex) {
2064   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2065 
2066   uint8_t src[64];
2067   for (unsigned i = 0; i < sizeof(src); i++) {
2068     src[i] = i;
2069   }
2070   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2071   __ Mov(x17, src_base + 1);
2072   __ Mov(x18, 1);
2073 
2074   START();
2075   __ Mov(x17, src_base + 1);
2076   __ Mov(x18, 1);
2077   __ Ld4r(v0.V8B(),
2078           v1.V8B(),
2079           v2.V8B(),
2080           v3.V8B(),
2081           MemOperand(x17, 4, PostIndex));
2082   __ Ld4r(v4.V16B(),
2083           v5.V16B(),
2084           v6.V16B(),
2085           v7.V16B(),
2086           MemOperand(x17, x18, PostIndex));
2087   __ Ld4r(v8.V4H(),
2088           v9.V4H(),
2089           v10.V4H(),
2090           v11.V4H(),
2091           MemOperand(x17, x18, PostIndex));
2092   __ Ld4r(v12.V8H(),
2093           v13.V8H(),
2094           v14.V8H(),
2095           v15.V8H(),
2096           MemOperand(x17, 8, PostIndex));
2097   __ Ld4r(v16.V2S(),
2098           v17.V2S(),
2099           v18.V2S(),
2100           v19.V2S(),
2101           MemOperand(x17, x18, PostIndex));
2102   __ Ld4r(v20.V4S(),
2103           v21.V4S(),
2104           v22.V4S(),
2105           v23.V4S(),
2106           MemOperand(x17, 16, PostIndex));
2107   __ Ld4r(v24.V2D(),
2108           v25.V2D(),
2109           v26.V2D(),
2110           v27.V2D(),
2111           MemOperand(x17, 32, PostIndex));
2112   END();
2113 
2114   if (CAN_RUN()) {
2115     RUN();
2116 
2117     ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
2118     ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
2119     ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
2120     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
2121     ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
2122     ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
2123     ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
2124     ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
2125     ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
2126     ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
2127     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
2128     ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
2129     ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
2130     ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
2131     ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
2132     ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
2133     ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
2134     ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
2135     ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
2136     ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
2137     ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
2138     ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
2139     ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
2140     ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
2141     ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
2142     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
2143     ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
2144     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
2145     ASSERT_EQUAL_64(src_base + 64, x17);
2146   }
2147 }
2148 
2149 
TEST(neon_st1_lane)2150 TEST(neon_st1_lane) {
2151   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2152 
2153   uint8_t src[64];
2154   for (unsigned i = 0; i < sizeof(src); i++) {
2155     src[i] = i;
2156   }
2157   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2158 
2159   START();
2160   __ Mov(x17, src_base);
2161   __ Mov(x18, -16);
2162   __ Ldr(q0, MemOperand(x17));
2163 
2164   for (int i = 15; i >= 0; i--) {
2165     __ St1(v0.B(), i, MemOperand(x17));
2166     __ Add(x17, x17, 1);
2167   }
2168   __ Ldr(q1, MemOperand(x17, x18));
2169 
2170   for (int i = 7; i >= 0; i--) {
2171     __ St1(v0.H(), i, MemOperand(x17));
2172     __ Add(x17, x17, 2);
2173   }
2174   __ Ldr(q2, MemOperand(x17, x18));
2175 
2176   for (int i = 3; i >= 0; i--) {
2177     __ St1(v0.S(), i, MemOperand(x17));
2178     __ Add(x17, x17, 4);
2179   }
2180   __ Ldr(q3, MemOperand(x17, x18));
2181 
2182   for (int i = 1; i >= 0; i--) {
2183     __ St1(v0.D(), i, MemOperand(x17));
2184     __ Add(x17, x17, 8);
2185   }
2186   __ Ldr(q4, MemOperand(x17, x18));
2187 
2188   END();
2189 
2190   if (CAN_RUN()) {
2191     RUN();
2192 
2193     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2194     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2195     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2196     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2197   }
2198 }
2199 
2200 
TEST(neon_st2_lane)2201 TEST(neon_st2_lane) {
2202   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2203 
2204   // Struct size * addressing modes * element sizes * vector size.
2205   uint8_t dst[2 * 2 * 4 * 16];
2206   memset(dst, 0, sizeof(dst));
2207   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2208 
2209   START();
2210   __ Mov(x17, dst_base);
2211   __ Mov(x18, dst_base);
2212   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2213   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2214 
2215   // Test B stores with and without post index.
2216   for (int i = 15; i >= 0; i--) {
2217     __ St2(v0.B(), v1.B(), i, MemOperand(x18));
2218     __ Add(x18, x18, 2);
2219   }
2220   for (int i = 15; i >= 0; i--) {
2221     __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
2222   }
2223   __ Ldr(q2, MemOperand(x17, 0 * 16));
2224   __ Ldr(q3, MemOperand(x17, 1 * 16));
2225   __ Ldr(q4, MemOperand(x17, 2 * 16));
2226   __ Ldr(q5, MemOperand(x17, 3 * 16));
2227 
2228   // Test H stores with and without post index.
2229   __ Mov(x0, 4);
2230   for (int i = 7; i >= 0; i--) {
2231     __ St2(v0.H(), v1.H(), i, MemOperand(x18));
2232     __ Add(x18, x18, 4);
2233   }
2234   for (int i = 7; i >= 0; i--) {
2235     __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
2236   }
2237   __ Ldr(q6, MemOperand(x17, 4 * 16));
2238   __ Ldr(q7, MemOperand(x17, 5 * 16));
2239   __ Ldr(q16, MemOperand(x17, 6 * 16));
2240   __ Ldr(q17, MemOperand(x17, 7 * 16));
2241 
2242   // Test S stores with and without post index.
2243   for (int i = 3; i >= 0; i--) {
2244     __ St2(v0.S(), v1.S(), i, MemOperand(x18));
2245     __ Add(x18, x18, 8);
2246   }
2247   for (int i = 3; i >= 0; i--) {
2248     __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
2249   }
2250   __ Ldr(q18, MemOperand(x17, 8 * 16));
2251   __ Ldr(q19, MemOperand(x17, 9 * 16));
2252   __ Ldr(q20, MemOperand(x17, 10 * 16));
2253   __ Ldr(q21, MemOperand(x17, 11 * 16));
2254 
2255   // Test D stores with and without post index.
2256   __ Mov(x0, 16);
2257   __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
2258   __ Add(x18, x18, 16);
2259   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
2260   __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
2261   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
2262   __ Ldr(q22, MemOperand(x17, 12 * 16));
2263   __ Ldr(q23, MemOperand(x17, 13 * 16));
2264   __ Ldr(q24, MemOperand(x17, 14 * 16));
2265   __ Ldr(q25, MemOperand(x17, 15 * 16));
2266   END();
2267 
2268   if (CAN_RUN()) {
2269     RUN();
2270 
2271     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
2272     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
2273     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
2274     ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
2275 
2276     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
2277     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
2278     ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
2279     ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
2280 
2281     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
2282     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
2283     ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
2284     ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
2285 
2286     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2287     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2288     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
2289     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
2290   }
2291 }
2292 
2293 
TEST(neon_st3_lane)2294 TEST(neon_st3_lane) {
2295   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2296 
2297   // Struct size * addressing modes * element sizes * vector size.
2298   uint8_t dst[3 * 2 * 4 * 16];
2299   memset(dst, 0, sizeof(dst));
2300   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2301 
2302   START();
2303   __ Mov(x17, dst_base);
2304   __ Mov(x18, dst_base);
2305   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2306   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2307   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2308 
2309   // Test B stores with and without post index.
2310   for (int i = 15; i >= 0; i--) {
2311     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
2312     __ Add(x18, x18, 3);
2313   }
2314   for (int i = 15; i >= 0; i--) {
2315     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
2316   }
2317   __ Ldr(q3, MemOperand(x17, 0 * 16));
2318   __ Ldr(q4, MemOperand(x17, 1 * 16));
2319   __ Ldr(q5, MemOperand(x17, 2 * 16));
2320   __ Ldr(q6, MemOperand(x17, 3 * 16));
2321   __ Ldr(q7, MemOperand(x17, 4 * 16));
2322   __ Ldr(q16, MemOperand(x17, 5 * 16));
2323 
2324   // Test H stores with and without post index.
2325   __ Mov(x0, 6);
2326   for (int i = 7; i >= 0; i--) {
2327     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
2328     __ Add(x18, x18, 6);
2329   }
2330   for (int i = 7; i >= 0; i--) {
2331     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
2332   }
2333   __ Ldr(q17, MemOperand(x17, 6 * 16));
2334   __ Ldr(q18, MemOperand(x17, 7 * 16));
2335   __ Ldr(q19, MemOperand(x17, 8 * 16));
2336   __ Ldr(q20, MemOperand(x17, 9 * 16));
2337   __ Ldr(q21, MemOperand(x17, 10 * 16));
2338   __ Ldr(q22, MemOperand(x17, 11 * 16));
2339 
2340   // Test S stores with and without post index.
2341   for (int i = 3; i >= 0; i--) {
2342     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
2343     __ Add(x18, x18, 12);
2344   }
2345   for (int i = 3; i >= 0; i--) {
2346     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
2347   }
2348   __ Ldr(q23, MemOperand(x17, 12 * 16));
2349   __ Ldr(q24, MemOperand(x17, 13 * 16));
2350   __ Ldr(q25, MemOperand(x17, 14 * 16));
2351   __ Ldr(q26, MemOperand(x17, 15 * 16));
2352   __ Ldr(q27, MemOperand(x17, 16 * 16));
2353   __ Ldr(q28, MemOperand(x17, 17 * 16));
2354 
2355   // Test D stores with and without post index.
2356   __ Mov(x0, 24);
2357   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
2358   __ Add(x18, x18, 24);
2359   __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
2360   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
2361   __ Ldr(q29, MemOperand(x17, 18 * 16));
2362   __ Ldr(q30, MemOperand(x17, 19 * 16));
2363   __ Ldr(q31, MemOperand(x17, 20 * 16));
2364   END();
2365 
2366   if (CAN_RUN()) {
2367     RUN();
2368 
2369     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
2370     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
2371     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
2372     ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
2373     ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
2374     ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
2375 
2376     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
2377     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
2378     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
2379     ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
2380     ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
2381     ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
2382 
2383     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
2384     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
2385     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
2386     ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
2387     ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
2388     ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
2389   }
2390 }
2391 
2392 
TEST(neon_st4_lane)2393 TEST(neon_st4_lane) {
2394   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2395 
2396   // Struct size * element sizes * vector size.
2397   uint8_t dst[4 * 4 * 16];
2398   memset(dst, 0, sizeof(dst));
2399   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
2400 
2401   START();
2402   __ Mov(x17, dst_base);
2403   __ Mov(x18, dst_base);
2404   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
2405   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
2406   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2407   __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
2408 
2409   // Test B stores without post index.
2410   for (int i = 15; i >= 0; i--) {
2411     __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
2412     __ Add(x18, x18, 4);
2413   }
2414   __ Ldr(q4, MemOperand(x17, 0 * 16));
2415   __ Ldr(q5, MemOperand(x17, 1 * 16));
2416   __ Ldr(q6, MemOperand(x17, 2 * 16));
2417   __ Ldr(q7, MemOperand(x17, 3 * 16));
2418 
2419   // Test H stores with post index.
2420   __ Mov(x0, 8);
2421   for (int i = 7; i >= 0; i--) {
2422     __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
2423   }
2424   __ Ldr(q16, MemOperand(x17, 4 * 16));
2425   __ Ldr(q17, MemOperand(x17, 5 * 16));
2426   __ Ldr(q18, MemOperand(x17, 6 * 16));
2427   __ Ldr(q19, MemOperand(x17, 7 * 16));
2428 
2429   // Test S stores without post index.
2430   for (int i = 3; i >= 0; i--) {
2431     __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
2432     __ Add(x18, x18, 16);
2433   }
2434   __ Ldr(q20, MemOperand(x17, 8 * 16));
2435   __ Ldr(q21, MemOperand(x17, 9 * 16));
2436   __ Ldr(q22, MemOperand(x17, 10 * 16));
2437   __ Ldr(q23, MemOperand(x17, 11 * 16));
2438 
2439   // Test D stores with post index.
2440   __ Mov(x0, 32);
2441   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
2442   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
2443 
2444   __ Ldr(q24, MemOperand(x17, 12 * 16));
2445   __ Ldr(q25, MemOperand(x17, 13 * 16));
2446   __ Ldr(q26, MemOperand(x17, 14 * 16));
2447   __ Ldr(q27, MemOperand(x17, 15 * 16));
2448   END();
2449 
2450   if (CAN_RUN()) {
2451     RUN();
2452 
2453     ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
2454     ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
2455     ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
2456     ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
2457 
2458     ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
2459     ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
2460     ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
2461     ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
2462 
2463     ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
2464     ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
2465     ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
2466     ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
2467 
2468     ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
2469     ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
2470     ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
2471     ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
2472   }
2473 }
2474 
2475 
TEST(neon_ld1_lane_postindex)2476 TEST(neon_ld1_lane_postindex) {
2477   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2478 
2479   uint8_t src[64];
2480   for (unsigned i = 0; i < sizeof(src); i++) {
2481     src[i] = i;
2482   }
2483   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2484 
2485   START();
2486   __ Mov(x17, src_base);
2487   __ Mov(x18, src_base);
2488   __ Mov(x19, src_base);
2489   __ Mov(x20, src_base);
2490   __ Mov(x21, src_base);
2491   __ Mov(x22, src_base);
2492   __ Mov(x23, src_base);
2493   __ Mov(x24, src_base);
2494 
2495   // Test loading whole register by element.
2496   for (int i = 15; i >= 0; i--) {
2497     __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2498   }
2499 
2500   for (int i = 7; i >= 0; i--) {
2501     __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
2502   }
2503 
2504   for (int i = 3; i >= 0; i--) {
2505     __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
2506   }
2507 
2508   for (int i = 1; i >= 0; i--) {
2509     __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
2510   }
2511 
2512   // Test loading a single element into an initialised register.
2513   __ Mov(x25, 1);
2514   __ Ldr(q4, MemOperand(x21));
2515   __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
2516   __ Add(x25, x25, 1);
2517 
2518   __ Ldr(q5, MemOperand(x22));
2519   __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
2520   __ Add(x25, x25, 1);
2521 
2522   __ Ldr(q6, MemOperand(x23));
2523   __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
2524   __ Add(x25, x25, 1);
2525 
2526   __ Ldr(q7, MemOperand(x24));
2527   __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
2528 
2529   END();
2530 
2531   if (CAN_RUN()) {
2532     RUN();
2533 
2534     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
2535     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
2536     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
2537     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
2538     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
2539     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
2540     ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
2541     ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
2542     ASSERT_EQUAL_64(src_base + 16, x17);
2543     ASSERT_EQUAL_64(src_base + 16, x18);
2544     ASSERT_EQUAL_64(src_base + 16, x19);
2545     ASSERT_EQUAL_64(src_base + 16, x20);
2546     ASSERT_EQUAL_64(src_base + 1, x21);
2547     ASSERT_EQUAL_64(src_base + 2, x22);
2548     ASSERT_EQUAL_64(src_base + 3, x23);
2549     ASSERT_EQUAL_64(src_base + 4, x24);
2550   }
2551 }
2552 
2553 
TEST(neon_st1_lane_postindex)2554 TEST(neon_st1_lane_postindex) {
2555   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2556 
2557   uint8_t src[64];
2558   for (unsigned i = 0; i < sizeof(src); i++) {
2559     src[i] = i;
2560   }
2561   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2562 
2563   START();
2564   __ Mov(x17, src_base);
2565   __ Mov(x18, -16);
2566   __ Ldr(q0, MemOperand(x17));
2567 
2568   for (int i = 15; i >= 0; i--) {
2569     __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
2570   }
2571   __ Ldr(q1, MemOperand(x17, x18));
2572 
2573   for (int i = 7; i >= 0; i--) {
2574     __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
2575   }
2576   __ Ldr(q2, MemOperand(x17, x18));
2577 
2578   for (int i = 3; i >= 0; i--) {
2579     __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
2580   }
2581   __ Ldr(q3, MemOperand(x17, x18));
2582 
2583   for (int i = 1; i >= 0; i--) {
2584     __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
2585   }
2586   __ Ldr(q4, MemOperand(x17, x18));
2587 
2588   END();
2589 
2590   if (CAN_RUN()) {
2591     RUN();
2592 
2593     ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
2594     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
2595     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
2596     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
2597   }
2598 }
2599 
2600 
TEST(neon_ld1_alllanes)2601 TEST(neon_ld1_alllanes) {
2602   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2603 
2604   uint8_t src[64];
2605   for (unsigned i = 0; i < sizeof(src); i++) {
2606     src[i] = i;
2607   }
2608   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2609 
2610   START();
2611   __ Mov(x17, src_base + 1);
2612   __ Ld1r(v0.V8B(), MemOperand(x17));
2613   __ Add(x17, x17, 1);
2614   __ Ld1r(v1.V16B(), MemOperand(x17));
2615   __ Add(x17, x17, 1);
2616   __ Ld1r(v2.V4H(), MemOperand(x17));
2617   __ Add(x17, x17, 1);
2618   __ Ld1r(v3.V8H(), MemOperand(x17));
2619   __ Add(x17, x17, 1);
2620   __ Ld1r(v4.V2S(), MemOperand(x17));
2621   __ Add(x17, x17, 1);
2622   __ Ld1r(v5.V4S(), MemOperand(x17));
2623   __ Add(x17, x17, 1);
2624   __ Ld1r(v6.V1D(), MemOperand(x17));
2625   __ Add(x17, x17, 1);
2626   __ Ld1r(v7.V2D(), MemOperand(x17));
2627   END();
2628 
2629   if (CAN_RUN()) {
2630     RUN();
2631 
2632     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2633     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2634     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2635     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2636     ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
2637     ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
2638     ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
2639     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
2640   }
2641 }
2642 
2643 
TEST(neon_ld1_alllanes_postindex)2644 TEST(neon_ld1_alllanes_postindex) {
2645   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2646 
2647   uint8_t src[64];
2648   for (unsigned i = 0; i < sizeof(src); i++) {
2649     src[i] = i;
2650   }
2651   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2652 
2653   START();
2654   __ Mov(x17, src_base + 1);
2655   __ Mov(x18, 1);
2656   __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
2657   __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
2658   __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
2659   __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
2660   __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
2661   __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
2662   __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
2663   END();
2664 
2665   if (CAN_RUN()) {
2666     RUN();
2667 
2668     ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
2669     ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
2670     ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
2671     ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
2672     ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
2673     ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
2674     ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
2675     ASSERT_EQUAL_64(src_base + 19, x17);
2676   }
2677 }
2678 
2679 
TEST(neon_st1_d)2680 TEST(neon_st1_d) {
2681   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2682 
2683   uint8_t src[14 * kDRegSizeInBytes];
2684   for (unsigned i = 0; i < sizeof(src); i++) {
2685     src[i] = i;
2686   }
2687   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2688 
2689   START();
2690   __ Mov(x17, src_base);
2691   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2692   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2693   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2694   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2695   __ Mov(x17, src_base);
2696 
2697   __ St1(v0.V8B(), MemOperand(x17));
2698   __ Ldr(d16, MemOperand(x17, 8, PostIndex));
2699 
2700   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
2701   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2702 
2703   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
2704   __ Ldr(d18, MemOperand(x17, 8, PostIndex));
2705   __ Ldr(d19, MemOperand(x17, 8, PostIndex));
2706   __ Ldr(d20, MemOperand(x17, 8, PostIndex));
2707 
2708   __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
2709   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2710   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2711 
2712   __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
2713   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2714   __ Ldr(q24, MemOperand(x17));
2715   END();
2716 
2717   if (CAN_RUN()) {
2718     RUN();
2719 
2720     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
2721     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
2722     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
2723     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
2724     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2725     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2726     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2727     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2728     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2729     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2730     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2731     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2732     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2733   }
2734 }
2735 
2736 
TEST(neon_st1_d_postindex)2737 TEST(neon_st1_d_postindex) {
2738   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2739 
2740   uint8_t src[64 + 14 * kDRegSizeInBytes];
2741   for (unsigned i = 0; i < sizeof(src); i++) {
2742     src[i] = i;
2743   }
2744   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2745 
2746   START();
2747   __ Mov(x17, src_base);
2748   __ Mov(x18, -8);
2749   __ Mov(x19, -16);
2750   __ Mov(x20, -24);
2751   __ Mov(x21, -32);
2752   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2753   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2754   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2755   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2756   __ Mov(x17, src_base);
2757 
2758   __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
2759   __ Ldr(d16, MemOperand(x17, x18));
2760 
2761   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
2762   __ Ldr(q17, MemOperand(x17, x19));
2763 
2764   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
2765   __ Ldr(d18, MemOperand(x17, x20));
2766   __ Ldr(d19, MemOperand(x17, x19));
2767   __ Ldr(d20, MemOperand(x17, x18));
2768 
2769   __ St1(v0.V2S(),
2770          v1.V2S(),
2771          v2.V2S(),
2772          v3.V2S(),
2773          MemOperand(x17, 32, PostIndex));
2774   __ Ldr(q21, MemOperand(x17, x21));
2775   __ Ldr(q22, MemOperand(x17, x19));
2776 
2777   __ St1(v0.V1D(),
2778          v1.V1D(),
2779          v2.V1D(),
2780          v3.V1D(),
2781          MemOperand(x17, 32, PostIndex));
2782   __ Ldr(q23, MemOperand(x17, x21));
2783   __ Ldr(q24, MemOperand(x17, x19));
2784   END();
2785 
2786   if (CAN_RUN()) {
2787     RUN();
2788 
2789     ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
2790     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
2791     ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
2792     ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
2793     ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
2794     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
2795     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
2796     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
2797     ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
2798   }
2799 }
2800 
2801 
TEST(neon_st1_q)2802 TEST(neon_st1_q) {
2803   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2804 
2805   uint8_t src[64 + 160];
2806   for (unsigned i = 0; i < sizeof(src); i++) {
2807     src[i] = i;
2808   }
2809   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2810 
2811   START();
2812   __ Mov(x17, src_base);
2813   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2814   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2815   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2816   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2817 
2818   __ St1(v0.V16B(), MemOperand(x17));
2819   __ Ldr(q16, MemOperand(x17, 16, PostIndex));
2820 
2821   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
2822   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
2823   __ Ldr(q18, MemOperand(x17, 16, PostIndex));
2824 
2825   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
2826   __ Ldr(q19, MemOperand(x17, 16, PostIndex));
2827   __ Ldr(q20, MemOperand(x17, 16, PostIndex));
2828   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
2829 
2830   __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
2831   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
2832   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
2833   __ Ldr(q24, MemOperand(x17, 16, PostIndex));
2834   __ Ldr(q25, MemOperand(x17));
2835   END();
2836 
2837   if (CAN_RUN()) {
2838     RUN();
2839 
2840     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2841     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2842     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2843     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2844     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2845     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2846     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2847     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2848     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2849     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2850   }
2851 }
2852 
2853 
TEST(neon_st1_q_postindex)2854 TEST(neon_st1_q_postindex) {
2855   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2856 
2857   uint8_t src[64 + 160];
2858   for (unsigned i = 0; i < sizeof(src); i++) {
2859     src[i] = i;
2860   }
2861   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2862 
2863   START();
2864   __ Mov(x17, src_base);
2865   __ Mov(x18, -16);
2866   __ Mov(x19, -32);
2867   __ Mov(x20, -48);
2868   __ Mov(x21, -64);
2869   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2870   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2871   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
2872   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
2873 
2874   __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
2875   __ Ldr(q16, MemOperand(x17, x18));
2876 
2877   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
2878   __ Ldr(q17, MemOperand(x17, x19));
2879   __ Ldr(q18, MemOperand(x17, x18));
2880 
2881   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
2882   __ Ldr(q19, MemOperand(x17, x20));
2883   __ Ldr(q20, MemOperand(x17, x19));
2884   __ Ldr(q21, MemOperand(x17, x18));
2885 
2886   __ St1(v0.V2D(),
2887          v1.V2D(),
2888          v2.V2D(),
2889          v3.V2D(),
2890          MemOperand(x17, 64, PostIndex));
2891   __ Ldr(q22, MemOperand(x17, x21));
2892   __ Ldr(q23, MemOperand(x17, x20));
2893   __ Ldr(q24, MemOperand(x17, x19));
2894   __ Ldr(q25, MemOperand(x17, x18));
2895 
2896   END();
2897 
2898   if (CAN_RUN()) {
2899     RUN();
2900 
2901     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
2902     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
2903     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
2904     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
2905     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
2906     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
2907     ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
2908     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
2909     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
2910     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
2911   }
2912 }
2913 
2914 
TEST(neon_st2_d)2915 TEST(neon_st2_d) {
2916   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2917 
2918   uint8_t src[4 * 16];
2919   for (unsigned i = 0; i < sizeof(src); i++) {
2920     src[i] = i;
2921   }
2922   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2923 
2924   START();
2925   __ Mov(x17, src_base);
2926   __ Mov(x18, src_base);
2927   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2928   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2929 
2930   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
2931   __ Add(x18, x18, 22);
2932   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
2933   __ Add(x18, x18, 11);
2934   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2935 
2936   __ Mov(x19, src_base);
2937   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2938   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2939   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2940   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
2941 
2942   END();
2943 
2944   if (CAN_RUN()) {
2945     RUN();
2946 
2947     ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
2948     ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
2949     ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
2950     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
2951   }
2952 }
2953 
2954 
TEST(neon_st2_d_postindex)2955 TEST(neon_st2_d_postindex) {
2956   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2957 
2958   uint8_t src[4 * 16];
2959   for (unsigned i = 0; i < sizeof(src); i++) {
2960     src[i] = i;
2961   }
2962   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
2963 
2964   START();
2965   __ Mov(x22, 5);
2966   __ Mov(x17, src_base);
2967   __ Mov(x18, src_base);
2968   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
2969   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
2970 
2971   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
2972   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
2973   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
2974 
2975 
2976   __ Mov(x19, src_base);
2977   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
2978   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
2979   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
2980 
2981   END();
2982 
2983   if (CAN_RUN()) {
2984     RUN();
2985 
2986     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
2987     ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
2988     ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
2989   }
2990 }
2991 
2992 
TEST(neon_st2_q)2993 TEST(neon_st2_q) {
2994   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
2995 
2996   uint8_t src[5 * 16];
2997   for (unsigned i = 0; i < sizeof(src); i++) {
2998     src[i] = i;
2999   }
3000   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3001 
3002   START();
3003   __ Mov(x17, src_base);
3004   __ Mov(x18, src_base);
3005   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3006   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3007 
3008   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
3009   __ Add(x18, x18, 8);
3010   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
3011   __ Add(x18, x18, 22);
3012   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
3013   __ Add(x18, x18, 2);
3014   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3015 
3016   __ Mov(x19, src_base);
3017   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3018   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3019   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3020   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3021 
3022   END();
3023 
3024   if (CAN_RUN()) {
3025     RUN();
3026 
3027     ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
3028     ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
3029     ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
3030     ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
3031   }
3032 }
3033 
3034 
TEST(neon_st2_q_postindex)3035 TEST(neon_st2_q_postindex) {
3036   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3037 
3038   uint8_t src[5 * 16];
3039   for (unsigned i = 0; i < sizeof(src); i++) {
3040     src[i] = i;
3041   }
3042   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3043 
3044   START();
3045   __ Mov(x22, 5);
3046   __ Mov(x17, src_base);
3047   __ Mov(x18, src_base);
3048   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3049   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3050 
3051   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
3052   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
3053   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
3054   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
3055 
3056   __ Mov(x19, src_base);
3057   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3058   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3059   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3060   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3061   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3062 
3063   END();
3064 
3065   if (CAN_RUN()) {
3066     RUN();
3067 
3068     ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
3069     ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
3070     ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
3071     ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
3072     ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
3073   }
3074 }
3075 
3076 
TEST(neon_st3_d)3077 TEST(neon_st3_d) {
3078   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3079 
3080   uint8_t src[3 * 16];
3081   for (unsigned i = 0; i < sizeof(src); i++) {
3082     src[i] = i;
3083   }
3084   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3085 
3086   START();
3087   __ Mov(x17, src_base);
3088   __ Mov(x18, src_base);
3089   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3090   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3091   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3092 
3093   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
3094   __ Add(x18, x18, 3);
3095   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
3096   __ Add(x18, x18, 2);
3097   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3098 
3099 
3100   __ Mov(x19, src_base);
3101   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3102   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3103 
3104   END();
3105 
3106   if (CAN_RUN()) {
3107     RUN();
3108 
3109     ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
3110     ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
3111   }
3112 }
3113 
3114 
TEST(neon_st3_d_postindex)3115 TEST(neon_st3_d_postindex) {
3116   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3117 
3118   uint8_t src[4 * 16];
3119   for (unsigned i = 0; i < sizeof(src); i++) {
3120     src[i] = i;
3121   }
3122   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3123 
3124   START();
3125   __ Mov(x22, 5);
3126   __ Mov(x17, src_base);
3127   __ Mov(x18, src_base);
3128   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3129   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3130   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3131 
3132   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
3133   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
3134   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
3135 
3136 
3137   __ Mov(x19, src_base);
3138   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3139   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3140   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3141   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3142 
3143   END();
3144 
3145   if (CAN_RUN()) {
3146     RUN();
3147 
3148     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3149     ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
3150     ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
3151     ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
3152   }
3153 }
3154 
3155 
TEST(neon_st3_q)3156 TEST(neon_st3_q) {
3157   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3158 
3159   uint8_t src[6 * 16];
3160   for (unsigned i = 0; i < sizeof(src); i++) {
3161     src[i] = i;
3162   }
3163   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3164 
3165   START();
3166   __ Mov(x17, src_base);
3167   __ Mov(x18, src_base);
3168   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3169   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3170   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3171 
3172   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
3173   __ Add(x18, x18, 5);
3174   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
3175   __ Add(x18, x18, 12);
3176   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
3177   __ Add(x18, x18, 22);
3178   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3179 
3180   __ Mov(x19, src_base);
3181   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3182   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3183   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3184   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3185   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3186   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3187 
3188   END();
3189 
3190   if (CAN_RUN()) {
3191     RUN();
3192 
3193     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3194     ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
3195     ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
3196     ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
3197     ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
3198     ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
3199   }
3200 }
3201 
3202 
TEST(neon_st3_q_postindex)3203 TEST(neon_st3_q_postindex) {
3204   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3205 
3206   uint8_t src[7 * 16];
3207   for (unsigned i = 0; i < sizeof(src); i++) {
3208     src[i] = i;
3209   }
3210   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3211 
3212   START();
3213   __ Mov(x22, 5);
3214   __ Mov(x17, src_base);
3215   __ Mov(x18, src_base);
3216   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3217   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3218   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3219 
3220   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
3221   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
3222   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
3223   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
3224 
3225   __ Mov(x19, src_base);
3226   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3227   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3228   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3229   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3230   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3231   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3232   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3233 
3234   END();
3235 
3236   if (CAN_RUN()) {
3237     RUN();
3238 
3239     ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
3240     ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
3241     ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
3242     ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
3243     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
3244     ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
3245     ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
3246   }
3247 }
3248 
3249 
TEST(neon_st4_d)3250 TEST(neon_st4_d) {
3251   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3252 
3253   uint8_t src[4 * 16];
3254   for (unsigned i = 0; i < sizeof(src); i++) {
3255     src[i] = i;
3256   }
3257   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3258 
3259   START();
3260   __ Mov(x17, src_base);
3261   __ Mov(x18, src_base);
3262   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3263   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3264   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3265   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3266 
3267   __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
3268   __ Add(x18, x18, 12);
3269   __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
3270   __ Add(x18, x18, 15);
3271   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3272 
3273 
3274   __ Mov(x19, src_base);
3275   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3276   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3277   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3278   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3279 
3280   END();
3281 
3282   if (CAN_RUN()) {
3283     RUN();
3284 
3285     ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
3286     ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
3287     ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
3288     ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
3289   }
3290 }
3291 
3292 
TEST(neon_st4_d_postindex)3293 TEST(neon_st4_d_postindex) {
3294   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3295 
3296   uint8_t src[5 * 16];
3297   for (unsigned i = 0; i < sizeof(src); i++) {
3298     src[i] = i;
3299   }
3300   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3301 
3302   START();
3303   __ Mov(x22, 5);
3304   __ Mov(x17, src_base);
3305   __ Mov(x18, src_base);
3306   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3307   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3308   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3309   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3310 
3311   __ St4(v0.V8B(),
3312          v1.V8B(),
3313          v2.V8B(),
3314          v3.V8B(),
3315          MemOperand(x18, x22, PostIndex));
3316   __ St4(v0.V4H(),
3317          v1.V4H(),
3318          v2.V4H(),
3319          v3.V4H(),
3320          MemOperand(x18, 32, PostIndex));
3321   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
3322 
3323 
3324   __ Mov(x19, src_base);
3325   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3326   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3327   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3328   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3329   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3330 
3331   END();
3332 
3333   if (CAN_RUN()) {
3334     RUN();
3335 
3336     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3337     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3338     ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
3339     ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
3340     ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
3341   }
3342 }
3343 
3344 
TEST(neon_st4_q)3345 TEST(neon_st4_q) {
3346   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3347 
3348   uint8_t src[7 * 16];
3349   for (unsigned i = 0; i < sizeof(src); i++) {
3350     src[i] = i;
3351   }
3352   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3353 
3354   START();
3355   __ Mov(x17, src_base);
3356   __ Mov(x18, src_base);
3357   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3358   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3359   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3360   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3361 
3362   __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
3363   __ Add(x18, x18, 5);
3364   __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
3365   __ Add(x18, x18, 12);
3366   __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
3367   __ Add(x18, x18, 22);
3368   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3369   __ Add(x18, x18, 10);
3370 
3371   __ Mov(x19, src_base);
3372   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3373   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3374   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3375   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3376   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3377   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3378   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3379 
3380   END();
3381 
3382   if (CAN_RUN()) {
3383     RUN();
3384 
3385     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3386     ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
3387     ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
3388     ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
3389     ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
3390     ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
3391     ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
3392   }
3393 }
3394 
3395 
TEST(neon_st4_q_postindex)3396 TEST(neon_st4_q_postindex) {
3397   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3398 
3399   uint8_t src[9 * 16];
3400   for (unsigned i = 0; i < sizeof(src); i++) {
3401     src[i] = i;
3402   }
3403   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3404 
3405   START();
3406   __ Mov(x22, 5);
3407   __ Mov(x17, src_base);
3408   __ Mov(x18, src_base);
3409   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
3410   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
3411   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
3412   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
3413 
3414   __ St4(v0.V16B(),
3415          v1.V16B(),
3416          v2.V16B(),
3417          v3.V16B(),
3418          MemOperand(x18, x22, PostIndex));
3419   __ St4(v0.V8H(),
3420          v1.V8H(),
3421          v2.V8H(),
3422          v3.V8H(),
3423          MemOperand(x18, 64, PostIndex));
3424   __ St4(v0.V4S(),
3425          v1.V4S(),
3426          v2.V4S(),
3427          v3.V4S(),
3428          MemOperand(x18, x22, PostIndex));
3429   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
3430 
3431   __ Mov(x19, src_base);
3432   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
3433   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3434   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
3435   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
3436   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
3437   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
3438   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
3439   __ Ldr(q7, MemOperand(x19, 16, PostIndex));
3440   __ Ldr(q8, MemOperand(x19, 16, PostIndex));
3441 
3442   END();
3443 
3444   if (CAN_RUN()) {
3445     RUN();
3446 
3447     ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
3448     ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
3449     ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
3450     ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
3451     ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
3452     ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
3453     ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
3454     ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
3455     ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
3456   }
3457 }
3458 
3459 
TEST(neon_destructive_minmaxp)3460 TEST(neon_destructive_minmaxp) {
3461   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3462 
3463   START();
3464   __ Movi(v0.V2D(), 0, 0x2222222233333333);
3465   __ Movi(v1.V2D(), 0, 0x0000000011111111);
3466 
3467   __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
3468   __ Mov(v17, v0);
3469   __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
3470   __ Mov(v18, v1);
3471   __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
3472   __ Mov(v19, v0);
3473   __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
3474 
3475   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
3476   __ Mov(v21, v0);
3477   __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
3478   __ Mov(v22, v1);
3479   __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
3480   __ Mov(v23, v0);
3481   __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
3482 
3483   __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
3484   __ Mov(v25, v0);
3485   __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
3486   __ Mov(v26, v1);
3487   __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
3488   __ Mov(v27, v0);
3489   __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
3490 
3491   __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
3492   __ Mov(v29, v0);
3493   __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
3494   __ Mov(v30, v1);
3495   __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
3496   __ Mov(v31, v0);
3497   __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
3498   END();
3499 
3500   if (CAN_RUN()) {
3501     RUN();
3502 
3503     ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
3504     ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
3505     ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
3506     ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
3507 
3508     ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
3509     ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
3510     ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
3511     ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
3512 
3513     ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
3514     ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
3515     ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
3516     ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
3517 
3518     ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
3519     ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
3520     ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
3521     ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
3522   }
3523 }
3524 
3525 
TEST(neon_destructive_tbl)3526 TEST(neon_destructive_tbl) {
3527   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3528 
3529   START();
3530   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3531   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3532   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3533   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3534   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3535 
3536   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3537   __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
3538   __ Mov(v17, v0);
3539   __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
3540   __ Mov(v18, v1);
3541   __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
3542   __ Mov(v19, v0);
3543   __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
3544 
3545   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3546   __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3547   __ Mov(v21, v0);
3548   __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3549   __ Mov(v22, v1);
3550   __ Mov(v23, v2);
3551   __ Mov(v24, v3);
3552   __ Mov(v25, v4);
3553   __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3554   __ Mov(v26, v0);
3555   __ Mov(v27, v1);
3556   __ Mov(v28, v2);
3557   __ Mov(v29, v3);
3558   __ Tbl(v26.V16B(),
3559          v26.V16B(),
3560          v27.V16B(),
3561          v28.V16B(),
3562          v29.V16B(),
3563          v26.V16B());
3564   END();
3565 
3566   if (CAN_RUN()) {
3567     RUN();
3568 
3569     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
3570     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
3571     ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
3572     ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
3573 
3574     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3575     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3576     ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3577     ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
3578   }
3579 }
3580 
3581 
TEST(neon_destructive_tbx)3582 TEST(neon_destructive_tbx) {
3583   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3584 
3585   START();
3586   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
3587   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
3588   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
3589   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
3590   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
3591 
3592   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
3593   __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
3594   __ Mov(v17, v0);
3595   __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
3596   __ Mov(v18, v1);
3597   __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
3598   __ Mov(v19, v0);
3599   __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
3600 
3601   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
3602   __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
3603   __ Mov(v21, v0);
3604   __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
3605   __ Mov(v22, v1);
3606   __ Mov(v23, v2);
3607   __ Mov(v24, v3);
3608   __ Mov(v25, v4);
3609   __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
3610   __ Mov(v26, v0);
3611   __ Mov(v27, v1);
3612   __ Mov(v28, v2);
3613   __ Mov(v29, v3);
3614   __ Tbx(v26.V16B(),
3615          v26.V16B(),
3616          v27.V16B(),
3617          v28.V16B(),
3618          v29.V16B(),
3619          v26.V16B());
3620   END();
3621 
3622   if (CAN_RUN()) {
3623     RUN();
3624 
3625     ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
3626     ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
3627     ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
3628     ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
3629 
3630     ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
3631     ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
3632     ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
3633     ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
3634   }
3635 }
3636 
3637 
TEST(neon_destructive_fcvtl)3638 TEST(neon_destructive_fcvtl) {
3639   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3640 
3641   START();
3642   __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
3643   __ Fcvtl(v16.V2D(), v0.V2S());
3644   __ Fcvtl2(v17.V2D(), v0.V4S());
3645   __ Mov(v18, v0);
3646   __ Mov(v19, v0);
3647   __ Fcvtl(v18.V2D(), v18.V2S());
3648   __ Fcvtl2(v19.V2D(), v19.V4S());
3649 
3650   __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
3651   __ Fcvtl(v20.V4S(), v1.V4H());
3652   __ Fcvtl2(v21.V4S(), v1.V8H());
3653   __ Mov(v22, v1);
3654   __ Mov(v23, v1);
3655   __ Fcvtl(v22.V4S(), v22.V4H());
3656   __ Fcvtl2(v23.V4S(), v23.V8H());
3657 
3658   END();
3659 
3660   if (CAN_RUN()) {
3661     RUN();
3662 
3663     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
3664     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
3665     ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
3666     ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
3667 
3668     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
3669     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
3670     ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
3671     ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
3672   }
3673 }
3674 
TEST(fadd_h_neon)3675 TEST(fadd_h_neon) {
3676   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3677                       CPUFeatures::kFP,
3678                       CPUFeatures::kNEONHalf);
3679 
3680   START();
3681   __ Fmov(v0.V4H(), 24.0);
3682   __ Fmov(v1.V4H(), 1024.0);
3683   __ Fmov(v2.V8H(), 5.5);
3684   __ Fmov(v3.V8H(), 2048.0);
3685   __ Fmov(v4.V8H(), kFP16PositiveInfinity);
3686   __ Fmov(v5.V8H(), kFP16NegativeInfinity);
3687   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
3688   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
3689 
3690   __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
3691   __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
3692   __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
3693 
3694   __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
3695   __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
3696 
3697   END();
3698 
3699   if (CAN_RUN()) {
3700     RUN();
3701 
3702     ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
3703     // 2053.5 is unrepresentable in FP16.
3704     ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
3705 
3706     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3707     // and we don't have traces for half-precision enabled hardware.
3708     // Default (Signalling NaN)
3709     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
3710     // Quiet NaN from Signalling.
3711     ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
3712     // Quiet NaN.
3713     ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
3714   }
3715 }
3716 
TEST(fsub_h_neon)3717 TEST(fsub_h_neon) {
3718   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3719                       CPUFeatures::kFP,
3720                       CPUFeatures::kNEONHalf);
3721 
3722   START();
3723   __ Fmov(v0.V4H(), 24.0);
3724   __ Fmov(v1.V4H(), 1024.0);
3725   __ Fmov(v2.V8H(), 5.5);
3726   __ Fmov(v3.V8H(), 2048.0);
3727   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3728   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3729   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
3730   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
3731 
3732   __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
3733   __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
3734   __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
3735   __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
3736 
3737   __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
3738   __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
3739   END();
3740 
3741   if (CAN_RUN()) {
3742     RUN();
3743 
3744     ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
3745     // 2042.5 is unpresentable in FP16:
3746     ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
3747 
3748     // Note: we test NaNs here as vectors aren't covered by process_nans_half
3749     // and we don't have traces for half-precision enabled hardware.
3750     // Signalling (Default) NaN.
3751     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
3752     ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
3753     // Quiet NaN from Signalling.
3754     ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
3755     // Quiet NaN.
3756     ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
3757   }
3758 }
3759 
TEST(fmul_h_neon)3760 TEST(fmul_h_neon) {
3761   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3762                       CPUFeatures::kFP,
3763                       CPUFeatures::kNEONHalf);
3764 
3765   START();
3766   __ Fmov(v0.V4H(), 24.0);
3767   __ Fmov(v1.V4H(), -2.0);
3768   __ Fmov(v2.V8H(), 5.5);
3769   __ Fmov(v3.V8H(), 0.5);
3770   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3771   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3772 
3773   __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
3774   __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
3775   __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
3776   __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
3777   __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
3778   END();
3779 
3780   if (CAN_RUN()) {
3781     RUN();
3782 
3783     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
3784     ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
3785     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3786     ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
3787     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3788   }
3789 }
3790 
TEST(fdiv_h_neon)3791 TEST(fdiv_h_neon) {
3792   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
3793                       CPUFeatures::kFP,
3794                       CPUFeatures::kNEONHalf);
3795 
3796   START();
3797   __ Fmov(v0.V4H(), 24.0);
3798   __ Fmov(v1.V4H(), -2.0);
3799   __ Fmov(v2.V8H(), 5.5);
3800   __ Fmov(v3.V8H(), 0.5);
3801   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
3802   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
3803 
3804   __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
3805   __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
3806   __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
3807   __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
3808   __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
3809   END();
3810 
3811   if (CAN_RUN()) {
3812     RUN();
3813 
3814     ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
3815     ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
3816     ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
3817     // -0.083333... is unrepresentable in FP16:
3818     ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
3819     ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
3820   }
3821 }
3822 
TEST(neon_fcvtl)3823 TEST(neon_fcvtl) {
3824   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3825 
3826   START();
3827 
3828   __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
3829   __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
3830   __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3831   __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3832   __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3833   __ Fcvtl(v16.V4S(), v0.V4H());
3834   __ Fcvtl2(v17.V4S(), v0.V8H());
3835   __ Fcvtl(v18.V4S(), v1.V4H());
3836   __ Fcvtl2(v19.V4S(), v1.V8H());
3837 
3838   __ Fcvtl(v20.V2D(), v2.V2S());
3839   __ Fcvtl2(v21.V2D(), v2.V4S());
3840   __ Fcvtl(v22.V2D(), v3.V2S());
3841   __ Fcvtl2(v23.V2D(), v3.V4S());
3842   __ Fcvtl(v24.V2D(), v4.V2S());
3843   __ Fcvtl2(v25.V2D(), v4.V4S());
3844 
3845   END();
3846 
3847   if (CAN_RUN()) {
3848     RUN();
3849     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
3850     ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
3851     ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
3852     ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
3853     ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
3854     ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
3855     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
3856     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
3857     ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
3858     ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
3859   }
3860 }
3861 
3862 
TEST(neon_fcvtn)3863 TEST(neon_fcvtn) {
3864   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3865 
3866   START();
3867 
3868   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3869   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3870   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3871   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3872   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3873   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3874   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3875   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3876   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3877 
3878   __ Fcvtn(v16.V4H(), v0.V4S());
3879   __ Fcvtn2(v16.V8H(), v1.V4S());
3880   __ Fcvtn(v17.V4H(), v2.V4S());
3881   __ Fcvtn(v18.V2S(), v3.V2D());
3882   __ Fcvtn2(v18.V4S(), v4.V2D());
3883   __ Fcvtn(v19.V2S(), v5.V2D());
3884   __ Fcvtn2(v19.V4S(), v6.V2D());
3885   __ Fcvtn(v20.V2S(), v7.V2D());
3886   __ Fcvtn2(v20.V4S(), v8.V2D());
3887   END();
3888 
3889   if (CAN_RUN()) {
3890     RUN();
3891     ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
3892     ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
3893     ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
3894     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
3895     ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
3896   }
3897 }
3898 
TEST(neon_fcvtn_fcvtxn_regression_test)3899 TEST(neon_fcvtn_fcvtxn_regression_test) {
3900   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3901 
3902   START();
3903   __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3904   __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
3905   __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
3906 
3907   __ Fcvtn(v16.V2S(), v0.V2D());
3908   __ Fcvtn(v17.V4H(), v1.V4S());
3909   __ Fcvtn(v0.V2S(), v0.V2D());
3910   __ Fcvtn(v1.V4H(), v1.V4S());
3911   __ Fcvtxn(v2.V2S(), v2.V2D());
3912   END();
3913 
3914   if (CAN_RUN()) {
3915     RUN();
3916     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16);
3917     ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17);
3918     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0);
3919     ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1);
3920     ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2);
3921   }
3922 }
3923 
TEST(neon_fcvtxn)3924 TEST(neon_fcvtxn) {
3925   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
3926 
3927   START();
3928   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
3929   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
3930   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
3931   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
3932   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
3933   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
3934   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
3935   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
3936   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
3937   __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
3938   __ Fcvtxn(v16.V2S(), v0.V2D());
3939   __ Fcvtxn2(v16.V4S(), v1.V2D());
3940   __ Fcvtxn(v17.V2S(), v2.V2D());
3941   __ Fcvtxn2(v17.V4S(), v3.V2D());
3942   __ Fcvtxn(v18.V2S(), v4.V2D());
3943   __ Fcvtxn2(v18.V4S(), v5.V2D());
3944   __ Fcvtxn(v19.V2S(), v6.V2D());
3945   __ Fcvtxn2(v19.V4S(), v7.V2D());
3946   __ Fcvtxn(v20.V2S(), v8.V2D());
3947   __ Fcvtxn2(v20.V4S(), v9.V2D());
3948   __ Fcvtxn(s21, d0);
3949   END();
3950 
3951   if (CAN_RUN()) {
3952     RUN();
3953     ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
3954     ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
3955     ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
3956     ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
3957     ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
3958     ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
3959   }
3960 }
3961 
TEST(neon_3same_addp)3962 TEST(neon_3same_addp) {
3963   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3964 
3965   START();
3966 
3967   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
3968   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
3969   __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
3970 
3971   END();
3972 
3973   if (CAN_RUN()) {
3974     RUN();
3975     ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
3976   }
3977 }
3978 
TEST(neon_3same_sqdmulh_sqrdmulh)3979 TEST(neon_3same_sqdmulh_sqrdmulh) {
3980   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3981 
3982   START();
3983 
3984   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
3985   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
3986   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
3987   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
3988 
3989   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
3990   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
3991   __ Sqdmulh(h18, h0, h1);
3992   __ Sqdmulh(s19, s2, s3);
3993 
3994   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
3995   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
3996   __ Sqrdmulh(h22, h0, h1);
3997   __ Sqrdmulh(s23, s2, s3);
3998 
3999   END();
4000 
4001   if (CAN_RUN()) {
4002     RUN();
4003     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
4004     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
4005     ASSERT_EQUAL_128(0, 0x7fff, q18);
4006     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4007     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
4008     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
4009     ASSERT_EQUAL_128(0, 0x7fff, q22);
4010     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4011   }
4012 }
4013 
TEST(neon_byelement_sqdmulh_sqrdmulh)4014 TEST(neon_byelement_sqdmulh_sqrdmulh) {
4015   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4016 
4017   START();
4018 
4019   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4020   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4021   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4022   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4023 
4024   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
4025   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
4026   __ Sqdmulh(h18, h0, v1.H(), 0);
4027   __ Sqdmulh(s19, s2, v3.S(), 0);
4028 
4029   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
4030   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
4031   __ Sqrdmulh(h22, h0, v1.H(), 0);
4032   __ Sqrdmulh(s23, s2, v3.S(), 0);
4033 
4034   END();
4035 
4036   if (CAN_RUN()) {
4037     RUN();
4038     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
4039     ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
4040     ASSERT_EQUAL_128(0, 0x7fff, q18);
4041     ASSERT_EQUAL_128(0, 0x7fffffff, q19);
4042     ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
4043     ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
4044     ASSERT_EQUAL_128(0, 0x7fff, q22);
4045     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
4046   }
4047 }
4048 
TEST(neon_3same_sqrdmlah)4049 TEST(neon_3same_sqrdmlah) {
4050   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4051 
4052   START();
4053 
4054   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4055   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4056   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4057   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4058 
4059   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4060   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4061   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4062   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4063 
4064   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
4065   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
4066   __ Sqrdmlah(h18, h0, h1);
4067   __ Sqrdmlah(s19, s2, s3);
4068 
4069   END();
4070 
4071   if (CAN_RUN()) {
4072     RUN();
4073     ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
4074     ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
4075     ASSERT_EQUAL_128(0, 0x7fff, q18);
4076     ASSERT_EQUAL_128(0, 0, q19);
4077   }
4078 }
4079 
TEST(neon_byelement_sqrdmlah)4080 TEST(neon_byelement_sqrdmlah) {
4081   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4082 
4083   START();
4084 
4085   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4086   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4087   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4088   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4089 
4090   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
4091   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
4092   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
4093   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
4094 
4095   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
4096   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
4097   __ Sqrdmlah(h18, h0, v1.H(), 0);
4098   __ Sqrdmlah(s19, s2, v3.S(), 0);
4099 
4100   END();
4101 
4102   if (CAN_RUN()) {
4103     RUN();
4104     ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
4105     ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
4106     ASSERT_EQUAL_128(0, 0x7fff, q18);
4107     ASSERT_EQUAL_128(0, 0, q19);
4108   }
4109 }
4110 
TEST(neon_3same_sqrdmlsh)4111 TEST(neon_3same_sqrdmlsh) {
4112   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4113 
4114   START();
4115 
4116   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
4117   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
4118   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4119   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4120 
4121   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4122   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4123   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4124   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4125 
4126   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
4127   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
4128   __ Sqrdmlsh(h18, h0, h1);
4129   __ Sqrdmlsh(s19, s2, s3);
4130 
4131   END();
4132 
4133   if (CAN_RUN()) {
4134     RUN();
4135     ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
4136     ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
4137     ASSERT_EQUAL_128(0, 0x3ffb, q18);
4138     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4139   }
4140 }
4141 
TEST(neon_byelement_sqrdmlsh)4142 TEST(neon_byelement_sqrdmlsh) {
4143   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
4144 
4145   START();
4146 
4147   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
4148   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
4149   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
4150   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
4151 
4152   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
4153   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
4154   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
4155   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
4156 
4157   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
4158   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
4159   __ Sqrdmlsh(h18, h0, v1.H(), 0);
4160   __ Sqrdmlsh(s19, s2, v3.S(), 0);
4161 
4162   END();
4163 
4164   if (CAN_RUN()) {
4165     RUN();
4166     ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
4167     ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
4168     ASSERT_EQUAL_128(0, 0xc000, q18);
4169     ASSERT_EQUAL_128(0, 0xc0004000, q19);
4170   }
4171 }
4172 
TEST(neon_3same_sdot_udot)4173 TEST(neon_3same_sdot_udot) {
4174   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4175 
4176   START();
4177 
4178   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4179   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4180   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4181 
4182   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4183   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4184   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4185   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4186 
4187   __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
4188   __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
4189 
4190   __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
4191   __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
4192 
4193   END();
4194 
4195   if (CAN_RUN()) {
4196     RUN();
4197     ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
4198     ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
4199     ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
4200     ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
4201   }
4202 }
4203 
TEST(neon_byelement_sdot_udot)4204 TEST(neon_byelement_sdot_udot) {
4205   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
4206 
4207   START();
4208 
4209   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
4210   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
4211   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
4212 
4213   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
4214   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
4215   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
4216   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
4217 
4218   __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
4219   __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
4220 
4221   __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
4222   __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
4223 
4224   END();
4225 
4226   if (CAN_RUN()) {
4227     RUN();
4228     ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
4229     ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
4230     ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
4231     ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
4232   }
4233 }
4234 
4235 
TEST(neon_2regmisc_saddlp)4236 TEST(neon_2regmisc_saddlp) {
4237   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4238 
4239   START();
4240 
4241   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4242 
4243   __ Saddlp(v16.V8H(), v0.V16B());
4244   __ Saddlp(v17.V4H(), v0.V8B());
4245 
4246   __ Saddlp(v18.V4S(), v0.V8H());
4247   __ Saddlp(v19.V2S(), v0.V4H());
4248 
4249   __ Saddlp(v20.V2D(), v0.V4S());
4250   __ Saddlp(v21.V1D(), v0.V2S());
4251 
4252   END();
4253 
4254   if (CAN_RUN()) {
4255     RUN();
4256     ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
4257     ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
4258     ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
4259     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
4260     ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
4261     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
4262   }
4263 }
4264 
TEST(neon_2regmisc_uaddlp)4265 TEST(neon_2regmisc_uaddlp) {
4266   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4267 
4268   START();
4269 
4270   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4271 
4272   __ Uaddlp(v16.V8H(), v0.V16B());
4273   __ Uaddlp(v17.V4H(), v0.V8B());
4274 
4275   __ Uaddlp(v18.V4S(), v0.V8H());
4276   __ Uaddlp(v19.V2S(), v0.V4H());
4277 
4278   __ Uaddlp(v20.V2D(), v0.V4S());
4279   __ Uaddlp(v21.V1D(), v0.V2S());
4280 
4281   END();
4282 
4283   if (CAN_RUN()) {
4284     RUN();
4285     ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
4286     ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
4287     ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
4288     ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
4289     ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
4290     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
4291   }
4292 }
4293 
TEST(neon_2regmisc_sadalp)4294 TEST(neon_2regmisc_sadalp) {
4295   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4296 
4297   START();
4298 
4299   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4300   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4301   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4302   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4303   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4304 
4305   __ Mov(v16.V16B(), v1.V16B());
4306   __ Mov(v17.V16B(), v1.V16B());
4307   __ Sadalp(v16.V8H(), v0.V16B());
4308   __ Sadalp(v17.V4H(), v0.V8B());
4309 
4310   __ Mov(v18.V16B(), v2.V16B());
4311   __ Mov(v19.V16B(), v2.V16B());
4312   __ Sadalp(v18.V4S(), v1.V8H());
4313   __ Sadalp(v19.V2S(), v1.V4H());
4314 
4315   __ Mov(v20.V16B(), v3.V16B());
4316   __ Mov(v21.V16B(), v4.V16B());
4317   __ Sadalp(v20.V2D(), v2.V4S());
4318   __ Sadalp(v21.V1D(), v2.V2S());
4319 
4320   END();
4321 
4322   if (CAN_RUN()) {
4323     RUN();
4324     ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
4325     ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
4326     ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
4327     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
4328     ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
4329     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4330   }
4331 }
4332 
TEST(neon_2regmisc_uadalp)4333 TEST(neon_2regmisc_uadalp) {
4334   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4335 
4336   START();
4337 
4338   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
4339   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
4340   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
4341   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
4342   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
4343 
4344   __ Mov(v16.V16B(), v1.V16B());
4345   __ Mov(v17.V16B(), v1.V16B());
4346   __ Uadalp(v16.V8H(), v0.V16B());
4347   __ Uadalp(v17.V4H(), v0.V8B());
4348 
4349   __ Mov(v18.V16B(), v2.V16B());
4350   __ Mov(v19.V16B(), v2.V16B());
4351   __ Uadalp(v18.V4S(), v1.V8H());
4352   __ Uadalp(v19.V2S(), v1.V4H());
4353 
4354   __ Mov(v20.V16B(), v3.V16B());
4355   __ Mov(v21.V16B(), v4.V16B());
4356   __ Uadalp(v20.V2D(), v2.V4S());
4357   __ Uadalp(v21.V1D(), v2.V2S());
4358 
4359   END();
4360 
4361   if (CAN_RUN()) {
4362     RUN();
4363     ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
4364     ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
4365     ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
4366     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
4367     ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
4368     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
4369   }
4370 }
4371 
TEST(neon_3same_mul)4372 TEST(neon_3same_mul) {
4373   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4374 
4375   START();
4376 
4377   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4378   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4379   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4380   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4381 
4382   __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
4383   __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
4384   __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
4385 
4386   END();
4387 
4388   if (CAN_RUN()) {
4389     RUN();
4390     ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
4391     ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
4392     ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
4393   }
4394 }
4395 
4396 
TEST(neon_3same_absdiff)4397 TEST(neon_3same_absdiff) {
4398   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4399 
4400   START();
4401 
4402   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4403   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4404   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4405   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4406 
4407   __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
4408   __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
4409   __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
4410   __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
4411 
4412   END();
4413 
4414   if (CAN_RUN()) {
4415     RUN();
4416     ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
4417     ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
4418     ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
4419     ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
4420   }
4421 }
4422 
4423 
TEST(neon_byelement_mul)4424 TEST(neon_byelement_mul) {
4425   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4426 
4427   START();
4428 
4429   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4430   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4431 
4432 
4433   __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
4434   __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
4435   __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
4436   __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
4437 
4438   __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
4439   __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
4440   __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
4441   __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
4442 
4443   __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
4444   __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
4445   __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
4446   __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
4447 
4448   __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
4449   __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4450   __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
4451   __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
4452 
4453   __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
4454   __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
4455   __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
4456   __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
4457 
4458   END();
4459 
4460   if (CAN_RUN()) {
4461     RUN();
4462     ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
4463     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
4464     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
4465     ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
4466 
4467     ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
4468     ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
4469     ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
4470     ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
4471 
4472     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4473     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4474     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4475     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4476   }
4477 }
4478 
4479 
TEST(neon_byelement_mull)4480 TEST(neon_byelement_mull) {
4481   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4482 
4483   START();
4484 
4485   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4486   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4487 
4488 
4489   __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
4490   __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4491   __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
4492   __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
4493 
4494   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4495   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4496   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4497   __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
4498 
4499   __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4500   __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4501   __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
4502   __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
4503 
4504   __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
4505   __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
4506   __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
4507   __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
4508 
4509   __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4510   __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4511   __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
4512   __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
4513 
4514   END();
4515 
4516   if (CAN_RUN()) {
4517     RUN();
4518 
4519     ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
4520     ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
4521     ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
4522     ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
4523 
4524     ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
4525     ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
4526     ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
4527     ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
4528 
4529     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4530     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4531     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
4532     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
4533   }
4534 }
4535 
4536 
TEST(neon_byelement_sqdmull)4537 TEST(neon_byelement_sqdmull) {
4538   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4539 
4540   START();
4541 
4542   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
4543   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
4544 
4545   __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
4546   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
4547   __ Sqdmull(s18, h0, v1.H(), 7);
4548 
4549   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
4550   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
4551   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
4552 
4553   __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
4554   __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
4555   __ Sqdmlal(s22, h0, v1.H(), 7);
4556 
4557   __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
4558   __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
4559   __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
4560 
4561   __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
4562   __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
4563   __ Sqdmlsl(s26, h0, v1.H(), 7);
4564 
4565   END();
4566 
4567   if (CAN_RUN()) {
4568     RUN();
4569 
4570     ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
4571     ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
4572     ASSERT_EQUAL_128(0, 0x0000ab54, q18);
4573 
4574     ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
4575     ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
4576     ASSERT_EQUAL_128(0, 0x0000ab55, q22);
4577 
4578     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
4579     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
4580     ASSERT_EQUAL_128(0, 0x00000000, q26);
4581   }
4582 }
4583 
4584 
TEST(neon_3diff_absdiff)4585 TEST(neon_3diff_absdiff) {
4586   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4587 
4588   START();
4589 
4590   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4591   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4592   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4593   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4594   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4595   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4596 
4597   __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
4598   __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
4599   __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
4600   __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
4601 
4602   END();
4603 
4604   if (CAN_RUN()) {
4605     RUN();
4606     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
4607     ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
4608     ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
4609     ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
4610   }
4611 }
4612 
4613 
TEST(neon_3diff_sqdmull)4614 TEST(neon_3diff_sqdmull) {
4615   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4616 
4617   START();
4618 
4619   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4620   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4621   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4622   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4623 
4624   __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
4625   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
4626   __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
4627   __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
4628   __ Sqdmull(s20, h0, h1);
4629   __ Sqdmull(d21, s2, s3);
4630 
4631   END();
4632 
4633   if (CAN_RUN()) {
4634     RUN();
4635     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
4636     ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
4637     ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
4638     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
4639     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4640     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4641   }
4642 }
4643 
4644 
TEST(neon_3diff_sqdmlal)4645 TEST(neon_3diff_sqdmlal) {
4646   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4647 
4648   START();
4649 
4650   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4651   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4652   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4653   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4654 
4655   __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
4656   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
4657   __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
4658   __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
4659   __ Movi(v20.V2D(), 0, 0x00000001);
4660   __ Movi(v21.V2D(), 0, 0x00000001);
4661 
4662   __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
4663   __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
4664   __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
4665   __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
4666   __ Sqdmlal(s20, h0, h1);
4667   __ Sqdmlal(d21, s2, s3);
4668 
4669   END();
4670 
4671   if (CAN_RUN()) {
4672     RUN();
4673     ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
4674     ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
4675     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
4676     ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
4677     ASSERT_EQUAL_128(0, 0x7fffffff, q20);
4678     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
4679   }
4680 }
4681 
4682 
TEST(neon_3diff_sqdmlsl)4683 TEST(neon_3diff_sqdmlsl) {
4684   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4685 
4686   START();
4687 
4688   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
4689   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
4690   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
4691   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
4692 
4693   __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
4694   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
4695   __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
4696   __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
4697   __ Movi(v20.V2D(), 0, 0x00000001);
4698   __ Movi(v21.V2D(), 0, 0x00000001);
4699 
4700   __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
4701   __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
4702   __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
4703   __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
4704   __ Sqdmlsl(s20, h0, h1);
4705   __ Sqdmlsl(d21, s2, s3);
4706 
4707   END();
4708 
4709   if (CAN_RUN()) {
4710     RUN();
4711     ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
4712     ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
4713     ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
4714     ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
4715     ASSERT_EQUAL_128(0, 0x80000002, q20);
4716     ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
4717   }
4718 }
4719 
4720 
TEST(neon_3diff_mla)4721 TEST(neon_3diff_mla) {
4722   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4723 
4724   START();
4725 
4726   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4727   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4728   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4729   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4730   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4731   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4732 
4733   __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
4734   __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
4735   __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
4736   __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
4737 
4738   END();
4739 
4740   if (CAN_RUN()) {
4741     RUN();
4742     ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
4743     ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
4744     ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
4745     ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
4746   }
4747 }
4748 
4749 
TEST(neon_3diff_mls)4750 TEST(neon_3diff_mls) {
4751   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4752 
4753   START();
4754 
4755   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
4756   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4757   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4758   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4759   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4760   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
4761 
4762   __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
4763   __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
4764   __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
4765   __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
4766 
4767   END();
4768 
4769   if (CAN_RUN()) {
4770     RUN();
4771     ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
4772     ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
4773     ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
4774     ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
4775   }
4776 }
4777 
4778 
TEST(neon_3same_compare)4779 TEST(neon_3same_compare) {
4780   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4781 
4782   START();
4783 
4784   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4785   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4786 
4787   __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
4788   __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
4789   __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
4790   __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
4791   __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
4792   __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
4793   __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
4794   __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
4795   __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
4796   __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
4797 
4798   END();
4799 
4800   if (CAN_RUN()) {
4801     RUN();
4802     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
4803     ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
4804     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
4805     ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
4806     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
4807     ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
4808     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
4809     ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
4810     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
4811     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
4812   }
4813 }
4814 
4815 
TEST(neon_3same_scalar_compare)4816 TEST(neon_3same_scalar_compare) {
4817   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4818 
4819   START();
4820 
4821   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
4822   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
4823 
4824   __ Cmeq(d16, d0, d0);
4825   __ Cmeq(d17, d0, d1);
4826   __ Cmeq(d18, d1, d0);
4827   __ Cmge(d19, d0, d0);
4828   __ Cmge(d20, d0, d1);
4829   __ Cmge(d21, d1, d0);
4830   __ Cmgt(d22, d0, d0);
4831   __ Cmgt(d23, d0, d1);
4832   __ Cmhi(d24, d0, d0);
4833   __ Cmhi(d25, d0, d1);
4834   __ Cmhs(d26, d0, d0);
4835   __ Cmhs(d27, d0, d1);
4836   __ Cmhs(d28, d1, d0);
4837 
4838   END();
4839 
4840   if (CAN_RUN()) {
4841     RUN();
4842 
4843     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
4844     ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
4845     ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
4846     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
4847     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
4848     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
4849     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
4850     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
4851     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
4852     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
4853     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
4854     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
4855     ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
4856   }
4857 }
4858 
TEST(neon_fcmeq_h)4859 TEST(neon_fcmeq_h) {
4860   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4861                       CPUFeatures::kFP,
4862                       CPUFeatures::kNEONHalf);
4863 
4864   START();
4865 
4866   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4867   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4868   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4869   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4870 
4871   __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
4872   __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
4873   __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
4874   __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
4875   __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
4876   __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
4877   __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
4878   __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
4879 
4880   END();
4881 
4882   if (CAN_RUN()) {
4883     RUN();
4884 
4885     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4886     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4887     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4888     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
4889     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4890     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4891     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4892     ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
4893   }
4894 }
4895 
TEST(neon_fcmeq_h_scalar)4896 TEST(neon_fcmeq_h_scalar) {
4897   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4898                       CPUFeatures::kFP,
4899                       CPUFeatures::kNEONHalf,
4900                       CPUFeatures::kFPHalf);
4901 
4902   START();
4903 
4904   __ Fmov(h0, Float16(0.0));
4905   __ Fmov(h1, RawbitsToFloat16(0xffff));
4906   __ Fmov(h2, Float16(-1.0));
4907   __ Fmov(h3, Float16(1.0));
4908   __ Fcmeq(h4, h0, h0);
4909   __ Fcmeq(h5, h1, h0);
4910   __ Fcmeq(h6, h2, h0);
4911   __ Fcmeq(h7, h3, h0);
4912 
4913   END();
4914 
4915   if (CAN_RUN()) {
4916     RUN();
4917 
4918     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4919     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4920     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4921     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
4922   }
4923 }
4924 
TEST(neon_fcmge_h)4925 TEST(neon_fcmge_h) {
4926   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4927                       CPUFeatures::kFP,
4928                       CPUFeatures::kNEONHalf);
4929 
4930   START();
4931 
4932   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4933   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
4934   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
4935   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
4936 
4937   __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
4938   __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
4939   __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
4940   __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
4941   __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
4942   __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
4943   __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
4944   __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
4945 
4946   END();
4947 
4948   if (CAN_RUN()) {
4949     RUN();
4950 
4951     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
4952     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
4953     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
4954     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
4955     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
4956     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
4957     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
4958     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
4959   }
4960 }
4961 
TEST(neon_fcmge_h_scalar)4962 TEST(neon_fcmge_h_scalar) {
4963   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4964                       CPUFeatures::kFP,
4965                       CPUFeatures::kNEONHalf,
4966                       CPUFeatures::kFPHalf);
4967 
4968   START();
4969 
4970   __ Fmov(h0, Float16(0.0));
4971   __ Fmov(h1, RawbitsToFloat16(0xffff));
4972   __ Fmov(h2, Float16(-1.0));
4973   __ Fmov(h3, Float16(1.0));
4974   __ Fcmge(h4, h0, h0);
4975   __ Fcmge(h5, h1, h0);
4976   __ Fcmge(h6, h2, h0);
4977   __ Fcmge(h7, h3, h0);
4978 
4979   END();
4980 
4981   if (CAN_RUN()) {
4982     RUN();
4983 
4984     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
4985     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
4986     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
4987     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
4988   }
4989 }
4990 
TEST(neon_fcmgt_h)4991 TEST(neon_fcmgt_h) {
4992   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
4993                       CPUFeatures::kFP,
4994                       CPUFeatures::kNEONHalf);
4995 
4996   START();
4997 
4998   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
4999   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5000   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5001   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5002 
5003   __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
5004   __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
5005   __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
5006   __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
5007   __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
5008   __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
5009   __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
5010   __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
5011 
5012   END();
5013 
5014   if (CAN_RUN()) {
5015     RUN();
5016 
5017     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5018     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5019     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
5020     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5021     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5022     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5023     ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
5024     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5025   }
5026 }
5027 
TEST(neon_fcmgt_h_scalar)5028 TEST(neon_fcmgt_h_scalar) {
5029   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5030                       CPUFeatures::kFP,
5031                       CPUFeatures::kNEONHalf,
5032                       CPUFeatures::kFPHalf);
5033 
5034   START();
5035 
5036   __ Fmov(h0, Float16(0.0));
5037   __ Fmov(h1, RawbitsToFloat16(0xffff));
5038   __ Fmov(h2, Float16(-1.0));
5039   __ Fmov(h3, Float16(1.0));
5040   __ Fcmgt(h4, h0, h0);
5041   __ Fcmgt(h5, h1, h0);
5042   __ Fcmgt(h6, h2, h0);
5043   __ Fcmgt(h7, h3, h0);
5044 
5045   END();
5046 
5047   if (CAN_RUN()) {
5048     RUN();
5049 
5050     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5051     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5052     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
5053     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5054   }
5055 }
5056 
TEST(neon_facge_h)5057 TEST(neon_facge_h) {
5058   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5059                       CPUFeatures::kFP,
5060                       CPUFeatures::kNEONHalf);
5061 
5062   START();
5063 
5064   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5065   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5066   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5067   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5068 
5069   __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
5070   __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
5071   __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
5072   __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
5073   __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
5074   __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
5075   __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
5076   __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
5077 
5078   END();
5079 
5080   if (CAN_RUN()) {
5081     RUN();
5082 
5083     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
5084     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5085     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5086     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5087     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
5088     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5089     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5090     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5091   }
5092 }
5093 
TEST(neon_facge_h_scalar)5094 TEST(neon_facge_h_scalar) {
5095   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5096                       CPUFeatures::kFP,
5097                       CPUFeatures::kNEONHalf,
5098                       CPUFeatures::kFPHalf);
5099 
5100   START();
5101 
5102   __ Fmov(h0, Float16(0.0));
5103   __ Fmov(h1, RawbitsToFloat16(0xffff));
5104   __ Fmov(h2, Float16(-1.0));
5105   __ Fmov(h3, Float16(1.0));
5106   __ Facge(h4, h0, h0);
5107   __ Facge(h5, h1, h0);
5108   __ Facge(h6, h2, h0);
5109   __ Facge(h7, h3, h0);
5110 
5111   END();
5112 
5113   if (CAN_RUN()) {
5114     RUN();
5115 
5116     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
5117     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5118     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5119     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5120   }
5121 }
5122 
TEST(neon_facgt_h)5123 TEST(neon_facgt_h) {
5124   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5125                       CPUFeatures::kFP,
5126                       CPUFeatures::kNEONHalf);
5127 
5128   START();
5129 
5130   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
5131   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
5132   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
5133   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
5134 
5135   __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
5136   __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
5137   __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
5138   __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
5139   __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
5140   __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
5141   __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
5142   __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
5143 
5144   END();
5145 
5146   if (CAN_RUN()) {
5147     RUN();
5148 
5149     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
5150     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
5151     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
5152     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
5153     ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
5154     ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
5155     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
5156     ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
5157   }
5158 }
5159 
TEST(neon_facgt_h_scalar)5160 TEST(neon_facgt_h_scalar) {
5161   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
5162                       CPUFeatures::kFP,
5163                       CPUFeatures::kNEONHalf,
5164                       CPUFeatures::kFPHalf);
5165 
5166   START();
5167 
5168   __ Fmov(h0, Float16(0.0));
5169   __ Fmov(h1, RawbitsToFloat16(0xffff));
5170   __ Fmov(h2, Float16(-1.0));
5171   __ Fmov(h3, Float16(1.0));
5172   __ Facgt(h4, h0, h0);
5173   __ Facgt(h5, h1, h0);
5174   __ Facgt(h6, h2, h0);
5175   __ Facgt(h7, h3, h0);
5176 
5177   END();
5178 
5179   if (CAN_RUN()) {
5180     RUN();
5181 
5182     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
5183     ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
5184     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
5185     ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
5186   }
5187 }
5188 
TEST(neon_2regmisc_fcmeq)5189 TEST(neon_2regmisc_fcmeq) {
5190   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5191 
5192   START();
5193 
5194   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5195   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5196   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5197   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5198 
5199   __ Fcmeq(s16, s0, 0.0);
5200   __ Fcmeq(s17, s1, 0.0);
5201   __ Fcmeq(s18, s2, 0.0);
5202   __ Fcmeq(d19, d0, 0.0);
5203   __ Fcmeq(d20, d1, 0.0);
5204   __ Fcmeq(d21, d2, 0.0);
5205   __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
5206   __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
5207   __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
5208   __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
5209 
5210   END();
5211 
5212   if (CAN_RUN()) {
5213     RUN();
5214     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5215     ASSERT_EQUAL_128(0, 0x00000000, q17);
5216     ASSERT_EQUAL_128(0, 0x00000000, q18);
5217     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5218     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5219     ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
5220     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5221     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5222     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5223     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
5224   }
5225 }
5226 
TEST(neon_2regmisc_fcmge)5227 TEST(neon_2regmisc_fcmge) {
5228   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5229 
5230   START();
5231 
5232   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5233   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5234   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5235   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5236 
5237   __ Fcmge(s16, s0, 0.0);
5238   __ Fcmge(s17, s1, 0.0);
5239   __ Fcmge(s18, s2, 0.0);
5240   __ Fcmge(d19, d0, 0.0);
5241   __ Fcmge(d20, d1, 0.0);
5242   __ Fcmge(d21, d3, 0.0);
5243   __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
5244   __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
5245   __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
5246   __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
5247 
5248   END();
5249 
5250   if (CAN_RUN()) {
5251     RUN();
5252     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5253     ASSERT_EQUAL_128(0, 0x00000000, q17);
5254     ASSERT_EQUAL_128(0, 0x00000000, q18);
5255     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5256     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5257     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5258     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5259     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5260     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5261     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5262   }
5263 }
5264 
5265 
TEST(neon_2regmisc_fcmgt)5266 TEST(neon_2regmisc_fcmgt) {
5267   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5268 
5269   START();
5270 
5271   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5272   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5273   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5274   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5275 
5276   __ Fcmgt(s16, s0, 0.0);
5277   __ Fcmgt(s17, s1, 0.0);
5278   __ Fcmgt(s18, s2, 0.0);
5279   __ Fcmgt(d19, d0, 0.0);
5280   __ Fcmgt(d20, d1, 0.0);
5281   __ Fcmgt(d21, d3, 0.0);
5282   __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
5283   __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
5284   __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
5285   __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
5286 
5287   END();
5288 
5289   if (CAN_RUN()) {
5290     RUN();
5291     ASSERT_EQUAL_128(0, 0x00000000, q16);
5292     ASSERT_EQUAL_128(0, 0x00000000, q17);
5293     ASSERT_EQUAL_128(0, 0x00000000, q18);
5294     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5295     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5296     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5297     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5298     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5299     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5300     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5301   }
5302 }
5303 
TEST(neon_2regmisc_fcmle)5304 TEST(neon_2regmisc_fcmle) {
5305   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5306 
5307   START();
5308 
5309   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5310   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5311   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5312   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5313 
5314   __ Fcmle(s16, s0, 0.0);
5315   __ Fcmle(s17, s1, 0.0);
5316   __ Fcmle(s18, s3, 0.0);
5317   __ Fcmle(d19, d0, 0.0);
5318   __ Fcmle(d20, d1, 0.0);
5319   __ Fcmle(d21, d2, 0.0);
5320   __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
5321   __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
5322   __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
5323   __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
5324 
5325   END();
5326 
5327   if (CAN_RUN()) {
5328     RUN();
5329     ASSERT_EQUAL_128(0, 0xffffffff, q16);
5330     ASSERT_EQUAL_128(0, 0x00000000, q17);
5331     ASSERT_EQUAL_128(0, 0x00000000, q18);
5332     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
5333     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5334     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5335     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
5336     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5337     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5338     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5339   }
5340 }
5341 
5342 
TEST(neon_2regmisc_fcmlt)5343 TEST(neon_2regmisc_fcmlt) {
5344   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
5345 
5346   START();
5347 
5348   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
5349   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
5350   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
5351   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
5352 
5353   __ Fcmlt(s16, s0, 0.0);
5354   __ Fcmlt(s17, s1, 0.0);
5355   __ Fcmlt(s18, s3, 0.0);
5356   __ Fcmlt(d19, d0, 0.0);
5357   __ Fcmlt(d20, d1, 0.0);
5358   __ Fcmlt(d21, d2, 0.0);
5359   __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
5360   __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
5361   __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
5362   __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
5363 
5364   END();
5365 
5366   if (CAN_RUN()) {
5367     RUN();
5368     ASSERT_EQUAL_128(0, 0x00000000, q16);
5369     ASSERT_EQUAL_128(0, 0x00000000, q17);
5370     ASSERT_EQUAL_128(0, 0x00000000, q18);
5371     ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
5372     ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
5373     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
5374     ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
5375     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5376     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
5377     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
5378   }
5379 }
5380 
TEST(neon_2regmisc_cmeq)5381 TEST(neon_2regmisc_cmeq) {
5382   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5383 
5384   START();
5385 
5386   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5387   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5388 
5389   __ Cmeq(v16.V8B(), v1.V8B(), 0);
5390   __ Cmeq(v17.V16B(), v1.V16B(), 0);
5391   __ Cmeq(v18.V4H(), v1.V4H(), 0);
5392   __ Cmeq(v19.V8H(), v1.V8H(), 0);
5393   __ Cmeq(v20.V2S(), v0.V2S(), 0);
5394   __ Cmeq(v21.V4S(), v0.V4S(), 0);
5395   __ Cmeq(d22, d0, 0);
5396   __ Cmeq(d23, d1, 0);
5397   __ Cmeq(v24.V2D(), v0.V2D(), 0);
5398 
5399   END();
5400 
5401   if (CAN_RUN()) {
5402     RUN();
5403     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
5404     ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
5405     ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
5406     ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
5407     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5408     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
5409     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5410     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5411     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5412   }
5413 }
5414 
5415 
TEST(neon_2regmisc_cmge)5416 TEST(neon_2regmisc_cmge) {
5417   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5418 
5419   START();
5420 
5421   __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
5422   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5423 
5424   __ Cmge(v16.V8B(), v1.V8B(), 0);
5425   __ Cmge(v17.V16B(), v1.V16B(), 0);
5426   __ Cmge(v18.V4H(), v1.V4H(), 0);
5427   __ Cmge(v19.V8H(), v1.V8H(), 0);
5428   __ Cmge(v20.V2S(), v0.V2S(), 0);
5429   __ Cmge(v21.V4S(), v0.V4S(), 0);
5430   __ Cmge(d22, d0, 0);
5431   __ Cmge(d23, d1, 0);
5432   __ Cmge(v24.V2D(), v0.V2D(), 0);
5433 
5434   END();
5435 
5436   if (CAN_RUN()) {
5437     RUN();
5438     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
5439     ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
5440     ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
5441     ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
5442     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
5443     ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
5444     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5445     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5446     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5447   }
5448 }
5449 
5450 
TEST(neon_2regmisc_cmlt)5451 TEST(neon_2regmisc_cmlt) {
5452   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5453 
5454   START();
5455 
5456   __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
5457   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5458 
5459   __ Cmlt(v16.V8B(), v1.V8B(), 0);
5460   __ Cmlt(v17.V16B(), v1.V16B(), 0);
5461   __ Cmlt(v18.V4H(), v1.V4H(), 0);
5462   __ Cmlt(v19.V8H(), v1.V8H(), 0);
5463   __ Cmlt(v20.V2S(), v1.V2S(), 0);
5464   __ Cmlt(v21.V4S(), v1.V4S(), 0);
5465   __ Cmlt(d22, d0, 0);
5466   __ Cmlt(d23, d1, 0);
5467   __ Cmlt(v24.V2D(), v0.V2D(), 0);
5468 
5469   END();
5470 
5471   if (CAN_RUN()) {
5472     RUN();
5473     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
5474     ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
5475     ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
5476     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
5477     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5478     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5479     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5480     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5481     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5482   }
5483 }
5484 
5485 
TEST(neon_2regmisc_cmle)5486 TEST(neon_2regmisc_cmle) {
5487   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5488 
5489   START();
5490 
5491   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5492   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5493 
5494   __ Cmle(v16.V8B(), v1.V8B(), 0);
5495   __ Cmle(v17.V16B(), v1.V16B(), 0);
5496   __ Cmle(v18.V4H(), v1.V4H(), 0);
5497   __ Cmle(v19.V8H(), v1.V8H(), 0);
5498   __ Cmle(v20.V2S(), v1.V2S(), 0);
5499   __ Cmle(v21.V4S(), v1.V4S(), 0);
5500   __ Cmle(d22, d0, 0);
5501   __ Cmle(d23, d1, 0);
5502   __ Cmle(v24.V2D(), v0.V2D(), 0);
5503 
5504   END();
5505 
5506   if (CAN_RUN()) {
5507     RUN();
5508     ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
5509     ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
5510     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
5511     ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
5512     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5513     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
5514     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
5515     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
5516     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
5517   }
5518 }
5519 
5520 
TEST(neon_2regmisc_cmgt)5521 TEST(neon_2regmisc_cmgt) {
5522   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5523 
5524   START();
5525 
5526   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
5527   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
5528 
5529   __ Cmgt(v16.V8B(), v1.V8B(), 0);
5530   __ Cmgt(v17.V16B(), v1.V16B(), 0);
5531   __ Cmgt(v18.V4H(), v1.V4H(), 0);
5532   __ Cmgt(v19.V8H(), v1.V8H(), 0);
5533   __ Cmgt(v20.V2S(), v0.V2S(), 0);
5534   __ Cmgt(v21.V4S(), v0.V4S(), 0);
5535   __ Cmgt(d22, d0, 0);
5536   __ Cmgt(d23, d1, 0);
5537   __ Cmgt(v24.V2D(), v0.V2D(), 0);
5538 
5539   END();
5540 
5541   if (CAN_RUN()) {
5542     RUN();
5543     ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
5544     ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
5545     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5546     ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
5547     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
5548     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
5549     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
5550     ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
5551     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
5552   }
5553 }
5554 
5555 
TEST(neon_2regmisc_neg)5556 TEST(neon_2regmisc_neg) {
5557   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5558 
5559   START();
5560 
5561   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5562   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5563   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5564   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5565   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5566 
5567   __ Neg(v16.V8B(), v0.V8B());
5568   __ Neg(v17.V16B(), v0.V16B());
5569   __ Neg(v18.V4H(), v1.V4H());
5570   __ Neg(v19.V8H(), v1.V8H());
5571   __ Neg(v20.V2S(), v2.V2S());
5572   __ Neg(v21.V4S(), v2.V4S());
5573   __ Neg(d22, d3);
5574   __ Neg(v23.V2D(), v3.V2D());
5575   __ Neg(v24.V2D(), v4.V2D());
5576 
5577   END();
5578 
5579   if (CAN_RUN()) {
5580     RUN();
5581     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
5582     ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
5583     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5584     ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
5585     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5586     ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
5587     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
5588     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
5589     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5590   }
5591 }
5592 
5593 
TEST(neon_2regmisc_sqneg)5594 TEST(neon_2regmisc_sqneg) {
5595   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5596 
5597   START();
5598 
5599   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5600   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5601   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5602   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5603   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5604 
5605   __ Sqneg(v16.V8B(), v0.V8B());
5606   __ Sqneg(v17.V16B(), v0.V16B());
5607   __ Sqneg(v18.V4H(), v1.V4H());
5608   __ Sqneg(v19.V8H(), v1.V8H());
5609   __ Sqneg(v20.V2S(), v2.V2S());
5610   __ Sqneg(v21.V4S(), v2.V4S());
5611   __ Sqneg(v22.V2D(), v3.V2D());
5612   __ Sqneg(v23.V2D(), v4.V2D());
5613 
5614   __ Sqneg(b24, b0);
5615   __ Sqneg(h25, h1);
5616   __ Sqneg(s26, s2);
5617   __ Sqneg(d27, d3);
5618 
5619   END();
5620 
5621   if (CAN_RUN()) {
5622     RUN();
5623     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
5624     ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
5625     ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
5626     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
5627     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
5628     ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
5629     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
5630     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5631 
5632     ASSERT_EQUAL_128(0, 0x7f, q24);
5633     ASSERT_EQUAL_128(0, 0x8001, q25);
5634     ASSERT_EQUAL_128(0, 0x80000001, q26);
5635     ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
5636   }
5637 }
5638 
5639 
TEST(neon_2regmisc_abs)5640 TEST(neon_2regmisc_abs) {
5641   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5642 
5643   START();
5644 
5645   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5646   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5647   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5648   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5649   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5650 
5651   __ Abs(v16.V8B(), v0.V8B());
5652   __ Abs(v17.V16B(), v0.V16B());
5653   __ Abs(v18.V4H(), v1.V4H());
5654   __ Abs(v19.V8H(), v1.V8H());
5655   __ Abs(v20.V2S(), v2.V2S());
5656   __ Abs(v21.V4S(), v2.V4S());
5657   __ Abs(d22, d3);
5658   __ Abs(v23.V2D(), v3.V2D());
5659   __ Abs(v24.V2D(), v4.V2D());
5660 
5661   END();
5662 
5663   if (CAN_RUN()) {
5664     RUN();
5665     ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
5666     ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
5667     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5668     ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
5669     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5670     ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
5671     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
5672     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
5673     ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
5674   }
5675 }
5676 
5677 
TEST(neon_2regmisc_sqabs)5678 TEST(neon_2regmisc_sqabs) {
5679   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5680 
5681   START();
5682 
5683   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5684   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5685   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5686   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5687   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5688 
5689   __ Sqabs(v16.V8B(), v0.V8B());
5690   __ Sqabs(v17.V16B(), v0.V16B());
5691   __ Sqabs(v18.V4H(), v1.V4H());
5692   __ Sqabs(v19.V8H(), v1.V8H());
5693   __ Sqabs(v20.V2S(), v2.V2S());
5694   __ Sqabs(v21.V4S(), v2.V4S());
5695   __ Sqabs(v22.V2D(), v3.V2D());
5696   __ Sqabs(v23.V2D(), v4.V2D());
5697 
5698   __ Sqabs(b24, b0);
5699   __ Sqabs(h25, h1);
5700   __ Sqabs(s26, s2);
5701   __ Sqabs(d27, d3);
5702 
5703   END();
5704 
5705   if (CAN_RUN()) {
5706     RUN();
5707     ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
5708     ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
5709     ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
5710     ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
5711     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
5712     ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
5713     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
5714     ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
5715 
5716     ASSERT_EQUAL_128(0, 0x7f, q24);
5717     ASSERT_EQUAL_128(0, 0x7fff, q25);
5718     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
5719     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
5720   }
5721 }
5722 
TEST(neon_2regmisc_suqadd)5723 TEST(neon_2regmisc_suqadd) {
5724   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5725 
5726   START();
5727 
5728   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5729   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
5730 
5731   __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
5732   __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
5733 
5734   __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
5735   __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
5736 
5737   __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5738   __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
5739 
5740   __ Mov(v16.V2D(), v0.V2D());
5741   __ Mov(v17.V2D(), v0.V2D());
5742   __ Mov(v18.V2D(), v2.V2D());
5743   __ Mov(v19.V2D(), v2.V2D());
5744   __ Mov(v20.V2D(), v4.V2D());
5745   __ Mov(v21.V2D(), v4.V2D());
5746   __ Mov(v22.V2D(), v6.V2D());
5747 
5748   __ Mov(v23.V2D(), v0.V2D());
5749   __ Mov(v24.V2D(), v2.V2D());
5750   __ Mov(v25.V2D(), v4.V2D());
5751   __ Mov(v26.V2D(), v6.V2D());
5752 
5753   __ Suqadd(v16.V8B(), v1.V8B());
5754   __ Suqadd(v17.V16B(), v1.V16B());
5755   __ Suqadd(v18.V4H(), v3.V4H());
5756   __ Suqadd(v19.V8H(), v3.V8H());
5757   __ Suqadd(v20.V2S(), v5.V2S());
5758   __ Suqadd(v21.V4S(), v5.V4S());
5759   __ Suqadd(v22.V2D(), v7.V2D());
5760 
5761   __ Suqadd(b23, b1);
5762   __ Suqadd(h24, h3);
5763   __ Suqadd(s25, s5);
5764   __ Suqadd(d26, d7);
5765 
5766   END();
5767 
5768   if (CAN_RUN()) {
5769     RUN();
5770     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
5771     ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
5772     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
5773     ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
5774     ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
5775     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
5776     ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
5777 
5778     ASSERT_EQUAL_128(0, 0x7f, q23);
5779     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5780     ASSERT_EQUAL_128(0, 0x7fffffff, q25);
5781     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
5782   }
5783 }
5784 
TEST(neon_2regmisc_usqadd)5785 TEST(neon_2regmisc_usqadd) {
5786   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5787 
5788   START();
5789 
5790   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
5791   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
5792 
5793   __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
5794   __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
5795 
5796   __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
5797   __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
5798 
5799   __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
5800   __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
5801 
5802   __ Mov(v16.V2D(), v0.V2D());
5803   __ Mov(v17.V2D(), v0.V2D());
5804   __ Mov(v18.V2D(), v2.V2D());
5805   __ Mov(v19.V2D(), v2.V2D());
5806   __ Mov(v20.V2D(), v4.V2D());
5807   __ Mov(v21.V2D(), v4.V2D());
5808   __ Mov(v22.V2D(), v6.V2D());
5809 
5810   __ Mov(v23.V2D(), v0.V2D());
5811   __ Mov(v24.V2D(), v2.V2D());
5812   __ Mov(v25.V2D(), v4.V2D());
5813   __ Mov(v26.V2D(), v6.V2D());
5814 
5815   __ Usqadd(v16.V8B(), v1.V8B());
5816   __ Usqadd(v17.V16B(), v1.V16B());
5817   __ Usqadd(v18.V4H(), v3.V4H());
5818   __ Usqadd(v19.V8H(), v3.V8H());
5819   __ Usqadd(v20.V2S(), v5.V2S());
5820   __ Usqadd(v21.V4S(), v5.V4S());
5821   __ Usqadd(v22.V2D(), v7.V2D());
5822 
5823   __ Usqadd(b23, b1);
5824   __ Usqadd(h24, h3);
5825   __ Usqadd(s25, s5);
5826   __ Usqadd(d26, d7);
5827 
5828   END();
5829 
5830   if (CAN_RUN()) {
5831     RUN();
5832     ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
5833     ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
5834     ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
5835     ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
5836     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
5837     ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
5838     ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
5839 
5840     ASSERT_EQUAL_128(0, 0xff, q23);
5841     ASSERT_EQUAL_128(0, 0x7ffe, q24);
5842     ASSERT_EQUAL_128(0, 0xffffffff, q25);
5843     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
5844   }
5845 }
5846 
TEST(neon_2regmisc_xtn)5847 TEST(neon_2regmisc_xtn) {
5848   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5849 
5850   START();
5851 
5852   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
5853   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5854   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5855   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5856   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5857 
5858   __ Xtn(v16.V8B(), v0.V8H());
5859   __ Xtn2(v16.V16B(), v1.V8H());
5860   __ Xtn(v17.V4H(), v1.V4S());
5861   __ Xtn2(v17.V8H(), v2.V4S());
5862   __ Xtn(v18.V2S(), v3.V2D());
5863   __ Xtn2(v18.V4S(), v4.V2D());
5864 
5865   END();
5866 
5867   if (CAN_RUN()) {
5868     RUN();
5869     ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
5870     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
5871     ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
5872   }
5873 }
5874 
5875 
TEST(neon_2regmisc_sqxtn)5876 TEST(neon_2regmisc_sqxtn) {
5877   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5878 
5879   START();
5880 
5881   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5882   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5883   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5884   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5885   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5886 
5887   __ Sqxtn(v16.V8B(), v0.V8H());
5888   __ Sqxtn2(v16.V16B(), v1.V8H());
5889   __ Sqxtn(v17.V4H(), v1.V4S());
5890   __ Sqxtn2(v17.V8H(), v2.V4S());
5891   __ Sqxtn(v18.V2S(), v3.V2D());
5892   __ Sqxtn2(v18.V4S(), v4.V2D());
5893   __ Sqxtn(b19, h0);
5894   __ Sqxtn(h20, s0);
5895   __ Sqxtn(s21, d0);
5896 
5897   END();
5898 
5899   if (CAN_RUN()) {
5900     RUN();
5901     ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
5902     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
5903     ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
5904     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
5905     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
5906     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
5907   }
5908 }
5909 
5910 
TEST(neon_2regmisc_uqxtn)5911 TEST(neon_2regmisc_uqxtn) {
5912   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5913 
5914   START();
5915 
5916   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5917   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5918   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5919   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5920   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5921 
5922   __ Uqxtn(v16.V8B(), v0.V8H());
5923   __ Uqxtn2(v16.V16B(), v1.V8H());
5924   __ Uqxtn(v17.V4H(), v1.V4S());
5925   __ Uqxtn2(v17.V8H(), v2.V4S());
5926   __ Uqxtn(v18.V2S(), v3.V2D());
5927   __ Uqxtn2(v18.V4S(), v4.V2D());
5928   __ Uqxtn(b19, h0);
5929   __ Uqxtn(h20, s0);
5930   __ Uqxtn(s21, d0);
5931 
5932   END();
5933 
5934   if (CAN_RUN()) {
5935     RUN();
5936     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
5937     ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
5938     ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
5939     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
5940     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5941     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
5942   }
5943 }
5944 
5945 
TEST(neon_2regmisc_sqxtun)5946 TEST(neon_2regmisc_sqxtun) {
5947   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5948 
5949   START();
5950 
5951   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
5952   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
5953   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
5954   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
5955   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
5956 
5957   __ Sqxtun(v16.V8B(), v0.V8H());
5958   __ Sqxtun2(v16.V16B(), v1.V8H());
5959   __ Sqxtun(v17.V4H(), v1.V4S());
5960   __ Sqxtun2(v17.V8H(), v2.V4S());
5961   __ Sqxtun(v18.V2S(), v3.V2D());
5962   __ Sqxtun2(v18.V4S(), v4.V2D());
5963   __ Sqxtun(b19, h0);
5964   __ Sqxtun(h20, s0);
5965   __ Sqxtun(s21, d0);
5966 
5967   END();
5968 
5969   if (CAN_RUN()) {
5970     RUN();
5971     ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
5972     ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
5973     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
5974     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
5975     ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
5976     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
5977   }
5978 }
5979 
TEST(neon_3same_and)5980 TEST(neon_3same_and) {
5981   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5982 
5983   START();
5984 
5985   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
5986   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
5987 
5988   __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
5989   __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
5990   __ And(v24.V8B(), v0.V8B(), v0.V8B());     // self test
5991   __ And(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
5992   END();
5993 
5994   if (CAN_RUN()) {
5995     RUN();
5996     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
5997     ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
5998     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
5999     ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
6000   }
6001 }
6002 
TEST(neon_3same_bic)6003 TEST(neon_3same_bic) {
6004   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6005 
6006   START();
6007 
6008   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6009   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6010 
6011   __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6012   __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6013   __ Bic(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6014   __ Bic(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6015   END();
6016 
6017   if (CAN_RUN()) {
6018     RUN();
6019     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6020     ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
6021     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6022     ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
6023   }
6024 }
6025 
TEST(neon_3same_orr)6026 TEST(neon_3same_orr) {
6027   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6028 
6029   START();
6030 
6031   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6032   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6033 
6034   __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6035   __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6036   __ Orr(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6037   __ Orr(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6038   END();
6039 
6040   if (CAN_RUN()) {
6041     RUN();
6042     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6043     ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
6044     ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
6045     ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
6046   }
6047 }
6048 
TEST(neon_3same_mov)6049 TEST(neon_3same_mov) {
6050   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6051 
6052   START();
6053 
6054   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6055 
6056   __ Mov(v16.V16B(), v0.V16B());
6057   __ Mov(v17.V8H(), v0.V8H());
6058   __ Mov(v18.V4S(), v0.V4S());
6059   __ Mov(v19.V2D(), v0.V2D());
6060 
6061   __ Mov(v24.V8B(), v0.V8B());
6062   __ Mov(v25.V4H(), v0.V4H());
6063   __ Mov(v26.V2S(), v0.V2S());
6064   END();
6065 
6066   if (CAN_RUN()) {
6067     RUN();
6068 
6069     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
6070     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
6071     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
6072     ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
6073 
6074     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
6075     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
6076     ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
6077   }
6078 }
6079 
TEST(neon_3same_orn)6080 TEST(neon_3same_orn) {
6081   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6082 
6083   START();
6084 
6085   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6086   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
6087 
6088   __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6089   __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6090   __ Orn(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6091   __ Orn(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6092   END();
6093 
6094   if (CAN_RUN()) {
6095     RUN();
6096     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
6097     ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
6098     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
6099     ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
6100   }
6101 }
6102 
TEST(neon_3same_eor)6103 TEST(neon_3same_eor) {
6104   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6105 
6106   START();
6107 
6108   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
6109   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
6110 
6111   __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
6112   __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
6113   __ Eor(v24.V8B(), v0.V8B(), v0.V8B());     // self test
6114   __ Eor(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
6115   END();
6116 
6117   if (CAN_RUN()) {
6118     RUN();
6119     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
6120     ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
6121     ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
6122     ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
6123   }
6124 }
6125 
TEST(neon_3same_bif)6126 TEST(neon_3same_bif) {
6127   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6128 
6129   START();
6130 
6131   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6132   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6133   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6134 
6135   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6136   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6137   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6138 
6139   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6140   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6141   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6142 
6143   __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
6144   __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
6145   __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
6146   END();
6147 
6148   if (CAN_RUN()) {
6149     RUN();
6150 
6151     ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
6152     ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
6153     ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
6154   }
6155 }
6156 
TEST(neon_3same_bit)6157 TEST(neon_3same_bit) {
6158   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6159 
6160   START();
6161 
6162   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6163   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6164   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6165 
6166   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6167   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6168   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6169 
6170   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6171   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6172   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6173 
6174   __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
6175   __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
6176   __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
6177   END();
6178 
6179   if (CAN_RUN()) {
6180     RUN();
6181 
6182     ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
6183     ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
6184     ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
6185   }
6186 }
6187 
TEST(neon_3same_bsl)6188 TEST(neon_3same_bsl) {
6189   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6190 
6191   START();
6192 
6193   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
6194   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
6195   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6196 
6197   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
6198   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
6199   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
6200 
6201   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
6202   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
6203   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
6204 
6205   __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
6206   __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
6207   __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
6208   END();
6209 
6210   if (CAN_RUN()) {
6211     RUN();
6212 
6213     ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
6214     ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
6215     ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
6216   }
6217 }
6218 
6219 
TEST(neon_3same_smax)6220 TEST(neon_3same_smax) {
6221   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6222 
6223   START();
6224 
6225   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6226   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6227 
6228   __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
6229   __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
6230   __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
6231 
6232   __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
6233   __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
6234   __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
6235   END();
6236 
6237   if (CAN_RUN()) {
6238     RUN();
6239 
6240     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6241     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6242     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6243     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6244     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6245     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6246   }
6247 }
6248 
6249 
TEST(neon_3same_smaxp)6250 TEST(neon_3same_smaxp) {
6251   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6252 
6253   START();
6254 
6255   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6256   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6257 
6258   __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
6259   __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
6260   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6261 
6262   __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
6263   __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
6264   __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
6265   END();
6266 
6267   if (CAN_RUN()) {
6268     RUN();
6269 
6270     ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
6271     ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
6272     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6273     ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
6274     ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
6275     ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
6276   }
6277 }
6278 
6279 
TEST(neon_addp_scalar)6280 TEST(neon_addp_scalar) {
6281   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6282 
6283   START();
6284 
6285   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6286   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6287   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6288 
6289   __ Addp(d16, v0.V2D());
6290   __ Addp(d17, v1.V2D());
6291   __ Addp(d18, v2.V2D());
6292 
6293   END();
6294 
6295   if (CAN_RUN()) {
6296     RUN();
6297 
6298     ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
6299     ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
6300     ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
6301   }
6302 }
6303 
TEST(neon_acrosslanes_addv)6304 TEST(neon_acrosslanes_addv) {
6305   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6306 
6307   START();
6308 
6309   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6310   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6311   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6312 
6313   __ Addv(b16, v0.V8B());
6314   __ Addv(b17, v0.V16B());
6315   __ Addv(h18, v1.V4H());
6316   __ Addv(h19, v1.V8H());
6317   __ Addv(s20, v2.V4S());
6318 
6319   END();
6320 
6321   if (CAN_RUN()) {
6322     RUN();
6323 
6324     ASSERT_EQUAL_128(0x0, 0xc7, q16);
6325     ASSERT_EQUAL_128(0x0, 0x99, q17);
6326     ASSERT_EQUAL_128(0x0, 0x55a9, q18);
6327     ASSERT_EQUAL_128(0x0, 0x55fc, q19);
6328     ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
6329   }
6330 }
6331 
6332 
TEST(neon_acrosslanes_saddlv)6333 TEST(neon_acrosslanes_saddlv) {
6334   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6335 
6336   START();
6337 
6338   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6339   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6340   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6341 
6342   __ Saddlv(h16, v0.V8B());
6343   __ Saddlv(h17, v0.V16B());
6344   __ Saddlv(s18, v1.V4H());
6345   __ Saddlv(s19, v1.V8H());
6346   __ Saddlv(d20, v2.V4S());
6347 
6348   END();
6349 
6350   if (CAN_RUN()) {
6351     RUN();
6352 
6353     ASSERT_EQUAL_128(0x0, 0xffc7, q16);
6354     ASSERT_EQUAL_128(0x0, 0xff99, q17);
6355     ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
6356     ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
6357     ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
6358   }
6359 }
6360 
6361 
TEST(neon_acrosslanes_uaddlv)6362 TEST(neon_acrosslanes_uaddlv) {
6363   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6364 
6365   START();
6366 
6367   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6368   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6369   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6370 
6371   __ Uaddlv(h16, v0.V8B());
6372   __ Uaddlv(h17, v0.V16B());
6373   __ Uaddlv(s18, v1.V4H());
6374   __ Uaddlv(s19, v1.V8H());
6375   __ Uaddlv(d20, v2.V4S());
6376 
6377   END();
6378 
6379   if (CAN_RUN()) {
6380     RUN();
6381 
6382     ASSERT_EQUAL_128(0x0, 0x02c7, q16);
6383     ASSERT_EQUAL_128(0x0, 0x0599, q17);
6384     ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
6385     ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
6386     ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
6387   }
6388 }
6389 
6390 
TEST(neon_acrosslanes_smaxv)6391 TEST(neon_acrosslanes_smaxv) {
6392   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6393 
6394   START();
6395 
6396   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6397   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6398   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6399 
6400   __ Smaxv(b16, v0.V8B());
6401   __ Smaxv(b17, v0.V16B());
6402   __ Smaxv(h18, v1.V4H());
6403   __ Smaxv(h19, v1.V8H());
6404   __ Smaxv(s20, v2.V4S());
6405 
6406   END();
6407 
6408   if (CAN_RUN()) {
6409     RUN();
6410 
6411     ASSERT_EQUAL_128(0x0, 0x33, q16);
6412     ASSERT_EQUAL_128(0x0, 0x44, q17);
6413     ASSERT_EQUAL_128(0x0, 0x55ff, q18);
6414     ASSERT_EQUAL_128(0x0, 0x55ff, q19);
6415     ASSERT_EQUAL_128(0x0, 0x66555555, q20);
6416   }
6417 }
6418 
6419 
TEST(neon_acrosslanes_sminv)6420 TEST(neon_acrosslanes_sminv) {
6421   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6422 
6423   START();
6424 
6425   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6426   __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
6427   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6428 
6429   __ Sminv(b16, v0.V8B());
6430   __ Sminv(b17, v0.V16B());
6431   __ Sminv(h18, v1.V4H());
6432   __ Sminv(h19, v1.V8H());
6433   __ Sminv(s20, v2.V4S());
6434 
6435   END();
6436 
6437   if (CAN_RUN()) {
6438     RUN();
6439 
6440     ASSERT_EQUAL_128(0x0, 0xaa, q16);
6441     ASSERT_EQUAL_128(0x0, 0x80, q17);
6442     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6443     ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
6444     ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
6445   }
6446 }
6447 
TEST(neon_acrosslanes_umaxv)6448 TEST(neon_acrosslanes_umaxv) {
6449   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6450 
6451   START();
6452 
6453   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
6454   __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
6455   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6456 
6457   __ Umaxv(b16, v0.V8B());
6458   __ Umaxv(b17, v0.V16B());
6459   __ Umaxv(h18, v1.V4H());
6460   __ Umaxv(h19, v1.V8H());
6461   __ Umaxv(s20, v2.V4S());
6462 
6463   END();
6464 
6465   if (CAN_RUN()) {
6466     RUN();
6467 
6468     ASSERT_EQUAL_128(0x0, 0xfc, q16);
6469     ASSERT_EQUAL_128(0x0, 0xfe, q17);
6470     ASSERT_EQUAL_128(0x0, 0xffaa, q18);
6471     ASSERT_EQUAL_128(0x0, 0xffab, q19);
6472     ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
6473   }
6474 }
6475 
6476 
TEST(neon_acrosslanes_uminv)6477 TEST(neon_acrosslanes_uminv) {
6478   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6479 
6480   START();
6481 
6482   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
6483   __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
6484   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
6485 
6486   __ Uminv(b16, v0.V8B());
6487   __ Uminv(b17, v0.V16B());
6488   __ Uminv(h18, v1.V4H());
6489   __ Uminv(h19, v1.V8H());
6490   __ Uminv(s20, v2.V4S());
6491 
6492   END();
6493 
6494   if (CAN_RUN()) {
6495     RUN();
6496 
6497     ASSERT_EQUAL_128(0x0, 0x01, q16);
6498     ASSERT_EQUAL_128(0x0, 0x00, q17);
6499     ASSERT_EQUAL_128(0x0, 0x0001, q18);
6500     ASSERT_EQUAL_128(0x0, 0x0000, q19);
6501     ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
6502   }
6503 }
6504 
6505 
TEST(neon_3same_smin)6506 TEST(neon_3same_smin) {
6507   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6508 
6509   START();
6510 
6511   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6512   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6513 
6514   __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
6515   __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
6516   __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
6517 
6518   __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
6519   __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
6520   __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
6521   END();
6522 
6523   if (CAN_RUN()) {
6524     RUN();
6525 
6526     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6527     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6528     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6529     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6530     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6531     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6532   }
6533 }
6534 
6535 
TEST(neon_3same_umax)6536 TEST(neon_3same_umax) {
6537   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6538 
6539   START();
6540 
6541   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6542   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6543 
6544   __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
6545   __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
6546   __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
6547 
6548   __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
6549   __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
6550   __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
6551   END();
6552 
6553   if (CAN_RUN()) {
6554     RUN();
6555 
6556     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
6557     ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
6558     ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
6559     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
6560     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
6561     ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
6562   }
6563 }
6564 
6565 
TEST(neon_3same_umin)6566 TEST(neon_3same_umin) {
6567   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6568 
6569   START();
6570 
6571   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
6572   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
6573 
6574   __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
6575   __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
6576   __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
6577 
6578   __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
6579   __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
6580   __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
6581   END();
6582 
6583   if (CAN_RUN()) {
6584     RUN();
6585 
6586     ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
6587     ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
6588     ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
6589     ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
6590     ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
6591     ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
6592   }
6593 }
6594 
6595 
TEST(neon_3same_extra_fcadd)6596 TEST(neon_3same_extra_fcadd) {
6597   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6598 
6599   START();
6600 
6601   // (0i, 5) (d)
6602   __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
6603   // (5i, 0) (d)
6604   __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
6605   // (10i, 10) (d)
6606   __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
6607   // (5i, 5), (5i, 5) (f)
6608   __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
6609   // (5i, 5), (0i, 0) (f)
6610   __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
6611   // 324567i, 16000 (f)
6612   __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
6613 
6614   // Subtraction (10, 10) - (5, 5) == (5, 5)
6615   __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
6616   __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
6617 
6618   // Addition (10, 10) + (5, 5) == (15, 15)
6619   __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
6620   __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
6621 
6622   // 2S
6623   __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
6624   __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
6625 
6626   // 4S
6627   __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
6628   __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
6629 
6630   END();
6631 
6632   if (CAN_RUN()) {
6633     RUN();
6634     ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
6635     ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
6636     ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29);  // (16000i, -324567)
6637     ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28);  // (-16000i, 324567)
6638     ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
6639     ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
6640   }
6641 }
6642 
6643 
TEST(neon_3same_extra_fcmla)6644 TEST(neon_3same_extra_fcmla) {
6645   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6646 
6647   START();
6648 
6649   __ Movi(v1.V2D(), 0x0, 0x40A0000040400000);  // (5i, 3) (f)
6650   __ Movi(v2.V2D(), 0x0, 0x4040000040A00000);  // (3i, 5) (f)
6651 
6652   __ Movi(v3.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6653   __ Movi(v4.V2D(), 0x0, 0x40E000003F800000);  // (7i, 1) (f)
6654 
6655   __ Movi(v5.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
6656   __ Movi(v6.V2D(), 0x0, 0x408000003F800000);  // (4i, 1) (f)
6657 
6658   // (1.5i, 2.5), (31.5i, 1024) (f)
6659   __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
6660   // (2048i, 412.75), (3645i, 0) (f)
6661   __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
6662   // (2000i, 450,000) (d)
6663   __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
6664   // (30,000i, 1250) (d)
6665   __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
6666 
6667   // DST
6668   __ Movi(v24.V2D(), 0x0, 0x0);
6669   __ Movi(v25.V2D(), 0x0, 0x0);
6670   __ Movi(v26.V2D(), 0x0, 0x0);
6671   __ Movi(v27.V2D(), 0x0, 0x0);
6672   __ Movi(v28.V2D(), 0x0, 0x0);
6673   __ Movi(v29.V2D(), 0x0, 0x0);
6674   __ Movi(v30.V2D(), 0x0, 0x0);
6675   __ Movi(v31.V2D(), 0x0, 0x0);
6676 
6677   // Full calculations
6678   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
6679   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
6680 
6681   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
6682   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
6683 
6684   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
6685   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
6686 
6687   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
6688   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
6689 
6690   // Partial checks
6691   __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
6692   __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
6693 
6694   __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
6695   __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
6696 
6697   END();
6698 
6699   if (CAN_RUN()) {
6700     RUN();
6701 
6702     ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31);  // (34i, 0)
6703     ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30);  // (23i, -11)
6704     ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29);  // (14i, -5)
6705 
6706     // (13502500000i, 502500000)
6707     ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
6708     ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27);  //  (9i, 15)
6709     ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26);  // (25i, 15)
6710     // (512i, 1.031875E3), (373248i, 0)
6711     ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
6712     // (619.125i, -3072), (0i, -114817.5)
6713     ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
6714   }
6715 }
6716 
6717 
TEST(neon_byelement_fcmla)6718 TEST(neon_byelement_fcmla) {
6719   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
6720 
6721   START();
6722 
6723   // (5i, 3), (5i, 3) (f)
6724   __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
6725   // (3i, 5), (3i, 5) (f)
6726   __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
6727   // (7i, 1), (5i, 3) (f)
6728   __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
6729   // (4i, 1), (3i, 5) (f)
6730   __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
6731   // (4i, 1), (7i, 1) (f)
6732   __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
6733   // (2i, 3), (0, 0) (f)
6734   __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
6735 
6736   // DST
6737   __ Movi(v22.V2D(), 0x0, 0x0);
6738   __ Movi(v23.V2D(), 0x0, 0x0);
6739   __ Movi(v24.V2D(), 0x0, 0x0);
6740   __ Movi(v25.V2D(), 0x0, 0x0);
6741   __ Movi(v26.V2D(), 0x0, 0x0);
6742   __ Movi(v27.V2D(), 0x0, 0x0);
6743   __ Movi(v28.V2D(), 0x0, 0x0);
6744   __ Movi(v29.V2D(), 0x0, 0x0);
6745   __ Movi(v30.V2D(), 0x0, 0x0);
6746   __ Movi(v31.V2D(), 0x0, 0x0);
6747 
6748   // Full calculation (pairs)
6749   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
6750   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
6751   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
6752   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
6753 
6754   // Rotations
6755   __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
6756   __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
6757   __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
6758   __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
6759   __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
6760   __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
6761   __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
6762   __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
6763 
6764   END();
6765 
6766   if (CAN_RUN()) {
6767     RUN();
6768     // (34i, 0), (34i, 0)
6769     ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
6770     // (14i, -5), (23i, -11)
6771     ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
6772     // (4i, 1), (12i, 3)
6773     ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
6774     // (7i, -28), (5i, -20)
6775     ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
6776     // (-4i, -1), (-12i, -3)
6777     ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
6778     // (-7i, 28), (-5i, 20)
6779     ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
6780     // (-35i, 21), (-25i, 15)
6781     ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
6782     // (-3i, -5), (-9i, -15)
6783     ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
6784     // (35i, -21), (25i, -15)
6785     ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
6786     // (3i, 5), (9i, 15)
6787     ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
6788   }
6789 }
6790 
6791 
TEST(neon_2regmisc_mvn)6792 TEST(neon_2regmisc_mvn) {
6793   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6794 
6795   START();
6796 
6797   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6798 
6799   __ Mvn(v16.V16B(), v0.V16B());
6800   __ Mvn(v17.V8H(), v0.V8H());
6801   __ Mvn(v18.V4S(), v0.V4S());
6802   __ Mvn(v19.V2D(), v0.V2D());
6803 
6804   __ Mvn(v24.V8B(), v0.V8B());
6805   __ Mvn(v25.V4H(), v0.V4H());
6806   __ Mvn(v26.V2S(), v0.V2S());
6807 
6808   END();
6809 
6810   if (CAN_RUN()) {
6811     RUN();
6812 
6813     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6814     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
6815     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
6816     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
6817 
6818     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
6819     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
6820     ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
6821   }
6822 }
6823 
6824 
TEST(neon_2regmisc_not)6825 TEST(neon_2regmisc_not) {
6826   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6827 
6828   START();
6829 
6830   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
6831   __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
6832 
6833   __ Not(v16.V16B(), v0.V16B());
6834   __ Not(v17.V8B(), v1.V8B());
6835   END();
6836 
6837   if (CAN_RUN()) {
6838     RUN();
6839 
6840     ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
6841     ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
6842   }
6843 }
6844 
6845 
TEST(neon_2regmisc_cls_clz_cnt)6846 TEST(neon_2regmisc_cls_clz_cnt) {
6847   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6848 
6849   START();
6850 
6851   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6852   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6853 
6854   __ Cls(v16.V8B(), v1.V8B());
6855   __ Cls(v17.V16B(), v1.V16B());
6856   __ Cls(v18.V4H(), v1.V4H());
6857   __ Cls(v19.V8H(), v1.V8H());
6858   __ Cls(v20.V2S(), v1.V2S());
6859   __ Cls(v21.V4S(), v1.V4S());
6860 
6861   __ Clz(v22.V8B(), v0.V8B());
6862   __ Clz(v23.V16B(), v0.V16B());
6863   __ Clz(v24.V4H(), v0.V4H());
6864   __ Clz(v25.V8H(), v0.V8H());
6865   __ Clz(v26.V2S(), v0.V2S());
6866   __ Clz(v27.V4S(), v0.V4S());
6867 
6868   __ Cnt(v28.V8B(), v0.V8B());
6869   __ Cnt(v29.V16B(), v1.V16B());
6870 
6871   END();
6872 
6873   if (CAN_RUN()) {
6874     RUN();
6875 
6876     ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
6877     ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
6878     ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
6879     ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
6880     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
6881     ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
6882 
6883     ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
6884     ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
6885     ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
6886     ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
6887     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
6888     ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
6889 
6890     ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
6891     ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
6892   }
6893 }
6894 
TEST(neon_2regmisc_rev)6895 TEST(neon_2regmisc_rev) {
6896   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6897 
6898   START();
6899 
6900   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6901   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6902 
6903   __ Rev16(v16.V8B(), v0.V8B());
6904   __ Rev16(v17.V16B(), v0.V16B());
6905 
6906   __ Rev32(v18.V8B(), v0.V8B());
6907   __ Rev32(v19.V16B(), v0.V16B());
6908   __ Rev32(v20.V4H(), v0.V4H());
6909   __ Rev32(v21.V8H(), v0.V8H());
6910 
6911   __ Rev64(v22.V8B(), v0.V8B());
6912   __ Rev64(v23.V16B(), v0.V16B());
6913   __ Rev64(v24.V4H(), v0.V4H());
6914   __ Rev64(v25.V8H(), v0.V8H());
6915   __ Rev64(v26.V2S(), v0.V2S());
6916   __ Rev64(v27.V4S(), v0.V4S());
6917 
6918   __ Rbit(v28.V8B(), v1.V8B());
6919   __ Rbit(v29.V16B(), v1.V16B());
6920 
6921   END();
6922 
6923   if (CAN_RUN()) {
6924     RUN();
6925 
6926     ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
6927     ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
6928 
6929     ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
6930     ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
6931     ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
6932     ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
6933 
6934     ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
6935     ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
6936     ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
6937     ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
6938     ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
6939     ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
6940 
6941     ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
6942     ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
6943   }
6944 }
6945 
6946 
TEST(neon_sli)6947 TEST(neon_sli) {
6948   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6949 
6950   START();
6951 
6952   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6953   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6954 
6955   __ Mov(v16.V2D(), v0.V2D());
6956   __ Mov(v17.V2D(), v0.V2D());
6957   __ Mov(v18.V2D(), v0.V2D());
6958   __ Mov(v19.V2D(), v0.V2D());
6959   __ Mov(v20.V2D(), v0.V2D());
6960   __ Mov(v21.V2D(), v0.V2D());
6961   __ Mov(v22.V2D(), v0.V2D());
6962   __ Mov(v23.V2D(), v0.V2D());
6963 
6964   __ Sli(v16.V8B(), v1.V8B(), 4);
6965   __ Sli(v17.V16B(), v1.V16B(), 7);
6966   __ Sli(v18.V4H(), v1.V4H(), 8);
6967   __ Sli(v19.V8H(), v1.V8H(), 15);
6968   __ Sli(v20.V2S(), v1.V2S(), 0);
6969   __ Sli(v21.V4S(), v1.V4S(), 31);
6970   __ Sli(v22.V2D(), v1.V2D(), 48);
6971 
6972   __ Sli(d23, d1, 48);
6973 
6974   END();
6975 
6976   if (CAN_RUN()) {
6977     RUN();
6978 
6979     ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
6980     ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
6981     ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
6982     ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
6983     ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
6984     ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
6985     ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
6986 
6987     ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
6988   }
6989 }
6990 
6991 
TEST(neon_sri)6992 TEST(neon_sri) {
6993   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6994 
6995   START();
6996 
6997   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
6998   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
6999 
7000   __ Mov(v16.V2D(), v0.V2D());
7001   __ Mov(v17.V2D(), v0.V2D());
7002   __ Mov(v18.V2D(), v0.V2D());
7003   __ Mov(v19.V2D(), v0.V2D());
7004   __ Mov(v20.V2D(), v0.V2D());
7005   __ Mov(v21.V2D(), v0.V2D());
7006   __ Mov(v22.V2D(), v0.V2D());
7007   __ Mov(v23.V2D(), v0.V2D());
7008 
7009   __ Sri(v16.V8B(), v1.V8B(), 4);
7010   __ Sri(v17.V16B(), v1.V16B(), 7);
7011   __ Sri(v18.V4H(), v1.V4H(), 8);
7012   __ Sri(v19.V8H(), v1.V8H(), 15);
7013   __ Sri(v20.V2S(), v1.V2S(), 1);
7014   __ Sri(v21.V4S(), v1.V4S(), 31);
7015   __ Sri(v22.V2D(), v1.V2D(), 48);
7016 
7017   __ Sri(d23, d1, 48);
7018 
7019   END();
7020 
7021   if (CAN_RUN()) {
7022     RUN();
7023 
7024     ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
7025     ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
7026     ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
7027     ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
7028     ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
7029     ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
7030     ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
7031 
7032     ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
7033   }
7034 }
7035 
7036 
TEST(neon_shrn)7037 TEST(neon_shrn) {
7038   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7039 
7040   START();
7041 
7042   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7043   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7044   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7045   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7046   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7047 
7048   __ Shrn(v16.V8B(), v0.V8H(), 8);
7049   __ Shrn2(v16.V16B(), v1.V8H(), 1);
7050   __ Shrn(v17.V4H(), v1.V4S(), 16);
7051   __ Shrn2(v17.V8H(), v2.V4S(), 1);
7052   __ Shrn(v18.V2S(), v3.V2D(), 32);
7053   __ Shrn2(v18.V4S(), v3.V2D(), 1);
7054 
7055   END();
7056 
7057   if (CAN_RUN()) {
7058     RUN();
7059     ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
7060     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
7061     ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
7062   }
7063 }
7064 
7065 
TEST(neon_rshrn)7066 TEST(neon_rshrn) {
7067   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7068 
7069   START();
7070 
7071   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7072   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7073   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7074   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7075   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7076 
7077   __ Rshrn(v16.V8B(), v0.V8H(), 8);
7078   __ Rshrn2(v16.V16B(), v1.V8H(), 1);
7079   __ Rshrn(v17.V4H(), v1.V4S(), 16);
7080   __ Rshrn2(v17.V8H(), v2.V4S(), 1);
7081   __ Rshrn(v18.V2S(), v3.V2D(), 32);
7082   __ Rshrn2(v18.V4S(), v3.V2D(), 1);
7083 
7084   END();
7085 
7086   if (CAN_RUN()) {
7087     RUN();
7088     ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
7089     ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
7090     ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
7091   }
7092 }
7093 
7094 
TEST(neon_uqshrn)7095 TEST(neon_uqshrn) {
7096   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7097 
7098   START();
7099 
7100   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7101   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7102   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7103   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7104   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7105 
7106   __ Uqshrn(v16.V8B(), v0.V8H(), 8);
7107   __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
7108   __ Uqshrn(v17.V4H(), v1.V4S(), 16);
7109   __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
7110   __ Uqshrn(v18.V2S(), v3.V2D(), 32);
7111   __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
7112 
7113   __ Uqshrn(b19, h0, 8);
7114   __ Uqshrn(h20, s1, 16);
7115   __ Uqshrn(s21, d3, 32);
7116 
7117   END();
7118 
7119   if (CAN_RUN()) {
7120     RUN();
7121     ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
7122     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
7123     ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
7124     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7125     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7126     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7127   }
7128 }
7129 
7130 
TEST(neon_uqrshrn)7131 TEST(neon_uqrshrn) {
7132   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7133 
7134   START();
7135 
7136   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7137   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7138   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7139   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7140   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7141 
7142   __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
7143   __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
7144   __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
7145   __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
7146   __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
7147   __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
7148 
7149   __ Uqrshrn(b19, h0, 8);
7150   __ Uqrshrn(h20, s1, 16);
7151   __ Uqrshrn(s21, d3, 32);
7152 
7153   END();
7154 
7155   if (CAN_RUN()) {
7156     RUN();
7157     ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
7158     ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
7159     ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
7160     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7161     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7162     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7163   }
7164 }
7165 
7166 
TEST(neon_sqshrn)7167 TEST(neon_sqshrn) {
7168   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7169 
7170   START();
7171 
7172   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7173   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7174   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7175   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7176   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7177 
7178   __ Sqshrn(v16.V8B(), v0.V8H(), 8);
7179   __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
7180   __ Sqshrn(v17.V4H(), v1.V4S(), 16);
7181   __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
7182   __ Sqshrn(v18.V2S(), v3.V2D(), 32);
7183   __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
7184 
7185   __ Sqshrn(b19, h0, 8);
7186   __ Sqshrn(h20, s1, 16);
7187   __ Sqshrn(s21, d3, 32);
7188 
7189   END();
7190 
7191   if (CAN_RUN()) {
7192     RUN();
7193     ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
7194     ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
7195     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7196     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
7197     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7198     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7199   }
7200 }
7201 
7202 
TEST(neon_sqrshrn)7203 TEST(neon_sqrshrn) {
7204   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7205 
7206   START();
7207 
7208   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7209   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7210   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7211   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7212   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7213 
7214   __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
7215   __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
7216   __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
7217   __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
7218   __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
7219   __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
7220 
7221   __ Sqrshrn(b19, h0, 8);
7222   __ Sqrshrn(h20, s1, 16);
7223   __ Sqrshrn(s21, d3, 32);
7224 
7225   END();
7226 
7227   if (CAN_RUN()) {
7228     RUN();
7229     ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
7230     ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
7231     ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
7232     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
7233     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7234     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7235   }
7236 }
7237 
7238 
TEST(neon_sqshrun)7239 TEST(neon_sqshrun) {
7240   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7241 
7242   START();
7243 
7244   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7245   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7246   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7247   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7248   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7249 
7250   __ Sqshrun(v16.V8B(), v0.V8H(), 8);
7251   __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
7252   __ Sqshrun(v17.V4H(), v1.V4S(), 16);
7253   __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
7254   __ Sqshrun(v18.V2S(), v3.V2D(), 32);
7255   __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
7256 
7257   __ Sqshrun(b19, h0, 8);
7258   __ Sqshrun(h20, s1, 16);
7259   __ Sqshrun(s21, d3, 32);
7260 
7261   END();
7262 
7263   if (CAN_RUN()) {
7264     RUN();
7265     ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
7266     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7267     ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
7268     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7269     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7270     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
7271   }
7272 }
7273 
7274 
TEST(neon_sqrshrun)7275 TEST(neon_sqrshrun) {
7276   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7277 
7278   START();
7279 
7280   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
7281   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
7282   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
7283   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
7284   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
7285 
7286   __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
7287   __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
7288   __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
7289   __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
7290   __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
7291   __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
7292 
7293   __ Sqrshrun(b19, h0, 8);
7294   __ Sqrshrun(h20, s1, 16);
7295   __ Sqrshrun(s21, d3, 32);
7296 
7297   END();
7298 
7299   if (CAN_RUN()) {
7300     RUN();
7301     ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
7302     ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
7303     ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
7304     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
7305     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
7306     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
7307   }
7308 }
7309 
TEST(neon_modimm_bic)7310 TEST(neon_modimm_bic) {
7311   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7312 
7313   START();
7314 
7315   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7316   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7317   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7318   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7319   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7320   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7321   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7322   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7323   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7324   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7325   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7326   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7327 
7328   __ Bic(v16.V4H(), 0x00, 0);
7329   __ Bic(v17.V4H(), 0xff, 8);
7330   __ Bic(v18.V8H(), 0x00, 0);
7331   __ Bic(v19.V8H(), 0xff, 8);
7332 
7333   __ Bic(v20.V2S(), 0x00, 0);
7334   __ Bic(v21.V2S(), 0xff, 8);
7335   __ Bic(v22.V2S(), 0x00, 16);
7336   __ Bic(v23.V2S(), 0xff, 24);
7337 
7338   __ Bic(v24.V4S(), 0xff, 0);
7339   __ Bic(v25.V4S(), 0x00, 8);
7340   __ Bic(v26.V4S(), 0xff, 16);
7341   __ Bic(v27.V4S(), 0x00, 24);
7342 
7343   END();
7344 
7345   if (CAN_RUN()) {
7346     RUN();
7347 
7348     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7349     ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
7350     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7351     ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
7352 
7353     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7354     ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
7355     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7356     ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
7357 
7358     ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
7359     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7360     ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
7361     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7362   }
7363 }
7364 
7365 
TEST(neon_modimm_movi_16bit_any)7366 TEST(neon_modimm_movi_16bit_any) {
7367   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7368 
7369   START();
7370 
7371   __ Movi(v0.V4H(), 0xabab);
7372   __ Movi(v1.V4H(), 0xab00);
7373   __ Movi(v2.V4H(), 0xabff);
7374   __ Movi(v3.V8H(), 0x00ab);
7375   __ Movi(v4.V8H(), 0xffab);
7376   __ Movi(v5.V8H(), 0xabcd);
7377 
7378   END();
7379 
7380   if (CAN_RUN()) {
7381     RUN();
7382 
7383     ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
7384     ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
7385     ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
7386     ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
7387     ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
7388     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
7389   }
7390 }
7391 
7392 
TEST(neon_modimm_movi_32bit_any)7393 TEST(neon_modimm_movi_32bit_any) {
7394   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7395 
7396   START();
7397 
7398   __ Movi(v0.V2S(), 0x000000ab);
7399   __ Movi(v1.V2S(), 0x0000ab00);
7400   __ Movi(v2.V4S(), 0x00ab0000);
7401   __ Movi(v3.V4S(), 0xab000000);
7402 
7403   __ Movi(v4.V2S(), 0xffffffab);
7404   __ Movi(v5.V2S(), 0xffffabff);
7405   __ Movi(v6.V4S(), 0xffabffff);
7406   __ Movi(v7.V4S(), 0xabffffff);
7407 
7408   __ Movi(v16.V2S(), 0x0000abff);
7409   __ Movi(v17.V2S(), 0x00abffff);
7410   __ Movi(v18.V4S(), 0xffab0000);
7411   __ Movi(v19.V4S(), 0xffffab00);
7412 
7413   __ Movi(v20.V4S(), 0xabababab);
7414   __ Movi(v21.V4S(), 0xabcdabcd);
7415   __ Movi(v22.V4S(), 0xabcdef01);
7416   __ Movi(v23.V4S(), 0x00ffff00);
7417 
7418   END();
7419 
7420   if (CAN_RUN()) {
7421     RUN();
7422 
7423     ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
7424     ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
7425     ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
7426     ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
7427 
7428     ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
7429     ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
7430     ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
7431     ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
7432 
7433     ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
7434     ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
7435     ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
7436     ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
7437 
7438     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
7439     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
7440     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
7441     ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
7442   }
7443 }
7444 
7445 
TEST(neon_modimm_movi_64bit_any)7446 TEST(neon_modimm_movi_64bit_any) {
7447   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7448 
7449   START();
7450 
7451   __ Movi(v0.V1D(), 0x00ffff0000ffffff);
7452   __ Movi(v1.V2D(), 0xabababababababab);
7453   __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
7454   __ Movi(v3.V2D(), 0xabcdef01abcdef01);
7455   __ Movi(v4.V1D(), 0xabcdef0123456789);
7456   __ Movi(v5.V2D(), 0xabcdef0123456789);
7457 
7458   END();
7459 
7460   if (CAN_RUN()) {
7461     RUN();
7462 
7463     ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
7464     ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
7465     ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
7466     ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
7467     ASSERT_EQUAL_64(0xabcdef0123456789, d4);
7468     ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
7469   }
7470 }
7471 
7472 
TEST(neon_modimm_movi)7473 TEST(neon_modimm_movi) {
7474   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7475 
7476   START();
7477 
7478   __ Movi(v0.V8B(), 0xaa);
7479   __ Movi(v1.V16B(), 0x55);
7480 
7481   __ Movi(d2, 0x00ffff0000ffffff);
7482   __ Movi(v3.V2D(), 0x00ffff0000ffffff);
7483 
7484   __ Movi(v16.V4H(), 0x00, LSL, 0);
7485   __ Movi(v17.V4H(), 0xff, LSL, 8);
7486   __ Movi(v18.V8H(), 0x00, LSL, 0);
7487   __ Movi(v19.V8H(), 0xff, LSL, 8);
7488 
7489   __ Movi(v20.V2S(), 0x00, LSL, 0);
7490   __ Movi(v21.V2S(), 0xff, LSL, 8);
7491   __ Movi(v22.V2S(), 0x00, LSL, 16);
7492   __ Movi(v23.V2S(), 0xff, LSL, 24);
7493 
7494   __ Movi(v24.V4S(), 0xff, LSL, 0);
7495   __ Movi(v25.V4S(), 0x00, LSL, 8);
7496   __ Movi(v26.V4S(), 0xff, LSL, 16);
7497   __ Movi(v27.V4S(), 0x00, LSL, 24);
7498 
7499   __ Movi(v28.V2S(), 0xaa, MSL, 8);
7500   __ Movi(v29.V2S(), 0x55, MSL, 16);
7501   __ Movi(v30.V4S(), 0xff, MSL, 8);
7502   __ Movi(v31.V4S(), 0x00, MSL, 16);
7503 
7504   END();
7505 
7506   if (CAN_RUN()) {
7507     RUN();
7508 
7509     ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
7510     ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
7511 
7512     ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
7513     ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
7514 
7515     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
7516     ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
7517     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
7518     ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
7519 
7520     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
7521     ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
7522     ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
7523     ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
7524 
7525     ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
7526     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
7527     ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
7528     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
7529 
7530     ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
7531     ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
7532     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
7533     ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
7534   }
7535 }
7536 
7537 
TEST(neon_modimm_mvni)7538 TEST(neon_modimm_mvni) {
7539   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7540 
7541   START();
7542 
7543   __ Mvni(v16.V4H(), 0x00, LSL, 0);
7544   __ Mvni(v17.V4H(), 0xff, LSL, 8);
7545   __ Mvni(v18.V8H(), 0x00, LSL, 0);
7546   __ Mvni(v19.V8H(), 0xff, LSL, 8);
7547 
7548   __ Mvni(v20.V2S(), 0x00, LSL, 0);
7549   __ Mvni(v21.V2S(), 0xff, LSL, 8);
7550   __ Mvni(v22.V2S(), 0x00, LSL, 16);
7551   __ Mvni(v23.V2S(), 0xff, LSL, 24);
7552 
7553   __ Mvni(v24.V4S(), 0xff, LSL, 0);
7554   __ Mvni(v25.V4S(), 0x00, LSL, 8);
7555   __ Mvni(v26.V4S(), 0xff, LSL, 16);
7556   __ Mvni(v27.V4S(), 0x00, LSL, 24);
7557 
7558   __ Mvni(v28.V2S(), 0xaa, MSL, 8);
7559   __ Mvni(v29.V2S(), 0x55, MSL, 16);
7560   __ Mvni(v30.V4S(), 0xff, MSL, 8);
7561   __ Mvni(v31.V4S(), 0x00, MSL, 16);
7562 
7563   END();
7564 
7565   if (CAN_RUN()) {
7566     RUN();
7567 
7568     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
7569     ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
7570     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
7571     ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
7572 
7573     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
7574     ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
7575     ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
7576     ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
7577 
7578     ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
7579     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
7580     ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
7581     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
7582 
7583     ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
7584     ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
7585     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
7586     ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
7587   }
7588 }
7589 
7590 
TEST(neon_modimm_orr)7591 TEST(neon_modimm_orr) {
7592   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7593 
7594   START();
7595 
7596   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7597   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7598   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7599   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7600   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7601   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7602   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7603   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7604   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7605   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7606   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7607   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
7608 
7609   __ Orr(v16.V4H(), 0x00, 0);
7610   __ Orr(v17.V4H(), 0xff, 8);
7611   __ Orr(v18.V8H(), 0x00, 0);
7612   __ Orr(v19.V8H(), 0xff, 8);
7613 
7614   __ Orr(v20.V2S(), 0x00, 0);
7615   __ Orr(v21.V2S(), 0xff, 8);
7616   __ Orr(v22.V2S(), 0x00, 16);
7617   __ Orr(v23.V2S(), 0xff, 24);
7618 
7619   __ Orr(v24.V4S(), 0xff, 0);
7620   __ Orr(v25.V4S(), 0x00, 8);
7621   __ Orr(v26.V4S(), 0xff, 16);
7622   __ Orr(v27.V4S(), 0x00, 24);
7623 
7624   END();
7625 
7626   if (CAN_RUN()) {
7627     RUN();
7628 
7629     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
7630     ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
7631     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
7632     ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
7633 
7634     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
7635     ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
7636     ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
7637     ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
7638 
7639     ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
7640     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
7641     ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
7642     ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
7643   }
7644 }
7645 
TEST(ldr_literal_values_q)7646 TEST(ldr_literal_values_q) {
7647   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7648 
7649   static const uint64_t kHalfValues[] = {0x8000000000000000,
7650                                          0x7fffffffffffffff,
7651                                          0x0000000000000000,
7652                                          0xffffffffffffffff,
7653                                          0x00ff00ff00ff00ff,
7654                                          0x1234567890abcdef};
7655   const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7656   const Register& ref_low64 = x1;
7657   const Register& ref_high64 = x2;
7658   const Register& loaded_low64 = x3;
7659   const Register& loaded_high64 = x4;
7660   const VRegister& tgt = q0;
7661 
7662   START();
7663   __ Mov(x0, 0);
7664 
7665   for (int i = 0; i < card; i++) {
7666     __ Mov(ref_low64, kHalfValues[i]);
7667     for (int j = 0; j < card; j++) {
7668       __ Mov(ref_high64, kHalfValues[j]);
7669       __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7670       __ Mov(loaded_low64, tgt.V2D(), 0);
7671       __ Mov(loaded_high64, tgt.V2D(), 1);
7672       __ Cmp(loaded_low64, ref_low64);
7673       __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7674       __ Cset(x0, ne);
7675     }
7676   }
7677   END();
7678 
7679   if (CAN_RUN()) {
7680     RUN();
7681 
7682     // If one of the values differs, the trace can be used to identify which
7683     // one.
7684     ASSERT_EQUAL_64(0, x0);
7685   }
7686 }
7687 
TEST(fmov_vec_imm)7688 TEST(fmov_vec_imm) {
7689   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
7690                       CPUFeatures::kFP,
7691                       CPUFeatures::kNEONHalf);
7692 
7693   START();
7694 
7695   __ Fmov(v0.V2S(), 20.0);
7696   __ Fmov(v1.V4S(), 1024.0);
7697 
7698   __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
7699   __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
7700 
7701   END();
7702   if (CAN_RUN()) {
7703     RUN();
7704 
7705     ASSERT_EQUAL_64(0x41A0000041A00000, d0);
7706     ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
7707     ASSERT_EQUAL_64(0xC500C500C500C500, d2);
7708     ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
7709   }
7710 }
7711 
7712 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)7713 TEST(neon_modimm_fmov) {
7714   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7715 
7716   // Immediates which can be encoded in the instructions.
7717   const float kOne = 1.0f;
7718   const float kPointFive = 0.5f;
7719   const double kMinusThirteen = -13.0;
7720   // Immediates which cannot be encoded in the instructions.
7721   const float kNonImmFP32 = 255.0f;
7722   const double kNonImmFP64 = 12.3456;
7723 
7724   START();
7725   __ Fmov(v11.V2S(), kOne);
7726   __ Fmov(v12.V4S(), kPointFive);
7727   __ Fmov(v22.V2D(), kMinusThirteen);
7728   __ Fmov(v13.V2S(), kNonImmFP32);
7729   __ Fmov(v14.V4S(), kNonImmFP32);
7730   __ Fmov(v23.V2D(), kNonImmFP64);
7731   __ Fmov(v1.V2S(), 0.0);
7732   __ Fmov(v2.V4S(), 0.0);
7733   __ Fmov(v3.V2D(), 0.0);
7734   __ Fmov(v4.V2S(), kFP32PositiveInfinity);
7735   __ Fmov(v5.V4S(), kFP32PositiveInfinity);
7736   __ Fmov(v6.V2D(), kFP64PositiveInfinity);
7737   END();
7738 
7739   if (CAN_RUN()) {
7740     RUN();
7741 
7742     const uint64_t kOne1S = FloatToRawbits(1.0);
7743     const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
7744     const uint64_t kPointFive1S = FloatToRawbits(0.5);
7745     const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
7746     const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
7747     const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
7748     const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
7749     const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
7750     const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
7751     const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
7752     const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
7753 
7754     ASSERT_EQUAL_128(0x0, kOne2S, q11);
7755     ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
7756     ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
7757     ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
7758     ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
7759     ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
7760     ASSERT_EQUAL_128(0x0, 0x0, q1);
7761     ASSERT_EQUAL_128(0x0, 0x0, q2);
7762     ASSERT_EQUAL_128(0x0, 0x0, q3);
7763     ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
7764     ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
7765     ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
7766   }
7767 }
7768 
7769 
TEST(neon_perm)7770 TEST(neon_perm) {
7771   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7772 
7773   START();
7774 
7775   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
7776   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
7777 
7778   __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
7779   __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
7780   __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
7781   __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
7782   __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
7783   __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
7784 
7785   END();
7786 
7787   if (CAN_RUN()) {
7788     RUN();
7789 
7790     ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
7791     ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
7792     ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
7793     ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
7794     ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
7795     ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
7796   }
7797 }
7798 
7799 
TEST(neon_copy_dup_element)7800 TEST(neon_copy_dup_element) {
7801   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7802 
7803   START();
7804 
7805   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7806   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7807   __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
7808   __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
7809   __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
7810   __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
7811 
7812   __ Dup(v16.V16B(), v0.B(), 0);
7813   __ Dup(v17.V8H(), v1.H(), 7);
7814   __ Dup(v18.V4S(), v1.S(), 3);
7815   __ Dup(v19.V2D(), v0.D(), 0);
7816 
7817   __ Dup(v20.V8B(), v0.B(), 0);
7818   __ Dup(v21.V4H(), v1.H(), 7);
7819   __ Dup(v22.V2S(), v1.S(), 3);
7820 
7821   __ Dup(v23.B(), v0.B(), 0);
7822   __ Dup(v24.H(), v1.H(), 7);
7823   __ Dup(v25.S(), v1.S(), 3);
7824   __ Dup(v26.D(), v0.D(), 0);
7825 
7826   __ Dup(v2.V16B(), v2.B(), 0);
7827   __ Dup(v3.V8H(), v3.H(), 7);
7828   __ Dup(v4.V4S(), v4.S(), 0);
7829   __ Dup(v5.V2D(), v5.D(), 1);
7830 
7831   END();
7832 
7833   if (CAN_RUN()) {
7834     RUN();
7835 
7836     ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
7837     ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
7838     ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
7839     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7840 
7841     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
7842     ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
7843     ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
7844 
7845     ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
7846     ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
7847     ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
7848     ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
7849 
7850     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
7851     ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
7852     ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
7853     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
7854   }
7855 }
7856 
7857 
TEST(neon_copy_dup_general)7858 TEST(neon_copy_dup_general) {
7859   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7860 
7861   START();
7862 
7863   __ Mov(x0, 0x0011223344556677);
7864 
7865   __ Dup(v16.V16B(), w0);
7866   __ Dup(v17.V8H(), w0);
7867   __ Dup(v18.V4S(), w0);
7868   __ Dup(v19.V2D(), x0);
7869 
7870   __ Dup(v20.V8B(), w0);
7871   __ Dup(v21.V4H(), w0);
7872   __ Dup(v22.V2S(), w0);
7873 
7874   __ Dup(v2.V16B(), wzr);
7875   __ Dup(v3.V8H(), wzr);
7876   __ Dup(v4.V4S(), wzr);
7877   __ Dup(v5.V2D(), xzr);
7878 
7879   END();
7880 
7881   if (CAN_RUN()) {
7882     RUN();
7883 
7884     ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
7885     ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
7886     ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
7887     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
7888 
7889     ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
7890     ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
7891     ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
7892 
7893     ASSERT_EQUAL_128(0, 0, q2);
7894     ASSERT_EQUAL_128(0, 0, q3);
7895     ASSERT_EQUAL_128(0, 0, q4);
7896     ASSERT_EQUAL_128(0, 0, q5);
7897   }
7898 }
7899 
7900 
TEST(neon_copy_ins_element)7901 TEST(neon_copy_ins_element) {
7902   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7903 
7904   START();
7905 
7906   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7907   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7908   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7909   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7910   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7911   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7912 
7913   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7914   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7915   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7916   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7917 
7918   __ Ins(v16.V16B(), 15, v0.V16B(), 0);
7919   __ Ins(v17.V8H(), 0, v1.V8H(), 7);
7920   __ Ins(v18.V4S(), 3, v1.V4S(), 0);
7921   __ Ins(v19.V2D(), 1, v0.V2D(), 0);
7922 
7923   __ Ins(v2.V16B(), 2, v2.V16B(), 0);
7924   __ Ins(v3.V8H(), 0, v3.V8H(), 7);
7925   __ Ins(v4.V4S(), 3, v4.V4S(), 0);
7926   __ Ins(v5.V2D(), 0, v5.V2D(), 1);
7927 
7928   END();
7929 
7930   if (CAN_RUN()) {
7931     RUN();
7932 
7933     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7934     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7935     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7936     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7937 
7938     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7939     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7940     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7941     ASSERT_EQUAL_128(0, 0, q5);
7942   }
7943 }
7944 
7945 
TEST(neon_copy_mov_element)7946 TEST(neon_copy_mov_element) {
7947   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7948 
7949   START();
7950 
7951   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7952   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
7953   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7954   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
7955   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7956   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
7957 
7958   __ Movi(v2.V2D(), 0, 0x0011223344556677);
7959   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
7960   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
7961   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
7962 
7963   __ Mov(v16.V16B(), 15, v0.V16B(), 0);
7964   __ Mov(v17.V8H(), 0, v1.V8H(), 7);
7965   __ Mov(v18.V4S(), 3, v1.V4S(), 0);
7966   __ Mov(v19.V2D(), 1, v0.V2D(), 0);
7967 
7968   __ Mov(v2.V16B(), 2, v2.V16B(), 0);
7969   __ Mov(v3.V8H(), 0, v3.V8H(), 7);
7970   __ Mov(v4.V4S(), 3, v4.V4S(), 0);
7971   __ Mov(v5.V2D(), 0, v5.V2D(), 1);
7972 
7973   END();
7974 
7975   if (CAN_RUN()) {
7976     RUN();
7977 
7978     ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
7979     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
7980     ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
7981     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
7982 
7983     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
7984     ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
7985     ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
7986     ASSERT_EQUAL_128(0, 0, q5);
7987   }
7988 }
7989 
7990 
TEST(neon_copy_smov)7991 TEST(neon_copy_smov) {
7992   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7993 
7994   START();
7995 
7996   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
7997 
7998   __ Smov(w0, v0.B(), 7);
7999   __ Smov(w1, v0.B(), 15);
8000 
8001   __ Smov(w2, v0.H(), 0);
8002   __ Smov(w3, v0.H(), 3);
8003 
8004   __ Smov(x4, v0.B(), 7);
8005   __ Smov(x5, v0.B(), 15);
8006 
8007   __ Smov(x6, v0.H(), 0);
8008   __ Smov(x7, v0.H(), 3);
8009 
8010   __ Smov(x16, v0.S(), 0);
8011   __ Smov(x17, v0.S(), 1);
8012 
8013   END();
8014 
8015   if (CAN_RUN()) {
8016     RUN();
8017 
8018     ASSERT_EQUAL_32(0xfffffffe, w0);
8019     ASSERT_EQUAL_32(0x00000001, w1);
8020     ASSERT_EQUAL_32(0x00003210, w2);
8021     ASSERT_EQUAL_32(0xfffffedc, w3);
8022     ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
8023     ASSERT_EQUAL_64(0x0000000000000001, x5);
8024     ASSERT_EQUAL_64(0x0000000000003210, x6);
8025     ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
8026     ASSERT_EQUAL_64(0x0000000076543210, x16);
8027     ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
8028   }
8029 }
8030 
8031 
TEST(neon_copy_umov_mov)8032 TEST(neon_copy_umov_mov) {
8033   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8034 
8035   START();
8036 
8037   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8038 
8039   __ Umov(w0, v0.B(), 15);
8040   __ Umov(w1, v0.H(), 0);
8041   __ Umov(w2, v0.S(), 3);
8042   __ Umov(x3, v0.D(), 1);
8043 
8044   __ Mov(w4, v0.S(), 3);
8045   __ Mov(x5, v0.D(), 1);
8046 
8047   END();
8048 
8049   if (CAN_RUN()) {
8050     RUN();
8051 
8052     ASSERT_EQUAL_32(0x00000001, w0);
8053     ASSERT_EQUAL_32(0x00003210, w1);
8054     ASSERT_EQUAL_32(0x01234567, w2);
8055     ASSERT_EQUAL_64(0x0123456789abcdef, x3);
8056     ASSERT_EQUAL_32(0x01234567, w4);
8057     ASSERT_EQUAL_64(0x0123456789abcdef, x5);
8058   }
8059 }
8060 
8061 
TEST(neon_copy_ins_general)8062 TEST(neon_copy_ins_general) {
8063   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8064 
8065   START();
8066 
8067   __ Mov(x0, 0x0011223344556677);
8068   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
8069   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
8070   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8071   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8072 
8073   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8074   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8075   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
8076   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
8077 
8078   __ Ins(v16.V16B(), 15, w0);
8079   __ Ins(v17.V8H(), 0, w0);
8080   __ Ins(v18.V4S(), 3, w0);
8081   __ Ins(v19.V2D(), 0, x0);
8082 
8083   __ Ins(v2.V16B(), 2, w0);
8084   __ Ins(v3.V8H(), 0, w0);
8085   __ Ins(v4.V4S(), 3, w0);
8086   __ Ins(v5.V2D(), 1, x0);
8087 
8088   END();
8089 
8090   if (CAN_RUN()) {
8091     RUN();
8092 
8093     ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
8094     ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
8095     ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
8096     ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
8097 
8098     ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
8099     ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
8100     ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
8101     ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
8102   }
8103 }
8104 
8105 
TEST(neon_extract_ext)8106 TEST(neon_extract_ext) {
8107   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8108 
8109   START();
8110 
8111   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
8112   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
8113 
8114   __ Movi(v2.V2D(), 0, 0x0011223344556677);
8115   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
8116 
8117   __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
8118   __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
8119   __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8);  // Dest is same as one Src
8120   __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8);  // All reg are the same
8121 
8122   __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
8123   __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
8124   __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);  // Dest is same as one Src
8125   __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);  // All reg are the same
8126 
8127   END();
8128 
8129   if (CAN_RUN()) {
8130     RUN();
8131 
8132     ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
8133     ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
8134     ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
8135     ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
8136 
8137     ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
8138     ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
8139     ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
8140     ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
8141   }
8142 }
8143 
8144 
TEST(neon_3different_uaddl)8145 TEST(neon_3different_uaddl) {
8146   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8147 
8148   START();
8149 
8150   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
8151   __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
8152   __ Movi(v2.V2D(), 0, 0x0101010101010101);
8153 
8154   __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
8155   __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
8156   __ Movi(v5.V2D(), 0, 0x0000000180008001);
8157   __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
8158   __ Movi(v7.V2D(), 0, 0x0001000100010001);
8159 
8160   __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
8161   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
8162   __ Movi(v18.V2D(), 0, 0x0000000000000001);
8163   __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
8164   __ Movi(v20.V2D(), 0, 0x0000000100000001);
8165 
8166   __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
8167 
8168   __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
8169   __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
8170 
8171   __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
8172   __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
8173 
8174 
8175   END();
8176 
8177   if (CAN_RUN()) {
8178     RUN();
8179 
8180     ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
8181     ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
8182     ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
8183     ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
8184     ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
8185   }
8186 }
8187 
8188 
TEST(neon_3different_addhn_subhn)8189 TEST(neon_3different_addhn_subhn) {
8190   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8191 
8192   START();
8193 
8194   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8195   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8196   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8197   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8198   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8199 
8200   __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
8201   __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
8202   __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
8203   __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
8204   __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
8205   __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
8206   __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
8207   __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
8208 
8209   END();
8210 
8211   if (CAN_RUN()) {
8212     RUN();
8213 
8214     ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
8215     ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
8216     ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
8217     ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
8218   }
8219 }
8220 
TEST(neon_d_only_scalar)8221 TEST(neon_d_only_scalar) {
8222   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8223 
8224   START();
8225 
8226   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8227   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8228   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
8229   __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
8230   __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
8231 
8232   __ Add(d16, d0, d0);
8233   __ Add(d17, d1, d1);
8234   __ Add(d18, d2, d2);
8235   __ Sub(d19, d0, d0);
8236   __ Sub(d20, d0, d1);
8237   __ Sub(d21, d1, d0);
8238   __ Ushl(d22, d0, d3);
8239   __ Ushl(d23, d0, d4);
8240   __ Sshl(d24, d0, d3);
8241   __ Sshl(d25, d0, d4);
8242   __ Ushr(d26, d0, 1);
8243   __ Sshr(d27, d0, 3);
8244   __ Shl(d28, d0, 0);
8245   __ Shl(d29, d0, 16);
8246 
8247   END();
8248 
8249   if (CAN_RUN()) {
8250     RUN();
8251 
8252     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
8253     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
8254     ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
8255     ASSERT_EQUAL_128(0, 0, q19);
8256     ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
8257     ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
8258     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
8259     ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
8260     ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
8261     ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
8262     ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
8263     ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
8264     ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
8265     ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
8266   }
8267 }
8268 
8269 
TEST(neon_sqshl_imm_scalar)8270 TEST(neon_sqshl_imm_scalar) {
8271   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8272 
8273   START();
8274 
8275   __ Movi(v0.V2D(), 0x0, 0x7f);
8276   __ Movi(v1.V2D(), 0x0, 0x80);
8277   __ Movi(v2.V2D(), 0x0, 0x01);
8278   __ Sqshl(b16, b0, 1);
8279   __ Sqshl(b17, b1, 1);
8280   __ Sqshl(b18, b2, 1);
8281 
8282   __ Movi(v0.V2D(), 0x0, 0x7fff);
8283   __ Movi(v1.V2D(), 0x0, 0x8000);
8284   __ Movi(v2.V2D(), 0x0, 0x0001);
8285   __ Sqshl(h19, h0, 1);
8286   __ Sqshl(h20, h1, 1);
8287   __ Sqshl(h21, h2, 1);
8288 
8289   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8290   __ Movi(v1.V2D(), 0x0, 0x80000000);
8291   __ Movi(v2.V2D(), 0x0, 0x00000001);
8292   __ Sqshl(s22, s0, 1);
8293   __ Sqshl(s23, s1, 1);
8294   __ Sqshl(s24, s2, 1);
8295 
8296   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8297   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8298   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8299   __ Sqshl(d25, d0, 1);
8300   __ Sqshl(d26, d1, 1);
8301   __ Sqshl(d27, d2, 1);
8302 
8303   END();
8304 
8305   if (CAN_RUN()) {
8306     RUN();
8307 
8308     ASSERT_EQUAL_128(0, 0x7f, q16);
8309     ASSERT_EQUAL_128(0, 0x80, q17);
8310     ASSERT_EQUAL_128(0, 0x02, q18);
8311 
8312     ASSERT_EQUAL_128(0, 0x7fff, q19);
8313     ASSERT_EQUAL_128(0, 0x8000, q20);
8314     ASSERT_EQUAL_128(0, 0x0002, q21);
8315 
8316     ASSERT_EQUAL_128(0, 0x7fffffff, q22);
8317     ASSERT_EQUAL_128(0, 0x80000000, q23);
8318     ASSERT_EQUAL_128(0, 0x00000002, q24);
8319 
8320     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
8321     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
8322     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8323   }
8324 }
8325 
8326 
TEST(neon_uqshl_imm_scalar)8327 TEST(neon_uqshl_imm_scalar) {
8328   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8329 
8330   START();
8331 
8332   __ Movi(v0.V2D(), 0x0, 0x7f);
8333   __ Movi(v1.V2D(), 0x0, 0x80);
8334   __ Movi(v2.V2D(), 0x0, 0x01);
8335   __ Uqshl(b16, b0, 1);
8336   __ Uqshl(b17, b1, 1);
8337   __ Uqshl(b18, b2, 1);
8338 
8339   __ Movi(v0.V2D(), 0x0, 0x7fff);
8340   __ Movi(v1.V2D(), 0x0, 0x8000);
8341   __ Movi(v2.V2D(), 0x0, 0x0001);
8342   __ Uqshl(h19, h0, 1);
8343   __ Uqshl(h20, h1, 1);
8344   __ Uqshl(h21, h2, 1);
8345 
8346   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8347   __ Movi(v1.V2D(), 0x0, 0x80000000);
8348   __ Movi(v2.V2D(), 0x0, 0x00000001);
8349   __ Uqshl(s22, s0, 1);
8350   __ Uqshl(s23, s1, 1);
8351   __ Uqshl(s24, s2, 1);
8352 
8353   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8354   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8355   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8356   __ Uqshl(d25, d0, 1);
8357   __ Uqshl(d26, d1, 1);
8358   __ Uqshl(d27, d2, 1);
8359 
8360   END();
8361 
8362   if (CAN_RUN()) {
8363     RUN();
8364 
8365     ASSERT_EQUAL_128(0, 0xfe, q16);
8366     ASSERT_EQUAL_128(0, 0xff, q17);
8367     ASSERT_EQUAL_128(0, 0x02, q18);
8368 
8369     ASSERT_EQUAL_128(0, 0xfffe, q19);
8370     ASSERT_EQUAL_128(0, 0xffff, q20);
8371     ASSERT_EQUAL_128(0, 0x0002, q21);
8372 
8373     ASSERT_EQUAL_128(0, 0xfffffffe, q22);
8374     ASSERT_EQUAL_128(0, 0xffffffff, q23);
8375     ASSERT_EQUAL_128(0, 0x00000002, q24);
8376 
8377     ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
8378     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
8379     ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
8380   }
8381 }
8382 
8383 
TEST(neon_sqshlu_scalar)8384 TEST(neon_sqshlu_scalar) {
8385   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8386 
8387   START();
8388 
8389   __ Movi(v0.V2D(), 0x0, 0x7f);
8390   __ Movi(v1.V2D(), 0x0, 0x80);
8391   __ Movi(v2.V2D(), 0x0, 0x01);
8392   __ Sqshlu(b16, b0, 2);
8393   __ Sqshlu(b17, b1, 2);
8394   __ Sqshlu(b18, b2, 2);
8395 
8396   __ Movi(v0.V2D(), 0x0, 0x7fff);
8397   __ Movi(v1.V2D(), 0x0, 0x8000);
8398   __ Movi(v2.V2D(), 0x0, 0x0001);
8399   __ Sqshlu(h19, h0, 2);
8400   __ Sqshlu(h20, h1, 2);
8401   __ Sqshlu(h21, h2, 2);
8402 
8403   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
8404   __ Movi(v1.V2D(), 0x0, 0x80000000);
8405   __ Movi(v2.V2D(), 0x0, 0x00000001);
8406   __ Sqshlu(s22, s0, 2);
8407   __ Sqshlu(s23, s1, 2);
8408   __ Sqshlu(s24, s2, 2);
8409 
8410   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
8411   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
8412   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
8413   __ Sqshlu(d25, d0, 2);
8414   __ Sqshlu(d26, d1, 2);
8415   __ Sqshlu(d27, d2, 2);
8416 
8417   END();
8418 
8419   if (CAN_RUN()) {
8420     RUN();
8421 
8422     ASSERT_EQUAL_128(0, 0xff, q16);
8423     ASSERT_EQUAL_128(0, 0x00, q17);
8424     ASSERT_EQUAL_128(0, 0x04, q18);
8425 
8426     ASSERT_EQUAL_128(0, 0xffff, q19);
8427     ASSERT_EQUAL_128(0, 0x0000, q20);
8428     ASSERT_EQUAL_128(0, 0x0004, q21);
8429 
8430     ASSERT_EQUAL_128(0, 0xffffffff, q22);
8431     ASSERT_EQUAL_128(0, 0x00000000, q23);
8432     ASSERT_EQUAL_128(0, 0x00000004, q24);
8433 
8434     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
8435     ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
8436     ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
8437   }
8438 }
8439 
8440 
TEST(neon_sshll)8441 TEST(neon_sshll) {
8442   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8443 
8444   START();
8445 
8446   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8447   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8448   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8449 
8450   __ Sshll(v16.V8H(), v0.V8B(), 4);
8451   __ Sshll2(v17.V8H(), v0.V16B(), 4);
8452 
8453   __ Sshll(v18.V4S(), v1.V4H(), 8);
8454   __ Sshll2(v19.V4S(), v1.V8H(), 8);
8455 
8456   __ Sshll(v20.V2D(), v2.V2S(), 16);
8457   __ Sshll2(v21.V2D(), v2.V4S(), 16);
8458 
8459   END();
8460 
8461   if (CAN_RUN()) {
8462     RUN();
8463 
8464     ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
8465     ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
8466     ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
8467     ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
8468     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8469     ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
8470   }
8471 }
8472 
TEST(neon_shll)8473 TEST(neon_shll) {
8474   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8475 
8476   START();
8477 
8478   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8479   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8480   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8481 
8482   __ Shll(v16.V8H(), v0.V8B(), 8);
8483   __ Shll2(v17.V8H(), v0.V16B(), 8);
8484 
8485   __ Shll(v18.V4S(), v1.V4H(), 16);
8486   __ Shll2(v19.V4S(), v1.V8H(), 16);
8487 
8488   __ Shll(v20.V2D(), v2.V2S(), 32);
8489   __ Shll2(v21.V2D(), v2.V4S(), 32);
8490 
8491   END();
8492 
8493   if (CAN_RUN()) {
8494     RUN();
8495 
8496     ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
8497     ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
8498     ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
8499     ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
8500     ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
8501     ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
8502   }
8503 }
8504 
TEST(neon_ushll)8505 TEST(neon_ushll) {
8506   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8507 
8508   START();
8509 
8510   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8511   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8512   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8513 
8514   __ Ushll(v16.V8H(), v0.V8B(), 4);
8515   __ Ushll2(v17.V8H(), v0.V16B(), 4);
8516 
8517   __ Ushll(v18.V4S(), v1.V4H(), 8);
8518   __ Ushll2(v19.V4S(), v1.V8H(), 8);
8519 
8520   __ Ushll(v20.V2D(), v2.V2S(), 16);
8521   __ Ushll2(v21.V2D(), v2.V4S(), 16);
8522 
8523   END();
8524 
8525   if (CAN_RUN()) {
8526     RUN();
8527 
8528     ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
8529     ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
8530     ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
8531     ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
8532     ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
8533     ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
8534   }
8535 }
8536 
8537 
TEST(neon_sxtl)8538 TEST(neon_sxtl) {
8539   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8540 
8541   START();
8542 
8543   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8544   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8545   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8546 
8547   __ Sxtl(v16.V8H(), v0.V8B());
8548   __ Sxtl2(v17.V8H(), v0.V16B());
8549 
8550   __ Sxtl(v18.V4S(), v1.V4H());
8551   __ Sxtl2(v19.V4S(), v1.V8H());
8552 
8553   __ Sxtl(v20.V2D(), v2.V2S());
8554   __ Sxtl2(v21.V2D(), v2.V4S());
8555 
8556   END();
8557 
8558   if (CAN_RUN()) {
8559     RUN();
8560 
8561     ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
8562     ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
8563     ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
8564     ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
8565     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8566     ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
8567   }
8568 }
8569 
8570 
TEST(neon_uxtl)8571 TEST(neon_uxtl) {
8572   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8573 
8574   START();
8575 
8576   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8577   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8578   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8579 
8580   __ Uxtl(v16.V8H(), v0.V8B());
8581   __ Uxtl2(v17.V8H(), v0.V16B());
8582 
8583   __ Uxtl(v18.V4S(), v1.V4H());
8584   __ Uxtl2(v19.V4S(), v1.V8H());
8585 
8586   __ Uxtl(v20.V2D(), v2.V2S());
8587   __ Uxtl2(v21.V2D(), v2.V4S());
8588 
8589   END();
8590 
8591   if (CAN_RUN()) {
8592     RUN();
8593 
8594     ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
8595     ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
8596     ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
8597     ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
8598     ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
8599     ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
8600   }
8601 }
8602 
8603 
TEST(neon_ssra)8604 TEST(neon_ssra) {
8605   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8606 
8607   START();
8608 
8609   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8610   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8611   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8612   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8613   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8614 
8615   __ Mov(v16.V2D(), v0.V2D());
8616   __ Mov(v17.V2D(), v0.V2D());
8617   __ Mov(v18.V2D(), v1.V2D());
8618   __ Mov(v19.V2D(), v1.V2D());
8619   __ Mov(v20.V2D(), v2.V2D());
8620   __ Mov(v21.V2D(), v2.V2D());
8621   __ Mov(v22.V2D(), v3.V2D());
8622   __ Mov(v23.V2D(), v4.V2D());
8623   __ Mov(v24.V2D(), v3.V2D());
8624   __ Mov(v25.V2D(), v4.V2D());
8625 
8626   __ Ssra(v16.V8B(), v0.V8B(), 4);
8627   __ Ssra(v17.V16B(), v0.V16B(), 4);
8628 
8629   __ Ssra(v18.V4H(), v1.V4H(), 8);
8630   __ Ssra(v19.V8H(), v1.V8H(), 8);
8631 
8632   __ Ssra(v20.V2S(), v2.V2S(), 16);
8633   __ Ssra(v21.V4S(), v2.V4S(), 16);
8634 
8635   __ Ssra(v22.V2D(), v3.V2D(), 32);
8636   __ Ssra(v23.V2D(), v4.V2D(), 32);
8637 
8638   __ Ssra(d24, d3, 48);
8639 
8640   END();
8641 
8642   if (CAN_RUN()) {
8643     RUN();
8644 
8645     ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
8646     ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
8647     ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
8648     ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
8649     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8650     ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
8651     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
8652     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8653     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8654   }
8655 }
8656 
TEST(neon_srsra)8657 TEST(neon_srsra) {
8658   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8659 
8660   START();
8661 
8662   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8663   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8664   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8665   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8666   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8667 
8668   __ Mov(v16.V2D(), v0.V2D());
8669   __ Mov(v17.V2D(), v0.V2D());
8670   __ Mov(v18.V2D(), v1.V2D());
8671   __ Mov(v19.V2D(), v1.V2D());
8672   __ Mov(v20.V2D(), v2.V2D());
8673   __ Mov(v21.V2D(), v2.V2D());
8674   __ Mov(v22.V2D(), v3.V2D());
8675   __ Mov(v23.V2D(), v4.V2D());
8676   __ Mov(v24.V2D(), v3.V2D());
8677   __ Mov(v25.V2D(), v4.V2D());
8678 
8679   __ Srsra(v16.V8B(), v0.V8B(), 4);
8680   __ Srsra(v17.V16B(), v0.V16B(), 4);
8681 
8682   __ Srsra(v18.V4H(), v1.V4H(), 8);
8683   __ Srsra(v19.V8H(), v1.V8H(), 8);
8684 
8685   __ Srsra(v20.V2S(), v2.V2S(), 16);
8686   __ Srsra(v21.V4S(), v2.V4S(), 16);
8687 
8688   __ Srsra(v22.V2D(), v3.V2D(), 32);
8689   __ Srsra(v23.V2D(), v4.V2D(), 32);
8690 
8691   __ Srsra(d24, d3, 48);
8692 
8693   END();
8694 
8695   if (CAN_RUN()) {
8696     RUN();
8697 
8698     ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
8699     ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
8700     ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
8701     ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
8702     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8703     ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
8704     ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
8705     ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
8706     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8707   }
8708 }
8709 
TEST(neon_usra)8710 TEST(neon_usra) {
8711   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8712 
8713   START();
8714 
8715   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8716   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8717   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8718   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8719   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8720 
8721   __ Mov(v16.V2D(), v0.V2D());
8722   __ Mov(v17.V2D(), v0.V2D());
8723   __ Mov(v18.V2D(), v1.V2D());
8724   __ Mov(v19.V2D(), v1.V2D());
8725   __ Mov(v20.V2D(), v2.V2D());
8726   __ Mov(v21.V2D(), v2.V2D());
8727   __ Mov(v22.V2D(), v3.V2D());
8728   __ Mov(v23.V2D(), v4.V2D());
8729   __ Mov(v24.V2D(), v3.V2D());
8730   __ Mov(v25.V2D(), v4.V2D());
8731 
8732   __ Usra(v16.V8B(), v0.V8B(), 4);
8733   __ Usra(v17.V16B(), v0.V16B(), 4);
8734 
8735   __ Usra(v18.V4H(), v1.V4H(), 8);
8736   __ Usra(v19.V8H(), v1.V8H(), 8);
8737 
8738   __ Usra(v20.V2S(), v2.V2S(), 16);
8739   __ Usra(v21.V4S(), v2.V4S(), 16);
8740 
8741   __ Usra(v22.V2D(), v3.V2D(), 32);
8742   __ Usra(v23.V2D(), v4.V2D(), 32);
8743 
8744   __ Usra(d24, d3, 48);
8745 
8746   END();
8747 
8748   if (CAN_RUN()) {
8749     RUN();
8750 
8751     ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
8752     ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
8753     ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
8754     ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
8755     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
8756     ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
8757     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
8758     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8759     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
8760   }
8761 }
8762 
TEST(neon_ursra)8763 TEST(neon_ursra) {
8764   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8765 
8766   START();
8767 
8768   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
8769   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
8770   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
8771   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
8772   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
8773 
8774   __ Mov(v16.V2D(), v0.V2D());
8775   __ Mov(v17.V2D(), v0.V2D());
8776   __ Mov(v18.V2D(), v1.V2D());
8777   __ Mov(v19.V2D(), v1.V2D());
8778   __ Mov(v20.V2D(), v2.V2D());
8779   __ Mov(v21.V2D(), v2.V2D());
8780   __ Mov(v22.V2D(), v3.V2D());
8781   __ Mov(v23.V2D(), v4.V2D());
8782   __ Mov(v24.V2D(), v3.V2D());
8783   __ Mov(v25.V2D(), v4.V2D());
8784 
8785   __ Ursra(v16.V8B(), v0.V8B(), 4);
8786   __ Ursra(v17.V16B(), v0.V16B(), 4);
8787 
8788   __ Ursra(v18.V4H(), v1.V4H(), 8);
8789   __ Ursra(v19.V8H(), v1.V8H(), 8);
8790 
8791   __ Ursra(v20.V2S(), v2.V2S(), 16);
8792   __ Ursra(v21.V4S(), v2.V4S(), 16);
8793 
8794   __ Ursra(v22.V2D(), v3.V2D(), 32);
8795   __ Ursra(v23.V2D(), v4.V2D(), 32);
8796 
8797   __ Ursra(d24, d3, 48);
8798 
8799   END();
8800 
8801   if (CAN_RUN()) {
8802     RUN();
8803 
8804     ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
8805     ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
8806     ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
8807     ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
8808     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
8809     ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
8810     ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
8811     ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
8812     ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
8813   }
8814 }
8815 
8816 
TEST(neon_uqshl_scalar)8817 TEST(neon_uqshl_scalar) {
8818   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8819 
8820   START();
8821 
8822   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8823   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8824   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8825   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8826 
8827   __ Uqshl(b16, b0, b2);
8828   __ Uqshl(b17, b0, b3);
8829   __ Uqshl(b18, b1, b2);
8830   __ Uqshl(b19, b1, b3);
8831   __ Uqshl(h20, h0, h2);
8832   __ Uqshl(h21, h0, h3);
8833   __ Uqshl(h22, h1, h2);
8834   __ Uqshl(h23, h1, h3);
8835   __ Uqshl(s24, s0, s2);
8836   __ Uqshl(s25, s0, s3);
8837   __ Uqshl(s26, s1, s2);
8838   __ Uqshl(s27, s1, s3);
8839   __ Uqshl(d28, d0, d2);
8840   __ Uqshl(d29, d0, d3);
8841   __ Uqshl(d30, d1, d2);
8842   __ Uqshl(d31, d1, d3);
8843 
8844   END();
8845 
8846   if (CAN_RUN()) {
8847     RUN();
8848 
8849     ASSERT_EQUAL_128(0, 0xff, q16);
8850     ASSERT_EQUAL_128(0, 0x78, q17);
8851     ASSERT_EQUAL_128(0, 0xfe, q18);
8852     ASSERT_EQUAL_128(0, 0x3f, q19);
8853     ASSERT_EQUAL_128(0, 0xffff, q20);
8854     ASSERT_EQUAL_128(0, 0x7878, q21);
8855     ASSERT_EQUAL_128(0, 0xfefe, q22);
8856     ASSERT_EQUAL_128(0, 0x3fbf, q23);
8857     ASSERT_EQUAL_128(0, 0xffffffff, q24);
8858     ASSERT_EQUAL_128(0, 0x78007878, q25);
8859     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
8860     ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
8861     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
8862     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8863     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8864     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
8865   }
8866 }
8867 
8868 
TEST(neon_sqshl_scalar)8869 TEST(neon_sqshl_scalar) {
8870   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8871 
8872   START();
8873 
8874   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8875   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8876   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8877   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8878 
8879   __ Sqshl(b16, b0, b2);
8880   __ Sqshl(b17, b0, b3);
8881   __ Sqshl(b18, b1, b2);
8882   __ Sqshl(b19, b1, b3);
8883   __ Sqshl(h20, h0, h2);
8884   __ Sqshl(h21, h0, h3);
8885   __ Sqshl(h22, h1, h2);
8886   __ Sqshl(h23, h1, h3);
8887   __ Sqshl(s24, s0, s2);
8888   __ Sqshl(s25, s0, s3);
8889   __ Sqshl(s26, s1, s2);
8890   __ Sqshl(s27, s1, s3);
8891   __ Sqshl(d28, d0, d2);
8892   __ Sqshl(d29, d0, d3);
8893   __ Sqshl(d30, d1, d2);
8894   __ Sqshl(d31, d1, d3);
8895 
8896   END();
8897 
8898   if (CAN_RUN()) {
8899     RUN();
8900 
8901     ASSERT_EQUAL_128(0, 0x80, q16);
8902     ASSERT_EQUAL_128(0, 0xdf, q17);
8903     ASSERT_EQUAL_128(0, 0x7f, q18);
8904     ASSERT_EQUAL_128(0, 0x20, q19);
8905     ASSERT_EQUAL_128(0, 0x8000, q20);
8906     ASSERT_EQUAL_128(0, 0xdfdf, q21);
8907     ASSERT_EQUAL_128(0, 0x7fff, q22);
8908     ASSERT_EQUAL_128(0, 0x2020, q23);
8909     ASSERT_EQUAL_128(0, 0x80000000, q24);
8910     ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
8911     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
8912     ASSERT_EQUAL_128(0, 0x20002020, q27);
8913     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
8914     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
8915     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
8916     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8917   }
8918 }
8919 
8920 
TEST(neon_urshl_scalar)8921 TEST(neon_urshl_scalar) {
8922   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8923 
8924   START();
8925 
8926   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8927   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8928   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8929   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8930 
8931   __ Urshl(d28, d0, d2);
8932   __ Urshl(d29, d0, d3);
8933   __ Urshl(d30, d1, d2);
8934   __ Urshl(d31, d1, d3);
8935 
8936   END();
8937 
8938   if (CAN_RUN()) {
8939     RUN();
8940 
8941     ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
8942     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
8943     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
8944     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
8945   }
8946 }
8947 
8948 
TEST(neon_srshl_scalar)8949 TEST(neon_srshl_scalar) {
8950   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8951 
8952   START();
8953 
8954   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
8955   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
8956   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8957   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8958 
8959   __ Srshl(d28, d0, d2);
8960   __ Srshl(d29, d0, d3);
8961   __ Srshl(d30, d1, d2);
8962   __ Srshl(d31, d1, d3);
8963 
8964   END();
8965 
8966   if (CAN_RUN()) {
8967     RUN();
8968 
8969     ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
8970     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
8971     ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
8972     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
8973   }
8974 }
8975 
8976 
TEST(neon_uqrshl_scalar)8977 TEST(neon_uqrshl_scalar) {
8978   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8979 
8980   START();
8981 
8982   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
8983   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
8984   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
8985   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
8986 
8987   __ Uqrshl(b16, b0, b2);
8988   __ Uqrshl(b17, b0, b3);
8989   __ Uqrshl(b18, b1, b2);
8990   __ Uqrshl(b19, b1, b3);
8991   __ Uqrshl(h20, h0, h2);
8992   __ Uqrshl(h21, h0, h3);
8993   __ Uqrshl(h22, h1, h2);
8994   __ Uqrshl(h23, h1, h3);
8995   __ Uqrshl(s24, s0, s2);
8996   __ Uqrshl(s25, s0, s3);
8997   __ Uqrshl(s26, s1, s2);
8998   __ Uqrshl(s27, s1, s3);
8999   __ Uqrshl(d28, d0, d2);
9000   __ Uqrshl(d29, d0, d3);
9001   __ Uqrshl(d30, d1, d2);
9002   __ Uqrshl(d31, d1, d3);
9003 
9004   END();
9005 
9006   if (CAN_RUN()) {
9007     RUN();
9008 
9009     ASSERT_EQUAL_128(0, 0xff, q16);
9010     ASSERT_EQUAL_128(0, 0x78, q17);
9011     ASSERT_EQUAL_128(0, 0xfe, q18);
9012     ASSERT_EQUAL_128(0, 0x40, q19);
9013     ASSERT_EQUAL_128(0, 0xffff, q20);
9014     ASSERT_EQUAL_128(0, 0x7878, q21);
9015     ASSERT_EQUAL_128(0, 0xfefe, q22);
9016     ASSERT_EQUAL_128(0, 0x3fc0, q23);
9017     ASSERT_EQUAL_128(0, 0xffffffff, q24);
9018     ASSERT_EQUAL_128(0, 0x78007878, q25);
9019     ASSERT_EQUAL_128(0, 0xfffefefe, q26);
9020     ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
9021     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
9022     ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
9023     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
9024     ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
9025   }
9026 }
9027 
9028 
TEST(neon_sqrshl_scalar)9029 TEST(neon_sqrshl_scalar) {
9030   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9031 
9032   START();
9033 
9034   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
9035   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
9036   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
9037   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
9038 
9039   __ Sqrshl(b16, b0, b2);
9040   __ Sqrshl(b17, b0, b3);
9041   __ Sqrshl(b18, b1, b2);
9042   __ Sqrshl(b19, b1, b3);
9043   __ Sqrshl(h20, h0, h2);
9044   __ Sqrshl(h21, h0, h3);
9045   __ Sqrshl(h22, h1, h2);
9046   __ Sqrshl(h23, h1, h3);
9047   __ Sqrshl(s24, s0, s2);
9048   __ Sqrshl(s25, s0, s3);
9049   __ Sqrshl(s26, s1, s2);
9050   __ Sqrshl(s27, s1, s3);
9051   __ Sqrshl(d28, d0, d2);
9052   __ Sqrshl(d29, d0, d3);
9053   __ Sqrshl(d30, d1, d2);
9054   __ Sqrshl(d31, d1, d3);
9055 
9056   END();
9057 
9058   if (CAN_RUN()) {
9059     RUN();
9060 
9061     ASSERT_EQUAL_128(0, 0x80, q16);
9062     ASSERT_EQUAL_128(0, 0xe0, q17);
9063     ASSERT_EQUAL_128(0, 0x7f, q18);
9064     ASSERT_EQUAL_128(0, 0x20, q19);
9065     ASSERT_EQUAL_128(0, 0x8000, q20);
9066     ASSERT_EQUAL_128(0, 0xdfe0, q21);
9067     ASSERT_EQUAL_128(0, 0x7fff, q22);
9068     ASSERT_EQUAL_128(0, 0x2020, q23);
9069     ASSERT_EQUAL_128(0, 0x80000000, q24);
9070     ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
9071     ASSERT_EQUAL_128(0, 0x7fffffff, q26);
9072     ASSERT_EQUAL_128(0, 0x20002020, q27);
9073     ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
9074     ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
9075     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
9076     ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
9077   }
9078 }
9079 
9080 
TEST(neon_uqadd_scalar)9081 TEST(neon_uqadd_scalar) {
9082   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9083 
9084   START();
9085 
9086   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9087   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9088   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9089 
9090   __ Uqadd(b16, b0, b0);
9091   __ Uqadd(b17, b1, b1);
9092   __ Uqadd(b18, b2, b2);
9093   __ Uqadd(h19, h0, h0);
9094   __ Uqadd(h20, h1, h1);
9095   __ Uqadd(h21, h2, h2);
9096   __ Uqadd(s22, s0, s0);
9097   __ Uqadd(s23, s1, s1);
9098   __ Uqadd(s24, s2, s2);
9099   __ Uqadd(d25, d0, d0);
9100   __ Uqadd(d26, d1, d1);
9101   __ Uqadd(d27, d2, d2);
9102 
9103   END();
9104 
9105   if (CAN_RUN()) {
9106     RUN();
9107 
9108     ASSERT_EQUAL_128(0, 0xff, q16);
9109     ASSERT_EQUAL_128(0, 0xfe, q17);
9110     ASSERT_EQUAL_128(0, 0x20, q18);
9111     ASSERT_EQUAL_128(0, 0xffff, q19);
9112     ASSERT_EQUAL_128(0, 0xfefe, q20);
9113     ASSERT_EQUAL_128(0, 0x2020, q21);
9114     ASSERT_EQUAL_128(0, 0xffffffff, q22);
9115     ASSERT_EQUAL_128(0, 0xfffefefe, q23);
9116     ASSERT_EQUAL_128(0, 0x20002020, q24);
9117     ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
9118     ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
9119     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9120   }
9121 }
9122 
9123 
TEST(neon_sqadd_scalar)9124 TEST(neon_sqadd_scalar) {
9125   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9126 
9127   START();
9128 
9129   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
9130   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9131   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
9132 
9133   __ Sqadd(b16, b0, b0);
9134   __ Sqadd(b17, b1, b1);
9135   __ Sqadd(b18, b2, b2);
9136   __ Sqadd(h19, h0, h0);
9137   __ Sqadd(h20, h1, h1);
9138   __ Sqadd(h21, h2, h2);
9139   __ Sqadd(s22, s0, s0);
9140   __ Sqadd(s23, s1, s1);
9141   __ Sqadd(s24, s2, s2);
9142   __ Sqadd(d25, d0, d0);
9143   __ Sqadd(d26, d1, d1);
9144   __ Sqadd(d27, d2, d2);
9145 
9146   END();
9147 
9148   if (CAN_RUN()) {
9149     RUN();
9150 
9151     ASSERT_EQUAL_128(0, 0x80, q16);
9152     ASSERT_EQUAL_128(0, 0x7f, q17);
9153     ASSERT_EQUAL_128(0, 0x20, q18);
9154     ASSERT_EQUAL_128(0, 0x8000, q19);
9155     ASSERT_EQUAL_128(0, 0x7fff, q20);
9156     ASSERT_EQUAL_128(0, 0x2020, q21);
9157     ASSERT_EQUAL_128(0, 0x80000000, q22);
9158     ASSERT_EQUAL_128(0, 0x7fffffff, q23);
9159     ASSERT_EQUAL_128(0, 0x20002020, q24);
9160     ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
9161     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
9162     ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
9163   }
9164 }
9165 
9166 
TEST(neon_uqsub_scalar)9167 TEST(neon_uqsub_scalar) {
9168   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9169 
9170   START();
9171 
9172   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9173   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
9174 
9175   __ Uqsub(b16, b0, b0);
9176   __ Uqsub(b17, b0, b1);
9177   __ Uqsub(b18, b1, b0);
9178   __ Uqsub(h19, h0, h0);
9179   __ Uqsub(h20, h0, h1);
9180   __ Uqsub(h21, h1, h0);
9181   __ Uqsub(s22, s0, s0);
9182   __ Uqsub(s23, s0, s1);
9183   __ Uqsub(s24, s1, s0);
9184   __ Uqsub(d25, d0, d0);
9185   __ Uqsub(d26, d0, d1);
9186   __ Uqsub(d27, d1, d0);
9187 
9188   END();
9189 
9190   if (CAN_RUN()) {
9191     RUN();
9192 
9193     ASSERT_EQUAL_128(0, 0, q16);
9194     ASSERT_EQUAL_128(0, 0x71, q17);
9195     ASSERT_EQUAL_128(0, 0, q18);
9196 
9197     ASSERT_EQUAL_128(0, 0, q19);
9198     ASSERT_EQUAL_128(0, 0x7171, q20);
9199     ASSERT_EQUAL_128(0, 0, q21);
9200 
9201     ASSERT_EQUAL_128(0, 0, q22);
9202     ASSERT_EQUAL_128(0, 0x70017171, q23);
9203     ASSERT_EQUAL_128(0, 0, q24);
9204 
9205     ASSERT_EQUAL_128(0, 0, q25);
9206     ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
9207     ASSERT_EQUAL_128(0, 0, q27);
9208   }
9209 }
9210 
9211 
TEST(neon_sqsub_scalar)9212 TEST(neon_sqsub_scalar) {
9213   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
9214 
9215   START();
9216 
9217   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
9218   __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
9219 
9220   __ Sqsub(b16, b0, b0);
9221   __ Sqsub(b17, b0, b1);
9222   __ Sqsub(b18, b1, b0);
9223   __ Sqsub(h19, h0, h0);
9224   __ Sqsub(h20, h0, h1);
9225   __ Sqsub(h21, h1, h0);
9226   __ Sqsub(s22, s0, s0);
9227   __ Sqsub(s23, s0, s1);
9228   __ Sqsub(s24, s1, s0);
9229   __ Sqsub(d25, d0, d0);
9230   __ Sqsub(d26, d0, d1);
9231   __ Sqsub(d27, d1, d0);
9232 
9233   END();
9234 
9235   if (CAN_RUN()) {
9236     RUN();
9237 
9238     ASSERT_EQUAL_128(0, 0, q16);
9239     ASSERT_EQUAL_128(0, 0x80, q17);
9240     ASSERT_EQUAL_128(0, 0x7f, q18);
9241 
9242     ASSERT_EQUAL_128(0, 0, q19);
9243     ASSERT_EQUAL_128(0, 0x8000, q20);
9244     ASSERT_EQUAL_128(0, 0x7fff, q21);
9245 
9246     ASSERT_EQUAL_128(0, 0, q22);
9247     ASSERT_EQUAL_128(0, 0x80000000, q23);
9248     ASSERT_EQUAL_128(0, 0x7fffffff, q24);
9249 
9250     ASSERT_EQUAL_128(0, 0, q25);
9251     ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
9252     ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
9253   }
9254 }
9255 
9256 
TEST(neon_fmla_fmls)9257 TEST(neon_fmla_fmls) {
9258   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9259 
9260   START();
9261   __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
9262   __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
9263   __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
9264   __ Mov(v16.V16B(), v0.V16B());
9265   __ Mov(v17.V16B(), v0.V16B());
9266   __ Mov(v18.V16B(), v0.V16B());
9267   __ Mov(v19.V16B(), v0.V16B());
9268   __ Mov(v20.V16B(), v0.V16B());
9269   __ Mov(v21.V16B(), v0.V16B());
9270 
9271   __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
9272   __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
9273   __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
9274   __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
9275   __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
9276   __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
9277   END();
9278 
9279   if (CAN_RUN()) {
9280     RUN();
9281 
9282     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
9283     ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
9284     ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
9285     ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
9286     ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
9287     ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
9288   }
9289 }
9290 
9291 
TEST(neon_fmla_h)9292 TEST(neon_fmla_h) {
9293   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9294                       CPUFeatures::kFP,
9295                       CPUFeatures::kNEONHalf);
9296 
9297   START();
9298   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9299   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9300   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9301   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9302   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9303   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9304   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9305   __ Mov(v16.V2D(), v0.V2D());
9306   __ Mov(v17.V2D(), v0.V2D());
9307   __ Mov(v18.V2D(), v4.V2D());
9308   __ Mov(v19.V2D(), v5.V2D());
9309   __ Mov(v20.V2D(), v0.V2D());
9310   __ Mov(v21.V2D(), v0.V2D());
9311   __ Mov(v22.V2D(), v4.V2D());
9312   __ Mov(v23.V2D(), v5.V2D());
9313 
9314   __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
9315   __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
9316   __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
9317   __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
9318   __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
9319   __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
9320   __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
9321   __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
9322   END();
9323 
9324   if (CAN_RUN()) {
9325     RUN();
9326 
9327     ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
9328     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
9329     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9330     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9331     ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
9332     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
9333     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9334     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9335   }
9336 }
9337 
9338 
TEST(neon_fmls_h)9339 TEST(neon_fmls_h) {
9340   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9341                       CPUFeatures::kFP,
9342                       CPUFeatures::kNEONHalf);
9343 
9344   START();
9345   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9346   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9347   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9348   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9349   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
9350   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
9351   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
9352   __ Mov(v16.V2D(), v0.V2D());
9353   __ Mov(v17.V2D(), v0.V2D());
9354   __ Mov(v18.V2D(), v4.V2D());
9355   __ Mov(v19.V2D(), v5.V2D());
9356   __ Mov(v20.V2D(), v0.V2D());
9357   __ Mov(v21.V2D(), v0.V2D());
9358   __ Mov(v22.V2D(), v4.V2D());
9359   __ Mov(v23.V2D(), v5.V2D());
9360 
9361   __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
9362   __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
9363   __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
9364   __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
9365   __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
9366   __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
9367   __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
9368   __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
9369   END();
9370 
9371   if (CAN_RUN()) {
9372     RUN();
9373 
9374     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
9375     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
9376     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
9377     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
9378     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
9379     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
9380     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
9381     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
9382   }
9383 }
9384 
9385 
TEST(neon_fhm)9386 TEST(neon_fhm) {
9387   // Test basic operation of fmlal{2} and fmlsl{2}. The simulator tests have
9388   // more comprehensive input sets.
9389   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9390                       CPUFeatures::kNEON,
9391                       CPUFeatures::kNEONHalf,
9392                       CPUFeatures::kFHM);
9393 
9394   START();
9395   // Test multiplications:
9396   //        v30                               v31
9397   //  [0]   65504 (max normal)          *     65504 (max normal)
9398   //  [1]   -1                          *     0
9399   //  [2]   2^-24 (min subnormal)       *     2^-24 (min subnormal)
9400   //  [3]   -2^-24 (min subnormal)      *     65504 (max normal)
9401   //  [4]   6.10e-5 (min normal)        *     0.99...
9402   //  [5]   0                           *     -0
9403   //  [6]   -0                          *     0
9404   //  [7]   -Inf                        *     -Inf
9405   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9406   __ Movi(v31.V8H(), 0xfc00000080003bff, 0x7bff000100007bff);
9407 
9408   // Accumulators for use with Fmlal{2}:
9409   // v0.S[0] = 384
9410   // v0.S[1] = -0
9411   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x8000000043c00000);
9412   // v1.S[0] = -(2^-48 + 2^-71)
9413   // v1.S[1] = 0
9414   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7800001);
9415   // v2.S[0] = 128
9416   // v2.S[1] = 0
9417   // v2.S[2] = 1
9418   // v2.S[3] = 1
9419   __ Movi(v2.V4S(), 0x3f8000003f800000, 0x0000000043000000);
9420   // v3.S[0] = 0
9421   // v3.S[1] = -0
9422   // v3.S[2] = -0
9423   // v3.S[3] = 0
9424   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9425   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9426   // results are just the negation of the Fmlal{2} results.
9427   __ Fneg(v4.V4S(), v0.V4S());
9428   __ Fneg(v5.V4S(), v1.V4S());
9429   __ Fneg(v6.V4S(), v2.V4S());
9430   __ Fneg(v7.V4S(), v3.V4S());
9431 
9432   __ Fmlal(v0.V2S(), v30.V2H(), v31.V2H());
9433   __ Fmlal2(v1.V2S(), v30.V2H(), v31.V2H());
9434   __ Fmlal(v2.V4S(), v30.V4H(), v31.V4H());
9435   __ Fmlal2(v3.V4S(), v30.V4H(), v31.V4H());
9436 
9437   __ Fmlsl(v4.V2S(), v30.V2H(), v31.V2H());
9438   __ Fmlsl2(v5.V2S(), v30.V2H(), v31.V2H());
9439   __ Fmlsl(v6.V4S(), v30.V4H(), v31.V4H());
9440   __ Fmlsl2(v7.V4S(), v30.V4H(), v31.V4H());
9441   END();
9442 
9443   if (CAN_RUN()) {
9444     RUN();
9445 
9446     // Fmlal(2S)
9447     // v0.S[0] = 384 + (65504 * 65504) = 4290774528 (rounded from 4290774400)
9448     // v0.S[1] = -0 + (-1 * 0) = -0
9449     ASSERT_EQUAL_128(0x0000000000000000, 0x800000004f7fc006, v0);
9450     // Fmlal2(2S)
9451     // v1.S[0] = -(2^-48 + 2^-71) + (2^-24 * 2^-24) = -2^-71
9452     // v1.S[1] = 0 + (-2^-24 * 65504) = -0.003904...
9453     ASSERT_EQUAL_128(0x0000000000000000, 0xbb7fe0009c000000, v1);
9454     // Fmlal(4S)
9455     // v2.S[0] = 128 + (65504 * 65504) = 4290774016 (rounded from 4290774144)
9456     // v2.S[1] = 0 + (-1 * 0) = 0
9457     // v2.S[2] = 1 + (2^-24 * 2^-24) = 1 (rounded)
9458     // v2.S[3] = 1 + (-2^-24 * 65504) = 0.996...
9459     ASSERT_EQUAL_128(0x3f7f00203f800000, 0x000000004f7fc004, v2);
9460     // Fmlal2(4S)
9461     // v3.S[0] = 0 + (6.103516e-5 * 0.99...) = 6.100535e-5
9462     // v3.S[1] = -0 + (0 * -0) = -0
9463     // v3.S[2] = -0 + (-0 * 0) = -0
9464     // v3.S[3] = 0 + (-Inf * -Inf) = Inf
9465     ASSERT_EQUAL_128(0x7f80000080000000, 0x80000000387fe000, v3);
9466 
9467     // Fmlsl results are mostly the same, but negated.
9468     ASSERT_EQUAL_128(0x0000000000000000, 0x00000000cf7fc006, v4);
9469     ASSERT_EQUAL_128(0x0000000000000000, 0x3b7fe0001c000000, v5);
9470     // In this case: v6.S[1] = 0 - (0 * -0) = 0
9471     ASSERT_EQUAL_128(0xbf7f0020bf800000, 0x00000000cf7fc004, v6);
9472     ASSERT_EQUAL_128(0xff80000000000000, 0x00000000b87fe000, v7);
9473   }
9474 }
9475 
9476 
TEST(neon_byelement_fhm)9477 TEST(neon_byelement_fhm) {
9478   // Test basic operation of fmlal{2} and fmlsl{2} (by element). The simulator
9479   // tests have more comprehensive input sets.
9480   SETUP_WITH_FEATURES(CPUFeatures::kFP,
9481                       CPUFeatures::kNEON,
9482                       CPUFeatures::kNEONHalf,
9483                       CPUFeatures::kFHM);
9484 
9485   START();
9486   // Set up multiplication inputs.
9487   //
9488   // v30.H[0] = 65504 (max normal)
9489   // v30.H[1] = -1
9490   // v30.H[2] = 2^-24 (min subnormal)
9491   // v30.H[3] = -2^-24 (min subnormal)
9492   // v30.H[4] = 6.10e-5 (min normal)
9493   // v30.H[5] = 0
9494   // v30.H[6] = -0
9495   // v30.H[7] = -Inf
9496   __ Movi(v30.V8H(), 0xfc00800000000400, 0x80010001bc007bff);
9497 
9498   // Each test instruction should only use one lane of vm, so set up unique
9499   // registers with poison values in other lanes. The poison NaN avoids the
9500   // default NaN (so it shouldn't be encountered accidentally), but is otherwise
9501   // arbitrary.
9502   VRegister poison = v29;
9503   __ Movi(v29.V8H(), 0x7f417f417f417f41, 0x7f417f417f417f41);
9504   // v31.H[0,2,4,...]: 0.9995117 (the value just below 1)
9505   // v31.H[1,3,5,...]: 1.000977 (the value just above 1)
9506   __ Movi(v31.V8H(), 0x3bff3c013bff3c01, 0x3bff3c013bff3c01);
9507   // Set up [v8,v15] as vm inputs.
9508   for (int i = 0; i <= 7; i++) {
9509     VRegister vm(i + 8);
9510     __ Mov(vm, poison);
9511     __ Ins(vm.V8H(), i, v31.V8H(), i);
9512   }
9513 
9514   // Accumulators for use with Fmlal{2}:
9515   // v0.S[0] = 2^-8
9516   // v0.S[1] = 1
9517   __ Movi(v0.V4S(), 0xdeadbeefdeadbeef, 0x3f8000003b800000);
9518   // v1.S[0] = -1.5 * 2^-49
9519   // v1.S[1] = 0
9520   __ Movi(v1.V4S(), 0xdeadbeefdeadbeef, 0x00000000a7400000);
9521   // v2.S[0] = 0
9522   // v2.S[1] = 2^14
9523   // v2.S[2] = 1.5 * 2^-48
9524   // v2.S[3] = Inf
9525   __ Movi(v2.V4S(), 0x7f80000027c00000, 0xc680000000000000);
9526   // v3.S[0] = 0
9527   // v3.S[1] = -0
9528   // v3.S[2] = -0
9529   // v3.S[3] = 0
9530   __ Movi(v3.V4S(), 0x0000000080000000, 0x8000000000000000);
9531   // For Fmlsl{2}, we simply negate the accumulators above so that the Fmlsl{2}
9532   // results are just the negation of the Fmlal{2} results.
9533   __ Fneg(v4.V4S(), v0.V4S());
9534   __ Fneg(v5.V4S(), v1.V4S());
9535   __ Fneg(v6.V4S(), v2.V4S());
9536   __ Fneg(v7.V4S(), v3.V4S());
9537 
9538   __ Fmlal(v0.V2S(), v30.V2H(), v8.H(), 0);
9539   __ Fmlal2(v1.V2S(), v30.V2H(), v9.H(), 1);
9540   __ Fmlal(v2.V4S(), v30.V4H(), v10.H(), 2);
9541   __ Fmlal2(v3.V4S(), v30.V4H(), v11.H(), 3);
9542 
9543   __ Fmlsl(v4.V2S(), v30.V2H(), v12.H(), 4);
9544   __ Fmlsl2(v5.V2S(), v30.V2H(), v13.H(), 5);
9545   __ Fmlsl(v6.V4S(), v30.V4H(), v14.H(), 6);
9546   __ Fmlsl2(v7.V4S(), v30.V4H(), v15.H(), 7);
9547   END();
9548 
9549   if (CAN_RUN()) {
9550     RUN();
9551 
9552     // Fmlal(2S)
9553     // v0.S[0] = 2^-8 + (65504 * 1.000977) = 65567.96875 (rounded)
9554     // v0.S[1] = 1 + (-1 * 1.000977) = -0.000976...
9555     ASSERT_EQUAL_128(0x0000000000000000, 0xba80000047800ffc, v0);
9556     // Fmlal2(2S)
9557     // v1.S[0] = (-1.5 * 2^-49) + (2^-24 * 0.9995117) = 5.958e-8 (rounded)
9558     // v1.S[1] = 0 + (-2^-24 * 0.9995117) = -5.958e-8
9559     ASSERT_EQUAL_128(0x0000000000000000, 0xb37fe000337fdfff, v1);
9560     // Fmlal(4S)
9561     // v2.S[0] = 0 + (65504 * 1.000977) = 65566.96875
9562     // v2.S[1] = 2^14 + (-1 * 1.000977) = -16385 (rounded from -16385.5)
9563     // v2.S[2] = (1.5 * 2^-48) + (2^-24 * 1.000977) = 5.966e-8 (rounded up)
9564     // v2.S[3] = Inf + (-2^-24 * 1.000977) = Inf
9565     ASSERT_EQUAL_128(0x7f80000033802001, 0xc680020047800ffc, v2);
9566     // Fmlal2(4S)
9567     // v3.S[0] = 0 + (6.103516e-5 * 0.9995117) = 6.100535e-5
9568     // v3.S[1] = -0 + (0 * 0.9995117) = 0
9569     // v3.S[2] = -0 + (-0 * 0.9995117) = -0
9570     // v3.S[3] = 0 + (-Inf * 0.9995117) = -Inf
9571     ASSERT_EQUAL_128(0xff80000080000000, 0x00000000387fe000, v3);
9572 
9573     // Fmlsl results are mostly the same, but negated.
9574     ASSERT_EQUAL_128(0x0000000000000000, 0x3a800000c7800ffc, v4);
9575     ASSERT_EQUAL_128(0x0000000000000000, 0x337fe000b37fdfff, v5);
9576     ASSERT_EQUAL_128(0xff800000b3802001, 0x46800200c7800ffc, v6);
9577     // In this case: v7.S[2] = 0 - (-0 * 0.9995117) = 0
9578     ASSERT_EQUAL_128(0x7f80000000000000, 0x00000000b87fe000, v7);
9579   }
9580 }
9581 
9582 
TEST(neon_fmulx_scalar)9583 TEST(neon_fmulx_scalar) {
9584   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9585 
9586   START();
9587   __ Fmov(s0, 2.0);
9588   __ Fmov(s1, 0.5);
9589   __ Fmov(s2, 0.0);
9590   __ Fmov(s3, -0.0);
9591   __ Fmov(s4, kFP32PositiveInfinity);
9592   __ Fmov(s5, kFP32NegativeInfinity);
9593   __ Fmulx(s16, s0, s1);
9594   __ Fmulx(s17, s2, s4);
9595   __ Fmulx(s18, s2, s5);
9596   __ Fmulx(s19, s3, s4);
9597   __ Fmulx(s20, s3, s5);
9598 
9599   __ Fmov(d21, 2.0);
9600   __ Fmov(d22, 0.5);
9601   __ Fmov(d23, 0.0);
9602   __ Fmov(d24, -0.0);
9603   __ Fmov(d25, kFP64PositiveInfinity);
9604   __ Fmov(d26, kFP64NegativeInfinity);
9605   __ Fmulx(d27, d21, d22);
9606   __ Fmulx(d28, d23, d25);
9607   __ Fmulx(d29, d23, d26);
9608   __ Fmulx(d30, d24, d25);
9609   __ Fmulx(d31, d24, d26);
9610   END();
9611 
9612   if (CAN_RUN()) {
9613     RUN();
9614 
9615     ASSERT_EQUAL_FP32(1.0, s16);
9616     ASSERT_EQUAL_FP32(2.0, s17);
9617     ASSERT_EQUAL_FP32(-2.0, s18);
9618     ASSERT_EQUAL_FP32(-2.0, s19);
9619     ASSERT_EQUAL_FP32(2.0, s20);
9620     ASSERT_EQUAL_FP64(1.0, d27);
9621     ASSERT_EQUAL_FP64(2.0, d28);
9622     ASSERT_EQUAL_FP64(-2.0, d29);
9623     ASSERT_EQUAL_FP64(-2.0, d30);
9624     ASSERT_EQUAL_FP64(2.0, d31);
9625   }
9626 }
9627 
9628 
TEST(neon_fmulx_h)9629 TEST(neon_fmulx_h) {
9630   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9631                       CPUFeatures::kFP,
9632                       CPUFeatures::kNEONHalf);
9633 
9634   START();
9635   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9636   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9637   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9638   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9639   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9640   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9641   __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
9642   __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
9643   __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
9644   __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
9645   __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
9646   __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
9647   __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
9648   __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
9649   __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
9650   __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
9651   END();
9652 
9653   if (CAN_RUN()) {
9654     RUN();
9655     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
9656     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
9657     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
9658     ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
9659     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
9660     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
9661     ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
9662     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
9663     ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
9664     ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
9665   }
9666 }
9667 
9668 
TEST(neon_fmulx_h_scalar)9669 TEST(neon_fmulx_h_scalar) {
9670   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9671                       CPUFeatures::kFP,
9672                       CPUFeatures::kNEONHalf,
9673                       CPUFeatures::kFPHalf);
9674 
9675   START();
9676   __ Fmov(h0, Float16(2.0));
9677   __ Fmov(h1, Float16(0.5));
9678   __ Fmov(h2, Float16(0.0));
9679   __ Fmov(h3, Float16(-0.0));
9680   __ Fmov(h4, kFP16PositiveInfinity);
9681   __ Fmov(h5, kFP16NegativeInfinity);
9682   __ Fmulx(h6, h0, h1);
9683   __ Fmulx(h7, h2, h4);
9684   __ Fmulx(h8, h2, h5);
9685   __ Fmulx(h9, h3, h4);
9686   __ Fmulx(h10, h3, h5);
9687   END();
9688 
9689   if (CAN_RUN()) {
9690     RUN();
9691     ASSERT_EQUAL_FP16(Float16(1.0), h6);
9692     ASSERT_EQUAL_FP16(Float16(2.0), h7);
9693     ASSERT_EQUAL_FP16(Float16(-2.0), h8);
9694     ASSERT_EQUAL_FP16(Float16(-2.0), h9);
9695     ASSERT_EQUAL_FP16(Float16(2.0), h10);
9696   }
9697 }
9698 
TEST(neon_fabd_h)9699 TEST(neon_fabd_h) {
9700   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9701                       CPUFeatures::kFP,
9702                       CPUFeatures::kNEONHalf);
9703 
9704   START();
9705   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9706   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
9707   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
9708   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
9709   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9710   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9711 
9712   __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
9713   __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
9714   __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
9715   __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
9716   __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
9717   __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
9718   __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
9719   __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
9720   __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
9721   __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
9722   END();
9723 
9724   if (CAN_RUN()) {
9725     RUN();
9726 
9727     ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
9728     ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
9729     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9730     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
9731     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
9732     ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
9733     ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
9734     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
9735     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
9736     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
9737   }
9738 }
9739 
9740 
TEST(neon_fabd_h_scalar)9741 TEST(neon_fabd_h_scalar) {
9742   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9743                       CPUFeatures::kFP,
9744                       CPUFeatures::kNEONHalf,
9745                       CPUFeatures::kFPHalf);
9746 
9747   START();
9748   __ Fmov(h0, Float16(2.0));
9749   __ Fmov(h1, Float16(0.5));
9750   __ Fmov(h2, Float16(0.0));
9751   __ Fmov(h3, Float16(-0.0));
9752   __ Fmov(h4, kFP16PositiveInfinity);
9753   __ Fmov(h5, kFP16NegativeInfinity);
9754   __ Fabd(h16, h1, h0);
9755   __ Fabd(h17, h2, h3);
9756   __ Fabd(h18, h2, h5);
9757   __ Fabd(h19, h3, h4);
9758   __ Fabd(h20, h3, h5);
9759   END();
9760 
9761   if (CAN_RUN()) {
9762     RUN();
9763     ASSERT_EQUAL_FP16(Float16(1.5), h16);
9764     ASSERT_EQUAL_FP16(Float16(0.0), h17);
9765     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
9766     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
9767     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
9768   }
9769 }
9770 
9771 
TEST(neon_fabd_scalar)9772 TEST(neon_fabd_scalar) {
9773   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9774 
9775   START();
9776   __ Fmov(s0, 2.0);
9777   __ Fmov(s1, 0.5);
9778   __ Fmov(s2, 0.0);
9779   __ Fmov(s3, -0.0);
9780   __ Fmov(s4, kFP32PositiveInfinity);
9781   __ Fmov(s5, kFP32NegativeInfinity);
9782   __ Fabd(s16, s1, s0);
9783   __ Fabd(s17, s2, s3);
9784   __ Fabd(s18, s2, s5);
9785   __ Fabd(s19, s3, s4);
9786   __ Fabd(s20, s3, s5);
9787 
9788   __ Fmov(d21, 2.0);
9789   __ Fmov(d22, 0.5);
9790   __ Fmov(d23, 0.0);
9791   __ Fmov(d24, -0.0);
9792   __ Fmov(d25, kFP64PositiveInfinity);
9793   __ Fmov(d26, kFP64NegativeInfinity);
9794   __ Fabd(d27, d21, d22);
9795   __ Fabd(d28, d23, d24);
9796   __ Fabd(d29, d23, d26);
9797   __ Fabd(d30, d24, d25);
9798   __ Fabd(d31, d24, d26);
9799   END();
9800 
9801   if (CAN_RUN()) {
9802     RUN();
9803 
9804     ASSERT_EQUAL_FP32(1.5, s16);
9805     ASSERT_EQUAL_FP32(0.0, s17);
9806     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
9807     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
9808     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
9809     ASSERT_EQUAL_FP64(1.5, d27);
9810     ASSERT_EQUAL_FP64(0.0, d28);
9811     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
9812     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
9813     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
9814   }
9815 }
9816 
9817 
TEST(neon_frecps_h)9818 TEST(neon_frecps_h) {
9819   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9820                       CPUFeatures::kFP,
9821                       CPUFeatures::kNEONHalf);
9822 
9823   START();
9824   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9825   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9826   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9827   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9828   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9829 
9830   __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
9831   __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
9832   __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
9833   __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
9834   __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
9835   __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
9836   __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
9837   __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
9838   END();
9839 
9840   if (CAN_RUN()) {
9841     RUN();
9842 
9843     ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
9844     ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
9845     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9846     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9847     ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
9848     ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
9849     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9850     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9851   }
9852 }
9853 
9854 
TEST(neon_frecps_h_scalar)9855 TEST(neon_frecps_h_scalar) {
9856   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9857                       CPUFeatures::kFP,
9858                       CPUFeatures::kNEONHalf,
9859                       CPUFeatures::kFPHalf);
9860 
9861   START();
9862   __ Fmov(h0, Float16(2.0));
9863   __ Fmov(h1, Float16(-1.0));
9864   __ Fmov(h2, Float16(45.0));
9865   __ Fmov(h3, kFP16PositiveInfinity);
9866   __ Fmov(h4, kFP16NegativeInfinity);
9867 
9868   __ Frecps(h5, h0, h2);
9869   __ Frecps(h6, h1, h2);
9870   __ Frecps(h7, h0, h3);
9871   __ Frecps(h8, h0, h4);
9872   END();
9873 
9874   if (CAN_RUN()) {
9875     RUN();
9876 
9877     ASSERT_EQUAL_FP16(Float16(-88.0), h5);
9878     ASSERT_EQUAL_FP16(Float16(47.0), h6);
9879     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9880     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9881   }
9882 }
9883 
9884 
TEST(neon_frsqrts_h)9885 TEST(neon_frsqrts_h) {
9886   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9887                       CPUFeatures::kFP,
9888                       CPUFeatures::kNEONHalf);
9889 
9890   START();
9891   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
9892   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
9893   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
9894   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
9895   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
9896 
9897   __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
9898   __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
9899   __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
9900   __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
9901   __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
9902   __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
9903   __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
9904   __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
9905   END();
9906 
9907   if (CAN_RUN()) {
9908     RUN();
9909 
9910     ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
9911     ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
9912     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
9913     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
9914     ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
9915     ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
9916     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
9917     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
9918   }
9919 }
9920 
9921 
TEST(neon_frsqrts_h_scalar)9922 TEST(neon_frsqrts_h_scalar) {
9923   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9924                       CPUFeatures::kFP,
9925                       CPUFeatures::kNEONHalf,
9926                       CPUFeatures::kFPHalf);
9927 
9928   START();
9929   __ Fmov(h0, Float16(2.0));
9930   __ Fmov(h1, Float16(-1.0));
9931   __ Fmov(h2, Float16(45.0));
9932   __ Fmov(h3, kFP16PositiveInfinity);
9933   __ Fmov(h4, kFP16NegativeInfinity);
9934 
9935   __ Frsqrts(h5, h0, h2);
9936   __ Frsqrts(h6, h1, h2);
9937   __ Frsqrts(h7, h0, h3);
9938   __ Frsqrts(h8, h0, h4);
9939   END();
9940 
9941   if (CAN_RUN()) {
9942     RUN();
9943 
9944     ASSERT_EQUAL_FP16(Float16(-43.5), h5);
9945     ASSERT_EQUAL_FP16(Float16(24.0), h6);
9946     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
9947     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
9948   }
9949 }
9950 
9951 
TEST(neon_faddp_h)9952 TEST(neon_faddp_h) {
9953   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
9954                       CPUFeatures::kFP,
9955                       CPUFeatures::kNEONHalf);
9956 
9957   START();
9958   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
9959   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
9960   __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
9961   __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
9962 
9963   __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
9964   __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
9965   __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
9966   __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
9967   END();
9968 
9969   if (CAN_RUN()) {
9970     RUN();
9971 
9972     ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
9973     ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
9974     ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
9975     ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
9976   }
9977 }
9978 
9979 
TEST(neon_faddp_scalar)9980 TEST(neon_faddp_scalar) {
9981   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
9982 
9983   START();
9984   __ Movi(d0, 0x3f80000040000000);
9985   __ Movi(d1, 0xff8000007f800000);
9986   __ Movi(d2, 0x0000000080000000);
9987   __ Faddp(s0, v0.V2S());
9988   __ Faddp(s1, v1.V2S());
9989   __ Faddp(s2, v2.V2S());
9990 
9991   __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
9992   __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
9993   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
9994   __ Faddp(d3, v3.V2D());
9995   __ Faddp(d4, v4.V2D());
9996   __ Faddp(d5, v5.V2D());
9997   END();
9998 
9999   if (CAN_RUN()) {
10000     RUN();
10001 
10002     ASSERT_EQUAL_FP32(3.0, s0);
10003     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
10004     ASSERT_EQUAL_FP32(0.0, s2);
10005     ASSERT_EQUAL_FP64(0.0, d3);
10006     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
10007     ASSERT_EQUAL_FP64(0.0, d5);
10008   }
10009 }
10010 
10011 
TEST(neon_faddp_h_scalar)10012 TEST(neon_faddp_h_scalar) {
10013   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10014                       CPUFeatures::kFP,
10015                       CPUFeatures::kNEONHalf);
10016 
10017   START();
10018   __ Movi(s0, 0x3c004000);
10019   __ Movi(s1, 0xfc007c00);
10020   __ Movi(s2, 0x00008000);
10021   __ Faddp(h0, v0.V2H());
10022   __ Faddp(h1, v1.V2H());
10023   __ Faddp(h2, v2.V2H());
10024   END();
10025 
10026   if (CAN_RUN()) {
10027     RUN();
10028 
10029     ASSERT_EQUAL_FP16(Float16(3.0), h0);
10030     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
10031     ASSERT_EQUAL_FP16(Float16(0.0), h2);
10032   }
10033 }
10034 
10035 
TEST(neon_fmaxp_scalar)10036 TEST(neon_fmaxp_scalar) {
10037   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10038 
10039   START();
10040   __ Movi(d0, 0x3f80000040000000);
10041   __ Movi(d1, 0xff8000007f800000);
10042   __ Movi(d2, 0x7fc00000ff800000);
10043   __ Fmaxp(s0, v0.V2S());
10044   __ Fmaxp(s1, v1.V2S());
10045   __ Fmaxp(s2, v2.V2S());
10046 
10047   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10048   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10049   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10050   __ Fmaxp(d3, v3.V2D());
10051   __ Fmaxp(d4, v4.V2D());
10052   __ Fmaxp(d5, v5.V2D());
10053   END();
10054 
10055   if (CAN_RUN()) {
10056     RUN();
10057 
10058     ASSERT_EQUAL_FP32(2.0, s0);
10059     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10060     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10061     ASSERT_EQUAL_FP64(2.0, d3);
10062     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10063     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10064   }
10065 }
10066 
10067 
TEST(neon_fmaxp_h_scalar)10068 TEST(neon_fmaxp_h_scalar) {
10069   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10070                       CPUFeatures::kFP,
10071                       CPUFeatures::kNEONHalf);
10072 
10073   START();
10074   __ Movi(s0, 0x3c004000);
10075   __ Movi(s1, 0xfc007c00);
10076   __ Movi(s2, 0x7e00fc00);
10077   __ Fmaxp(h0, v0.V2H());
10078   __ Fmaxp(h1, v1.V2H());
10079   __ Fmaxp(h2, v2.V2H());
10080   END();
10081 
10082   if (CAN_RUN()) {
10083     RUN();
10084 
10085     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10086     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10087     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10088   }
10089 }
10090 
10091 
TEST(neon_fmax_h)10092 TEST(neon_fmax_h) {
10093   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10094                       CPUFeatures::kFP,
10095                       CPUFeatures::kNEONHalf);
10096 
10097   START();
10098   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10099   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10100   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10101   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10102   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10103   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10104 
10105   __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
10106   __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
10107   __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
10108   __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
10109   __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
10110   __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
10111   __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
10112   __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
10113   END();
10114 
10115   if (CAN_RUN()) {
10116     RUN();
10117 
10118     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10119     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10120     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10121     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10122     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10123     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10124     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10125     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10126   }
10127 }
10128 
10129 
TEST(neon_fmaxp_h)10130 TEST(neon_fmaxp_h) {
10131   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10132                       CPUFeatures::kFP,
10133                       CPUFeatures::kNEONHalf);
10134 
10135   START();
10136   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10137   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10138   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10139   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10140 
10141   __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
10142   __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
10143   __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
10144   __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
10145   END();
10146 
10147   if (CAN_RUN()) {
10148     RUN();
10149 
10150     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10151     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10152     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10153     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10154   }
10155 }
10156 
10157 
TEST(neon_fmaxnm_h)10158 TEST(neon_fmaxnm_h) {
10159   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10160                       CPUFeatures::kFP,
10161                       CPUFeatures::kNEONHalf);
10162 
10163   START();
10164   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10165   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10166   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10167   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10168   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10169   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10170 
10171   __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
10172   __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
10173   __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
10174   __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
10175   __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
10176   __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
10177   __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
10178   __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
10179   END();
10180 
10181   if (CAN_RUN()) {
10182     RUN();
10183 
10184     ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
10185     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
10186     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10187     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10188     ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
10189     ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
10190     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10191     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10192   }
10193 }
10194 
10195 
TEST(neon_fmaxnmp_h)10196 TEST(neon_fmaxnmp_h) {
10197   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10198                       CPUFeatures::kFP,
10199                       CPUFeatures::kNEONHalf);
10200 
10201   START();
10202   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10203   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10204   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10205   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10206 
10207   __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
10208   __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
10209   __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
10210   __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
10211   END();
10212 
10213   if (CAN_RUN()) {
10214     RUN();
10215 
10216     ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
10217     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10218     ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
10219     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10220   }
10221 }
10222 
10223 
TEST(neon_fmaxnmp_scalar)10224 TEST(neon_fmaxnmp_scalar) {
10225   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10226 
10227   START();
10228   __ Movi(d0, 0x3f80000040000000);
10229   __ Movi(d1, 0xff8000007f800000);
10230   __ Movi(d2, 0x7fc00000ff800000);
10231   __ Fmaxnmp(s0, v0.V2S());
10232   __ Fmaxnmp(s1, v1.V2S());
10233   __ Fmaxnmp(s2, v2.V2S());
10234 
10235   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10236   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10237   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10238   __ Fmaxnmp(d3, v3.V2D());
10239   __ Fmaxnmp(d4, v4.V2D());
10240   __ Fmaxnmp(d5, v5.V2D());
10241   END();
10242 
10243   if (CAN_RUN()) {
10244     RUN();
10245 
10246     ASSERT_EQUAL_FP32(2.0, s0);
10247     ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
10248     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10249     ASSERT_EQUAL_FP64(2.0, d3);
10250     ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
10251     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10252   }
10253 }
10254 
10255 
TEST(neon_fmaxnmp_h_scalar)10256 TEST(neon_fmaxnmp_h_scalar) {
10257   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10258                       CPUFeatures::kFP,
10259                       CPUFeatures::kNEONHalf);
10260 
10261   START();
10262   __ Movi(s0, 0x3c004000);
10263   __ Movi(s1, 0xfc007c00);
10264   __ Movi(s2, 0x7e00fc00);
10265   __ Fmaxnmp(h0, v0.V2H());
10266   __ Fmaxnmp(h1, v1.V2H());
10267   __ Fmaxnmp(h2, v2.V2H());
10268   END();
10269 
10270   if (CAN_RUN()) {
10271     RUN();
10272 
10273     ASSERT_EQUAL_FP16(Float16(2.0), h0);
10274     ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
10275     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10276   }
10277 }
10278 
10279 
TEST(neon_fminp_scalar)10280 TEST(neon_fminp_scalar) {
10281   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10282 
10283   START();
10284   __ Movi(d0, 0x3f80000040000000);
10285   __ Movi(d1, 0xff8000007f800000);
10286   __ Movi(d2, 0x7fc00000ff800000);
10287   __ Fminp(s0, v0.V2S());
10288   __ Fminp(s1, v1.V2S());
10289   __ Fminp(s2, v2.V2S());
10290 
10291   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10292   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10293   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
10294   __ Fminp(d3, v3.V2D());
10295   __ Fminp(d4, v4.V2D());
10296   __ Fminp(d5, v5.V2D());
10297   END();
10298 
10299   if (CAN_RUN()) {
10300     RUN();
10301 
10302     ASSERT_EQUAL_FP32(1.0, s0);
10303     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10304     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
10305     ASSERT_EQUAL_FP64(1.0, d3);
10306     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10307     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
10308   }
10309 }
10310 
10311 
TEST(neon_fminp_h_scalar)10312 TEST(neon_fminp_h_scalar) {
10313   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10314                       CPUFeatures::kFP,
10315                       CPUFeatures::kNEONHalf);
10316 
10317   START();
10318   __ Movi(s0, 0x3c004000);
10319   __ Movi(s1, 0xfc007c00);
10320   __ Movi(s2, 0x7e00fc00);
10321   __ Fminp(h0, v0.V2H());
10322   __ Fminp(h1, v1.V2H());
10323   __ Fminp(h2, v2.V2H());
10324   END();
10325 
10326   if (CAN_RUN()) {
10327     RUN();
10328 
10329     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10330     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10331     ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
10332   }
10333 }
10334 
10335 
TEST(neon_fmin_h)10336 TEST(neon_fmin_h) {
10337   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10338                       CPUFeatures::kFP,
10339                       CPUFeatures::kNEONHalf);
10340 
10341   START();
10342   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10343   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10344   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10345   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10346   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10347   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10348 
10349   __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
10350   __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
10351   __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
10352   __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
10353   __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
10354   __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
10355   __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
10356   __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
10357   END();
10358 
10359   if (CAN_RUN()) {
10360     RUN();
10361 
10362     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10363     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10364     ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
10365     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10366     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10367     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10368     ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
10369     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10370   }
10371 }
10372 
10373 
TEST(neon_fminp_h)10374 TEST(neon_fminp_h) {
10375   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10376                       CPUFeatures::kFP,
10377                       CPUFeatures::kNEONHalf);
10378 
10379   START();
10380   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10381   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10382   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10383   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10384 
10385   __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
10386   __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
10387   __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
10388   __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
10389   END();
10390 
10391   if (CAN_RUN()) {
10392     RUN();
10393 
10394     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10395     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
10396     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10397     ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
10398   }
10399 }
10400 
10401 
TEST(neon_fminnm_h)10402 TEST(neon_fminnm_h) {
10403   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10404                       CPUFeatures::kFP,
10405                       CPUFeatures::kNEONHalf);
10406 
10407   START();
10408   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
10409   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
10410   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
10411   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
10412   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
10413   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
10414 
10415   __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
10416   __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
10417   __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
10418   __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
10419   __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
10420   __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
10421   __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
10422   __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
10423   END();
10424 
10425   if (CAN_RUN()) {
10426     RUN();
10427 
10428     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
10429     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
10430     ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
10431     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
10432     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
10433     ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
10434     ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
10435     ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
10436   }
10437 }
10438 
10439 
TEST(neon_fminnmp_h)10440 TEST(neon_fminnmp_h) {
10441   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10442                       CPUFeatures::kFP,
10443                       CPUFeatures::kNEONHalf);
10444 
10445   START();
10446   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
10447   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
10448   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
10449   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
10450 
10451   __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
10452   __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
10453   __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
10454   __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
10455   END();
10456 
10457   if (CAN_RUN()) {
10458     RUN();
10459 
10460     ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
10461     ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
10462     ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
10463     ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
10464   }
10465 }
10466 
10467 
TEST(neon_fminnmp_scalar)10468 TEST(neon_fminnmp_scalar) {
10469   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
10470 
10471   START();
10472   __ Movi(d0, 0x3f80000040000000);
10473   __ Movi(d1, 0xff8000007f800000);
10474   __ Movi(d2, 0x7fc00000ff800000);
10475   __ Fminnmp(s0, v0.V2S());
10476   __ Fminnmp(s1, v1.V2S());
10477   __ Fminnmp(s2, v2.V2S());
10478 
10479   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
10480   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10481   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
10482   __ Fminnmp(d3, v3.V2D());
10483   __ Fminnmp(d4, v4.V2D());
10484   __ Fminnmp(d5, v5.V2D());
10485   END();
10486 
10487   if (CAN_RUN()) {
10488     RUN();
10489 
10490     ASSERT_EQUAL_FP32(1.0, s0);
10491     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
10492     ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
10493     ASSERT_EQUAL_FP64(1.0, d3);
10494     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
10495     ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
10496   }
10497 }
10498 
10499 
TEST(neon_fminnmp_h_scalar)10500 TEST(neon_fminnmp_h_scalar) {
10501   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10502                       CPUFeatures::kFP,
10503                       CPUFeatures::kNEONHalf);
10504 
10505   START();
10506   __ Movi(s0, 0x3c004000);
10507   __ Movi(s1, 0xfc007c00);
10508   __ Movi(s2, 0x7e00fc00);
10509   __ Fminnmp(h0, v0.V2H());
10510   __ Fminnmp(h1, v1.V2H());
10511   __ Fminnmp(h2, v2.V2H());
10512   END();
10513 
10514   if (CAN_RUN()) {
10515     RUN();
10516 
10517     ASSERT_EQUAL_FP16(Float16(1.0), h0);
10518     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
10519     ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
10520   }
10521 }
10522 
Float16ToV4H(Float16 f)10523 static uint64_t Float16ToV4H(Float16 f) {
10524   uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
10525   return (bits << 48) | (bits << 32) | (bits << 16) | bits;
10526 }
10527 
10528 
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)10529 static void FminFmaxFloat16Helper(Float16 n,
10530                                   Float16 m,
10531                                   Float16 min,
10532                                   Float16 max,
10533                                   Float16 minnm,
10534                                   Float16 maxnm) {
10535   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10536                       CPUFeatures::kFP,
10537                       CPUFeatures::kNEONHalf,
10538                       CPUFeatures::kFPHalf);
10539 
10540   START();
10541   __ Fmov(h0, n);
10542   __ Fmov(h1, m);
10543   __ Fmov(v0.V8H(), n);
10544   __ Fmov(v1.V8H(), m);
10545   __ Fmin(h28, h0, h1);
10546   __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
10547   __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
10548   __ Fmax(h29, h0, h1);
10549   __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
10550   __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
10551   __ Fminnm(h30, h0, h1);
10552   __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
10553   __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
10554   __ Fmaxnm(h31, h0, h1);
10555   __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
10556   __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
10557   END();
10558 
10559   uint64_t min_vec = Float16ToV4H(min);
10560   uint64_t max_vec = Float16ToV4H(max);
10561   uint64_t minnm_vec = Float16ToV4H(minnm);
10562   uint64_t maxnm_vec = Float16ToV4H(maxnm);
10563 
10564   if (CAN_RUN()) {
10565     RUN();
10566 
10567     ASSERT_EQUAL_FP16(min, h28);
10568     ASSERT_EQUAL_FP16(max, h29);
10569     ASSERT_EQUAL_FP16(minnm, h30);
10570     ASSERT_EQUAL_FP16(maxnm, h31);
10571 
10572 
10573     ASSERT_EQUAL_128(0, min_vec, v2);
10574     ASSERT_EQUAL_128(min_vec, min_vec, v3);
10575     ASSERT_EQUAL_128(0, max_vec, v4);
10576     ASSERT_EQUAL_128(max_vec, max_vec, v5);
10577     ASSERT_EQUAL_128(0, minnm_vec, v6);
10578     ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
10579     ASSERT_EQUAL_128(0, maxnm_vec, v8);
10580     ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
10581   }
10582 }
10583 
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))10584 static Float16 MinMaxHelper(Float16 n,
10585                             Float16 m,
10586                             bool min,
10587                             Float16 quiet_nan_substitute = Float16(0.0)) {
10588   const uint64_t kFP16QuietNaNMask = 0x0200;
10589   uint16_t raw_n = Float16ToRawbits(n);
10590   uint16_t raw_m = Float16ToRawbits(m);
10591 
10592   if (IsSignallingNaN(n)) {
10593     // n is signalling NaN.
10594     return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
10595   } else if (IsSignallingNaN(m)) {
10596     // m is signalling NaN.
10597     return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
10598   } else if (IsZero(quiet_nan_substitute)) {
10599     if (IsNaN(n)) {
10600       // n is quiet NaN.
10601       return n;
10602     } else if (IsNaN(m)) {
10603       // m is quiet NaN.
10604       return m;
10605     }
10606   } else {
10607     // Substitute n or m if one is quiet, but not both.
10608     if (IsNaN(n) && !IsNaN(m)) {
10609       // n is quiet NaN: replace with substitute.
10610       n = quiet_nan_substitute;
10611     } else if (!IsNaN(n) && IsNaN(m)) {
10612       // m is quiet NaN: replace with substitute.
10613       m = quiet_nan_substitute;
10614     }
10615   }
10616 
10617   uint16_t sign_mask = 0x8000;
10618   if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
10619     return min ? Float16(-0.0) : Float16(0.0);
10620   }
10621 
10622   if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
10623     return min ? n : m;
10624   }
10625   return min ? m : n;
10626 }
10627 
TEST(fmax_fmin_h)10628 TEST(fmax_fmin_h) {
10629   // Use non-standard NaNs to check that the payload bits are preserved.
10630   Float16 snan = RawbitsToFloat16(0x7c12);
10631   Float16 qnan = RawbitsToFloat16(0x7e34);
10632 
10633   Float16 snan_processed = RawbitsToFloat16(0x7e12);
10634   Float16 qnan_processed = qnan;
10635 
10636   VIXL_ASSERT(IsSignallingNaN(snan));
10637   VIXL_ASSERT(IsQuietNaN(qnan));
10638   VIXL_ASSERT(IsQuietNaN(snan_processed));
10639   VIXL_ASSERT(IsQuietNaN(qnan_processed));
10640 
10641   // Bootstrap tests.
10642   FminFmaxFloat16Helper(Float16(0),
10643                         Float16(0),
10644                         Float16(0),
10645                         Float16(0),
10646                         Float16(0),
10647                         Float16(0));
10648   FminFmaxFloat16Helper(Float16(0),
10649                         Float16(1),
10650                         Float16(0),
10651                         Float16(1),
10652                         Float16(0),
10653                         Float16(1));
10654   FminFmaxFloat16Helper(kFP16PositiveInfinity,
10655                         kFP16NegativeInfinity,
10656                         kFP16NegativeInfinity,
10657                         kFP16PositiveInfinity,
10658                         kFP16NegativeInfinity,
10659                         kFP16PositiveInfinity);
10660   FminFmaxFloat16Helper(snan,
10661                         Float16(0),
10662                         snan_processed,
10663                         snan_processed,
10664                         snan_processed,
10665                         snan_processed);
10666   FminFmaxFloat16Helper(Float16(0),
10667                         snan,
10668                         snan_processed,
10669                         snan_processed,
10670                         snan_processed,
10671                         snan_processed);
10672   FminFmaxFloat16Helper(qnan,
10673                         Float16(0),
10674                         qnan_processed,
10675                         qnan_processed,
10676                         Float16(0),
10677                         Float16(0));
10678   FminFmaxFloat16Helper(Float16(0),
10679                         qnan,
10680                         qnan_processed,
10681                         qnan_processed,
10682                         Float16(0),
10683                         Float16(0));
10684   FminFmaxFloat16Helper(qnan,
10685                         snan,
10686                         snan_processed,
10687                         snan_processed,
10688                         snan_processed,
10689                         snan_processed);
10690   FminFmaxFloat16Helper(snan,
10691                         qnan,
10692                         snan_processed,
10693                         snan_processed,
10694                         snan_processed,
10695                         snan_processed);
10696 
10697   // Iterate over all combinations of inputs.
10698   Float16 inputs[] = {RawbitsToFloat16(0x7bff),
10699                       RawbitsToFloat16(0x0400),
10700                       Float16(1.0),
10701                       Float16(0.0),
10702                       RawbitsToFloat16(0xfbff),
10703                       RawbitsToFloat16(0x8400),
10704                       Float16(-1.0),
10705                       Float16(-0.0),
10706                       kFP16PositiveInfinity,
10707                       kFP16NegativeInfinity,
10708                       kFP16QuietNaN,
10709                       kFP16SignallingNaN};
10710 
10711   const int count = sizeof(inputs) / sizeof(inputs[0]);
10712 
10713   for (int in = 0; in < count; in++) {
10714     Float16 n = inputs[in];
10715     for (int im = 0; im < count; im++) {
10716       Float16 m = inputs[im];
10717       FminFmaxFloat16Helper(n,
10718                             m,
10719                             MinMaxHelper(n, m, true),
10720                             MinMaxHelper(n, m, false),
10721                             MinMaxHelper(n, m, true, kFP16PositiveInfinity),
10722                             MinMaxHelper(n, m, false, kFP16NegativeInfinity));
10723     }
10724   }
10725 }
10726 
TEST(neon_frint_saturating)10727 TEST(neon_frint_saturating) {
10728   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10729                       CPUFeatures::kFP,
10730                       CPUFeatures::kFrintToFixedSizedInt);
10731 
10732   START();
10733 
10734   __ Movi(v0.V2D(), 0x3f8000003f8ccccd, 0x3fc000003ff33333);
10735   __ Movi(v1.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
10736   __ Movi(v2.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
10737   __ Frint32x(v16.V2S(), v0.V2S());
10738   __ Frint32x(v17.V4S(), v1.V4S());
10739   __ Frint32x(v18.V2D(), v2.V2D());
10740   __ Frint64x(v19.V2S(), v0.V2S());
10741   __ Frint64x(v20.V4S(), v1.V4S());
10742   __ Frint64x(v21.V2D(), v2.V2D());
10743   __ Frint32z(v22.V2S(), v0.V2S());
10744   __ Frint32z(v23.V4S(), v1.V4S());
10745   __ Frint32z(v24.V2D(), v2.V2D());
10746   __ Frint64z(v25.V2S(), v0.V2S());
10747   __ Frint64z(v26.V4S(), v1.V4S());
10748   __ Frint64z(v27.V2D(), v2.V2D());
10749 
10750   END();
10751 
10752   if (CAN_RUN()) {
10753     RUN();
10754 
10755     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q16);
10756     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q17);
10757     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q18);
10758     ASSERT_EQUAL_128(0x0000000000000000, 0x4000000040000000, q19);
10759     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q20);
10760     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q21);
10761     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q22);
10762     ASSERT_EQUAL_128(0x0000000080000000, 0xcf000000cf000000, q23);
10763     ASSERT_EQUAL_128(0xc1e0000000000000, 0xc1e0000000000000, q24);
10764     ASSERT_EQUAL_128(0x0000000000000000, 0x3f8000003f800000, q25);
10765     ASSERT_EQUAL_128(0x0000000080000000, 0xdf000000df000000, q26);
10766     ASSERT_EQUAL_128(0xc3e0000000000000, 0xc3e0000000000000, q27);
10767   }
10768 }
10769 
10770 
TEST(neon_tbl)10771 TEST(neon_tbl) {
10772   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
10773 
10774   START();
10775   __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
10776   __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
10777   __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
10778   __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
10779 
10780   __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
10781   __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
10782   __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
10783   __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
10784 
10785   __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10786   __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10787   __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10788   __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10789   __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10790   __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10791   __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10792   __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10793 
10794   __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
10795   __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10796   __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10797   __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10798   __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
10799   __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10800   __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10801   __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10802 
10803   __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
10804   __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
10805   __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
10806   __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
10807   __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
10808   __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
10809   __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
10810   __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
10811 
10812   __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
10813   __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
10814   __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
10815   __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
10816   __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
10817   __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
10818   __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
10819   __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
10820   END();
10821 
10822   if (CAN_RUN()) {
10823     RUN();
10824 
10825     ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
10826     ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
10827     ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
10828     ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
10829     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
10830     ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
10831     ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
10832     ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
10833 
10834     ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
10835     ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
10836     ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
10837     ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
10838     ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
10839     ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
10840     ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
10841     ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
10842   }
10843 }
10844 
TEST(neon_usdot)10845 TEST(neon_usdot) {
10846   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10847                       CPUFeatures::kDotProduct,
10848                       CPUFeatures::kI8MM);
10849 
10850   START();
10851   __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
10852   __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
10853   __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
10854   __ Movi(v3.V2D(), 0, 0);
10855   __ Mov(q4, q3);
10856   __ Mov(q5, q3);
10857   __ Mov(q6, q3);
10858   __ Mov(q7, q3);
10859   __ Mov(q8, q3);
10860   __ Mov(q9, q3);
10861   __ Mov(q10, q3);
10862   __ Mov(q11, q3);
10863 
10864   // Test Usdot against Udot/Sdot over the range of inputs where they should be
10865   // equal.
10866   __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
10867   __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
10868   __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
10869   __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
10870   __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
10871   __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
10872 
10873   __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
10874   __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
10875   __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
10876   __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
10877   __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
10878   __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
10879 
10880   // Construct values which, when interpreted correctly as signed/unsigned,
10881   // should give a zero result for dot product.
10882   __ Mov(w0, 0x8101ff40);  // [-127, 1, -1, 64] as signed bytes.
10883   __ Mov(w1, 0x02fe8002);  // [2, 254, 128, 2] as unsigned bytes.
10884   __ Dup(v0.V4S(), w0);
10885   __ Dup(v1.V4S(), w1);
10886   __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
10887 
10888   END();
10889 
10890   if (CAN_RUN()) {
10891     RUN();
10892 
10893     ASSERT_EQUAL_128(-1, -1, q3);
10894     ASSERT_EQUAL_128(-1, -1, q5);
10895     ASSERT_EQUAL_128(-1, -1, q7);
10896     ASSERT_EQUAL_128(-1, -1, q9);
10897     ASSERT_EQUAL_128(0, 0, q11);
10898   }
10899 }
10900 
TEST(neon_usdot_element)10901 TEST(neon_usdot_element) {
10902   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM);
10903 
10904   START();
10905   __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
10906   __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
10907 
10908   // Test element Usdot against vector variant.
10909   __ Dup(v2.V4S(), v1.V4S(), 0);
10910   __ Dup(v3.V4S(), v1.V4S(), 1);
10911   __ Dup(v4.V4S(), v1.V4S(), 3);
10912 
10913   __ Mov(q10, q1);
10914   __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
10915   __ Mov(q11, q1);
10916   __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
10917   __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
10918 
10919   __ Mov(q12, q1);
10920   __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
10921   __ Mov(q13, q1);
10922   __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
10923   __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
10924 
10925   __ Mov(q14, q1);
10926   __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
10927   __ Mov(q15, q1);
10928   __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
10929   __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
10930   END();
10931 
10932   if (CAN_RUN()) {
10933     RUN();
10934 
10935     ASSERT_EQUAL_128(-1, -1, q11);
10936     ASSERT_EQUAL_128(-1, -1, q13);
10937     ASSERT_EQUAL_128(-1, -1, q15);
10938   }
10939 }
10940 
TEST(zero_high_b)10941 TEST(zero_high_b) {
10942   SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
10943   START();
10944 
10945   __ Mov(x0, 0x55aa42ffaa42ff55);
10946   __ Mov(x1, 4);
10947   __ Movi(q30.V16B(), 0);
10948 
10949   // Iterate over the SISD instructions using different input values on each
10950   // loop.
10951   Label loop;
10952   __ Bind(&loop);
10953 
10954   __ Dup(q0.V16B(), w0);
10955   __ Ror(x0, x0, 8);
10956   __ Dup(q1.V16B(), w0);
10957   __ Ror(x0, x0, 8);
10958   __ Dup(q2.V16B(), w0);
10959   __ Ror(x0, x0, 8);
10960 
10961   {
10962     ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
10963     __ movi(q9.V16B(), 0x55);
10964     __ dci(0x5e010409);  // mov b9, v0.b[0]
10965     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10966 
10967     __ movi(q9.V16B(), 0x55);
10968     __ dci(0x5e207809);  // sqabs b9, b0
10969     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10970 
10971     __ movi(q9.V16B(), 0x55);
10972     __ dci(0x5e200c29);  // sqadd b9, b1, b0
10973     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10974 
10975     __ movi(q9.V16B(), 0x55);
10976     __ dci(0x7e207809);  // sqneg b9, b0
10977     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10978 
10979     __ movi(q9.V16B(), 0x55);
10980     __ dci(0x7e008429);  // sqrdmlah b9, b1, b0
10981     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10982 
10983     __ movi(q9.V16B(), 0x55);
10984     __ dci(0x7e008c29);  // sqrdmlsh b9, b1, b0
10985     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10986 
10987     __ movi(q9.V16B(), 0x55);
10988     __ dci(0x5e205c29);  // sqrshl b9, b1, b0
10989     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10990 
10991     __ movi(q9.V16B(), 0x55);
10992     __ dci(0x5f089c09);  // sqrshrn b9, h0, #8
10993     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10994 
10995     __ movi(q9.V16B(), 0x55);
10996     __ dci(0x7f088c09);  // sqrshrun b9, h0, #8
10997     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
10998 
10999     __ movi(q9.V16B(), 0x55);
11000     __ dci(0x5e204c29);  // sqshl b9, b1, b0
11001     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11002 
11003     __ movi(q9.V16B(), 0x55);
11004     __ dci(0x5f087409);  // sqshl b9, b0, #0
11005     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11006 
11007     __ movi(q9.V16B(), 0x55);
11008     __ dci(0x7f086409);  // sqshlu b9, b0, #0
11009     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11010 
11011     __ movi(q9.V16B(), 0x55);
11012     __ dci(0x5f089409);  // sqshrn b9, h0, #8
11013     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11014 
11015     __ movi(q9.V16B(), 0x55);
11016     __ dci(0x7f088409);  // sqshrun b9, h0, #8
11017     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11018 
11019     __ movi(q9.V16B(), 0x55);
11020     __ dci(0x5e202c29);  // sqsub b9, b1, b0
11021     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11022 
11023     __ movi(q9.V16B(), 0x55);
11024     __ dci(0x5e214809);  // sqxtn b9, h0
11025     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11026 
11027     __ movi(q9.V16B(), 0x55);
11028     __ dci(0x7e212809);  // sqxtun b9, h0
11029     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11030 
11031     __ movi(q9.V16B(), 0x55);
11032     __ dci(0x5e203809);  // suqadd b9, b0
11033     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11034 
11035     __ movi(q9.V16B(), 0x55);
11036     __ dci(0x7e200c29);  // uqadd b9, b1, b0
11037     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11038 
11039     __ movi(q9.V16B(), 0x55);
11040     __ dci(0x7e205c29);  // uqrshl b9, b1, b0
11041     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11042 
11043     __ movi(q9.V16B(), 0x55);
11044     __ dci(0x7f089c09);  // uqrshrn b9, h0, #8
11045     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11046 
11047     __ movi(q9.V16B(), 0x55);
11048     __ dci(0x7e204c29);  // uqshl b9, b1, b0
11049     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11050 
11051     __ movi(q9.V16B(), 0x55);
11052     __ dci(0x7f087409);  // uqshl b9, b0, #0
11053     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11054 
11055     __ movi(q9.V16B(), 0x55);
11056     __ dci(0x7f089409);  // uqshrn b9, h0, #8
11057     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11058 
11059     __ movi(q9.V16B(), 0x55);
11060     __ dci(0x7e202c29);  // uqsub b9, b1, b0
11061     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11062 
11063     __ movi(q9.V16B(), 0x55);
11064     __ dci(0x7e214809);  // uqxtn b9, h0
11065     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11066 
11067     __ movi(q9.V16B(), 0x55);
11068     __ dci(0x7e203809);  // usqadd b9, b0
11069     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11070   }
11071   __ Sub(x1, x1, 1);
11072   __ Cbnz(x1, &loop);
11073 
11074   __ Ins(q30.V16B(), 0, wzr);
11075 
11076   END();
11077   if (CAN_RUN()) {
11078     RUN();
11079     ASSERT_EQUAL_128(0, 0, q30);
11080   }
11081 }
11082 
TEST(zero_high_h)11083 TEST(zero_high_h) {
11084   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11085                       CPUFeatures::kNEON,
11086                       CPUFeatures::kFP,
11087                       CPUFeatures::kNEONHalf,
11088                       CPUFeatures::kRDM);
11089   START();
11090 
11091   __ Mov(x0, 0x55aa42ffaa42ff55);
11092   __ Mov(x1, 4);
11093   __ Movi(q30.V16B(), 0);
11094 
11095   // Iterate over the SISD instructions using different input values on each
11096   // loop.
11097   Label loop;
11098   __ Bind(&loop);
11099 
11100   __ Dup(q0.V8H(), w0);
11101   __ Ror(x0, x0, 8);
11102   __ Dup(q1.V8H(), w0);
11103   __ Ror(x0, x0, 8);
11104   __ Dup(q2.V8H(), w0);
11105   __ Ror(x0, x0, 8);
11106 
11107   {
11108     ExactAssemblyScope scope(&masm, 225 * kInstructionSize);
11109     __ movi(q9.V16B(), 0x55);
11110     __ dci(0x5e020409);  // mov h9, v0.h[0]
11111     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11112 
11113     __ movi(q9.V16B(), 0x55);
11114     __ dci(0x7ec01429);  // fabd h9, h1, h0
11115     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11116 
11117     __ movi(q9.V16B(), 0x55);
11118     __ dci(0x7e402c29);  // facge h9, h1, h0
11119     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11120 
11121     __ movi(q9.V16B(), 0x55);
11122     __ dci(0x7ec02c29);  // facgt h9, h1, h0
11123     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11124 
11125     __ movi(q9.V16B(), 0x55);
11126     __ dci(0x5e30d809);  // faddp h9, v0.2h
11127     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11128 
11129     __ movi(q9.V16B(), 0x55);
11130     __ dci(0x5ef8d809);  // fcmeq h9, h0, #0.0
11131     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11132 
11133     __ movi(q9.V16B(), 0x55);
11134     __ dci(0x5e402429);  // fcmeq h9, h1, h0
11135     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11136 
11137     __ movi(q9.V16B(), 0x55);
11138     __ dci(0x7ef8c809);  // fcmge h9, h0, #0.0
11139     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11140 
11141     __ movi(q9.V16B(), 0x55);
11142     __ dci(0x7e402429);  // fcmge h9, h1, h0
11143     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11144 
11145     __ movi(q9.V16B(), 0x55);
11146     __ dci(0x5ef8c809);  // fcmgt h9, h0, #0.0
11147     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11148 
11149     __ movi(q9.V16B(), 0x55);
11150     __ dci(0x7ec02429);  // fcmgt h9, h1, h0
11151     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11152 
11153     __ movi(q9.V16B(), 0x55);
11154     __ dci(0x7ef8d809);  // fcmle h9, h0, #0.0
11155     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11156 
11157     __ movi(q9.V16B(), 0x55);
11158     __ dci(0x5ef8e809);  // fcmlt h9, h0, #0.0
11159     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11160 
11161     __ movi(q9.V16B(), 0x55);
11162     __ dci(0x5e79c809);  // fcvtas h9, h0
11163     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11164 
11165     __ movi(q9.V16B(), 0x55);
11166     __ dci(0x7e79c809);  // fcvtau h9, h0
11167     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11168 
11169     __ movi(q9.V16B(), 0x55);
11170     __ dci(0x5e79b809);  // fcvtms h9, h0
11171     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11172 
11173     __ movi(q9.V16B(), 0x55);
11174     __ dci(0x7e79b809);  // fcvtmu h9, h0
11175     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11176 
11177     __ movi(q9.V16B(), 0x55);
11178     __ dci(0x5e79a809);  // fcvtns h9, h0
11179     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11180 
11181     __ movi(q9.V16B(), 0x55);
11182     __ dci(0x7e79a809);  // fcvtnu h9, h0
11183     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11184 
11185     __ movi(q9.V16B(), 0x55);
11186     __ dci(0x5ef9a809);  // fcvtps h9, h0
11187     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11188 
11189     __ movi(q9.V16B(), 0x55);
11190     __ dci(0x7ef9a809);  // fcvtpu h9, h0
11191     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11192 
11193     __ movi(q9.V16B(), 0x55);
11194     __ dci(0x5ef9b809);  // fcvtzs h9, h0
11195     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11196 
11197     __ movi(q9.V16B(), 0x55);
11198     __ dci(0x5f10fc09);  // fcvtzs h9, h0, #16
11199     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11200 
11201     __ movi(q9.V16B(), 0x55);
11202     __ dci(0x7ef9b809);  // fcvtzu h9, h0
11203     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11204 
11205     __ movi(q9.V16B(), 0x55);
11206     __ dci(0x7f10fc09);  // fcvtzu h9, h0, #16
11207     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11208 
11209     __ movi(q9.V16B(), 0x55);
11210     __ dci(0x5e30c809);  // fmaxnmp h9, v0.2h
11211     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11212 
11213     __ movi(q9.V16B(), 0x55);
11214     __ dci(0x5e30f809);  // fmaxp h9, v0.2h
11215     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11216 
11217     __ movi(q9.V16B(), 0x55);
11218     __ dci(0x5eb0c809);  // fminnmp h9, v0.2h
11219     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11220 
11221     __ movi(q9.V16B(), 0x55);
11222     __ dci(0x5eb0f809);  // fminp h9, v0.2h
11223     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11224 
11225     __ movi(q9.V16B(), 0x55);
11226     __ dci(0x5f001029);  // fmla h9, h1, v0.h[0]
11227     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11228 
11229     __ movi(q9.V16B(), 0x55);
11230     __ dci(0x5f005029);  // fmls h9, h1, v0.h[0]
11231     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11232 
11233     __ movi(q9.V16B(), 0x55);
11234     __ dci(0x5f009029);  // fmul h9, h1, v0.h[0]
11235     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11236 
11237     __ movi(q9.V16B(), 0x55);
11238     __ dci(0x7f009029);  // fmulx h9, h1, v0.h[0]
11239     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11240 
11241     __ movi(q9.V16B(), 0x55);
11242     __ dci(0x5e401c29);  // fmulx h9, h1, h0
11243     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11244 
11245     __ movi(q9.V16B(), 0x55);
11246     __ dci(0x5ef9d809);  // frecpe h9, h0
11247     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11248 
11249     __ movi(q9.V16B(), 0x55);
11250     __ dci(0x5e403c29);  // frecps h9, h1, h0
11251     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11252 
11253     __ movi(q9.V16B(), 0x55);
11254     __ dci(0x5ef9f809);  // frecpx h9, h0
11255     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11256 
11257     __ movi(q9.V16B(), 0x55);
11258     __ dci(0x7ef9d809);  // frsqrte h9, h0
11259     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11260 
11261     __ movi(q9.V16B(), 0x55);
11262     __ dci(0x5ec03c29);  // frsqrts h9, h1, h0
11263     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11264 
11265     __ movi(q9.V16B(), 0x55);
11266     __ dci(0x5e79d809);  // scvtf h9, h0
11267     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11268 
11269     __ movi(q9.V16B(), 0x55);
11270     __ dci(0x5f10e409);  // scvtf h9, h0, #16
11271     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11272 
11273     __ movi(q9.V16B(), 0x55);
11274     __ dci(0x5e607809);  // sqabs h9, h0
11275     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11276 
11277     __ movi(q9.V16B(), 0x55);
11278     __ dci(0x5e600c29);  // sqadd h9, h1, h0
11279     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11280 
11281     __ movi(q9.V16B(), 0x55);
11282     __ dci(0x5f40c029);  // sqdmulh h9, h1, v0.h[0]
11283     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11284 
11285     __ movi(q9.V16B(), 0x55);
11286     __ dci(0x5e60b429);  // sqdmulh h9, h1, h0
11287     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11288 
11289     __ movi(q9.V16B(), 0x55);
11290     __ dci(0x7e607809);  // sqneg h9, h0
11291     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11292 
11293     __ movi(q9.V16B(), 0x55);
11294     __ dci(0x7f40d029);  // sqrdmlah h9, h1, v0.h[0]
11295     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11296 
11297     __ movi(q9.V16B(), 0x55);
11298     __ dci(0x7e408429);  // sqrdmlah h9, h1, h0
11299     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11300 
11301     __ movi(q9.V16B(), 0x55);
11302     __ dci(0x7f40f029);  // sqrdmlsh h9, h1, v0.h[0]
11303     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11304 
11305     __ movi(q9.V16B(), 0x55);
11306     __ dci(0x7e408c29);  // sqrdmlsh h9, h1, h0
11307     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11308 
11309     __ movi(q9.V16B(), 0x55);
11310     __ dci(0x5f40d029);  // sqrdmulh h9, h1, v0.h[0]
11311     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11312 
11313     __ movi(q9.V16B(), 0x55);
11314     __ dci(0x7e60b429);  // sqrdmulh h9, h1, h0
11315     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11316 
11317     __ movi(q9.V16B(), 0x55);
11318     __ dci(0x5e605c29);  // sqrshl h9, h1, h0
11319     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11320 
11321     __ movi(q9.V16B(), 0x55);
11322     __ dci(0x5f109c09);  // sqrshrn h9, s0, #16
11323     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11324 
11325     __ movi(q9.V16B(), 0x55);
11326     __ dci(0x7f108c09);  // sqrshrun h9, s0, #16
11327     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11328 
11329     __ movi(q9.V16B(), 0x55);
11330     __ dci(0x5e604c29);  // sqshl h9, h1, h0
11331     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11332 
11333     __ movi(q9.V16B(), 0x55);
11334     __ dci(0x5f107409);  // sqshl h9, h0, #0
11335     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11336 
11337     __ movi(q9.V16B(), 0x55);
11338     __ dci(0x7f106409);  // sqshlu h9, h0, #0
11339     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11340 
11341     __ movi(q9.V16B(), 0x55);
11342     __ dci(0x5f109409);  // sqshrn h9, s0, #16
11343     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11344 
11345     __ movi(q9.V16B(), 0x55);
11346     __ dci(0x7f108409);  // sqshrun h9, s0, #16
11347     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11348 
11349     __ movi(q9.V16B(), 0x55);
11350     __ dci(0x5e602c29);  // sqsub h9, h1, h0
11351     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11352 
11353     __ movi(q9.V16B(), 0x55);
11354     __ dci(0x5e614809);  // sqxtn h9, s0
11355     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11356 
11357     __ movi(q9.V16B(), 0x55);
11358     __ dci(0x7e612809);  // sqxtun h9, s0
11359     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11360 
11361     __ movi(q9.V16B(), 0x55);
11362     __ dci(0x5e603809);  // suqadd h9, h0
11363     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11364 
11365     __ movi(q9.V16B(), 0x55);
11366     __ dci(0x7e79d809);  // ucvtf h9, h0
11367     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11368 
11369     __ movi(q9.V16B(), 0x55);
11370     __ dci(0x7f10e409);  // ucvtf h9, h0, #16
11371     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11372 
11373     __ movi(q9.V16B(), 0x55);
11374     __ dci(0x7e600c29);  // uqadd h9, h1, h0
11375     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11376 
11377     __ movi(q9.V16B(), 0x55);
11378     __ dci(0x7e605c29);  // uqrshl h9, h1, h0
11379     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11380 
11381     __ movi(q9.V16B(), 0x55);
11382     __ dci(0x7f109c09);  // uqrshrn h9, s0, #16
11383     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11384 
11385     __ movi(q9.V16B(), 0x55);
11386     __ dci(0x7e604c29);  // uqshl h9, h1, h0
11387     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11388 
11389     __ movi(q9.V16B(), 0x55);
11390     __ dci(0x7f107409);  // uqshl h9, h0, #0
11391     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11392 
11393     __ movi(q9.V16B(), 0x55);
11394     __ dci(0x7f109409);  // uqshrn h9, s0, #16
11395     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11396 
11397     __ movi(q9.V16B(), 0x55);
11398     __ dci(0x7e602c29);  // uqsub h9, h1, h0
11399     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11400 
11401     __ movi(q9.V16B(), 0x55);
11402     __ dci(0x7e614809);  // uqxtn h9, s0
11403     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11404 
11405     __ movi(q9.V16B(), 0x55);
11406     __ dci(0x7e603809);  // usqadd h9, h0
11407     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11408   }
11409   __ Sub(x1, x1, 1);
11410   __ Cbnz(x1, &loop);
11411 
11412   __ Ins(q30.V8H(), 0, wzr);
11413 
11414   END();
11415   if (CAN_RUN()) {
11416     RUN();
11417     ASSERT_EQUAL_128(0, 0, q30);
11418   }
11419 }
11420 
TEST(zero_high_s)11421 TEST(zero_high_s) {
11422   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11423                       CPUFeatures::kNEON,
11424                       CPUFeatures::kFP,
11425                       CPUFeatures::kRDM);
11426   START();
11427 
11428   __ Mov(x0, 0x55aa42ffaa42ff55);
11429   __ Mov(x1, 4);
11430   __ Movi(q30.V16B(), 0);
11431 
11432   // Iterate over the SISD instructions using different input values on each
11433   // loop.
11434   Label loop;
11435   __ Bind(&loop);
11436 
11437   __ Dup(q0.V4S(), w0);
11438   __ Ror(x0, x0, 8);
11439   __ Dup(q1.V4S(), w0);
11440   __ Ror(x0, x0, 8);
11441   __ Dup(q2.V4S(), w0);
11442   __ Ror(x0, x0, 8);
11443 
11444   {
11445     ExactAssemblyScope scope(&masm, 246 * kInstructionSize);
11446     __ movi(q9.V16B(), 0x55);
11447     __ dci(0x5e040409);  // mov s9, v0.s[0]
11448     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11449 
11450     __ movi(q9.V16B(), 0x55);
11451     __ dci(0x7ea0d429);  // fabd s9, s1, s0
11452     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11453 
11454     __ movi(q9.V16B(), 0x55);
11455     __ dci(0x7e20ec29);  // facge s9, s1, s0
11456     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11457 
11458     __ movi(q9.V16B(), 0x55);
11459     __ dci(0x7ea0ec29);  // facgt s9, s1, s0
11460     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11461 
11462     __ movi(q9.V16B(), 0x55);
11463     __ dci(0x7e30d809);  // faddp s9, v0.2s
11464     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11465 
11466     __ movi(q9.V16B(), 0x55);
11467     __ dci(0x5ea0d809);  // fcmeq s9, s0, #0.0
11468     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11469 
11470     __ movi(q9.V16B(), 0x55);
11471     __ dci(0x5e20e429);  // fcmeq s9, s1, s0
11472     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11473 
11474     __ movi(q9.V16B(), 0x55);
11475     __ dci(0x7ea0c809);  // fcmge s9, s0, #0.0
11476     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11477 
11478     __ movi(q9.V16B(), 0x55);
11479     __ dci(0x7e20e429);  // fcmge s9, s1, s0
11480     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11481 
11482     __ movi(q9.V16B(), 0x55);
11483     __ dci(0x5ea0c809);  // fcmgt s9, s0, #0.0
11484     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11485 
11486     __ movi(q9.V16B(), 0x55);
11487     __ dci(0x7ea0e429);  // fcmgt s9, s1, s0
11488     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11489 
11490     __ movi(q9.V16B(), 0x55);
11491     __ dci(0x7ea0d809);  // fcmle s9, s0, #0.0
11492     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11493 
11494     __ movi(q9.V16B(), 0x55);
11495     __ dci(0x5ea0e809);  // fcmlt s9, s0, #0.0
11496     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11497 
11498     __ movi(q9.V16B(), 0x55);
11499     __ dci(0x5e21c809);  // fcvtas s9, s0
11500     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11501 
11502     __ movi(q9.V16B(), 0x55);
11503     __ dci(0x7e21c809);  // fcvtau s9, s0
11504     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11505 
11506     __ movi(q9.V16B(), 0x55);
11507     __ dci(0x5e21b809);  // fcvtms s9, s0
11508     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11509 
11510     __ movi(q9.V16B(), 0x55);
11511     __ dci(0x7e21b809);  // fcvtmu s9, s0
11512     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11513 
11514     __ movi(q9.V16B(), 0x55);
11515     __ dci(0x5e21a809);  // fcvtns s9, s0
11516     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11517 
11518     __ movi(q9.V16B(), 0x55);
11519     __ dci(0x7e21a809);  // fcvtnu s9, s0
11520     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11521 
11522     __ movi(q9.V16B(), 0x55);
11523     __ dci(0x5ea1a809);  // fcvtps s9, s0
11524     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11525 
11526     __ movi(q9.V16B(), 0x55);
11527     __ dci(0x7ea1a809);  // fcvtpu s9, s0
11528     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11529 
11530     __ movi(q9.V16B(), 0x55);
11531     __ dci(0x7e616809);  // fcvtxn s9, d0
11532     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11533 
11534     __ movi(q9.V16B(), 0x55);
11535     __ dci(0x5ea1b809);  // fcvtzs s9, s0
11536     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11537 
11538     __ movi(q9.V16B(), 0x55);
11539     __ dci(0x5f20fc09);  // fcvtzs s9, s0, #32
11540     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11541 
11542     __ movi(q9.V16B(), 0x55);
11543     __ dci(0x7ea1b809);  // fcvtzu s9, s0
11544     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11545 
11546     __ movi(q9.V16B(), 0x55);
11547     __ dci(0x7f20fc09);  // fcvtzu s9, s0, #32
11548     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11549 
11550     __ movi(q9.V16B(), 0x55);
11551     __ dci(0x7e30c809);  // fmaxnmp s9, v0.2s
11552     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11553 
11554     __ movi(q9.V16B(), 0x55);
11555     __ dci(0x7e30f809);  // fmaxp s9, v0.2s
11556     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11557 
11558     __ movi(q9.V16B(), 0x55);
11559     __ dci(0x7eb0c809);  // fminnmp s9, v0.2s
11560     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11561 
11562     __ movi(q9.V16B(), 0x55);
11563     __ dci(0x7eb0f809);  // fminp s9, v0.2s
11564     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11565 
11566     __ movi(q9.V16B(), 0x55);
11567     __ dci(0x5f801029);  // fmla s9, s1, v0.s[0]
11568     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11569 
11570     __ movi(q9.V16B(), 0x55);
11571     __ dci(0x5f805029);  // fmls s9, s1, v0.s[0]
11572     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11573 
11574     __ movi(q9.V16B(), 0x55);
11575     __ dci(0x5f809029);  // fmul s9, s1, v0.s[0]
11576     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11577 
11578     __ movi(q9.V16B(), 0x55);
11579     __ dci(0x7f809029);  // fmulx s9, s1, v0.s[0]
11580     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11581 
11582     __ movi(q9.V16B(), 0x55);
11583     __ dci(0x5e20dc29);  // fmulx s9, s1, s0
11584     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11585 
11586     __ movi(q9.V16B(), 0x55);
11587     __ dci(0x5ea1d809);  // frecpe s9, s0
11588     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11589 
11590     __ movi(q9.V16B(), 0x55);
11591     __ dci(0x5e20fc29);  // frecps s9, s1, s0
11592     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11593 
11594     __ movi(q9.V16B(), 0x55);
11595     __ dci(0x5ea1f809);  // frecpx s9, s0
11596     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11597 
11598     __ movi(q9.V16B(), 0x55);
11599     __ dci(0x7ea1d809);  // frsqrte s9, s0
11600     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11601 
11602     __ movi(q9.V16B(), 0x55);
11603     __ dci(0x5ea0fc29);  // frsqrts s9, s1, s0
11604     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11605 
11606     __ movi(q9.V16B(), 0x55);
11607     __ dci(0x5e21d809);  // scvtf s9, s0
11608     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11609 
11610     __ movi(q9.V16B(), 0x55);
11611     __ dci(0x5f20e409);  // scvtf s9, s0, #32
11612     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11613 
11614     __ movi(q9.V16B(), 0x55);
11615     __ dci(0x5ea07809);  // sqabs s9, s0
11616     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11617 
11618     __ movi(q9.V16B(), 0x55);
11619     __ dci(0x5ea00c29);  // sqadd s9, s1, s0
11620     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11621 
11622     __ movi(q9.V16B(), 0x55);
11623     __ dci(0x5e609029);  // sqdmlal s9, h1, h0
11624     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11625 
11626     __ movi(q9.V16B(), 0x55);
11627     __ dci(0x5f403029);  // sqdmlal s9, h1, v0.h[0]
11628     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11629 
11630     __ movi(q9.V16B(), 0x55);
11631     __ dci(0x5e60b029);  // sqdmlsl s9, h1, h0
11632     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11633 
11634     __ movi(q9.V16B(), 0x55);
11635     __ dci(0x5f407029);  // sqdmlsl s9, h1, v0.h[0]
11636     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11637 
11638     __ movi(q9.V16B(), 0x55);
11639     __ dci(0x5f80c029);  // sqdmulh s9, s1, v0.s[0]
11640     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11641 
11642     __ movi(q9.V16B(), 0x55);
11643     __ dci(0x5ea0b429);  // sqdmulh s9, s1, s0
11644     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11645 
11646     __ movi(q9.V16B(), 0x55);
11647     __ dci(0x5e60d029);  // sqdmull s9, h1, h0
11648     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11649 
11650     __ movi(q9.V16B(), 0x55);
11651     __ dci(0x5f40b029);  // sqdmull s9, h1, v0.h[0]
11652     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11653 
11654     __ movi(q9.V16B(), 0x55);
11655     __ dci(0x7ea07809);  // sqneg s9, s0
11656     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11657 
11658     __ movi(q9.V16B(), 0x55);
11659     __ dci(0x7f80d029);  // sqrdmlah s9, s1, v0.s[0]
11660     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11661 
11662     __ movi(q9.V16B(), 0x55);
11663     __ dci(0x7e808429);  // sqrdmlah s9, s1, s0
11664     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11665 
11666     __ movi(q9.V16B(), 0x55);
11667     __ dci(0x7f80f029);  // sqrdmlsh s9, s1, v0.s[0]
11668     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11669 
11670     __ movi(q9.V16B(), 0x55);
11671     __ dci(0x7e808c29);  // sqrdmlsh s9, s1, s0
11672     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11673 
11674     __ movi(q9.V16B(), 0x55);
11675     __ dci(0x5f80d029);  // sqrdmulh s9, s1, v0.s[0]
11676     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11677 
11678     __ movi(q9.V16B(), 0x55);
11679     __ dci(0x7ea0b429);  // sqrdmulh s9, s1, s0
11680     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11681 
11682     __ movi(q9.V16B(), 0x55);
11683     __ dci(0x5ea05c29);  // sqrshl s9, s1, s0
11684     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11685 
11686     __ movi(q9.V16B(), 0x55);
11687     __ dci(0x5f209c09);  // sqrshrn s9, d0, #32
11688     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11689 
11690     __ movi(q9.V16B(), 0x55);
11691     __ dci(0x7f208c09);  // sqrshrun s9, d0, #32
11692     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11693 
11694     __ movi(q9.V16B(), 0x55);
11695     __ dci(0x5ea04c29);  // sqshl s9, s1, s0
11696     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11697 
11698     __ movi(q9.V16B(), 0x55);
11699     __ dci(0x5f207409);  // sqshl s9, s0, #0
11700     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11701 
11702     __ movi(q9.V16B(), 0x55);
11703     __ dci(0x7f206409);  // sqshlu s9, s0, #0
11704     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11705 
11706     __ movi(q9.V16B(), 0x55);
11707     __ dci(0x5f209409);  // sqshrn s9, d0, #32
11708     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11709 
11710     __ movi(q9.V16B(), 0x55);
11711     __ dci(0x7f208409);  // sqshrun s9, d0, #32
11712     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11713 
11714     __ movi(q9.V16B(), 0x55);
11715     __ dci(0x5ea02c29);  // sqsub s9, s1, s0
11716     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11717 
11718     __ movi(q9.V16B(), 0x55);
11719     __ dci(0x5ea14809);  // sqxtn s9, d0
11720     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11721 
11722     __ movi(q9.V16B(), 0x55);
11723     __ dci(0x7ea12809);  // sqxtun s9, d0
11724     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11725 
11726     __ movi(q9.V16B(), 0x55);
11727     __ dci(0x5ea03809);  // suqadd s9, s0
11728     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11729 
11730     __ movi(q9.V16B(), 0x55);
11731     __ dci(0x7e21d809);  // ucvtf s9, s0
11732     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11733 
11734     __ movi(q9.V16B(), 0x55);
11735     __ dci(0x7f20e409);  // ucvtf s9, s0, #32
11736     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11737 
11738     __ movi(q9.V16B(), 0x55);
11739     __ dci(0x7ea00c29);  // uqadd s9, s1, s0
11740     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11741 
11742     __ movi(q9.V16B(), 0x55);
11743     __ dci(0x7ea05c29);  // uqrshl s9, s1, s0
11744     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11745 
11746     __ movi(q9.V16B(), 0x55);
11747     __ dci(0x7f209c09);  // uqrshrn s9, d0, #32
11748     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11749 
11750     __ movi(q9.V16B(), 0x55);
11751     __ dci(0x7ea04c29);  // uqshl s9, s1, s0
11752     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11753 
11754     __ movi(q9.V16B(), 0x55);
11755     __ dci(0x7f207409);  // uqshl s9, s0, #0
11756     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11757 
11758     __ movi(q9.V16B(), 0x55);
11759     __ dci(0x7f209409);  // uqshrn s9, d0, #32
11760     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11761 
11762     __ movi(q9.V16B(), 0x55);
11763     __ dci(0x7ea02c29);  // uqsub s9, s1, s0
11764     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11765 
11766     __ movi(q9.V16B(), 0x55);
11767     __ dci(0x7ea14809);  // uqxtn s9, d0
11768     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11769 
11770     __ movi(q9.V16B(), 0x55);
11771     __ dci(0x7ea03809);  // usqadd s9, s0
11772     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11773   }
11774   __ Sub(x1, x1, 1);
11775   __ Cbnz(x1, &loop);
11776 
11777   __ Ins(q30.V4S(), 0, wzr);
11778 
11779   END();
11780   if (CAN_RUN()) {
11781     RUN();
11782     ASSERT_EQUAL_128(0, 0, q30);
11783   }
11784 }
11785 
TEST(zero_high_d)11786 TEST(zero_high_d) {
11787   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
11788                       CPUFeatures::kNEON,
11789                       CPUFeatures::kFP,
11790                       CPUFeatures::kRDM);
11791   START();
11792 
11793   __ Mov(x0, 0x55aa42ffaa42ff55);
11794   __ Mov(x1, 4);
11795   __ Movi(q30.V16B(), 0);
11796 
11797   // Iterate over the SISD instructions using different input values on each
11798   // loop.
11799   Label loop;
11800   __ Bind(&loop);
11801 
11802   __ Dup(q0.V2D(), x0);
11803   __ Ror(x0, x0, 8);
11804   __ Dup(q1.V2D(), x0);
11805   __ Ror(x0, x0, 8);
11806   __ Dup(q2.V2D(), x0);
11807   __ Ror(x0, x0, 8);
11808 
11809   {
11810     ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
11811     __ movi(q9.V16B(), 0x55);
11812     __ dci(0x5ee0b809);  // abs d9, d0
11813     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11814 
11815     __ movi(q9.V16B(), 0x55);
11816     __ dci(0x5ee08429);  // add d9, d1, d0
11817     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11818 
11819     __ movi(q9.V16B(), 0x55);
11820     __ dci(0x5ef1b809);  // addp d9, v0.2d
11821     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11822 
11823     __ movi(q9.V16B(), 0x55);
11824     __ dci(0x5ee09809);  // cmeq d9, d0, #0
11825     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11826 
11827     __ movi(q9.V16B(), 0x55);
11828     __ dci(0x7ee08c29);  // cmeq d9, d1, d0
11829     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11830 
11831     __ movi(q9.V16B(), 0x55);
11832     __ dci(0x7ee08809);  // cmge d9, d0, #0
11833     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11834 
11835     __ movi(q9.V16B(), 0x55);
11836     __ dci(0x5ee03c29);  // cmge d9, d1, d0
11837     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11838 
11839     __ movi(q9.V16B(), 0x55);
11840     __ dci(0x5ee08809);  // cmgt d9, d0, #0
11841     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11842 
11843     __ movi(q9.V16B(), 0x55);
11844     __ dci(0x5ee03429);  // cmgt d9, d1, d0
11845     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11846 
11847     __ movi(q9.V16B(), 0x55);
11848     __ dci(0x7ee03429);  // cmhi d9, d1, d0
11849     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11850 
11851     __ movi(q9.V16B(), 0x55);
11852     __ dci(0x7ee03c29);  // cmhs d9, d1, d0
11853     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11854 
11855     __ movi(q9.V16B(), 0x55);
11856     __ dci(0x7ee09809);  // cmle d9, d0, #0
11857     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11858 
11859     __ movi(q9.V16B(), 0x55);
11860     __ dci(0x5ee0a809);  // cmlt d9, d0, #0
11861     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11862 
11863     __ movi(q9.V16B(), 0x55);
11864     __ dci(0x5ee08c29);  // cmtst d9, d1, d0
11865     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11866 
11867     __ movi(q9.V16B(), 0x55);
11868     __ dci(0x5e080409);  // mov d9, v0.d[0]
11869     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11870 
11871     __ movi(q9.V16B(), 0x55);
11872     __ dci(0x7ee0d429);  // fabd d9, d1, d0
11873     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11874 
11875     __ movi(q9.V16B(), 0x55);
11876     __ dci(0x7e60ec29);  // facge d9, d1, d0
11877     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11878 
11879     __ movi(q9.V16B(), 0x55);
11880     __ dci(0x7ee0ec29);  // facgt d9, d1, d0
11881     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11882 
11883     __ movi(q9.V16B(), 0x55);
11884     __ dci(0x7e70d809);  // faddp d9, v0.2d
11885     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11886 
11887     __ movi(q9.V16B(), 0x55);
11888     __ dci(0x5ee0d809);  // fcmeq d9, d0, #0.0
11889     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11890 
11891     __ movi(q9.V16B(), 0x55);
11892     __ dci(0x5e60e429);  // fcmeq d9, d1, d0
11893     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11894 
11895     __ movi(q9.V16B(), 0x55);
11896     __ dci(0x7ee0c809);  // fcmge d9, d0, #0.0
11897     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11898 
11899     __ movi(q9.V16B(), 0x55);
11900     __ dci(0x7e60e429);  // fcmge d9, d1, d0
11901     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11902 
11903     __ movi(q9.V16B(), 0x55);
11904     __ dci(0x5ee0c809);  // fcmgt d9, d0, #0.0
11905     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11906 
11907     __ movi(q9.V16B(), 0x55);
11908     __ dci(0x7ee0e429);  // fcmgt d9, d1, d0
11909     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11910 
11911     __ movi(q9.V16B(), 0x55);
11912     __ dci(0x7ee0d809);  // fcmle d9, d0, #0.0
11913     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11914 
11915     __ movi(q9.V16B(), 0x55);
11916     __ dci(0x5ee0e809);  // fcmlt d9, d0, #0.0
11917     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11918 
11919     __ movi(q9.V16B(), 0x55);
11920     __ dci(0x5e61c809);  // fcvtas d9, d0
11921     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11922 
11923     __ movi(q9.V16B(), 0x55);
11924     __ dci(0x7e61c809);  // fcvtau d9, d0
11925     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11926 
11927     __ movi(q9.V16B(), 0x55);
11928     __ dci(0x5e61b809);  // fcvtms d9, d0
11929     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11930 
11931     __ movi(q9.V16B(), 0x55);
11932     __ dci(0x7e61b809);  // fcvtmu d9, d0
11933     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11934 
11935     __ movi(q9.V16B(), 0x55);
11936     __ dci(0x5e61a809);  // fcvtns d9, d0
11937     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11938 
11939     __ movi(q9.V16B(), 0x55);
11940     __ dci(0x7e61a809);  // fcvtnu d9, d0
11941     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11942 
11943     __ movi(q9.V16B(), 0x55);
11944     __ dci(0x5ee1a809);  // fcvtps d9, d0
11945     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11946 
11947     __ movi(q9.V16B(), 0x55);
11948     __ dci(0x7ee1a809);  // fcvtpu d9, d0
11949     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11950 
11951     __ movi(q9.V16B(), 0x55);
11952     __ dci(0x5ee1b809);  // fcvtzs d9, d0
11953     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11954 
11955     __ movi(q9.V16B(), 0x55);
11956     __ dci(0x5f40fc09);  // fcvtzs d9, d0, #64
11957     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11958 
11959     __ movi(q9.V16B(), 0x55);
11960     __ dci(0x7ee1b809);  // fcvtzu d9, d0
11961     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11962 
11963     __ movi(q9.V16B(), 0x55);
11964     __ dci(0x7f40fc09);  // fcvtzu d9, d0, #64
11965     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11966 
11967     __ movi(q9.V16B(), 0x55);
11968     __ dci(0x7e70c809);  // fmaxnmp d9, v0.2d
11969     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11970 
11971     __ movi(q9.V16B(), 0x55);
11972     __ dci(0x7e70f809);  // fmaxp d9, v0.2d
11973     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11974 
11975     __ movi(q9.V16B(), 0x55);
11976     __ dci(0x7ef0c809);  // fminnmp d9, v0.2d
11977     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11978 
11979     __ movi(q9.V16B(), 0x55);
11980     __ dci(0x7ef0f809);  // fminp d9, v0.2d
11981     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11982 
11983     __ movi(q9.V16B(), 0x55);
11984     __ dci(0x5fc01029);  // fmla d9, d1, v0.d[0]
11985     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11986 
11987     __ movi(q9.V16B(), 0x55);
11988     __ dci(0x5fc05029);  // fmls d9, d1, v0.d[0]
11989     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11990 
11991     __ movi(q9.V16B(), 0x55);
11992     __ dci(0x5fc09029);  // fmul d9, d1, v0.d[0]
11993     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11994 
11995     __ movi(q9.V16B(), 0x55);
11996     __ dci(0x7fc09029);  // fmulx d9, d1, v0.d[0]
11997     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
11998 
11999     __ movi(q9.V16B(), 0x55);
12000     __ dci(0x5e60dc29);  // fmulx d9, d1, d0
12001     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12002 
12003     __ movi(q9.V16B(), 0x55);
12004     __ dci(0x5ee1d809);  // frecpe d9, d0
12005     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12006 
12007     __ movi(q9.V16B(), 0x55);
12008     __ dci(0x5e60fc29);  // frecps d9, d1, d0
12009     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12010 
12011     __ movi(q9.V16B(), 0x55);
12012     __ dci(0x5ee1f809);  // frecpx d9, d0
12013     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12014 
12015     __ movi(q9.V16B(), 0x55);
12016     __ dci(0x7ee1d809);  // frsqrte d9, d0
12017     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12018 
12019     __ movi(q9.V16B(), 0x55);
12020     __ dci(0x5ee0fc29);  // frsqrts d9, d1, d0
12021     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12022 
12023     __ movi(q9.V16B(), 0x55);
12024     __ dci(0x7ee0b809);  // neg d9, d0
12025     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12026 
12027     __ movi(q9.V16B(), 0x55);
12028     __ dci(0x5e61d809);  // scvtf d9, d0
12029     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12030 
12031     __ movi(q9.V16B(), 0x55);
12032     __ dci(0x5f40e409);  // scvtf d9, d0, #64
12033     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12034 
12035     __ movi(q9.V16B(), 0x55);
12036     __ dci(0x5f405409);  // shl d9, d0, #0
12037     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12038 
12039     __ movi(q9.V16B(), 0x55);
12040     __ dci(0x7f405409);  // sli d9, d0, #0
12041     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12042 
12043     __ movi(q9.V16B(), 0x55);
12044     __ dci(0x5ee07809);  // sqabs d9, d0
12045     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12046 
12047     __ movi(q9.V16B(), 0x55);
12048     __ dci(0x5ee00c29);  // sqadd d9, d1, d0
12049     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12050 
12051     __ movi(q9.V16B(), 0x55);
12052     __ dci(0x5ea09029);  // sqdmlal d9, s1, s0
12053     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12054 
12055     __ movi(q9.V16B(), 0x55);
12056     __ dci(0x5f803029);  // sqdmlal d9, s1, v0.s[0]
12057     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12058 
12059     __ movi(q9.V16B(), 0x55);
12060     __ dci(0x5ea0b029);  // sqdmlsl d9, s1, s0
12061     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12062 
12063     __ movi(q9.V16B(), 0x55);
12064     __ dci(0x5f807029);  // sqdmlsl d9, s1, v0.s[0]
12065     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12066 
12067     __ movi(q9.V16B(), 0x55);
12068     __ dci(0x5ea0d029);  // sqdmull d9, s1, s0
12069     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12070 
12071     __ movi(q9.V16B(), 0x55);
12072     __ dci(0x5f80b029);  // sqdmull d9, s1, v0.s[0]
12073     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12074 
12075     __ movi(q9.V16B(), 0x55);
12076     __ dci(0x7ee07809);  // sqneg d9, d0
12077     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12078 
12079     __ movi(q9.V16B(), 0x55);
12080     __ dci(0x7ec08429);  // sqrdmlah d9, d1, d0
12081     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12082 
12083     __ movi(q9.V16B(), 0x55);
12084     __ dci(0x7ec08c29);  // sqrdmlsh d9, d1, d0
12085     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12086 
12087     __ movi(q9.V16B(), 0x55);
12088     __ dci(0x5ee05c29);  // sqrshl d9, d1, d0
12089     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12090 
12091     __ movi(q9.V16B(), 0x55);
12092     __ dci(0x5ee04c29);  // sqshl d9, d1, d0
12093     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12094 
12095     __ movi(q9.V16B(), 0x55);
12096     __ dci(0x5f407409);  // sqshl d9, d0, #0
12097     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12098 
12099     __ movi(q9.V16B(), 0x55);
12100     __ dci(0x7f406409);  // sqshlu d9, d0, #0
12101     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12102 
12103     __ movi(q9.V16B(), 0x55);
12104     __ dci(0x5ee02c29);  // sqsub d9, d1, d0
12105     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12106 
12107     __ movi(q9.V16B(), 0x55);
12108     __ dci(0x7f404409);  // sri d9, d0, #64
12109     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12110 
12111     __ movi(q9.V16B(), 0x55);
12112     __ dci(0x5ee05429);  // srshl d9, d1, d0
12113     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12114 
12115     __ movi(q9.V16B(), 0x55);
12116     __ dci(0x5f402409);  // srshr d9, d0, #64
12117     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12118 
12119     __ movi(q9.V16B(), 0x55);
12120     __ dci(0x5f403409);  // srsra d9, d0, #64
12121     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12122 
12123     __ movi(q9.V16B(), 0x55);
12124     __ dci(0x5ee04429);  // sshl d9, d1, d0
12125     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12126 
12127     __ movi(q9.V16B(), 0x55);
12128     __ dci(0x5f400409);  // sshr d9, d0, #64
12129     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12130 
12131     __ movi(q9.V16B(), 0x55);
12132     __ dci(0x5f401409);  // ssra d9, d0, #64
12133     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12134 
12135     __ movi(q9.V16B(), 0x55);
12136     __ dci(0x7ee08429);  // sub d9, d1, d0
12137     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12138 
12139     __ movi(q9.V16B(), 0x55);
12140     __ dci(0x5ee03809);  // suqadd d9, d0
12141     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12142 
12143     __ movi(q9.V16B(), 0x55);
12144     __ dci(0x7e61d809);  // ucvtf d9, d0
12145     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12146 
12147     __ movi(q9.V16B(), 0x55);
12148     __ dci(0x7f40e409);  // ucvtf d9, d0, #64
12149     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12150 
12151     __ movi(q9.V16B(), 0x55);
12152     __ dci(0x7ee00c29);  // uqadd d9, d1, d0
12153     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12154 
12155     __ movi(q9.V16B(), 0x55);
12156     __ dci(0x7ee05c29);  // uqrshl d9, d1, d0
12157     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12158 
12159     __ movi(q9.V16B(), 0x55);
12160     __ dci(0x7ee04c29);  // uqshl d9, d1, d0
12161     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12162 
12163     __ movi(q9.V16B(), 0x55);
12164     __ dci(0x7f407409);  // uqshl d9, d0, #0
12165     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12166 
12167     __ movi(q9.V16B(), 0x55);
12168     __ dci(0x7ee02c29);  // uqsub d9, d1, d0
12169     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12170 
12171     __ movi(q9.V16B(), 0x55);
12172     __ dci(0x7ee05429);  // urshl d9, d1, d0
12173     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12174 
12175     __ movi(q9.V16B(), 0x55);
12176     __ dci(0x7f402409);  // urshr d9, d0, #64
12177     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12178 
12179     __ movi(q9.V16B(), 0x55);
12180     __ dci(0x7f403409);  // ursra d9, d0, #64
12181     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12182 
12183     __ movi(q9.V16B(), 0x55);
12184     __ dci(0x7ee04429);  // ushl d9, d1, d0
12185     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12186 
12187     __ movi(q9.V16B(), 0x55);
12188     __ dci(0x7f400409);  // ushr d9, d0, #64
12189     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12190 
12191     __ movi(q9.V16B(), 0x55);
12192     __ dci(0x7ee03809);  // usqadd d9, d0
12193     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12194 
12195     __ movi(q9.V16B(), 0x55);
12196     __ dci(0x7f401409);  // usra d9, d0, #64
12197     __ orr(q30.V16B(), q30.V16B(), q9.V16B());
12198   }
12199   __ Sub(x1, x1, 1);
12200   __ Cbnz(x1, &loop);
12201 
12202   __ Ins(q30.V2D(), 0, xzr);
12203 
12204   END();
12205   if (CAN_RUN()) {
12206     RUN();
12207     ASSERT_EQUAL_128(0, 0, q30);
12208   }
12209 }
12210 
12211 }  // namespace aarch64
12212 }  // namespace vixl
12213