1 // Copyright 2016, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cmath>
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cstring>
32
33 #include "test-runner.h"
34 #include "test-utils-aarch64.h"
35
36 #include "aarch64/cpu-aarch64.h"
37 #include "aarch64/disasm-aarch64.h"
38 #include "aarch64/macro-assembler-aarch64.h"
39 #include "aarch64/simulator-aarch64.h"
40
41 namespace vixl {
42 namespace aarch64 {
43
44 #define __ masm->
45 #define TEST(name) TEST_(TRACE_##name)
46
47 #define REF(name) "test/test-trace-reference/" name
48
GenerateTestSequenceBase(MacroAssembler * masm)49 static void GenerateTestSequenceBase(MacroAssembler* masm) {
50 ExactAssemblyScope guard(masm,
51 masm->GetBuffer()->GetRemainingBytes(),
52 ExactAssemblyScope::kMaximumSize);
53
54 __ adc(w3, w4, w5);
55 __ adc(x6, x7, x8);
56 __ adcs(w9, w10, w11);
57 __ adcs(x12, x13, x14);
58 __ add(w15, w16, w17);
59 __ add(x18, x19, x20);
60 __ adds(w21, w22, w23);
61 __ adds(x24, x25, x26);
62 __ and_(w27, w28, w29);
63 __ and_(x2, x3, x4);
64 __ ands(w5, w6, w7);
65 __ ands(x8, x9, x10);
66 __ asr(w11, w12, 0);
67 __ asr(x13, x14, 1);
68 __ asrv(w15, w16, w17);
69 __ asrv(x18, x19, x20);
70 __ bfm(w21, w22, 5, 6);
71 __ bfm(x23, x24, 7, 8);
72 __ bic(w25, w26, w27);
73 __ bic(x28, x29, x2);
74 __ bics(w3, w4, w5);
75 __ bics(x6, x7, x8);
76 __ ccmn(w9, w10, NoFlag, al);
77 __ ccmn(w9, w10, NoFlag, eq);
78 __ ccmn(w9, w10, NoFlag, ne);
79 __ ccmn(x11, x12, CFlag, al);
80 __ ccmn(x11, x12, CFlag, cc);
81 __ ccmn(x11, x12, CFlag, cs);
82 __ ccmp(w13, w14, VFlag, al);
83 __ ccmp(w13, w14, VFlag, hi);
84 __ ccmp(w13, w14, VFlag, ls);
85 __ ccmp(x15, x16, CVFlag, al);
86 __ ccmp(x15, x16, CVFlag, eq);
87 __ ccmp(x15, x16, CVFlag, ne);
88 __ cinc(w17, w18, cc);
89 __ cinc(w17, w18, cs);
90 __ cinc(x19, x20, hi);
91 __ cinc(x19, x20, ls);
92 __ cinv(w21, w22, eq);
93 __ cinv(w21, w22, ne);
94 __ cinv(x23, x24, cc);
95 __ cinv(x23, x24, cs);
96 __ clrex();
97 __ cls(w25, w26);
98 __ cls(x27, x28);
99 __ clz(w29, w2);
100 __ clz(x3, x4);
101 __ cmn(w5, w6);
102 __ cmn(x7, x8);
103 __ cmp(w9, w10);
104 __ cmp(x11, x12);
105 __ cneg(w13, w14, hi);
106 __ cneg(w13, w14, ls);
107 __ cneg(x15, x16, eq);
108 __ cneg(x15, x16, ne);
109 __ crc32b(w17, w18, w19);
110 __ crc32cb(w20, w21, w22);
111 __ crc32ch(w23, w24, w25);
112 __ crc32cw(w26, w27, w28);
113 __ crc32h(w4, w5, w6);
114 __ crc32w(w7, w8, w9);
115 __ csel(w13, w14, w15, cc);
116 __ csel(w13, w14, w15, cs);
117 __ csel(x16, x17, x18, hi);
118 __ csel(x16, x17, x18, ls);
119 __ cset(w19, eq);
120 __ cset(w19, ne);
121 __ cset(x20, cc);
122 __ cset(x20, cs);
123 __ csetm(w21, hi);
124 __ csetm(w21, ls);
125 __ csetm(x22, eq);
126 __ csetm(x22, ne);
127 __ csinc(w23, w24, w25, cc);
128 __ csinc(w23, w24, w25, cs);
129 __ csinc(x26, x27, x28, hi);
130 __ csinc(x26, x27, x28, ls);
131 __ csinv(w29, w2, w3, eq);
132 __ csinv(w29, w2, w3, ne);
133 __ csinv(x4, x5, x6, cc);
134 __ csinv(x4, x5, x6, cs);
135 __ csneg(w7, w8, w9, hi);
136 __ csneg(w7, w8, w9, ls);
137 __ csneg(x10, x11, x12, eq);
138 __ csneg(x10, x11, x12, ne);
139 __ dc(CVAC, x0);
140 __ dmb(InnerShareable, BarrierAll);
141 __ dsb(InnerShareable, BarrierAll);
142 __ eon(w13, w14, w15);
143 __ eon(x16, x17, x18);
144 __ eor(w19, w20, w21);
145 __ eor(x22, x23, x24);
146 __ extr(w25, w26, w27, 9);
147 __ extr(x28, x29, x2, 10);
148 __ hint(NOP);
149 __ ic(IVAU, x0);
150 __ isb();
151 __ ldar(w3, MemOperand(x0));
152 __ ldar(x4, MemOperand(x0));
153 __ ldarb(w5, MemOperand(x0));
154 __ ldarb(x6, MemOperand(x0));
155 __ ldarh(w7, MemOperand(x0));
156 __ ldarh(x8, MemOperand(x0));
157 __ ldaxp(w9, w10, MemOperand(x0));
158 __ ldaxp(x11, x12, MemOperand(x0));
159 __ ldaxr(w13, MemOperand(x0));
160 __ ldaxr(x14, MemOperand(x0));
161 __ ldaxrb(w15, MemOperand(x0));
162 __ ldaxrb(x16, MemOperand(x0));
163 __ ldaxrh(w17, MemOperand(x0));
164 __ ldaxrh(x18, MemOperand(x0));
165 __ ldnp(w19, w20, MemOperand(x0));
166 __ ldnp(x21, x22, MemOperand(x0));
167 __ ldp(w23, w24, MemOperand(x0));
168 __ ldp(w23, w24, MemOperand(x1, 8, PostIndex));
169 __ ldp(w23, w24, MemOperand(x1, 8, PreIndex));
170 __ ldp(x25, x26, MemOperand(x0));
171 __ ldp(x25, x26, MemOperand(x1, 16, PostIndex));
172 __ ldp(x25, x26, MemOperand(x1, 16, PreIndex));
173 __ ldpsw(x27, x28, MemOperand(x0));
174 __ ldpsw(x27, x28, MemOperand(x1, 8, PostIndex));
175 __ ldpsw(x27, x28, MemOperand(x1, 8, PreIndex));
176 __ ldr(w29, MemOperand(x0));
177 __ ldr(w29, MemOperand(x1, 4, PostIndex));
178 __ ldr(w29, MemOperand(x1, 4, PreIndex));
179 __ ldr(x2, MemOperand(x0));
180 __ ldr(x2, MemOperand(x1, 8, PostIndex));
181 __ ldr(x2, MemOperand(x1, 8, PreIndex));
182 __ ldrb(w3, MemOperand(x0));
183 __ ldrb(w3, MemOperand(x1, 1, PostIndex));
184 __ ldrb(w3, MemOperand(x1, 1, PreIndex));
185 __ ldrb(x4, MemOperand(x0));
186 __ ldrb(x4, MemOperand(x1, 1, PostIndex));
187 __ ldrb(x4, MemOperand(x1, 1, PreIndex));
188 __ ldrh(w5, MemOperand(x0));
189 __ ldrh(w5, MemOperand(x1, 2, PostIndex));
190 __ ldrh(w5, MemOperand(x1, 2, PreIndex));
191 __ ldrh(x6, MemOperand(x0));
192 __ ldrh(x6, MemOperand(x1, 2, PostIndex));
193 __ ldrh(x6, MemOperand(x1, 2, PreIndex));
194 __ ldrsb(w7, MemOperand(x0));
195 __ ldrsb(w7, MemOperand(x1, 1, PostIndex));
196 __ ldrsb(w7, MemOperand(x1, 1, PreIndex));
197 __ ldrsb(x8, MemOperand(x0));
198 __ ldrsb(x8, MemOperand(x1, 1, PostIndex));
199 __ ldrsb(x8, MemOperand(x1, 1, PreIndex));
200 __ ldrsh(w9, MemOperand(x0));
201 __ ldrsh(w9, MemOperand(x1, 2, PostIndex));
202 __ ldrsh(w9, MemOperand(x1, 2, PreIndex));
203 __ ldrsh(x10, MemOperand(x0));
204 __ ldrsh(x10, MemOperand(x1, 2, PostIndex));
205 __ ldrsh(x10, MemOperand(x1, 2, PreIndex));
206 __ ldrsw(x11, MemOperand(x0));
207 __ ldrsw(x11, MemOperand(x1, 4, PostIndex));
208 __ ldrsw(x11, MemOperand(x1, 4, PreIndex));
209 __ ldur(w12, MemOperand(x0, 7));
210 __ ldur(x13, MemOperand(x0, 15));
211 __ ldurb(w14, MemOperand(x0, 1));
212 __ ldurb(x15, MemOperand(x0, 1));
213 __ ldurh(w16, MemOperand(x0, 3));
214 __ ldurh(x17, MemOperand(x0, 3));
215 __ ldursb(w18, MemOperand(x0, 1));
216 __ ldursb(x19, MemOperand(x0, 1));
217 __ ldursh(w20, MemOperand(x0, 3));
218 __ ldursh(x21, MemOperand(x0, 3));
219 __ ldursw(x22, MemOperand(x0, 7));
220 __ ldxp(w23, w24, MemOperand(x0));
221 __ ldxp(x25, x26, MemOperand(x0));
222 __ ldxr(w27, MemOperand(x0));
223 __ ldxr(x28, MemOperand(x0));
224 __ ldxrb(w29, MemOperand(x0));
225 __ ldxrb(x2, MemOperand(x0));
226 __ ldxrh(w3, MemOperand(x0));
227 __ ldxrh(x4, MemOperand(x0));
228 __ lsl(w5, w6, 2);
229 __ lsl(x7, x8, 3);
230 __ lslv(w9, w10, w11);
231 __ lslv(x12, x13, x14);
232 __ lsr(w15, w16, 4);
233 __ lsr(x17, x18, 5);
234 __ lsrv(w19, w20, w21);
235 __ lsrv(x22, x23, x24);
236 __ madd(w25, w26, w27, w28);
237 __ madd(x29, x2, x3, x4);
238 __ mneg(w5, w6, w7);
239 __ mneg(x8, x9, x10);
240 __ mov(w11, w12);
241 __ mov(x13, x14);
242 __ movk(w15, 130);
243 __ movk(x16, 131);
244 __ movn(w17, 132);
245 __ movn(x18, 133);
246 __ movz(w19, 134);
247 __ movz(x20, 135);
248 __ msub(w22, w23, w24, w25);
249 __ msub(x26, x27, x28, x29);
250 __ mul(w2, w3, w4);
251 __ mul(x5, x6, x7);
252 __ mvn(w8, w9);
253 __ mvn(x10, x11);
254 __ neg(w12, w13);
255 __ neg(x14, x15);
256 __ negs(w16, w17);
257 __ negs(x18, x19);
258 __ ngc(w20, w21);
259 __ ngc(x22, x23);
260 __ ngcs(w24, w25);
261 __ ngcs(x26, x27);
262 __ nop();
263 __ orn(w28, w29, w2);
264 __ orn(x3, x4, x5);
265 __ orr(w6, w7, w8);
266 __ orr(x9, x10, x11);
267 __ prfm(PLDL1KEEP, MemOperand(x0, 4));
268 __ prfum(PLDL1KEEP, MemOperand(x0, 1));
269 __ rbit(w12, w13);
270 __ rbit(x14, x15);
271 __ rev(w16, w17);
272 __ rev(x18, x19);
273 __ rev16(w20, w21);
274 __ rev16(x22, x23);
275 __ rev32(x24, x25);
276 __ rorv(w26, w27, w28);
277 __ rorv(x29, x2, x3);
278 __ sbc(w4, w5, w6);
279 __ sbc(x7, x8, x9);
280 __ sbcs(w10, w11, w12);
281 __ sbcs(x13, x14, x15);
282 __ sbfiz(w16, w17, 2, 3);
283 __ sbfiz(x18, x19, 4, 5);
284 __ sbfx(w22, w23, 6, 7);
285 __ sbfx(x24, x25, 8, 9);
286 __ sdiv(w26, w27, w28);
287 __ sdiv(x29, x2, x3);
288 __ smulh(x12, x13, x14);
289 __ stlr(w18, MemOperand(x0));
290 __ stlr(x19, MemOperand(x0));
291 __ stlrb(w20, MemOperand(x0));
292 __ stlrb(x21, MemOperand(x0));
293 __ stlrh(w22, MemOperand(x0));
294 __ stlrh(x23, MemOperand(x0));
295 __ stlxp(w24, w25, w26, MemOperand(x0));
296 __ stlxp(x27, x28, x29, MemOperand(x0));
297 __ stlxr(w2, w3, MemOperand(x0));
298 __ stlxr(x4, x5, MemOperand(x0));
299 __ stlxrb(w6, w7, MemOperand(x0));
300 __ stlxrb(x8, x9, MemOperand(x0));
301 __ stlxrh(w10, w11, MemOperand(x0));
302 __ stlxrh(x12, x13, MemOperand(x0));
303 __ stnp(w14, w15, MemOperand(x0));
304 __ stnp(x16, x17, MemOperand(x0));
305 __ stp(w18, w19, MemOperand(x0));
306 __ stp(w18, w19, MemOperand(x1, 8, PostIndex));
307 __ stp(w18, w19, MemOperand(x1, 8, PreIndex));
308 __ stp(x20, x21, MemOperand(x0));
309 __ stp(x20, x21, MemOperand(x1, 16, PostIndex));
310 __ stp(x20, x21, MemOperand(x1, 16, PreIndex));
311 __ str(w22, MemOperand(x0));
312 __ str(w22, MemOperand(x1, 4, PostIndex));
313 __ str(w22, MemOperand(x1, 4, PreIndex));
314 __ str(x23, MemOperand(x0));
315 __ str(x23, MemOperand(x1, 8, PostIndex));
316 __ str(x23, MemOperand(x1, 8, PreIndex));
317 __ strb(w24, MemOperand(x0));
318 __ strb(w24, MemOperand(x1, 1, PostIndex));
319 __ strb(w24, MemOperand(x1, 1, PreIndex));
320 __ strb(x25, MemOperand(x0));
321 __ strb(x25, MemOperand(x1, 1, PostIndex));
322 __ strb(x25, MemOperand(x1, 1, PreIndex));
323 __ strh(w26, MemOperand(x0));
324 __ strh(w26, MemOperand(x1, 2, PostIndex));
325 __ strh(w26, MemOperand(x1, 2, PreIndex));
326 __ strh(x27, MemOperand(x0));
327 __ strh(x27, MemOperand(x1, 2, PostIndex));
328 __ strh(x27, MemOperand(x1, 2, PreIndex));
329 __ stur(w28, MemOperand(x0, 7));
330 __ stur(x29, MemOperand(x0, 15));
331 __ sturb(w2, MemOperand(x0, 1));
332 __ sturb(x3, MemOperand(x0, 1));
333 __ sturh(w4, MemOperand(x0, 3));
334 __ sturh(x5, MemOperand(x0, 3));
335 __ stxp(w6, w7, w8, MemOperand(x0));
336 __ stxp(x9, x10, x11, MemOperand(x0));
337 __ stxr(w12, w13, MemOperand(x0));
338 __ stxr(x14, x15, MemOperand(x0));
339 __ stxrb(w16, w17, MemOperand(x0));
340 __ stxrb(x18, x19, MemOperand(x0));
341 __ stxrh(w20, w21, MemOperand(x0));
342 __ stxrh(x22, x23, MemOperand(x0));
343 __ sub(w24, w25, w26);
344 __ sub(x27, x28, x29);
345 __ subs(w2, w3, w4);
346 __ subs(x5, x6, x7);
347 __ sxtb(w8, w9);
348 __ sxtb(x10, x11);
349 __ sxth(w12, w13);
350 __ sxth(x14, x15);
351 __ sxtw(w16, w17);
352 __ sxtw(x18, x19);
353 __ tst(w20, w21);
354 __ tst(x22, x23);
355 __ ubfiz(w24, w25, 10, 11);
356 __ ubfiz(x26, x27, 12, 13);
357 __ ubfm(w28, w29, 14, 15);
358 __ ubfm(x2, x3, 1, 2);
359 __ ubfx(w4, w5, 3, 4);
360 __ ubfx(x6, x7, 5, 6);
361 __ udiv(w8, w9, w10);
362 __ udiv(x11, x12, x13);
363 __ umulh(x22, x23, x24);
364 __ uxtb(w28, w29);
365 __ uxtb(x2, x3);
366 __ uxth(w4, w5);
367 __ uxth(x6, x7);
368 __ uxtw(w8, w9);
369 __ uxtw(x10, x11);
370
371 // Branch tests.
372 {
373 Label end;
374 // Branch to the next instruction.
375 __ b(&end);
376 __ bind(&end);
377 }
378 {
379 Label loop, end;
380 __ subs(x3, x3, x3);
381 __ bind(&loop);
382 // Not-taken branch (the first time).
383 // Taken branch (the second time).
384 __ b(&end, ne);
385 __ cmp(x3, 1);
386 // Backwards branch.
387 __ b(&loop);
388 __ bind(&end);
389 }
390 }
391
392
GenerateTestSequenceFP(MacroAssembler * masm)393 static void GenerateTestSequenceFP(MacroAssembler* masm) {
394 ExactAssemblyScope guard(masm,
395 masm->GetBuffer()->GetRemainingBytes(),
396 ExactAssemblyScope::kMaximumSize);
397
398 // Scalar floating point instructions.
399 __ fabd(d13, d2, d19);
400 __ fabd(s8, s10, s30);
401 __ fabs(d1, d1);
402 __ fabs(s25, s7);
403 __ facge(d1, d23, d16);
404 __ facge(s4, s17, s1);
405 __ facgt(d2, d21, d24);
406 __ facgt(s12, s26, s12);
407 __ fadd(d13, d11, d22);
408 __ fadd(s27, s19, s8);
409 __ fccmp(d6, d10, NoFlag, hs);
410 __ fccmp(s29, s20, NZVFlag, ne);
411 __ fccmpe(d10, d2, NZCFlag, al);
412 __ fccmpe(s3, s3, NZVFlag, pl);
413 __ fcmeq(d19, d8, d10);
414 __ fcmeq(d0, d18, 0.0);
415 __ fcmeq(s1, s4, s30);
416 __ fcmeq(s22, s29, 0.0);
417 __ fcmge(d27, d18, d1);
418 __ fcmge(d31, d28, 0.0);
419 __ fcmge(s31, s19, s9);
420 __ fcmge(s1, s25, 0.0);
421 __ fcmgt(d18, d1, d15);
422 __ fcmgt(d3, d31, 0.0);
423 __ fcmgt(s11, s25, s2);
424 __ fcmgt(s17, s16, 0.0);
425 __ fcmle(d24, d17, 0.0);
426 __ fcmle(s11, s8, 0.0);
427 __ fcmlt(d5, d31, 0.0);
428 __ fcmlt(s18, s23, 0.0);
429 __ fcmp(d10, d24);
430 __ fcmp(d13, 0.0);
431 __ fcmp(s18, s6);
432 __ fcmp(s16, 0.0);
433 __ fcmpe(d9, d17);
434 __ fcmpe(d29, 0.0);
435 __ fcmpe(s16, s17);
436 __ fcmpe(s22, 0.0);
437 __ fcsel(d10, d14, d19, gt);
438 __ fcsel(s22, s18, s2, ge);
439 __ fcvt(d4, h24);
440 __ fcvt(d11, s2);
441 __ fcvt(h8, d9);
442 __ fcvt(h12, s1);
443 __ fcvt(s12, d31);
444 __ fcvt(s27, h25);
445 __ fcvtas(d28, d16);
446 __ fcvtas(s3, s5);
447 __ fcvtas(w18, d31);
448 __ fcvtas(w29, s24);
449 __ fcvtas(x9, d1);
450 __ fcvtas(x30, s2);
451 __ fcvtau(d14, d0);
452 __ fcvtau(s31, s14);
453 __ fcvtau(w16, d2);
454 __ fcvtau(w18, s0);
455 __ fcvtau(x26, d7);
456 __ fcvtau(x25, s19);
457 __ fcvtms(d30, d25);
458 __ fcvtms(s12, s15);
459 __ fcvtms(w9, d7);
460 __ fcvtms(w19, s6);
461 __ fcvtms(x6, d6);
462 __ fcvtms(x22, s7);
463 __ fcvtmu(d27, d0);
464 __ fcvtmu(s8, s22);
465 __ fcvtmu(w29, d19);
466 __ fcvtmu(w26, s0);
467 __ fcvtmu(x13, d5);
468 __ fcvtmu(x5, s18);
469 __ fcvtns(d30, d15);
470 __ fcvtns(s10, s11);
471 __ fcvtns(w21, d15);
472 __ fcvtns(w18, s10);
473 __ fcvtns(x8, d17);
474 __ fcvtns(x17, s12);
475 __ fcvtnu(d0, d21);
476 __ fcvtnu(s6, s25);
477 __ fcvtnu(w29, d11);
478 __ fcvtnu(w25, s31);
479 __ fcvtnu(x30, d11);
480 __ fcvtnu(x27, s18);
481 __ fcvtps(d11, d22);
482 __ fcvtps(s29, s20);
483 __ fcvtps(w15, d25);
484 __ fcvtps(w16, s7);
485 __ fcvtps(x13, d20);
486 __ fcvtps(x3, s23);
487 __ fcvtpu(d24, d1);
488 __ fcvtpu(s14, s24);
489 __ fcvtpu(w26, d29);
490 __ fcvtpu(wzr, s26);
491 __ fcvtpu(x27, d6);
492 __ fcvtpu(x29, s14);
493 __ fcvtxn(s12, d12);
494 __ fcvtzs(d15, d0);
495 __ fcvtzs(d13, d4, 42);
496 __ fcvtzs(s8, s11);
497 __ fcvtzs(s31, s6, 25);
498 __ fcvtzs(w6, d9);
499 __ fcvtzs(w25, d10, 20);
500 __ fcvtzs(w9, s1);
501 __ fcvtzs(w17, s29, 30);
502 __ fcvtzs(x19, d2);
503 __ fcvtzs(x22, d14, 1);
504 __ fcvtzs(x14, s20);
505 __ fcvtzs(x3, s30, 33);
506 __ fcvtzu(d28, d15);
507 __ fcvtzu(d0, d4, 3);
508 __ fcvtzu(s2, s5);
509 __ fcvtzu(s4, s0, 30);
510 __ fcvtzu(w11, d4);
511 __ fcvtzu(w7, d24, 32);
512 __ fcvtzu(w18, s24);
513 __ fcvtzu(w14, s27, 4);
514 __ fcvtzu(x22, d11);
515 __ fcvtzu(x8, d27, 52);
516 __ fcvtzu(x7, s20);
517 __ fcvtzu(x22, s7, 44);
518 __ fdiv(d6, d14, d15);
519 __ fdiv(s26, s5, s25);
520 __ fmadd(d18, d26, d12, d30);
521 __ fmadd(s13, s9, s28, s4);
522 __ fmax(d12, d5, d5);
523 __ fmax(s12, s28, s6);
524 __ fmaxnm(d28, d4, d2);
525 __ fmaxnm(s6, s10, s8);
526 __ fmin(d20, d20, d18);
527 __ fmin(s7, s13, s16);
528 __ fminnm(d19, d14, d30);
529 __ fminnm(s0, s1, s1);
530 __ fmov(d13, d6);
531 __ fmov(d2, x17);
532 __ fmov(d8, -2.5000);
533 __ fmov(s5, s3);
534 __ fmov(s25, w20);
535 __ fmov(s21, 2.8750f);
536 __ fmov(w18, s24);
537 __ fmov(x18, d2);
538 __ fmsub(d20, d30, d3, d19);
539 __ fmsub(s5, s19, s4, s12);
540 __ fmul(d30, d27, d23);
541 __ fmul(s25, s17, s15);
542 __ fmulx(d4, d17, d1);
543 __ fmulx(s14, s25, s4);
544 __ fneg(d15, d0);
545 __ fneg(s14, s15);
546 __ fnmadd(d0, d16, d22, d31);
547 __ fnmadd(s0, s18, s26, s18);
548 __ fnmsub(d19, d12, d15, d21);
549 __ fnmsub(s29, s0, s11, s26);
550 __ fnmul(d31, d19, d1);
551 __ fnmul(s18, s3, s17);
552 __ frecpe(d7, d21);
553 __ frecpe(s29, s17);
554 __ frecps(d11, d26, d17);
555 __ frecps(s18, s27, s1);
556 __ frecpx(d15, d18);
557 __ frecpx(s5, s10);
558 __ frinta(d16, d30);
559 __ frinta(s1, s22);
560 __ frinti(d19, d29);
561 __ frinti(s14, s21);
562 __ frintm(d20, d30);
563 __ frintm(s1, s16);
564 __ frintn(d30, d1);
565 __ frintn(s24, s10);
566 __ frintp(d4, d20);
567 __ frintp(s13, s3);
568 __ frintx(d13, d20);
569 __ frintx(s17, s7);
570 __ frintz(d0, d8);
571 __ frintz(s15, s29);
572 __ frsqrte(d21, d10);
573 __ frsqrte(s17, s25);
574 __ frsqrts(d4, d29, d17);
575 __ frsqrts(s14, s3, s24);
576 __ fsqrt(d14, d17);
577 __ fsqrt(s4, s14);
578 __ fsub(d13, d19, d7);
579 __ fsub(s3, s21, s27);
580 __ scvtf(d31, d16);
581 __ scvtf(d26, d31, 24);
582 __ scvtf(d6, w16);
583 __ scvtf(d5, w20, 6);
584 __ scvtf(d16, x8);
585 __ scvtf(d15, x8, 10);
586 __ scvtf(s7, s4);
587 __ scvtf(s8, s15, 14);
588 __ scvtf(s29, w10);
589 __ scvtf(s15, w21, 11);
590 __ scvtf(s27, x26);
591 __ scvtf(s26, x12, 38);
592 __ ucvtf(d0, d9);
593 __ ucvtf(d5, d22, 47);
594 __ ucvtf(d30, w27);
595 __ ucvtf(d3, w19, 1);
596 __ ucvtf(d28, x21);
597 __ ucvtf(d27, x30, 35);
598 __ ucvtf(s11, s5);
599 __ ucvtf(s0, s23, 14);
600 __ ucvtf(s20, w19);
601 __ ucvtf(s21, w22, 18);
602 __ ucvtf(s6, x13);
603 __ ucvtf(s7, x2, 21);
604 }
605
606
GenerateTestSequenceNEON(MacroAssembler * masm)607 static void GenerateTestSequenceNEON(MacroAssembler* masm) {
608 ExactAssemblyScope guard(masm,
609 masm->GetBuffer()->GetRemainingBytes(),
610 ExactAssemblyScope::kMaximumSize);
611
612 // NEON integer instructions.
613 __ abs(d19, d0);
614 __ abs(v16.V16B(), v11.V16B());
615 __ abs(v0.V2D(), v31.V2D());
616 __ abs(v27.V2S(), v25.V2S());
617 __ abs(v21.V4H(), v27.V4H());
618 __ abs(v16.V4S(), v1.V4S());
619 __ abs(v31.V8B(), v5.V8B());
620 __ abs(v29.V8H(), v13.V8H());
621 __ add(d10, d5, d17);
622 __ add(v31.V16B(), v15.V16B(), v23.V16B());
623 __ add(v10.V2D(), v31.V2D(), v14.V2D());
624 __ add(v15.V2S(), v14.V2S(), v19.V2S());
625 __ add(v27.V4H(), v23.V4H(), v17.V4H());
626 __ add(v25.V4S(), v28.V4S(), v29.V4S());
627 __ add(v13.V8B(), v7.V8B(), v18.V8B());
628 __ add(v4.V8H(), v2.V8H(), v1.V8H());
629 __ addhn(v10.V2S(), v14.V2D(), v15.V2D());
630 __ addhn(v10.V4H(), v30.V4S(), v26.V4S());
631 __ addhn(v31.V8B(), v12.V8H(), v22.V8H());
632 __ addhn2(v16.V16B(), v21.V8H(), v20.V8H());
633 __ addhn2(v0.V4S(), v2.V2D(), v17.V2D());
634 __ addhn2(v31.V8H(), v7.V4S(), v17.V4S());
635 __ addp(d14, v19.V2D());
636 __ addp(v3.V16B(), v8.V16B(), v28.V16B());
637 __ addp(v8.V2D(), v5.V2D(), v17.V2D());
638 __ addp(v22.V2S(), v30.V2S(), v26.V2S());
639 __ addp(v29.V4H(), v24.V4H(), v14.V4H());
640 __ addp(v30.V4S(), v26.V4S(), v24.V4S());
641 __ addp(v12.V8B(), v26.V8B(), v7.V8B());
642 __ addp(v17.V8H(), v8.V8H(), v12.V8H());
643 __ addv(b27, v23.V16B());
644 __ addv(b12, v20.V8B());
645 __ addv(h27, v30.V4H());
646 __ addv(h19, v14.V8H());
647 __ addv(s14, v27.V4S());
648 __ and_(v10.V16B(), v8.V16B(), v27.V16B());
649 __ and_(v5.V8B(), v1.V8B(), v16.V8B());
650 __ bic(v26.V16B(), v3.V16B(), v24.V16B());
651 __ bic(v7.V2S(), 0xe4, 16);
652 __ bic(v28.V4H(), 0x23, 8);
653 __ bic(v29.V4S(), 0xac);
654 __ bic(v12.V8B(), v31.V8B(), v21.V8B());
655 __ bic(v18.V8H(), 0x98);
656 __ bif(v12.V16B(), v26.V16B(), v8.V16B());
657 __ bif(v2.V8B(), v23.V8B(), v27.V8B());
658 __ bit(v8.V16B(), v3.V16B(), v13.V16B());
659 __ bit(v5.V8B(), v5.V8B(), v23.V8B());
660 __ bsl(v9.V16B(), v31.V16B(), v23.V16B());
661 __ bsl(v14.V8B(), v7.V8B(), v3.V8B());
662 __ cls(v29.V16B(), v5.V16B());
663 __ cls(v21.V2S(), v0.V2S());
664 __ cls(v1.V4H(), v12.V4H());
665 __ cls(v27.V4S(), v10.V4S());
666 __ cls(v19.V8B(), v4.V8B());
667 __ cls(v15.V8H(), v14.V8H());
668 __ clz(v1.V16B(), v4.V16B());
669 __ clz(v27.V2S(), v17.V2S());
670 __ clz(v9.V4H(), v9.V4H());
671 __ clz(v31.V4S(), v15.V4S());
672 __ clz(v14.V8B(), v19.V8B());
673 __ clz(v6.V8H(), v11.V8H());
674 __ cmeq(d18, d5, d29);
675 __ cmeq(d14, d31, 0);
676 __ cmeq(v19.V16B(), v3.V16B(), v22.V16B());
677 __ cmeq(v15.V16B(), v9.V16B(), 0);
678 __ cmeq(v12.V2D(), v16.V2D(), v10.V2D());
679 __ cmeq(v8.V2D(), v22.V2D(), 0);
680 __ cmeq(v2.V2S(), v3.V2S(), v9.V2S());
681 __ cmeq(v16.V2S(), v25.V2S(), 0);
682 __ cmeq(v6.V4H(), v23.V4H(), v20.V4H());
683 __ cmeq(v16.V4H(), v13.V4H(), 0);
684 __ cmeq(v21.V4S(), v17.V4S(), v2.V4S());
685 __ cmeq(v6.V4S(), v25.V4S(), 0);
686 __ cmeq(v16.V8B(), v13.V8B(), v2.V8B());
687 __ cmeq(v21.V8B(), v16.V8B(), 0);
688 __ cmeq(v20.V8H(), v7.V8H(), v25.V8H());
689 __ cmeq(v26.V8H(), v8.V8H(), 0);
690 __ cmge(d16, d13, d31);
691 __ cmge(d25, d24, 0);
692 __ cmge(v17.V16B(), v19.V16B(), v17.V16B());
693 __ cmge(v22.V16B(), v30.V16B(), 0);
694 __ cmge(v28.V2D(), v20.V2D(), v26.V2D());
695 __ cmge(v6.V2D(), v23.V2D(), 0);
696 __ cmge(v25.V2S(), v22.V2S(), v3.V2S());
697 __ cmge(v21.V2S(), v11.V2S(), 0);
698 __ cmge(v16.V4H(), v3.V4H(), v12.V4H());
699 __ cmge(v23.V4H(), v9.V4H(), 0);
700 __ cmge(v7.V4S(), v2.V4S(), v11.V4S());
701 __ cmge(v0.V4S(), v22.V4S(), 0);
702 __ cmge(v10.V8B(), v30.V8B(), v9.V8B());
703 __ cmge(v21.V8B(), v8.V8B(), 0);
704 __ cmge(v2.V8H(), v7.V8H(), v26.V8H());
705 __ cmge(v19.V8H(), v10.V8H(), 0);
706 __ cmgt(d6, d13, d1);
707 __ cmgt(d30, d24, 0);
708 __ cmgt(v20.V16B(), v25.V16B(), v27.V16B());
709 __ cmgt(v0.V16B(), v25.V16B(), 0);
710 __ cmgt(v22.V2D(), v25.V2D(), v1.V2D());
711 __ cmgt(v16.V2D(), v16.V2D(), 0);
712 __ cmgt(v5.V2S(), v9.V2S(), v15.V2S());
713 __ cmgt(v12.V2S(), v18.V2S(), 0);
714 __ cmgt(v28.V4H(), v18.V4H(), v11.V4H());
715 __ cmgt(v22.V4H(), v3.V4H(), 0);
716 __ cmgt(v5.V4S(), v11.V4S(), v27.V4S());
717 __ cmgt(v13.V4S(), v20.V4S(), 0);
718 __ cmgt(v27.V8B(), v31.V8B(), v7.V8B());
719 __ cmgt(v5.V8B(), v0.V8B(), 0);
720 __ cmgt(v22.V8H(), v28.V8H(), v13.V8H());
721 __ cmgt(v6.V8H(), v2.V8H(), 0);
722 __ cmhi(d21, d8, d22);
723 __ cmhi(v18.V16B(), v19.V16B(), v19.V16B());
724 __ cmhi(v7.V2D(), v0.V2D(), v21.V2D());
725 __ cmhi(v15.V2S(), v19.V2S(), v0.V2S());
726 __ cmhi(v31.V4H(), v7.V4H(), v12.V4H());
727 __ cmhi(v9.V4S(), v16.V4S(), v22.V4S());
728 __ cmhi(v7.V8B(), v24.V8B(), v28.V8B());
729 __ cmhi(v11.V8H(), v10.V8H(), v25.V8H());
730 __ cmhs(d1, d12, d17);
731 __ cmhs(v21.V16B(), v25.V16B(), v30.V16B());
732 __ cmhs(v8.V2D(), v2.V2D(), v26.V2D());
733 __ cmhs(v1.V2S(), v22.V2S(), v29.V2S());
734 __ cmhs(v26.V4H(), v30.V4H(), v30.V4H());
735 __ cmhs(v19.V4S(), v20.V4S(), v16.V4S());
736 __ cmhs(v1.V8B(), v3.V8B(), v26.V8B());
737 __ cmhs(v20.V8H(), v28.V8H(), v8.V8H());
738 __ cmle(d30, d24, 0);
739 __ cmle(v0.V16B(), v3.V16B(), 0);
740 __ cmle(v2.V2D(), v30.V2D(), 0);
741 __ cmle(v7.V2S(), v10.V2S(), 0);
742 __ cmle(v9.V4H(), v31.V4H(), 0);
743 __ cmle(v9.V4S(), v18.V4S(), 0);
744 __ cmle(v21.V8B(), v31.V8B(), 0);
745 __ cmle(v29.V8H(), v21.V8H(), 0);
746 __ cmlt(d25, d23, 0);
747 __ cmlt(v7.V16B(), v21.V16B(), 0);
748 __ cmlt(v7.V2D(), v30.V2D(), 0);
749 __ cmlt(v25.V2S(), v28.V2S(), 0);
750 __ cmlt(v0.V4H(), v11.V4H(), 0);
751 __ cmlt(v24.V4S(), v5.V4S(), 0);
752 __ cmlt(v26.V8B(), v11.V8B(), 0);
753 __ cmlt(v1.V8H(), v21.V8H(), 0);
754 __ cmtst(d28, d23, d30);
755 __ cmtst(v26.V16B(), v6.V16B(), v31.V16B());
756 __ cmtst(v1.V2D(), v21.V2D(), v4.V2D());
757 __ cmtst(v27.V2S(), v26.V2S(), v20.V2S());
758 __ cmtst(v26.V4H(), v0.V4H(), v18.V4H());
759 __ cmtst(v25.V4S(), v16.V4S(), v4.V4S());
760 __ cmtst(v11.V8B(), v10.V8B(), v9.V8B());
761 __ cmtst(v0.V8H(), v2.V8H(), v1.V8H());
762 __ cnt(v25.V16B(), v15.V16B());
763 __ cnt(v28.V8B(), v6.V8B());
764 __ dup(v6.V16B(), v7.B(), 7);
765 __ dup(v9.V16B(), w20);
766 __ dup(v12.V2D(), v13.D(), 1);
767 __ dup(v9.V2D(), xzr);
768 __ dup(v4.V2S(), v26.S(), 2);
769 __ dup(v3.V2S(), w12);
770 __ dup(v22.V4H(), v5.H(), 7);
771 __ dup(v16.V4H(), w25);
772 __ dup(v20.V4S(), v10.S(), 2);
773 __ dup(v10.V4S(), w7);
774 __ dup(v30.V8B(), v30.B(), 2);
775 __ dup(v31.V8B(), w15);
776 __ dup(v28.V8H(), v17.H(), 4);
777 __ dup(v2.V8H(), w3);
778 __ eor(v29.V16B(), v25.V16B(), v3.V16B());
779 __ eor(v3.V8B(), v16.V8B(), v28.V8B());
780 __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1);
781 __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1);
782 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
783 __ ld1(v23.V16B(),
784 v24.V16B(),
785 v25.V16B(),
786 v26.V16B(),
787 MemOperand(x1, x2, PostIndex));
788 __ ld1(v5.V16B(),
789 v6.V16B(),
790 v7.V16B(),
791 v8.V16B(),
792 MemOperand(x1, 64, PostIndex));
793 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0));
794 __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex));
795 __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x1, 48, PostIndex));
796 __ ld1(v17.V16B(), v18.V16B(), MemOperand(x0));
797 __ ld1(v20.V16B(), v21.V16B(), MemOperand(x1, x2, PostIndex));
798 __ ld1(v28.V16B(), v29.V16B(), MemOperand(x1, 32, PostIndex));
799 __ ld1(v29.V16B(), MemOperand(x0));
800 __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex));
801 __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex));
802 __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0));
803 __ ld1(v17.V1D(),
804 v18.V1D(),
805 v19.V1D(),
806 v20.V1D(),
807 MemOperand(x1, x2, PostIndex));
808 __ ld1(v28.V1D(),
809 v29.V1D(),
810 v30.V1D(),
811 v31.V1D(),
812 MemOperand(x1, 32, PostIndex));
813 __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0));
814 __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex));
815 __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), MemOperand(x1, 24, PostIndex));
816 __ ld1(v29.V1D(), v30.V1D(), MemOperand(x0));
817 __ ld1(v31.V1D(), v0.V1D(), MemOperand(x1, x2, PostIndex));
818 __ ld1(v3.V1D(), v4.V1D(), MemOperand(x1, 16, PostIndex));
819 __ ld1(v28.V1D(), MemOperand(x0));
820 __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex));
821 __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex));
822 __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0));
823 __ ld1(v8.V2D(),
824 v9.V2D(),
825 v10.V2D(),
826 v11.V2D(),
827 MemOperand(x1, x2, PostIndex));
828 __ ld1(v14.V2D(),
829 v15.V2D(),
830 v16.V2D(),
831 v17.V2D(),
832 MemOperand(x1, 64, PostIndex));
833 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0));
834 __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
835 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x1, 48, PostIndex));
836 __ ld1(v18.V2D(), v19.V2D(), MemOperand(x0));
837 __ ld1(v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
838 __ ld1(v17.V2D(), v18.V2D(), MemOperand(x1, 32, PostIndex));
839 __ ld1(v5.V2D(), MemOperand(x0));
840 __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex));
841 __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex));
842 __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0));
843 __ ld1(v24.V2S(),
844 v25.V2S(),
845 v26.V2S(),
846 v27.V2S(),
847 MemOperand(x1, x2, PostIndex));
848 __ ld1(v27.V2S(),
849 v28.V2S(),
850 v29.V2S(),
851 v30.V2S(),
852 MemOperand(x1, 32, PostIndex));
853 __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0));
854 __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex));
855 __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x1, 24, PostIndex));
856 __ ld1(v0.V2S(), v1.V2S(), MemOperand(x0));
857 __ ld1(v13.V2S(), v14.V2S(), MemOperand(x1, x2, PostIndex));
858 __ ld1(v3.V2S(), v4.V2S(), MemOperand(x1, 16, PostIndex));
859 __ ld1(v26.V2S(), MemOperand(x0));
860 __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex));
861 __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex));
862 __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
863 __ ld1(v24.V4H(),
864 v25.V4H(),
865 v26.V4H(),
866 v27.V4H(),
867 MemOperand(x1, x2, PostIndex));
868 __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
869 __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0));
870 __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), MemOperand(x1, x2, PostIndex));
871 __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 24, PostIndex));
872 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x0));
873 __ ld1(v3.V4H(), v4.V4H(), MemOperand(x1, x2, PostIndex));
874 __ ld1(v23.V4H(), v24.V4H(), MemOperand(x1, 16, PostIndex));
875 __ ld1(v26.V4H(), MemOperand(x0));
876 __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex));
877 __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex));
878 __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0));
879 __ ld1(v28.V4S(),
880 v29.V4S(),
881 v30.V4S(),
882 v31.V4S(),
883 MemOperand(x1, x2, PostIndex));
884 __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex));
885 __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
886 __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), MemOperand(x1, x2, PostIndex));
887 __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x1, 48, PostIndex));
888 __ ld1(v20.V4S(), v21.V4S(), MemOperand(x0));
889 __ ld1(v30.V4S(), v31.V4S(), MemOperand(x1, x2, PostIndex));
890 __ ld1(v11.V4S(), v12.V4S(), MemOperand(x1, 32, PostIndex));
891 __ ld1(v15.V4S(), MemOperand(x0));
892 __ ld1(v12.V4S(), MemOperand(x1, x2, PostIndex));
893 __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex));
894 __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0));
895 __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex));
896 __ ld1(v9.V8B(),
897 v10.V8B(),
898 v11.V8B(),
899 v12.V8B(),
900 MemOperand(x1, 32, PostIndex));
901 __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0));
902 __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex));
903 __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
904 __ ld1(v10.V8B(), v11.V8B(), MemOperand(x0));
905 __ ld1(v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
906 __ ld1(v27.V8B(), v28.V8B(), MemOperand(x1, 16, PostIndex));
907 __ ld1(v31.V8B(), MemOperand(x0));
908 __ ld1(v10.V8B(), MemOperand(x1, x2, PostIndex));
909 __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex));
910 __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
911 __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
912 __ ld1(v10.V8H(),
913 v11.V8H(),
914 v12.V8H(),
915 v13.V8H(),
916 MemOperand(x1, 64, PostIndex));
917 __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
918 __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
919 __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x1, 48, PostIndex));
920 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x0));
921 __ ld1(v21.V8H(), v22.V8H(), MemOperand(x1, x2, PostIndex));
922 __ ld1(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
923 __ ld1(v9.V8H(), MemOperand(x0));
924 __ ld1(v27.V8H(), MemOperand(x1, x2, PostIndex));
925 __ ld1(v26.V8H(), MemOperand(x1, 16, PostIndex));
926 __ ld1(v19.B(), 1, MemOperand(x0));
927 __ ld1(v12.B(), 3, MemOperand(x1, x2, PostIndex));
928 __ ld1(v27.B(), 12, MemOperand(x1, 1, PostIndex));
929 __ ld1(v10.D(), 1, MemOperand(x0));
930 __ ld1(v26.D(), 1, MemOperand(x1, x2, PostIndex));
931 __ ld1(v7.D(), 1, MemOperand(x1, 8, PostIndex));
932 __ ld1(v19.H(), 5, MemOperand(x0));
933 __ ld1(v10.H(), 1, MemOperand(x1, x2, PostIndex));
934 __ ld1(v5.H(), 4, MemOperand(x1, 2, PostIndex));
935 __ ld1(v21.S(), 2, MemOperand(x0));
936 __ ld1(v13.S(), 2, MemOperand(x1, x2, PostIndex));
937 __ ld1(v1.S(), 2, MemOperand(x1, 4, PostIndex));
938 __ ld1r(v2.V16B(), MemOperand(x0));
939 __ ld1r(v2.V16B(), MemOperand(x1, x2, PostIndex));
940 __ ld1r(v22.V16B(), MemOperand(x1, 1, PostIndex));
941 __ ld1r(v25.V1D(), MemOperand(x0));
942 __ ld1r(v9.V1D(), MemOperand(x1, x2, PostIndex));
943 __ ld1r(v23.V1D(), MemOperand(x1, 8, PostIndex));
944 __ ld1r(v19.V2D(), MemOperand(x0));
945 __ ld1r(v21.V2D(), MemOperand(x1, x2, PostIndex));
946 __ ld1r(v30.V2D(), MemOperand(x1, 8, PostIndex));
947 __ ld1r(v24.V2S(), MemOperand(x0));
948 __ ld1r(v26.V2S(), MemOperand(x1, x2, PostIndex));
949 __ ld1r(v28.V2S(), MemOperand(x1, 4, PostIndex));
950 __ ld1r(v19.V4H(), MemOperand(x0));
951 __ ld1r(v1.V4H(), MemOperand(x1, x2, PostIndex));
952 __ ld1r(v21.V4H(), MemOperand(x1, 2, PostIndex));
953 __ ld1r(v15.V4S(), MemOperand(x0));
954 __ ld1r(v21.V4S(), MemOperand(x1, x2, PostIndex));
955 __ ld1r(v23.V4S(), MemOperand(x1, 4, PostIndex));
956 __ ld1r(v26.V8B(), MemOperand(x0));
957 __ ld1r(v14.V8B(), MemOperand(x1, x2, PostIndex));
958 __ ld1r(v19.V8B(), MemOperand(x1, 1, PostIndex));
959 __ ld1r(v13.V8H(), MemOperand(x0));
960 __ ld1r(v30.V8H(), MemOperand(x1, x2, PostIndex));
961 __ ld1r(v27.V8H(), MemOperand(x1, 2, PostIndex));
962 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x0));
963 __ ld2(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
964 __ ld2(v12.V16B(), v13.V16B(), MemOperand(x1, 32, PostIndex));
965 __ ld2(v14.V2D(), v15.V2D(), MemOperand(x0));
966 __ ld2(v0.V2D(), v1.V2D(), MemOperand(x1, x2, PostIndex));
967 __ ld2(v12.V2D(), v13.V2D(), MemOperand(x1, 32, PostIndex));
968 __ ld2(v27.V2S(), v28.V2S(), MemOperand(x0));
969 __ ld2(v2.V2S(), v3.V2S(), MemOperand(x1, x2, PostIndex));
970 __ ld2(v12.V2S(), v13.V2S(), MemOperand(x1, 16, PostIndex));
971 __ ld2(v9.V4H(), v10.V4H(), MemOperand(x0));
972 __ ld2(v23.V4H(), v24.V4H(), MemOperand(x1, x2, PostIndex));
973 __ ld2(v1.V4H(), v2.V4H(), MemOperand(x1, 16, PostIndex));
974 __ ld2(v20.V4S(), v21.V4S(), MemOperand(x0));
975 __ ld2(v10.V4S(), v11.V4S(), MemOperand(x1, x2, PostIndex));
976 __ ld2(v24.V4S(), v25.V4S(), MemOperand(x1, 32, PostIndex));
977 __ ld2(v17.V8B(), v18.V8B(), MemOperand(x0));
978 __ ld2(v13.V8B(), v14.V8B(), MemOperand(x1, x2, PostIndex));
979 __ ld2(v7.V8B(), v8.V8B(), MemOperand(x1, 16, PostIndex));
980 __ ld2(v30.V8H(), v31.V8H(), MemOperand(x0));
981 __ ld2(v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
982 __ ld2(v13.V8H(), v14.V8H(), MemOperand(x1, 32, PostIndex));
983 __ ld2(v5.B(), v6.B(), 12, MemOperand(x0));
984 __ ld2(v16.B(), v17.B(), 7, MemOperand(x1, x2, PostIndex));
985 __ ld2(v29.B(), v30.B(), 2, MemOperand(x1, 2, PostIndex));
986 __ ld2(v11.D(), v12.D(), 1, MemOperand(x0));
987 __ ld2(v26.D(), v27.D(), 0, MemOperand(x1, x2, PostIndex));
988 __ ld2(v25.D(), v26.D(), 0, MemOperand(x1, 16, PostIndex));
989 __ ld2(v18.H(), v19.H(), 7, MemOperand(x0));
990 __ ld2(v17.H(), v18.H(), 5, MemOperand(x1, x2, PostIndex));
991 __ ld2(v30.H(), v31.H(), 2, MemOperand(x1, 4, PostIndex));
992 __ ld2(v29.S(), v30.S(), 3, MemOperand(x0));
993 __ ld2(v28.S(), v29.S(), 0, MemOperand(x1, x2, PostIndex));
994 __ ld2(v6.S(), v7.S(), 1, MemOperand(x1, 8, PostIndex));
995 __ ld2r(v26.V16B(), v27.V16B(), MemOperand(x0));
996 __ ld2r(v21.V16B(), v22.V16B(), MemOperand(x1, x2, PostIndex));
997 __ ld2r(v5.V16B(), v6.V16B(), MemOperand(x1, 2, PostIndex));
998 __ ld2r(v26.V1D(), v27.V1D(), MemOperand(x0));
999 __ ld2r(v14.V1D(), v15.V1D(), MemOperand(x1, x2, PostIndex));
1000 __ ld2r(v23.V1D(), v24.V1D(), MemOperand(x1, 16, PostIndex));
1001 __ ld2r(v11.V2D(), v12.V2D(), MemOperand(x0));
1002 __ ld2r(v29.V2D(), v30.V2D(), MemOperand(x1, x2, PostIndex));
1003 __ ld2r(v15.V2D(), v16.V2D(), MemOperand(x1, 16, PostIndex));
1004 __ ld2r(v26.V2S(), v27.V2S(), MemOperand(x0));
1005 __ ld2r(v22.V2S(), v23.V2S(), MemOperand(x1, x2, PostIndex));
1006 __ ld2r(v2.V2S(), v3.V2S(), MemOperand(x1, 8, PostIndex));
1007 __ ld2r(v2.V4H(), v3.V4H(), MemOperand(x0));
1008 __ ld2r(v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
1009 __ ld2r(v6.V4H(), v7.V4H(), MemOperand(x1, 4, PostIndex));
1010 __ ld2r(v7.V4S(), v8.V4S(), MemOperand(x0));
1011 __ ld2r(v19.V4S(), v20.V4S(), MemOperand(x1, x2, PostIndex));
1012 __ ld2r(v21.V4S(), v22.V4S(), MemOperand(x1, 8, PostIndex));
1013 __ ld2r(v26.V8B(), v27.V8B(), MemOperand(x0));
1014 __ ld2r(v20.V8B(), v21.V8B(), MemOperand(x1, x2, PostIndex));
1015 __ ld2r(v11.V8B(), v12.V8B(), MemOperand(x1, 2, PostIndex));
1016 __ ld2r(v12.V8H(), v13.V8H(), MemOperand(x0));
1017 __ ld2r(v6.V8H(), v7.V8H(), MemOperand(x1, x2, PostIndex));
1018 __ ld2r(v25.V8H(), v26.V8H(), MemOperand(x1, 4, PostIndex));
1019 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x0));
1020 __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), MemOperand(x1, x2, PostIndex));
1021 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), MemOperand(x1, 48, PostIndex));
1022 __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(x0));
1023 __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x1, x2, PostIndex));
1024 __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x1, 48, PostIndex));
1025 __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x0));
1026 __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), MemOperand(x1, x2, PostIndex));
1027 __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), MemOperand(x1, 24, PostIndex));
1028 __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), MemOperand(x0));
1029 __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), MemOperand(x1, x2, PostIndex));
1030 __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 24, PostIndex));
1031 __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
1032 __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), MemOperand(x1, x2, PostIndex));
1033 __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x1, 48, PostIndex));
1034 __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x0));
1035 __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1036 __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), MemOperand(x1, 24, PostIndex));
1037 __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), MemOperand(x0));
1038 __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x1, x2, PostIndex));
1039 __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), MemOperand(x1, 48, PostIndex));
1040 __ ld3(v21.B(), v22.B(), v23.B(), 11, MemOperand(x0));
1041 __ ld3(v5.B(), v6.B(), v7.B(), 9, MemOperand(x1, x2, PostIndex));
1042 __ ld3(v23.B(), v24.B(), v25.B(), 0, MemOperand(x1, 3, PostIndex));
1043 __ ld3(v16.D(), v17.D(), v18.D(), 0, MemOperand(x0));
1044 __ ld3(v30.D(), v31.D(), v0.D(), 0, MemOperand(x1, x2, PostIndex));
1045 __ ld3(v28.D(), v29.D(), v30.D(), 1, MemOperand(x1, 24, PostIndex));
1046 __ ld3(v13.H(), v14.H(), v15.H(), 2, MemOperand(x0));
1047 __ ld3(v22.H(), v23.H(), v24.H(), 7, MemOperand(x1, x2, PostIndex));
1048 __ ld3(v14.H(), v15.H(), v16.H(), 3, MemOperand(x1, 6, PostIndex));
1049 __ ld3(v22.S(), v23.S(), v24.S(), 3, MemOperand(x0));
1050 __ ld3(v30.S(), v31.S(), v0.S(), 2, MemOperand(x1, x2, PostIndex));
1051 __ ld3(v12.S(), v13.S(), v14.S(), 1, MemOperand(x1, 12, PostIndex));
1052 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x0));
1053 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, x2, PostIndex));
1054 __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x1, 3, PostIndex));
1055 __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), MemOperand(x0));
1056 __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), MemOperand(x1, x2, PostIndex));
1057 __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), MemOperand(x1, 24, PostIndex));
1058 __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x0));
1059 __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x1, x2, PostIndex));
1060 __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), MemOperand(x1, 24, PostIndex));
1061 __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), MemOperand(x0));
1062 __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x1, x2, PostIndex));
1063 __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, 12, PostIndex));
1064 __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), MemOperand(x0));
1065 __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x1, x2, PostIndex));
1066 __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x1, 6, PostIndex));
1067 __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), MemOperand(x0));
1068 __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x1, x2, PostIndex));
1069 __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, 12, PostIndex));
1070 __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x0));
1071 __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), MemOperand(x1, x2, PostIndex));
1072 __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), MemOperand(x1, 3, PostIndex));
1073 __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
1074 __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex));
1075 __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex));
1076 __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0));
1077 __ ld4(v2.V16B(),
1078 v3.V16B(),
1079 v4.V16B(),
1080 v5.V16B(),
1081 MemOperand(x1, x2, PostIndex));
1082 __ ld4(v5.V16B(),
1083 v6.V16B(),
1084 v7.V16B(),
1085 v8.V16B(),
1086 MemOperand(x1, 64, PostIndex));
1087 __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0));
1088 __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1089 __ ld4(v29.V2D(),
1090 v30.V2D(),
1091 v31.V2D(),
1092 v0.V2D(),
1093 MemOperand(x1, 64, PostIndex));
1094 __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0));
1095 __ ld4(v24.V2S(),
1096 v25.V2S(),
1097 v26.V2S(),
1098 v27.V2S(),
1099 MemOperand(x1, x2, PostIndex));
1100 __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex));
1101 __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
1102 __ ld4(v23.V4H(),
1103 v24.V4H(),
1104 v25.V4H(),
1105 v26.V4H(),
1106 MemOperand(x1, x2, PostIndex));
1107 __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex));
1108 __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0));
1109 __ ld4(v28.V4S(),
1110 v29.V4S(),
1111 v30.V4S(),
1112 v31.V4S(),
1113 MemOperand(x1, x2, PostIndex));
1114 __ ld4(v29.V4S(),
1115 v30.V4S(),
1116 v31.V4S(),
1117 v0.V4S(),
1118 MemOperand(x1, 64, PostIndex));
1119 __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0));
1120 __ ld4(v27.V8B(),
1121 v28.V8B(),
1122 v29.V8B(),
1123 v30.V8B(),
1124 MemOperand(x1, x2, PostIndex));
1125 __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex));
1126 __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
1127 __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
1128 __ ld4(v20.V8H(),
1129 v21.V8H(),
1130 v22.V8H(),
1131 v23.V8H(),
1132 MemOperand(x1, 64, PostIndex));
1133 __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0));
1134 __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex));
1135 __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, MemOperand(x1, 4, PostIndex));
1136 __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x0));
1137 __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1138 __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, MemOperand(x1, 32, PostIndex));
1139 __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, MemOperand(x0));
1140 __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, MemOperand(x1, x2, PostIndex));
1141 __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, MemOperand(x1, 8, PostIndex));
1142 __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, MemOperand(x0));
1143 __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex));
1144 __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex));
1145 __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0));
1146 __ ld4r(v13.V16B(),
1147 v14.V16B(),
1148 v15.V16B(),
1149 v16.V16B(),
1150 MemOperand(x1, x2, PostIndex));
1151 __ ld4r(v9.V16B(),
1152 v10.V16B(),
1153 v11.V16B(),
1154 v12.V16B(),
1155 MemOperand(x1, 4, PostIndex));
1156 __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0));
1157 __ ld4r(v4.V1D(),
1158 v5.V1D(),
1159 v6.V1D(),
1160 v7.V1D(),
1161 MemOperand(x1, x2, PostIndex));
1162 __ ld4r(v26.V1D(),
1163 v27.V1D(),
1164 v28.V1D(),
1165 v29.V1D(),
1166 MemOperand(x1, 32, PostIndex));
1167 __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0));
1168 __ ld4r(v28.V2D(),
1169 v29.V2D(),
1170 v30.V2D(),
1171 v31.V2D(),
1172 MemOperand(x1, x2, PostIndex));
1173 __ ld4r(v15.V2D(),
1174 v16.V2D(),
1175 v17.V2D(),
1176 v18.V2D(),
1177 MemOperand(x1, 32, PostIndex));
1178 __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0));
1179 __ ld4r(v28.V2S(),
1180 v29.V2S(),
1181 v30.V2S(),
1182 v31.V2S(),
1183 MemOperand(x1, x2, PostIndex));
1184 __ ld4r(v11.V2S(),
1185 v12.V2S(),
1186 v13.V2S(),
1187 v14.V2S(),
1188 MemOperand(x1, 16, PostIndex));
1189 __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0));
1190 __ ld4r(v22.V4H(),
1191 v23.V4H(),
1192 v24.V4H(),
1193 v25.V4H(),
1194 MemOperand(x1, x2, PostIndex));
1195 __ ld4r(v20.V4H(),
1196 v21.V4H(),
1197 v22.V4H(),
1198 v23.V4H(),
1199 MemOperand(x1, 8, PostIndex));
1200 __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0));
1201 __ ld4r(v25.V4S(),
1202 v26.V4S(),
1203 v27.V4S(),
1204 v28.V4S(),
1205 MemOperand(x1, x2, PostIndex));
1206 __ ld4r(v23.V4S(),
1207 v24.V4S(),
1208 v25.V4S(),
1209 v26.V4S(),
1210 MemOperand(x1, 16, PostIndex));
1211 __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0));
1212 __ ld4r(v27.V8B(),
1213 v28.V8B(),
1214 v29.V8B(),
1215 v30.V8B(),
1216 MemOperand(x1, x2, PostIndex));
1217 __ ld4r(v29.V8B(),
1218 v30.V8B(),
1219 v31.V8B(),
1220 v0.V8B(),
1221 MemOperand(x1, 4, PostIndex));
1222 __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0));
1223 __ ld4r(v25.V8H(),
1224 v26.V8H(),
1225 v27.V8H(),
1226 v28.V8H(),
1227 MemOperand(x1, x2, PostIndex));
1228 __ ld4r(v22.V8H(),
1229 v23.V8H(),
1230 v24.V8H(),
1231 v25.V8H(),
1232 MemOperand(x1, 8, PostIndex));
1233 __ mla(v29.V16B(), v7.V16B(), v26.V16B());
1234 __ mla(v6.V2S(), v4.V2S(), v14.V2S());
1235 __ mla(v9.V2S(), v11.V2S(), v0.S(), 2);
1236 __ mla(v5.V4H(), v17.V4H(), v25.V4H());
1237 __ mla(v24.V4H(), v7.V4H(), v11.H(), 3);
1238 __ mla(v12.V4S(), v3.V4S(), v4.V4S());
1239 __ mla(v10.V4S(), v7.V4S(), v7.S(), 3);
1240 __ mla(v3.V8B(), v16.V8B(), v9.V8B());
1241 __ mla(v19.V8H(), v22.V8H(), v18.V8H());
1242 __ mla(v6.V8H(), v2.V8H(), v0.H(), 0);
1243 __ mls(v23.V16B(), v10.V16B(), v11.V16B());
1244 __ mls(v14.V2S(), v31.V2S(), v22.V2S());
1245 __ mls(v28.V2S(), v13.V2S(), v1.S(), 3);
1246 __ mls(v2.V4H(), v19.V4H(), v13.V4H());
1247 __ mls(v18.V4H(), v15.V4H(), v12.H(), 6);
1248 __ mls(v6.V4S(), v11.V4S(), v16.V4S());
1249 __ mls(v23.V4S(), v16.V4S(), v10.S(), 2);
1250 __ mls(v26.V8B(), v13.V8B(), v23.V8B());
1251 __ mls(v10.V8H(), v10.V8H(), v12.V8H());
1252 __ mls(v14.V8H(), v0.V8H(), v14.H(), 7);
1253 __ mov(b22, v1.B(), 3);
1254 __ mov(d7, v13.D(), 1);
1255 __ mov(h26, v21.H(), 2);
1256 __ mov(s26, v19.S(), 0);
1257 __ mov(v26.V16B(), v11.V16B());
1258 __ mov(v20.V8B(), v0.V8B());
1259 __ mov(v19.B(), 13, v6.B(), 4);
1260 __ mov(v4.B(), 13, w19);
1261 __ mov(v11.D(), 1, v8.D(), 0);
1262 __ mov(v3.D(), 0, x30);
1263 __ mov(v29.H(), 4, v11.H(), 7);
1264 __ mov(v2.H(), 6, w6);
1265 __ mov(v22.S(), 0, v5.S(), 2);
1266 __ mov(v24.S(), 3, w8);
1267 __ mov(w18, v1.S(), 3);
1268 __ mov(x28, v21.D(), 0);
1269 __ movi(d24, 0xffff0000ffffff);
1270 __ movi(v29.V16B(), 0x80);
1271 __ movi(v12.V2D(), 0xffff00ff00ffff00);
1272 __ movi(v12.V2S(), 0xec, LSL, 24);
1273 __ movi(v10.V2S(), 0x4c, MSL, 16);
1274 __ movi(v26.V4H(), 0xc0, LSL);
1275 __ movi(v24.V4S(), 0x98, LSL, 16);
1276 __ movi(v1.V4S(), 0xde, MSL, 16);
1277 __ movi(v21.V8B(), 0x4d);
1278 __ movi(v29.V8H(), 0x69, LSL);
1279 __ mul(v1.V16B(), v15.V16B(), v17.V16B());
1280 __ mul(v21.V2S(), v19.V2S(), v29.V2S());
1281 __ mul(v19.V2S(), v5.V2S(), v3.S(), 0);
1282 __ mul(v29.V4H(), v11.V4H(), v2.V4H());
1283 __ mul(v2.V4H(), v7.V4H(), v0.H(), 0);
1284 __ mul(v25.V4S(), v26.V4S(), v16.V4S());
1285 __ mul(v26.V4S(), v6.V4S(), v15.S(), 2);
1286 __ mul(v11.V8B(), v15.V8B(), v31.V8B());
1287 __ mul(v20.V8H(), v31.V8H(), v15.V8H());
1288 __ mul(v29.V8H(), v5.V8H(), v9.H(), 4);
1289 __ mvn(v13.V16B(), v21.V16B());
1290 __ mvn(v28.V8B(), v19.V8B());
1291 __ mvni(v25.V2S(), 0xb8, LSL, 8);
1292 __ mvni(v17.V2S(), 0x6c, MSL, 16);
1293 __ mvni(v29.V4H(), 0x48, LSL);
1294 __ mvni(v20.V4S(), 0x7a, LSL, 16);
1295 __ mvni(v0.V4S(), 0x1e, MSL, 8);
1296 __ mvni(v31.V8H(), 0x3e, LSL);
1297 __ neg(d25, d11);
1298 __ neg(v4.V16B(), v9.V16B());
1299 __ neg(v11.V2D(), v25.V2D());
1300 __ neg(v7.V2S(), v18.V2S());
1301 __ neg(v7.V4H(), v15.V4H());
1302 __ neg(v17.V4S(), v18.V4S());
1303 __ neg(v20.V8B(), v17.V8B());
1304 __ neg(v0.V8H(), v11.V8H());
1305 __ orn(v13.V16B(), v11.V16B(), v31.V16B());
1306 __ orn(v22.V8B(), v16.V8B(), v22.V8B());
1307 __ orr(v17.V16B(), v17.V16B(), v23.V16B());
1308 __ orr(v8.V2S(), 0xe3);
1309 __ orr(v11.V4H(), 0x97, 8);
1310 __ orr(v7.V4S(), 0xab);
1311 __ orr(v8.V8B(), v4.V8B(), v3.V8B());
1312 __ orr(v31.V8H(), 0xb0, 8);
1313 __ pmul(v11.V16B(), v18.V16B(), v23.V16B());
1314 __ pmul(v8.V8B(), v24.V8B(), v5.V8B());
1315 __ pmull(v24.V8H(), v18.V8B(), v22.V8B());
1316 __ pmull2(v13.V8H(), v3.V16B(), v21.V16B());
1317 __ raddhn(v22.V2S(), v10.V2D(), v21.V2D());
1318 __ raddhn(v5.V4H(), v13.V4S(), v13.V4S());
1319 __ raddhn(v10.V8B(), v17.V8H(), v26.V8H());
1320 __ raddhn2(v9.V16B(), v29.V8H(), v13.V8H());
1321 __ raddhn2(v27.V4S(), v23.V2D(), v26.V2D());
1322 __ raddhn2(v0.V8H(), v29.V4S(), v7.V4S());
1323 __ rbit(v22.V16B(), v15.V16B());
1324 __ rbit(v30.V8B(), v3.V8B());
1325 __ rev16(v31.V16B(), v27.V16B());
1326 __ rev16(v12.V8B(), v26.V8B());
1327 __ rev32(v5.V16B(), v4.V16B());
1328 __ rev32(v16.V4H(), v26.V4H());
1329 __ rev32(v20.V8B(), v3.V8B());
1330 __ rev32(v20.V8H(), v28.V8H());
1331 __ rev64(v9.V16B(), v19.V16B());
1332 __ rev64(v5.V2S(), v16.V2S());
1333 __ rev64(v7.V4H(), v31.V4H());
1334 __ rev64(v15.V4S(), v26.V4S());
1335 __ rev64(v25.V8B(), v9.V8B());
1336 __ rev64(v11.V8H(), v5.V8H());
1337 __ rshrn(v18.V2S(), v13.V2D(), 1);
1338 __ rshrn(v25.V4H(), v30.V4S(), 2);
1339 __ rshrn(v13.V8B(), v9.V8H(), 8);
1340 __ rshrn2(v3.V16B(), v6.V8H(), 8);
1341 __ rshrn2(v0.V4S(), v29.V2D(), 25);
1342 __ rshrn2(v27.V8H(), v26.V4S(), 15);
1343 __ rsubhn(v15.V2S(), v25.V2D(), v4.V2D());
1344 __ rsubhn(v23.V4H(), v9.V4S(), v3.V4S());
1345 __ rsubhn(v6.V8B(), v30.V8H(), v24.V8H());
1346 __ rsubhn2(v4.V16B(), v24.V8H(), v20.V8H());
1347 __ rsubhn2(v1.V4S(), v23.V2D(), v22.V2D());
1348 __ rsubhn2(v19.V8H(), v2.V4S(), v20.V4S());
1349 __ saba(v28.V16B(), v9.V16B(), v25.V16B());
1350 __ saba(v9.V2S(), v28.V2S(), v20.V2S());
1351 __ saba(v17.V4H(), v22.V4H(), v22.V4H());
1352 __ saba(v29.V4S(), v5.V4S(), v27.V4S());
1353 __ saba(v20.V8B(), v21.V8B(), v18.V8B());
1354 __ saba(v27.V8H(), v17.V8H(), v30.V8H());
1355 __ sabal(v20.V2D(), v13.V2S(), v7.V2S());
1356 __ sabal(v4.V4S(), v12.V4H(), v4.V4H());
1357 __ sabal(v23.V8H(), v24.V8B(), v20.V8B());
1358 __ sabal2(v26.V2D(), v21.V4S(), v18.V4S());
1359 __ sabal2(v27.V4S(), v28.V8H(), v8.V8H());
1360 __ sabal2(v12.V8H(), v16.V16B(), v21.V16B());
1361 __ sabd(v0.V16B(), v15.V16B(), v13.V16B());
1362 __ sabd(v15.V2S(), v7.V2S(), v30.V2S());
1363 __ sabd(v17.V4H(), v17.V4H(), v12.V4H());
1364 __ sabd(v7.V4S(), v4.V4S(), v22.V4S());
1365 __ sabd(v23.V8B(), v3.V8B(), v26.V8B());
1366 __ sabd(v20.V8H(), v28.V8H(), v5.V8H());
1367 __ sabdl(v27.V2D(), v22.V2S(), v20.V2S());
1368 __ sabdl(v31.V4S(), v20.V4H(), v23.V4H());
1369 __ sabdl(v0.V8H(), v20.V8B(), v27.V8B());
1370 __ sabdl2(v31.V2D(), v11.V4S(), v3.V4S());
1371 __ sabdl2(v26.V4S(), v11.V8H(), v27.V8H());
1372 __ sabdl2(v6.V8H(), v8.V16B(), v18.V16B());
1373 __ sadalp(v8.V1D(), v26.V2S());
1374 __ sadalp(v12.V2D(), v26.V4S());
1375 __ sadalp(v12.V2S(), v26.V4H());
1376 __ sadalp(v4.V4H(), v1.V8B());
1377 __ sadalp(v15.V4S(), v17.V8H());
1378 __ sadalp(v21.V8H(), v25.V16B());
1379 __ saddl(v5.V2D(), v10.V2S(), v14.V2S());
1380 __ saddl(v18.V4S(), v3.V4H(), v15.V4H());
1381 __ saddl(v15.V8H(), v2.V8B(), v23.V8B());
1382 __ saddl2(v16.V2D(), v16.V4S(), v27.V4S());
1383 __ saddl2(v6.V4S(), v24.V8H(), v0.V8H());
1384 __ saddl2(v7.V8H(), v20.V16B(), v28.V16B());
1385 __ saddlp(v10.V1D(), v25.V2S());
1386 __ saddlp(v15.V2D(), v16.V4S());
1387 __ saddlp(v18.V2S(), v10.V4H());
1388 __ saddlp(v29.V4H(), v26.V8B());
1389 __ saddlp(v10.V4S(), v1.V8H());
1390 __ saddlp(v0.V8H(), v21.V16B());
1391 __ saddlv(d12, v7.V4S());
1392 __ saddlv(h14, v28.V16B());
1393 __ saddlv(h30, v30.V8B());
1394 __ saddlv(s27, v3.V4H());
1395 __ saddlv(s16, v16.V8H());
1396 __ saddw(v24.V2D(), v11.V2D(), v18.V2S());
1397 __ saddw(v13.V4S(), v12.V4S(), v6.V4H());
1398 __ saddw(v19.V8H(), v19.V8H(), v7.V8B());
1399 __ saddw2(v27.V2D(), v9.V2D(), v26.V4S());
1400 __ saddw2(v19.V4S(), v23.V4S(), v21.V8H());
1401 __ saddw2(v15.V8H(), v25.V8H(), v30.V16B());
1402 __ shadd(v7.V16B(), v4.V16B(), v9.V16B());
1403 __ shadd(v29.V2S(), v25.V2S(), v24.V2S());
1404 __ shadd(v31.V4H(), v10.V4H(), v13.V4H());
1405 __ shadd(v21.V4S(), v16.V4S(), v8.V4S());
1406 __ shadd(v14.V8B(), v29.V8B(), v22.V8B());
1407 __ shadd(v19.V8H(), v24.V8H(), v20.V8H());
1408 __ shl(d22, d25, 23);
1409 __ shl(v5.V16B(), v17.V16B(), 7);
1410 __ shl(v2.V2D(), v4.V2D(), 21);
1411 __ shl(v4.V2S(), v3.V2S(), 26);
1412 __ shl(v3.V4H(), v28.V4H(), 8);
1413 __ shl(v4.V4S(), v31.V4S(), 24);
1414 __ shl(v18.V8B(), v16.V8B(), 2);
1415 __ shl(v0.V8H(), v11.V8H(), 3);
1416 __ shll(v5.V2D(), v24.V2S(), 32);
1417 __ shll(v26.V4S(), v20.V4H(), 16);
1418 __ shll(v5.V8H(), v9.V8B(), 8);
1419 __ shll2(v21.V2D(), v28.V4S(), 32);
1420 __ shll2(v22.V4S(), v1.V8H(), 16);
1421 __ shll2(v30.V8H(), v25.V16B(), 8);
1422 __ shrn(v5.V2S(), v1.V2D(), 28);
1423 __ shrn(v29.V4H(), v18.V4S(), 7);
1424 __ shrn(v17.V8B(), v29.V8H(), 2);
1425 __ shrn2(v5.V16B(), v30.V8H(), 3);
1426 __ shrn2(v24.V4S(), v1.V2D(), 1);
1427 __ shrn2(v5.V8H(), v14.V4S(), 16);
1428 __ shsub(v30.V16B(), v22.V16B(), v23.V16B());
1429 __ shsub(v22.V2S(), v27.V2S(), v25.V2S());
1430 __ shsub(v13.V4H(), v22.V4H(), v1.V4H());
1431 __ shsub(v10.V4S(), v8.V4S(), v23.V4S());
1432 __ shsub(v6.V8B(), v9.V8B(), v31.V8B());
1433 __ shsub(v8.V8H(), v31.V8H(), v8.V8H());
1434 __ sli(d19, d29, 20);
1435 __ sli(v9.V16B(), v24.V16B(), 0);
1436 __ sli(v22.V2D(), v9.V2D(), 10);
1437 __ sli(v11.V2S(), v27.V2S(), 20);
1438 __ sli(v16.V4H(), v15.V4H(), 5);
1439 __ sli(v8.V4S(), v8.V4S(), 25);
1440 __ sli(v10.V8B(), v30.V8B(), 0);
1441 __ sli(v7.V8H(), v28.V8H(), 6);
1442 __ smax(v18.V16B(), v8.V16B(), v1.V16B());
1443 __ smax(v30.V2S(), v5.V2S(), v1.V2S());
1444 __ smax(v17.V4H(), v25.V4H(), v19.V4H());
1445 __ smax(v1.V4S(), v24.V4S(), v31.V4S());
1446 __ smax(v17.V8B(), v24.V8B(), v24.V8B());
1447 __ smax(v11.V8H(), v26.V8H(), v10.V8H());
1448 __ smaxp(v12.V16B(), v14.V16B(), v7.V16B());
1449 __ smaxp(v31.V2S(), v24.V2S(), v6.V2S());
1450 __ smaxp(v10.V4H(), v29.V4H(), v10.V4H());
1451 __ smaxp(v18.V4S(), v11.V4S(), v7.V4S());
1452 __ smaxp(v21.V8B(), v0.V8B(), v18.V8B());
1453 __ smaxp(v26.V8H(), v8.V8H(), v15.V8H());
1454 __ smaxv(b4, v5.V16B());
1455 __ smaxv(b23, v0.V8B());
1456 __ smaxv(h6, v0.V4H());
1457 __ smaxv(h24, v8.V8H());
1458 __ smaxv(s3, v16.V4S());
1459 __ smin(v24.V16B(), v8.V16B(), v18.V16B());
1460 __ smin(v29.V2S(), v8.V2S(), v23.V2S());
1461 __ smin(v6.V4H(), v11.V4H(), v21.V4H());
1462 __ smin(v24.V4S(), v23.V4S(), v15.V4S());
1463 __ smin(v8.V8B(), v16.V8B(), v4.V8B());
1464 __ smin(v12.V8H(), v1.V8H(), v10.V8H());
1465 __ sminp(v13.V16B(), v18.V16B(), v28.V16B());
1466 __ sminp(v22.V2S(), v28.V2S(), v16.V2S());
1467 __ sminp(v15.V4H(), v12.V4H(), v5.V4H());
1468 __ sminp(v15.V4S(), v17.V4S(), v8.V4S());
1469 __ sminp(v21.V8B(), v2.V8B(), v6.V8B());
1470 __ sminp(v21.V8H(), v12.V8H(), v6.V8H());
1471 __ sminv(b8, v6.V16B());
1472 __ sminv(b6, v18.V8B());
1473 __ sminv(h20, v1.V4H());
1474 __ sminv(h7, v17.V8H());
1475 __ sminv(s21, v4.V4S());
1476 __ smlal(v24.V2D(), v14.V2S(), v21.V2S());
1477 __ smlal(v31.V2D(), v3.V2S(), v14.S(), 2);
1478 __ smlal(v7.V4S(), v20.V4H(), v21.V4H());
1479 __ smlal(v19.V4S(), v16.V4H(), v9.H(), 3);
1480 __ smlal(v29.V8H(), v14.V8B(), v1.V8B());
1481 __ smlal2(v30.V2D(), v26.V4S(), v16.V4S());
1482 __ smlal2(v31.V2D(), v30.V4S(), v1.S(), 0);
1483 __ smlal2(v17.V4S(), v6.V8H(), v3.V8H());
1484 __ smlal2(v11.V4S(), v31.V8H(), v5.H(), 7);
1485 __ smlal2(v30.V8H(), v16.V16B(), v29.V16B());
1486 __ smlsl(v1.V2D(), v20.V2S(), v17.V2S());
1487 __ smlsl(v29.V2D(), v12.V2S(), v5.S(), 3);
1488 __ smlsl(v0.V4S(), v26.V4H(), v1.V4H());
1489 __ smlsl(v3.V4S(), v5.V4H(), v6.H(), 5);
1490 __ smlsl(v4.V8H(), v0.V8B(), v26.V8B());
1491 __ smlsl2(v14.V2D(), v14.V4S(), v5.V4S());
1492 __ smlsl2(v15.V2D(), v5.V4S(), v0.S(), 1);
1493 __ smlsl2(v29.V4S(), v17.V8H(), v31.V8H());
1494 __ smlsl2(v6.V4S(), v15.V8H(), v9.H(), 6);
1495 __ smlsl2(v30.V8H(), v15.V16B(), v15.V16B());
1496 __ smov(w21, v6.B(), 3);
1497 __ smov(w13, v26.H(), 7);
1498 __ smov(x24, v16.B(), 7);
1499 __ smov(x7, v4.H(), 3);
1500 __ smov(x29, v7.S(), 1);
1501 __ smull(v4.V2D(), v29.V2S(), v17.V2S());
1502 __ smull(v30.V2D(), v21.V2S(), v6.S(), 2);
1503 __ smull(v23.V4S(), v5.V4H(), v23.V4H());
1504 __ smull(v8.V4S(), v9.V4H(), v2.H(), 1);
1505 __ smull(v31.V8H(), v17.V8B(), v1.V8B());
1506 __ smull2(v3.V2D(), v3.V4S(), v23.V4S());
1507 __ smull2(v15.V2D(), v29.V4S(), v6.S(), 1);
1508 __ smull2(v19.V4S(), v20.V8H(), v30.V8H());
1509 __ smull2(v6.V4S(), v10.V8H(), v7.H(), 4);
1510 __ smull2(v25.V8H(), v8.V16B(), v27.V16B());
1511 __ sqabs(b3, b15);
1512 __ sqabs(d14, d9);
1513 __ sqabs(h31, h28);
1514 __ sqabs(s8, s0);
1515 __ sqabs(v14.V16B(), v7.V16B());
1516 __ sqabs(v23.V2D(), v19.V2D());
1517 __ sqabs(v10.V2S(), v24.V2S());
1518 __ sqabs(v31.V4H(), v19.V4H());
1519 __ sqabs(v23.V4S(), v0.V4S());
1520 __ sqabs(v29.V8B(), v23.V8B());
1521 __ sqabs(v17.V8H(), v21.V8H());
1522 __ sqadd(b9, b23, b13);
1523 __ sqadd(d2, d25, d26);
1524 __ sqadd(h7, h29, h25);
1525 __ sqadd(s11, s7, s24);
1526 __ sqadd(v20.V16B(), v16.V16B(), v29.V16B());
1527 __ sqadd(v23.V2D(), v30.V2D(), v28.V2D());
1528 __ sqadd(v8.V2S(), v19.V2S(), v2.V2S());
1529 __ sqadd(v20.V4H(), v12.V4H(), v31.V4H());
1530 __ sqadd(v14.V4S(), v15.V4S(), v17.V4S());
1531 __ sqadd(v2.V8B(), v29.V8B(), v13.V8B());
1532 __ sqadd(v7.V8H(), v19.V8H(), v14.V8H());
1533 __ sqdmlal(d15, s5, s30);
1534 __ sqdmlal(d24, s10, v2.S(), 3);
1535 __ sqdmlal(s9, h19, h8);
1536 __ sqdmlal(s14, h1, v12.H(), 3);
1537 __ sqdmlal(v30.V2D(), v5.V2S(), v31.V2S());
1538 __ sqdmlal(v25.V2D(), v14.V2S(), v10.S(), 1);
1539 __ sqdmlal(v19.V4S(), v17.V4H(), v16.V4H());
1540 __ sqdmlal(v8.V4S(), v5.V4H(), v8.H(), 1);
1541 __ sqdmlal2(v1.V2D(), v23.V4S(), v3.V4S());
1542 __ sqdmlal2(v19.V2D(), v0.V4S(), v9.S(), 0);
1543 __ sqdmlal2(v26.V4S(), v22.V8H(), v11.V8H());
1544 __ sqdmlal2(v6.V4S(), v28.V8H(), v13.H(), 4);
1545 __ sqdmlsl(d10, s29, s20);
1546 __ sqdmlsl(d10, s9, v10.S(), 1);
1547 __ sqdmlsl(s30, h9, h24);
1548 __ sqdmlsl(s13, h24, v6.H(), 1);
1549 __ sqdmlsl(v27.V2D(), v10.V2S(), v20.V2S());
1550 __ sqdmlsl(v23.V2D(), v23.V2S(), v3.S(), 3);
1551 __ sqdmlsl(v7.V4S(), v17.V4H(), v29.V4H());
1552 __ sqdmlsl(v22.V4S(), v21.V4H(), v3.H(), 4);
1553 __ sqdmlsl2(v12.V2D(), v7.V4S(), v22.V4S());
1554 __ sqdmlsl2(v20.V2D(), v25.V4S(), v8.S(), 0);
1555 __ sqdmlsl2(v25.V4S(), v26.V8H(), v18.V8H());
1556 __ sqdmlsl2(v25.V4S(), v19.V8H(), v5.H(), 0);
1557 __ sqdmulh(h17, h27, h12);
1558 __ sqdmulh(h16, h5, v11.H(), 0);
1559 __ sqdmulh(s1, s19, s16);
1560 __ sqdmulh(s1, s16, v2.S(), 0);
1561 __ sqdmulh(v28.V2S(), v1.V2S(), v8.V2S());
1562 __ sqdmulh(v28.V2S(), v8.V2S(), v3.S(), 0);
1563 __ sqdmulh(v11.V4H(), v25.V4H(), v5.V4H());
1564 __ sqdmulh(v30.V4H(), v14.V4H(), v8.H(), 5);
1565 __ sqdmulh(v25.V4S(), v21.V4S(), v13.V4S());
1566 __ sqdmulh(v23.V4S(), v2.V4S(), v10.S(), 3);
1567 __ sqdmulh(v26.V8H(), v5.V8H(), v23.V8H());
1568 __ sqdmulh(v4.V8H(), v22.V8H(), v4.H(), 3);
1569 __ sqdmull(d25, s2, s26);
1570 __ sqdmull(d30, s14, v5.S(), 1);
1571 __ sqdmull(s29, h18, h11);
1572 __ sqdmull(s11, h13, v7.H(), 6);
1573 __ sqdmull(v23.V2D(), v9.V2S(), v8.V2S());
1574 __ sqdmull(v18.V2D(), v29.V2S(), v4.S(), 1);
1575 __ sqdmull(v17.V4S(), v24.V4H(), v7.V4H());
1576 __ sqdmull(v8.V4S(), v15.V4H(), v5.H(), 1);
1577 __ sqdmull2(v28.V2D(), v14.V4S(), v2.V4S());
1578 __ sqdmull2(v1.V2D(), v24.V4S(), v13.S(), 2);
1579 __ sqdmull2(v11.V4S(), v17.V8H(), v31.V8H());
1580 __ sqdmull2(v1.V4S(), v20.V8H(), v11.H(), 3);
1581 __ sqneg(b2, b0);
1582 __ sqneg(d24, d2);
1583 __ sqneg(h29, h3);
1584 __ sqneg(s4, s9);
1585 __ sqneg(v14.V16B(), v29.V16B());
1586 __ sqneg(v30.V2D(), v12.V2D());
1587 __ sqneg(v28.V2S(), v26.V2S());
1588 __ sqneg(v4.V4H(), v4.V4H());
1589 __ sqneg(v9.V4S(), v8.V4S());
1590 __ sqneg(v20.V8B(), v20.V8B());
1591 __ sqneg(v27.V8H(), v10.V8H());
1592 __ sqrdmulh(h7, h24, h0);
1593 __ sqrdmulh(h14, h3, v4.H(), 6);
1594 __ sqrdmulh(s27, s19, s24);
1595 __ sqrdmulh(s31, s21, v4.S(), 0);
1596 __ sqrdmulh(v18.V2S(), v25.V2S(), v1.V2S());
1597 __ sqrdmulh(v22.V2S(), v5.V2S(), v13.S(), 0);
1598 __ sqrdmulh(v22.V4H(), v24.V4H(), v9.V4H());
1599 __ sqrdmulh(v13.V4H(), v2.V4H(), v12.H(), 6);
1600 __ sqrdmulh(v9.V4S(), v27.V4S(), v2.V4S());
1601 __ sqrdmulh(v3.V4S(), v23.V4S(), v7.S(), 1);
1602 __ sqrdmulh(v2.V8H(), v0.V8H(), v7.V8H());
1603 __ sqrdmulh(v16.V8H(), v9.V8H(), v8.H(), 2);
1604 __ sqrshl(b8, b21, b13);
1605 __ sqrshl(d29, d7, d20);
1606 __ sqrshl(h28, h14, h10);
1607 __ sqrshl(s26, s18, s2);
1608 __ sqrshl(v18.V16B(), v31.V16B(), v26.V16B());
1609 __ sqrshl(v28.V2D(), v4.V2D(), v0.V2D());
1610 __ sqrshl(v3.V2S(), v6.V2S(), v0.V2S());
1611 __ sqrshl(v1.V4H(), v18.V4H(), v22.V4H());
1612 __ sqrshl(v16.V4S(), v25.V4S(), v7.V4S());
1613 __ sqrshl(v0.V8B(), v21.V8B(), v5.V8B());
1614 __ sqrshl(v30.V8H(), v19.V8H(), v8.V8H());
1615 __ sqrshrn(b6, h21, 4);
1616 __ sqrshrn(h14, s17, 11);
1617 __ sqrshrn(s25, d27, 10);
1618 __ sqrshrn(v6.V2S(), v13.V2D(), 18);
1619 __ sqrshrn(v5.V4H(), v9.V4S(), 15);
1620 __ sqrshrn(v19.V8B(), v12.V8H(), 1);
1621 __ sqrshrn2(v19.V16B(), v21.V8H(), 7);
1622 __ sqrshrn2(v29.V4S(), v24.V2D(), 13);
1623 __ sqrshrn2(v12.V8H(), v2.V4S(), 10);
1624 __ sqrshrun(b16, h9, 5);
1625 __ sqrshrun(h3, s24, 15);
1626 __ sqrshrun(s16, d18, 8);
1627 __ sqrshrun(v28.V2S(), v23.V2D(), 8);
1628 __ sqrshrun(v31.V4H(), v25.V4S(), 10);
1629 __ sqrshrun(v19.V8B(), v23.V8H(), 2);
1630 __ sqrshrun2(v24.V16B(), v0.V8H(), 8);
1631 __ sqrshrun2(v22.V4S(), v1.V2D(), 23);
1632 __ sqrshrun2(v28.V8H(), v21.V4S(), 13);
1633 __ sqshl(b6, b21, b8);
1634 __ sqshl(b11, b26, 2);
1635 __ sqshl(d29, d0, d4);
1636 __ sqshl(d21, d7, 35);
1637 __ sqshl(h20, h25, h17);
1638 __ sqshl(h20, h0, 8);
1639 __ sqshl(s29, s13, s4);
1640 __ sqshl(s10, s11, 20);
1641 __ sqshl(v8.V16B(), v18.V16B(), v28.V16B());
1642 __ sqshl(v29.V16B(), v29.V16B(), 2);
1643 __ sqshl(v8.V2D(), v31.V2D(), v16.V2D());
1644 __ sqshl(v7.V2D(), v14.V2D(), 37);
1645 __ sqshl(v0.V2S(), v26.V2S(), v7.V2S());
1646 __ sqshl(v5.V2S(), v11.V2S(), 19);
1647 __ sqshl(v11.V4H(), v30.V4H(), v0.V4H());
1648 __ sqshl(v1.V4H(), v18.V4H(), 7);
1649 __ sqshl(v22.V4S(), v3.V4S(), v30.V4S());
1650 __ sqshl(v16.V4S(), v15.V4S(), 28);
1651 __ sqshl(v6.V8B(), v28.V8B(), v25.V8B());
1652 __ sqshl(v0.V8B(), v15.V8B(), 0);
1653 __ sqshl(v6.V8H(), v16.V8H(), v30.V8H());
1654 __ sqshl(v3.V8H(), v20.V8H(), 14);
1655 __ sqshlu(b13, b14, 6);
1656 __ sqshlu(d0, d16, 44);
1657 __ sqshlu(h5, h29, 15);
1658 __ sqshlu(s29, s8, 13);
1659 __ sqshlu(v27.V16B(), v20.V16B(), 2);
1660 __ sqshlu(v24.V2D(), v12.V2D(), 11);
1661 __ sqshlu(v12.V2S(), v19.V2S(), 22);
1662 __ sqshlu(v8.V4H(), v12.V4H(), 11);
1663 __ sqshlu(v18.V4S(), v3.V4S(), 8);
1664 __ sqshlu(v3.V8B(), v10.V8B(), 1);
1665 __ sqshlu(v30.V8H(), v24.V8H(), 4);
1666 __ sqshrn(b1, h28, 1);
1667 __ sqshrn(h31, s7, 10);
1668 __ sqshrn(s4, d10, 24);
1669 __ sqshrn(v10.V2S(), v1.V2D(), 29);
1670 __ sqshrn(v3.V4H(), v13.V4S(), 14);
1671 __ sqshrn(v27.V8B(), v6.V8H(), 7);
1672 __ sqshrn2(v14.V16B(), v23.V8H(), 1);
1673 __ sqshrn2(v25.V4S(), v22.V2D(), 27);
1674 __ sqshrn2(v31.V8H(), v12.V4S(), 10);
1675 __ sqshrun(b9, h0, 1);
1676 __ sqshrun(h11, s6, 7);
1677 __ sqshrun(s13, d12, 13);
1678 __ sqshrun(v10.V2S(), v30.V2D(), 1);
1679 __ sqshrun(v31.V4H(), v3.V4S(), 11);
1680 __ sqshrun(v28.V8B(), v30.V8H(), 8);
1681 __ sqshrun2(v16.V16B(), v27.V8H(), 3);
1682 __ sqshrun2(v27.V4S(), v14.V2D(), 18);
1683 __ sqshrun2(v23.V8H(), v14.V4S(), 1);
1684 __ sqsub(b19, b29, b11);
1685 __ sqsub(d21, d31, d6);
1686 __ sqsub(h18, h10, h19);
1687 __ sqsub(s6, s5, s0);
1688 __ sqsub(v21.V16B(), v22.V16B(), v0.V16B());
1689 __ sqsub(v22.V2D(), v10.V2D(), v17.V2D());
1690 __ sqsub(v8.V2S(), v21.V2S(), v2.V2S());
1691 __ sqsub(v18.V4H(), v25.V4H(), v27.V4H());
1692 __ sqsub(v13.V4S(), v3.V4S(), v6.V4S());
1693 __ sqsub(v28.V8B(), v29.V8B(), v16.V8B());
1694 __ sqsub(v17.V8H(), v6.V8H(), v10.V8H());
1695 __ sqxtn(b27, h26);
1696 __ sqxtn(h17, s11);
1697 __ sqxtn(s22, d31);
1698 __ sqxtn(v26.V2S(), v5.V2D());
1699 __ sqxtn(v13.V4H(), v7.V4S());
1700 __ sqxtn(v19.V8B(), v19.V8H());
1701 __ sqxtn2(v19.V16B(), v3.V8H());
1702 __ sqxtn2(v23.V4S(), v1.V2D());
1703 __ sqxtn2(v13.V8H(), v3.V4S());
1704 __ sqxtun(b26, h9);
1705 __ sqxtun(h19, s12);
1706 __ sqxtun(s3, d6);
1707 __ sqxtun(v29.V2S(), v26.V2D());
1708 __ sqxtun(v26.V4H(), v10.V4S());
1709 __ sqxtun(v7.V8B(), v29.V8H());
1710 __ sqxtun2(v21.V16B(), v14.V8H());
1711 __ sqxtun2(v24.V4S(), v15.V2D());
1712 __ sqxtun2(v30.V8H(), v1.V4S());
1713 __ srhadd(v21.V16B(), v17.V16B(), v15.V16B());
1714 __ srhadd(v28.V2S(), v21.V2S(), v29.V2S());
1715 __ srhadd(v9.V4H(), v1.V4H(), v30.V4H());
1716 __ srhadd(v24.V4S(), v0.V4S(), v2.V4S());
1717 __ srhadd(v6.V8B(), v17.V8B(), v15.V8B());
1718 __ srhadd(v5.V8H(), v7.V8H(), v21.V8H());
1719 __ sri(d14, d14, 49);
1720 __ sri(v23.V16B(), v8.V16B(), 4);
1721 __ sri(v20.V2D(), v13.V2D(), 20);
1722 __ sri(v16.V2S(), v2.V2S(), 24);
1723 __ sri(v5.V4H(), v23.V4H(), 11);
1724 __ sri(v27.V4S(), v15.V4S(), 23);
1725 __ sri(v19.V8B(), v29.V8B(), 4);
1726 __ sri(v7.V8H(), v29.V8H(), 3);
1727 __ srshl(d2, d9, d26);
1728 __ srshl(v29.V16B(), v17.V16B(), v11.V16B());
1729 __ srshl(v8.V2D(), v15.V2D(), v4.V2D());
1730 __ srshl(v25.V2S(), v17.V2S(), v8.V2S());
1731 __ srshl(v19.V4H(), v7.V4H(), v7.V4H());
1732 __ srshl(v13.V4S(), v2.V4S(), v17.V4S());
1733 __ srshl(v22.V8B(), v6.V8B(), v21.V8B());
1734 __ srshl(v10.V8H(), v17.V8H(), v4.V8H());
1735 __ srshr(d21, d18, 45);
1736 __ srshr(v3.V16B(), v11.V16B(), 7);
1737 __ srshr(v21.V2D(), v26.V2D(), 53);
1738 __ srshr(v11.V2S(), v5.V2S(), 28);
1739 __ srshr(v7.V4H(), v18.V4H(), 12);
1740 __ srshr(v7.V4S(), v3.V4S(), 30);
1741 __ srshr(v14.V8B(), v2.V8B(), 6);
1742 __ srshr(v21.V8H(), v20.V8H(), 3);
1743 __ srsra(d21, d30, 63);
1744 __ srsra(v27.V16B(), v30.V16B(), 6);
1745 __ srsra(v20.V2D(), v12.V2D(), 27);
1746 __ srsra(v0.V2S(), v17.V2S(), 5);
1747 __ srsra(v14.V4H(), v16.V4H(), 15);
1748 __ srsra(v18.V4S(), v3.V4S(), 20);
1749 __ srsra(v21.V8B(), v1.V8B(), 1);
1750 __ srsra(v31.V8H(), v25.V8H(), 2);
1751 __ sshl(d1, d13, d9);
1752 __ sshl(v17.V16B(), v31.V16B(), v15.V16B());
1753 __ sshl(v13.V2D(), v16.V2D(), v0.V2D());
1754 __ sshl(v0.V2S(), v7.V2S(), v22.V2S());
1755 __ sshl(v23.V4H(), v19.V4H(), v4.V4H());
1756 __ sshl(v5.V4S(), v5.V4S(), v11.V4S());
1757 __ sshl(v23.V8B(), v27.V8B(), v7.V8B());
1758 __ sshl(v29.V8H(), v10.V8H(), v5.V8H());
1759 __ sshll(v0.V2D(), v2.V2S(), 23);
1760 __ sshll(v11.V4S(), v8.V4H(), 8);
1761 __ sshll(v4.V8H(), v29.V8B(), 1);
1762 __ sshll2(v10.V2D(), v4.V4S(), 14);
1763 __ sshll2(v26.V4S(), v31.V8H(), 6);
1764 __ sshll2(v3.V8H(), v26.V16B(), 4);
1765 __ sshr(d19, d21, 20);
1766 __ sshr(v15.V16B(), v23.V16B(), 5);
1767 __ sshr(v17.V2D(), v14.V2D(), 38);
1768 __ sshr(v3.V2S(), v29.V2S(), 23);
1769 __ sshr(v23.V4H(), v27.V4H(), 4);
1770 __ sshr(v28.V4S(), v3.V4S(), 4);
1771 __ sshr(v14.V8B(), v2.V8B(), 6);
1772 __ sshr(v3.V8H(), v8.V8H(), 6);
1773 __ ssra(d12, d28, 44);
1774 __ ssra(v29.V16B(), v31.V16B(), 4);
1775 __ ssra(v3.V2D(), v0.V2D(), 24);
1776 __ ssra(v14.V2S(), v28.V2S(), 6);
1777 __ ssra(v18.V4H(), v8.V4H(), 7);
1778 __ ssra(v31.V4S(), v14.V4S(), 24);
1779 __ ssra(v28.V8B(), v26.V8B(), 5);
1780 __ ssra(v9.V8H(), v9.V8H(), 14);
1781 __ ssubl(v13.V2D(), v14.V2S(), v3.V2S());
1782 __ ssubl(v5.V4S(), v16.V4H(), v8.V4H());
1783 __ ssubl(v0.V8H(), v28.V8B(), v6.V8B());
1784 __ ssubl2(v5.V2D(), v13.V4S(), v25.V4S());
1785 __ ssubl2(v3.V4S(), v15.V8H(), v17.V8H());
1786 __ ssubl2(v15.V8H(), v15.V16B(), v14.V16B());
1787 __ ssubw(v25.V2D(), v23.V2D(), v26.V2S());
1788 __ ssubw(v21.V4S(), v18.V4S(), v24.V4H());
1789 __ ssubw(v30.V8H(), v22.V8H(), v3.V8B());
1790 __ ssubw2(v16.V2D(), v24.V2D(), v28.V4S());
1791 __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H());
1792 __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B());
1793 __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
1794 __ st1(v10.V16B(),
1795 v11.V16B(),
1796 v12.V16B(),
1797 v13.V16B(),
1798 MemOperand(x1, x2, PostIndex));
1799 __ st1(v27.V16B(),
1800 v28.V16B(),
1801 v29.V16B(),
1802 v30.V16B(),
1803 MemOperand(x1, 64, PostIndex));
1804 __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0));
1805 __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1806 __ st1(v9.V16B(), v10.V16B(), v11.V16B(), MemOperand(x1, 48, PostIndex));
1807 __ st1(v7.V16B(), v8.V16B(), MemOperand(x0));
1808 __ st1(v26.V16B(), v27.V16B(), MemOperand(x1, x2, PostIndex));
1809 __ st1(v22.V16B(), v23.V16B(), MemOperand(x1, 32, PostIndex));
1810 __ st1(v23.V16B(), MemOperand(x0));
1811 __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex));
1812 __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex));
1813 __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0));
1814 __ st1(v12.V1D(),
1815 v13.V1D(),
1816 v14.V1D(),
1817 v15.V1D(),
1818 MemOperand(x1, x2, PostIndex));
1819 __ st1(v30.V1D(),
1820 v31.V1D(),
1821 v0.V1D(),
1822 v1.V1D(),
1823 MemOperand(x1, 32, PostIndex));
1824 __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0));
1825 __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex));
1826 __ st1(v14.V1D(), v15.V1D(), v16.V1D(), MemOperand(x1, 24, PostIndex));
1827 __ st1(v18.V1D(), v19.V1D(), MemOperand(x0));
1828 __ st1(v5.V1D(), v6.V1D(), MemOperand(x1, x2, PostIndex));
1829 __ st1(v2.V1D(), v3.V1D(), MemOperand(x1, 16, PostIndex));
1830 __ st1(v4.V1D(), MemOperand(x0));
1831 __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex));
1832 __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex));
1833 __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0));
1834 __ st1(v22.V2D(),
1835 v23.V2D(),
1836 v24.V2D(),
1837 v25.V2D(),
1838 MemOperand(x1, x2, PostIndex));
1839 __ st1(v28.V2D(),
1840 v29.V2D(),
1841 v30.V2D(),
1842 v31.V2D(),
1843 MemOperand(x1, 64, PostIndex));
1844 __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1845 __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex));
1846 __ st1(v22.V2D(), v23.V2D(), v24.V2D(), MemOperand(x1, 48, PostIndex));
1847 __ st1(v21.V2D(), v22.V2D(), MemOperand(x0));
1848 __ st1(v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
1849 __ st1(v27.V2D(), v28.V2D(), MemOperand(x1, 32, PostIndex));
1850 __ st1(v21.V2D(), MemOperand(x0));
1851 __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex));
1852 __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex));
1853 __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0));
1854 __ st1(v8.V2S(),
1855 v9.V2S(),
1856 v10.V2S(),
1857 v11.V2S(),
1858 MemOperand(x1, x2, PostIndex));
1859 __ st1(v15.V2S(),
1860 v16.V2S(),
1861 v17.V2S(),
1862 v18.V2S(),
1863 MemOperand(x1, 32, PostIndex));
1864 __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0));
1865 __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex));
1866 __ st1(v7.V2S(), v8.V2S(), v9.V2S(), MemOperand(x1, 24, PostIndex));
1867 __ st1(v28.V2S(), v29.V2S(), MemOperand(x0));
1868 __ st1(v29.V2S(), v30.V2S(), MemOperand(x1, x2, PostIndex));
1869 __ st1(v23.V2S(), v24.V2S(), MemOperand(x1, 16, PostIndex));
1870 __ st1(v6.V2S(), MemOperand(x0));
1871 __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex));
1872 __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex));
1873 __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0));
1874 __ st1(v9.V4H(),
1875 v10.V4H(),
1876 v11.V4H(),
1877 v12.V4H(),
1878 MemOperand(x1, x2, PostIndex));
1879 __ st1(v25.V4H(),
1880 v26.V4H(),
1881 v27.V4H(),
1882 v28.V4H(),
1883 MemOperand(x1, 32, PostIndex));
1884 __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0));
1885 __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex));
1886 __ st1(v12.V4H(), v13.V4H(), v14.V4H(), MemOperand(x1, 24, PostIndex));
1887 __ st1(v13.V4H(), v14.V4H(), MemOperand(x0));
1888 __ st1(v15.V4H(), v16.V4H(), MemOperand(x1, x2, PostIndex));
1889 __ st1(v21.V4H(), v22.V4H(), MemOperand(x1, 16, PostIndex));
1890 __ st1(v16.V4H(), MemOperand(x0));
1891 __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex));
1892 __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex));
1893 __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0));
1894 __ st1(v25.V4S(),
1895 v26.V4S(),
1896 v27.V4S(),
1897 v28.V4S(),
1898 MemOperand(x1, x2, PostIndex));
1899 __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex));
1900 __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0));
1901 __ st1(v30.V4S(), v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1902 __ st1(v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 48, PostIndex));
1903 __ st1(v17.V4S(), v18.V4S(), MemOperand(x0));
1904 __ st1(v31.V4S(), v0.V4S(), MemOperand(x1, x2, PostIndex));
1905 __ st1(v1.V4S(), v2.V4S(), MemOperand(x1, 32, PostIndex));
1906 __ st1(v26.V4S(), MemOperand(x0));
1907 __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex));
1908 __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex));
1909 __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
1910 __ st1(v10.V8B(),
1911 v11.V8B(),
1912 v12.V8B(),
1913 v13.V8B(),
1914 MemOperand(x1, x2, PostIndex));
1915 __ st1(v15.V8B(),
1916 v16.V8B(),
1917 v17.V8B(),
1918 v18.V8B(),
1919 MemOperand(x1, 32, PostIndex));
1920 __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0));
1921 __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1922 __ st1(v9.V8B(), v10.V8B(), v11.V8B(), MemOperand(x1, 24, PostIndex));
1923 __ st1(v12.V8B(), v13.V8B(), MemOperand(x0));
1924 __ st1(v2.V8B(), v3.V8B(), MemOperand(x1, x2, PostIndex));
1925 __ st1(v0.V8B(), v1.V8B(), MemOperand(x1, 16, PostIndex));
1926 __ st1(v16.V8B(), MemOperand(x0));
1927 __ st1(v25.V8B(), MemOperand(x1, x2, PostIndex));
1928 __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex));
1929 __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0));
1930 __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex));
1931 __ st1(v26.V8H(),
1932 v27.V8H(),
1933 v28.V8H(),
1934 v29.V8H(),
1935 MemOperand(x1, 64, PostIndex));
1936 __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0));
1937 __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1938 __ st1(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
1939 __ st1(v26.V8H(), v27.V8H(), MemOperand(x0));
1940 __ st1(v24.V8H(), v25.V8H(), MemOperand(x1, x2, PostIndex));
1941 __ st1(v17.V8H(), v18.V8H(), MemOperand(x1, 32, PostIndex));
1942 __ st1(v29.V8H(), MemOperand(x0));
1943 __ st1(v19.V8H(), MemOperand(x1, x2, PostIndex));
1944 __ st1(v23.V8H(), MemOperand(x1, 16, PostIndex));
1945 __ st1(v19.B(), 15, MemOperand(x0));
1946 __ st1(v25.B(), 9, MemOperand(x1, x2, PostIndex));
1947 __ st1(v4.B(), 8, MemOperand(x1, 1, PostIndex));
1948 __ st1(v13.D(), 0, MemOperand(x0));
1949 __ st1(v30.D(), 0, MemOperand(x1, x2, PostIndex));
1950 __ st1(v3.D(), 0, MemOperand(x1, 8, PostIndex));
1951 __ st1(v22.H(), 0, MemOperand(x0));
1952 __ st1(v31.H(), 7, MemOperand(x1, x2, PostIndex));
1953 __ st1(v23.H(), 3, MemOperand(x1, 2, PostIndex));
1954 __ st1(v0.S(), 0, MemOperand(x0));
1955 __ st1(v11.S(), 3, MemOperand(x1, x2, PostIndex));
1956 __ st1(v24.S(), 3, MemOperand(x1, 4, PostIndex));
1957 __ st2(v7.V16B(), v8.V16B(), MemOperand(x0));
1958 __ st2(v5.V16B(), v6.V16B(), MemOperand(x1, x2, PostIndex));
1959 __ st2(v18.V16B(), v19.V16B(), MemOperand(x1, 32, PostIndex));
1960 __ st2(v14.V2D(), v15.V2D(), MemOperand(x0));
1961 __ st2(v7.V2D(), v8.V2D(), MemOperand(x1, x2, PostIndex));
1962 __ st2(v24.V2D(), v25.V2D(), MemOperand(x1, 32, PostIndex));
1963 __ st2(v22.V2S(), v23.V2S(), MemOperand(x0));
1964 __ st2(v4.V2S(), v5.V2S(), MemOperand(x1, x2, PostIndex));
1965 __ st2(v2.V2S(), v3.V2S(), MemOperand(x1, 16, PostIndex));
1966 __ st2(v23.V4H(), v24.V4H(), MemOperand(x0));
1967 __ st2(v8.V4H(), v9.V4H(), MemOperand(x1, x2, PostIndex));
1968 __ st2(v7.V4H(), v8.V4H(), MemOperand(x1, 16, PostIndex));
1969 __ st2(v17.V4S(), v18.V4S(), MemOperand(x0));
1970 __ st2(v6.V4S(), v7.V4S(), MemOperand(x1, x2, PostIndex));
1971 __ st2(v26.V4S(), v27.V4S(), MemOperand(x1, 32, PostIndex));
1972 __ st2(v31.V8B(), v0.V8B(), MemOperand(x0));
1973 __ st2(v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
1974 __ st2(v21.V8B(), v22.V8B(), MemOperand(x1, 16, PostIndex));
1975 __ st2(v7.V8H(), v8.V8H(), MemOperand(x0));
1976 __ st2(v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
1977 __ st2(v4.V8H(), v5.V8H(), MemOperand(x1, 32, PostIndex));
1978 __ st2(v8.B(), v9.B(), 15, MemOperand(x0));
1979 __ st2(v8.B(), v9.B(), 15, MemOperand(x1, x2, PostIndex));
1980 __ st2(v7.B(), v8.B(), 4, MemOperand(x1, 2, PostIndex));
1981 __ st2(v25.D(), v26.D(), 0, MemOperand(x0));
1982 __ st2(v17.D(), v18.D(), 1, MemOperand(x1, x2, PostIndex));
1983 __ st2(v3.D(), v4.D(), 1, MemOperand(x1, 16, PostIndex));
1984 __ st2(v4.H(), v5.H(), 3, MemOperand(x0));
1985 __ st2(v0.H(), v1.H(), 5, MemOperand(x1, x2, PostIndex));
1986 __ st2(v22.H(), v23.H(), 2, MemOperand(x1, 4, PostIndex));
1987 __ st2(v14.S(), v15.S(), 3, MemOperand(x0));
1988 __ st2(v23.S(), v24.S(), 3, MemOperand(x1, x2, PostIndex));
1989 __ st2(v0.S(), v1.S(), 2, MemOperand(x1, 8, PostIndex));
1990 __ st3(v26.V16B(), v27.V16B(), v28.V16B(), MemOperand(x0));
1991 __ st3(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
1992 __ st3(v24.V16B(), v25.V16B(), v26.V16B(), MemOperand(x1, 48, PostIndex));
1993 __ st3(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
1994 __ st3(v23.V2D(), v24.V2D(), v25.V2D(), MemOperand(x1, x2, PostIndex));
1995 __ st3(v10.V2D(), v11.V2D(), v12.V2D(), MemOperand(x1, 48, PostIndex));
1996 __ st3(v9.V2S(), v10.V2S(), v11.V2S(), MemOperand(x0));
1997 __ st3(v13.V2S(), v14.V2S(), v15.V2S(), MemOperand(x1, x2, PostIndex));
1998 __ st3(v22.V2S(), v23.V2S(), v24.V2S(), MemOperand(x1, 24, PostIndex));
1999 __ st3(v31.V4H(), v0.V4H(), v1.V4H(), MemOperand(x0));
2000 __ st3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x1, x2, PostIndex));
2001 __ st3(v19.V4H(), v20.V4H(), v21.V4H(), MemOperand(x1, 24, PostIndex));
2002 __ st3(v18.V4S(), v19.V4S(), v20.V4S(), MemOperand(x0));
2003 __ st3(v25.V4S(), v26.V4S(), v27.V4S(), MemOperand(x1, x2, PostIndex));
2004 __ st3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x1, 48, PostIndex));
2005 __ st3(v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2006 __ st3(v29.V8B(), v30.V8B(), v31.V8B(), MemOperand(x1, x2, PostIndex));
2007 __ st3(v30.V8B(), v31.V8B(), v0.V8B(), MemOperand(x1, 24, PostIndex));
2008 __ st3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x0));
2009 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, x2, PostIndex));
2010 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x1, 48, PostIndex));
2011 __ st3(v31.B(), v0.B(), v1.B(), 10, MemOperand(x0));
2012 __ st3(v4.B(), v5.B(), v6.B(), 5, MemOperand(x1, x2, PostIndex));
2013 __ st3(v5.B(), v6.B(), v7.B(), 1, MemOperand(x1, 3, PostIndex));
2014 __ st3(v5.D(), v6.D(), v7.D(), 0, MemOperand(x0));
2015 __ st3(v6.D(), v7.D(), v8.D(), 0, MemOperand(x1, x2, PostIndex));
2016 __ st3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x1, 24, PostIndex));
2017 __ st3(v31.H(), v0.H(), v1.H(), 2, MemOperand(x0));
2018 __ st3(v14.H(), v15.H(), v16.H(), 5, MemOperand(x1, x2, PostIndex));
2019 __ st3(v21.H(), v22.H(), v23.H(), 6, MemOperand(x1, 6, PostIndex));
2020 __ st3(v21.S(), v22.S(), v23.S(), 0, MemOperand(x0));
2021 __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex));
2022 __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex));
2023 __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0));
2024 __ st4(v24.V16B(),
2025 v25.V16B(),
2026 v26.V16B(),
2027 v27.V16B(),
2028 MemOperand(x1, x2, PostIndex));
2029 __ st4(v15.V16B(),
2030 v16.V16B(),
2031 v17.V16B(),
2032 v18.V16B(),
2033 MemOperand(x1, 64, PostIndex));
2034 __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
2035 __ st4(v17.V2D(),
2036 v18.V2D(),
2037 v19.V2D(),
2038 v20.V2D(),
2039 MemOperand(x1, x2, PostIndex));
2040 __ st4(v9.V2D(),
2041 v10.V2D(),
2042 v11.V2D(),
2043 v12.V2D(),
2044 MemOperand(x1, 64, PostIndex));
2045 __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0));
2046 __ st4(v15.V2S(),
2047 v16.V2S(),
2048 v17.V2S(),
2049 v18.V2S(),
2050 MemOperand(x1, x2, PostIndex));
2051 __ st4(v24.V2S(),
2052 v25.V2S(),
2053 v26.V2S(),
2054 v27.V2S(),
2055 MemOperand(x1, 32, PostIndex));
2056 __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0));
2057 __ st4(v18.V4H(),
2058 v19.V4H(),
2059 v20.V4H(),
2060 v21.V4H(),
2061 MemOperand(x1, x2, PostIndex));
2062 __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
2063 __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0));
2064 __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex));
2065 __ st4(v15.V4S(),
2066 v16.V4S(),
2067 v17.V4S(),
2068 v18.V4S(),
2069 MemOperand(x1, 64, PostIndex));
2070 __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
2071 __ st4(v25.V8B(),
2072 v26.V8B(),
2073 v27.V8B(),
2074 v28.V8B(),
2075 MemOperand(x1, x2, PostIndex));
2076 __ st4(v19.V8B(),
2077 v20.V8B(),
2078 v21.V8B(),
2079 v22.V8B(),
2080 MemOperand(x1, 32, PostIndex));
2081 __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0));
2082 __ st4(v15.V8H(),
2083 v16.V8H(),
2084 v17.V8H(),
2085 v18.V8H(),
2086 MemOperand(x1, x2, PostIndex));
2087 __ st4(v31.V8H(),
2088 v0.V8H(),
2089 v1.V8H(),
2090 v2.V8H(),
2091 MemOperand(x1, 64, PostIndex));
2092 __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0));
2093 __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex));
2094 __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, MemOperand(x1, 4, PostIndex));
2095 __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, MemOperand(x0));
2096 __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, MemOperand(x1, x2, PostIndex));
2097 __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, MemOperand(x1, 32, PostIndex));
2098 __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, MemOperand(x0));
2099 __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, MemOperand(x1, x2, PostIndex));
2100 __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, MemOperand(x1, 8, PostIndex));
2101 __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, MemOperand(x0));
2102 __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, MemOperand(x1, x2, PostIndex));
2103 __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, MemOperand(x1, 16, PostIndex));
2104 __ sub(d12, d17, d2);
2105 __ sub(v20.V16B(), v24.V16B(), v8.V16B());
2106 __ sub(v8.V2D(), v29.V2D(), v5.V2D());
2107 __ sub(v2.V2S(), v28.V2S(), v24.V2S());
2108 __ sub(v24.V4H(), v10.V4H(), v4.V4H());
2109 __ sub(v28.V4S(), v4.V4S(), v17.V4S());
2110 __ sub(v16.V8B(), v27.V8B(), v2.V8B());
2111 __ sub(v20.V8H(), v10.V8H(), v13.V8H());
2112 __ subhn(v5.V2S(), v14.V2D(), v13.V2D());
2113 __ subhn(v10.V4H(), v5.V4S(), v8.V4S());
2114 __ subhn(v6.V8B(), v10.V8H(), v22.V8H());
2115 __ subhn2(v11.V16B(), v6.V8H(), v9.V8H());
2116 __ subhn2(v25.V4S(), v18.V2D(), v24.V2D());
2117 __ subhn2(v20.V8H(), v21.V4S(), v1.V4S());
2118 __ suqadd(b25, b11);
2119 __ suqadd(d13, d1);
2120 __ suqadd(h0, h9);
2121 __ suqadd(s22, s8);
2122 __ suqadd(v24.V16B(), v27.V16B());
2123 __ suqadd(v26.V2D(), v14.V2D());
2124 __ suqadd(v7.V2S(), v10.V2S());
2125 __ suqadd(v25.V4H(), v12.V4H());
2126 __ suqadd(v4.V4S(), v3.V4S());
2127 __ suqadd(v14.V8B(), v18.V8B());
2128 __ suqadd(v31.V8H(), v8.V8H());
2129 __ sxtl(v16.V2D(), v20.V2S());
2130 __ sxtl(v27.V4S(), v28.V4H());
2131 __ sxtl(v0.V8H(), v22.V8B());
2132 __ sxtl2(v6.V2D(), v7.V4S());
2133 __ sxtl2(v9.V4S(), v27.V8H());
2134 __ sxtl2(v16.V8H(), v16.V16B());
2135 __ tbl(v25.V16B(),
2136 v17.V16B(),
2137 v18.V16B(),
2138 v19.V16B(),
2139 v20.V16B(),
2140 v22.V16B());
2141 __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B());
2142 __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B());
2143 __ tbl(v20.V16B(), v15.V16B(), v4.V16B());
2144 __ tbl(v7.V8B(), v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), v20.V8B());
2145 __ tbl(v8.V8B(), v1.V16B(), v2.V16B(), v3.V16B(), v31.V8B());
2146 __ tbl(v8.V8B(), v25.V16B(), v26.V16B(), v16.V8B());
2147 __ tbl(v11.V8B(), v19.V16B(), v30.V8B());
2148 __ tbx(v25.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v5.V16B());
2149 __ tbx(v21.V16B(), v29.V16B(), v30.V16B(), v31.V16B(), v24.V16B());
2150 __ tbx(v6.V16B(), v16.V16B(), v17.V16B(), v1.V16B());
2151 __ tbx(v13.V16B(), v3.V16B(), v20.V16B());
2152 __ tbx(v24.V8B(), v29.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v9.V8B());
2153 __ tbx(v17.V8B(), v9.V16B(), v10.V16B(), v11.V16B(), v26.V8B());
2154 __ tbx(v5.V8B(), v3.V16B(), v4.V16B(), v21.V8B());
2155 __ tbx(v16.V8B(), v11.V16B(), v29.V8B());
2156 __ trn1(v19.V16B(), v24.V16B(), v12.V16B());
2157 __ trn1(v2.V2D(), v7.V2D(), v10.V2D());
2158 __ trn1(v22.V2S(), v0.V2S(), v21.V2S());
2159 __ trn1(v12.V4H(), v15.V4H(), v20.V4H());
2160 __ trn1(v30.V4S(), v17.V4S(), v9.V4S());
2161 __ trn1(v12.V8B(), v19.V8B(), v29.V8B());
2162 __ trn1(v23.V8H(), v8.V8H(), v9.V8H());
2163 __ trn2(v28.V16B(), v30.V16B(), v25.V16B());
2164 __ trn2(v7.V2D(), v27.V2D(), v7.V2D());
2165 __ trn2(v30.V2S(), v16.V2S(), v19.V2S());
2166 __ trn2(v24.V4H(), v6.V4H(), v25.V4H());
2167 __ trn2(v2.V4S(), v19.V4S(), v11.V4S());
2168 __ trn2(v25.V8B(), v27.V8B(), v18.V8B());
2169 __ trn2(v12.V8H(), v4.V8H(), v15.V8H());
2170 __ uaba(v31.V16B(), v12.V16B(), v28.V16B());
2171 __ uaba(v18.V2S(), v5.V2S(), v14.V2S());
2172 __ uaba(v9.V4H(), v20.V4H(), v21.V4H());
2173 __ uaba(v6.V4S(), v20.V4S(), v2.V4S());
2174 __ uaba(v16.V8B(), v12.V8B(), v5.V8B());
2175 __ uaba(v15.V8H(), v26.V8H(), v30.V8H());
2176 __ uabal(v10.V2D(), v18.V2S(), v15.V2S());
2177 __ uabal(v30.V4S(), v19.V4H(), v7.V4H());
2178 __ uabal(v4.V8H(), v27.V8B(), v0.V8B());
2179 __ uabal2(v19.V2D(), v12.V4S(), v2.V4S());
2180 __ uabal2(v26.V4S(), v5.V8H(), v12.V8H());
2181 __ uabal2(v19.V8H(), v20.V16B(), v28.V16B());
2182 __ uabd(v18.V16B(), v4.V16B(), v21.V16B());
2183 __ uabd(v30.V2S(), v21.V2S(), v16.V2S());
2184 __ uabd(v8.V4H(), v28.V4H(), v25.V4H());
2185 __ uabd(v28.V4S(), v12.V4S(), v21.V4S());
2186 __ uabd(v19.V8B(), v16.V8B(), v28.V8B());
2187 __ uabd(v9.V8H(), v12.V8H(), v29.V8H());
2188 __ uabdl(v26.V2D(), v0.V2S(), v8.V2S());
2189 __ uabdl(v29.V4S(), v31.V4H(), v25.V4H());
2190 __ uabdl(v27.V8H(), v29.V8B(), v14.V8B());
2191 __ uabdl2(v20.V2D(), v20.V4S(), v8.V4S());
2192 __ uabdl2(v22.V4S(), v15.V8H(), v18.V8H());
2193 __ uabdl2(v9.V8H(), v18.V16B(), v23.V16B());
2194 __ uadalp(v9.V1D(), v15.V2S());
2195 __ uadalp(v14.V2D(), v12.V4S());
2196 __ uadalp(v28.V2S(), v12.V4H());
2197 __ uadalp(v0.V4H(), v17.V8B());
2198 __ uadalp(v1.V4S(), v29.V8H());
2199 __ uadalp(v15.V8H(), v22.V16B());
2200 __ uaddl(v1.V2D(), v20.V2S(), v27.V2S());
2201 __ uaddl(v31.V4S(), v25.V4H(), v5.V4H());
2202 __ uaddl(v12.V8H(), v3.V8B(), v3.V8B());
2203 __ uaddl2(v5.V2D(), v23.V4S(), v6.V4S());
2204 __ uaddl2(v1.V4S(), v5.V8H(), v25.V8H());
2205 __ uaddl2(v22.V8H(), v30.V16B(), v28.V16B());
2206 __ uaddlp(v7.V1D(), v9.V2S());
2207 __ uaddlp(v26.V2D(), v4.V4S());
2208 __ uaddlp(v28.V2S(), v1.V4H());
2209 __ uaddlp(v20.V4H(), v31.V8B());
2210 __ uaddlp(v16.V4S(), v17.V8H());
2211 __ uaddlp(v6.V8H(), v2.V16B());
2212 __ uaddlv(d28, v22.V4S());
2213 __ uaddlv(h0, v19.V16B());
2214 __ uaddlv(h30, v30.V8B());
2215 __ uaddlv(s24, v18.V4H());
2216 __ uaddlv(s10, v0.V8H());
2217 __ uaddw(v9.V2D(), v17.V2D(), v14.V2S());
2218 __ uaddw(v9.V4S(), v25.V4S(), v3.V4H());
2219 __ uaddw(v18.V8H(), v1.V8H(), v0.V8B());
2220 __ uaddw2(v18.V2D(), v5.V2D(), v6.V4S());
2221 __ uaddw2(v17.V4S(), v15.V4S(), v11.V8H());
2222 __ uaddw2(v29.V8H(), v11.V8H(), v7.V16B());
2223 __ uhadd(v13.V16B(), v9.V16B(), v3.V16B());
2224 __ uhadd(v17.V2S(), v25.V2S(), v24.V2S());
2225 __ uhadd(v25.V4H(), v23.V4H(), v13.V4H());
2226 __ uhadd(v0.V4S(), v20.V4S(), v16.V4S());
2227 __ uhadd(v5.V8B(), v5.V8B(), v25.V8B());
2228 __ uhadd(v3.V8H(), v29.V8H(), v18.V8H());
2229 __ uhsub(v1.V16B(), v22.V16B(), v13.V16B());
2230 __ uhsub(v14.V2S(), v30.V2S(), v30.V2S());
2231 __ uhsub(v29.V4H(), v14.V4H(), v17.V4H());
2232 __ uhsub(v26.V4S(), v5.V4S(), v18.V4S());
2233 __ uhsub(v3.V8B(), v7.V8B(), v12.V8B());
2234 __ uhsub(v25.V8H(), v21.V8H(), v5.V8H());
2235 __ umax(v28.V16B(), v12.V16B(), v6.V16B());
2236 __ umax(v20.V2S(), v19.V2S(), v26.V2S());
2237 __ umax(v0.V4H(), v31.V4H(), v18.V4H());
2238 __ umax(v6.V4S(), v21.V4S(), v28.V4S());
2239 __ umax(v0.V8B(), v2.V8B(), v20.V8B());
2240 __ umax(v4.V8H(), v11.V8H(), v22.V8H());
2241 __ umaxp(v1.V16B(), v6.V16B(), v29.V16B());
2242 __ umaxp(v19.V2S(), v17.V2S(), v27.V2S());
2243 __ umaxp(v21.V4H(), v16.V4H(), v7.V4H());
2244 __ umaxp(v9.V4S(), v20.V4S(), v29.V4S());
2245 __ umaxp(v13.V8B(), v1.V8B(), v16.V8B());
2246 __ umaxp(v19.V8H(), v23.V8H(), v26.V8H());
2247 __ umaxv(b17, v30.V16B());
2248 __ umaxv(b23, v12.V8B());
2249 __ umaxv(h31, v15.V4H());
2250 __ umaxv(h15, v25.V8H());
2251 __ umaxv(s18, v21.V4S());
2252 __ umin(v22.V16B(), v0.V16B(), v18.V16B());
2253 __ umin(v1.V2S(), v21.V2S(), v16.V2S());
2254 __ umin(v17.V4H(), v4.V4H(), v25.V4H());
2255 __ umin(v24.V4S(), v26.V4S(), v13.V4S());
2256 __ umin(v20.V8B(), v1.V8B(), v5.V8B());
2257 __ umin(v26.V8H(), v25.V8H(), v23.V8H());
2258 __ uminp(v5.V16B(), v1.V16B(), v23.V16B());
2259 __ uminp(v7.V2S(), v26.V2S(), v30.V2S());
2260 __ uminp(v9.V4H(), v5.V4H(), v25.V4H());
2261 __ uminp(v23.V4S(), v10.V4S(), v1.V4S());
2262 __ uminp(v4.V8B(), v29.V8B(), v14.V8B());
2263 __ uminp(v21.V8H(), v0.V8H(), v14.V8H());
2264 __ uminv(b0, v17.V16B());
2265 __ uminv(b0, v31.V8B());
2266 __ uminv(h24, v0.V4H());
2267 __ uminv(h29, v14.V8H());
2268 __ uminv(s30, v3.V4S());
2269 __ umlal(v11.V2D(), v11.V2S(), v24.V2S());
2270 __ umlal(v30.V2D(), v16.V2S(), v11.S(), 3);
2271 __ umlal(v0.V4S(), v9.V4H(), v26.V4H());
2272 __ umlal(v20.V4S(), v24.V4H(), v12.H(), 4);
2273 __ umlal(v16.V8H(), v21.V8B(), v6.V8B());
2274 __ umlal2(v17.V2D(), v19.V4S(), v23.V4S());
2275 __ umlal2(v5.V2D(), v30.V4S(), v8.S(), 0);
2276 __ umlal2(v16.V4S(), v8.V8H(), v15.V8H());
2277 __ umlal2(v15.V4S(), v26.V8H(), v1.H(), 5);
2278 __ umlal2(v30.V8H(), v1.V16B(), v17.V16B());
2279 __ umlsl(v18.V2D(), v19.V2S(), v28.V2S());
2280 __ umlsl(v7.V2D(), v7.V2S(), v8.S(), 0);
2281 __ umlsl(v24.V4S(), v8.V4H(), v4.V4H());
2282 __ umlsl(v18.V4S(), v22.V4H(), v12.H(), 4);
2283 __ umlsl(v28.V8H(), v14.V8B(), v20.V8B());
2284 __ umlsl2(v11.V2D(), v0.V4S(), v9.V4S());
2285 __ umlsl2(v26.V2D(), v16.V4S(), v9.S(), 2);
2286 __ umlsl2(v3.V4S(), v11.V8H(), v9.V8H());
2287 __ umlsl2(v10.V4S(), v25.V8H(), v9.H(), 4);
2288 __ umlsl2(v24.V8H(), v16.V16B(), v28.V16B());
2289 __ umov(x30, v25.D(), 1);
2290 __ umull(v12.V2D(), v10.V2S(), v29.V2S());
2291 __ umull(v22.V2D(), v30.V2S(), v5.S(), 3);
2292 __ umull(v7.V4S(), v0.V4H(), v25.V4H());
2293 __ umull(v11.V4S(), v13.V4H(), v3.H(), 2);
2294 __ umull(v25.V8H(), v16.V8B(), v10.V8B());
2295 __ umull2(v17.V2D(), v3.V4S(), v26.V4S());
2296 __ umull2(v26.V2D(), v11.V4S(), v2.S(), 3);
2297 __ umull2(v12.V4S(), v17.V8H(), v23.V8H());
2298 __ umull2(v4.V4S(), v31.V8H(), v1.H(), 2);
2299 __ umull2(v5.V8H(), v12.V16B(), v17.V16B());
2300 __ uqadd(b30, b4, b28);
2301 __ uqadd(d27, d20, d16);
2302 __ uqadd(h7, h14, h28);
2303 __ uqadd(s28, s17, s4);
2304 __ uqadd(v19.V16B(), v22.V16B(), v21.V16B());
2305 __ uqadd(v16.V2D(), v4.V2D(), v11.V2D());
2306 __ uqadd(v20.V2S(), v14.V2S(), v4.V2S());
2307 __ uqadd(v5.V4H(), v0.V4H(), v16.V4H());
2308 __ uqadd(v21.V4S(), v31.V4S(), v9.V4S());
2309 __ uqadd(v23.V8B(), v24.V8B(), v3.V8B());
2310 __ uqadd(v17.V8H(), v27.V8H(), v11.V8H());
2311 __ uqrshl(b10, b22, b10);
2312 __ uqrshl(d29, d5, d11);
2313 __ uqrshl(h27, h24, h30);
2314 __ uqrshl(s10, s13, s8);
2315 __ uqrshl(v9.V16B(), v18.V16B(), v14.V16B());
2316 __ uqrshl(v24.V2D(), v15.V2D(), v17.V2D());
2317 __ uqrshl(v4.V2S(), v14.V2S(), v27.V2S());
2318 __ uqrshl(v15.V4H(), v5.V4H(), v8.V4H());
2319 __ uqrshl(v21.V4S(), v29.V4S(), v0.V4S());
2320 __ uqrshl(v16.V8B(), v24.V8B(), v9.V8B());
2321 __ uqrshl(v2.V8H(), v0.V8H(), v15.V8H());
2322 __ uqrshrn(b11, h26, 4);
2323 __ uqrshrn(h7, s30, 5);
2324 __ uqrshrn(s10, d8, 21);
2325 __ uqrshrn(v15.V2S(), v6.V2D(), 11);
2326 __ uqrshrn(v5.V4H(), v26.V4S(), 12);
2327 __ uqrshrn(v28.V8B(), v25.V8H(), 5);
2328 __ uqrshrn2(v25.V16B(), v30.V8H(), 2);
2329 __ uqrshrn2(v21.V4S(), v14.V2D(), 32);
2330 __ uqrshrn2(v13.V8H(), v7.V4S(), 2);
2331 __ uqshl(b13, b0, b23);
2332 __ uqshl(b9, b17, 4);
2333 __ uqshl(d23, d6, d4);
2334 __ uqshl(d8, d11, 44);
2335 __ uqshl(h19, h13, h15);
2336 __ uqshl(h25, h26, 6);
2337 __ uqshl(s4, s24, s10);
2338 __ uqshl(s19, s14, 1);
2339 __ uqshl(v14.V16B(), v30.V16B(), v25.V16B());
2340 __ uqshl(v6.V16B(), v10.V16B(), 5);
2341 __ uqshl(v18.V2D(), v8.V2D(), v7.V2D());
2342 __ uqshl(v25.V2D(), v14.V2D(), 18);
2343 __ uqshl(v25.V2S(), v16.V2S(), v23.V2S());
2344 __ uqshl(v13.V2S(), v15.V2S(), 31);
2345 __ uqshl(v28.V4H(), v24.V4H(), v15.V4H());
2346 __ uqshl(v4.V4H(), v17.V4H(), 1);
2347 __ uqshl(v9.V4S(), v31.V4S(), v23.V4S());
2348 __ uqshl(v18.V4S(), v28.V4S(), 31);
2349 __ uqshl(v31.V8B(), v21.V8B(), v15.V8B());
2350 __ uqshl(v6.V8B(), v21.V8B(), 1);
2351 __ uqshl(v28.V8H(), v2.V8H(), v17.V8H());
2352 __ uqshl(v24.V8H(), v8.V8H(), 14);
2353 __ uqshrn(b21, h27, 7);
2354 __ uqshrn(h28, s26, 11);
2355 __ uqshrn(s13, d31, 17);
2356 __ uqshrn(v21.V2S(), v16.V2D(), 8);
2357 __ uqshrn(v24.V4H(), v24.V4S(), 2);
2358 __ uqshrn(v5.V8B(), v1.V8H(), 8);
2359 __ uqshrn2(v16.V16B(), v29.V8H(), 6);
2360 __ uqshrn2(v2.V4S(), v6.V2D(), 1);
2361 __ uqshrn2(v16.V8H(), v10.V4S(), 14);
2362 __ uqsub(b28, b20, b26);
2363 __ uqsub(d0, d7, d10);
2364 __ uqsub(h26, h24, h7);
2365 __ uqsub(s23, s23, s16);
2366 __ uqsub(v14.V16B(), v16.V16B(), v24.V16B());
2367 __ uqsub(v11.V2D(), v17.V2D(), v6.V2D());
2368 __ uqsub(v10.V2S(), v10.V2S(), v8.V2S());
2369 __ uqsub(v9.V4H(), v15.V4H(), v12.V4H());
2370 __ uqsub(v23.V4S(), v18.V4S(), v7.V4S());
2371 __ uqsub(v9.V8B(), v19.V8B(), v17.V8B());
2372 __ uqsub(v20.V8H(), v2.V8H(), v6.V8H());
2373 __ uqxtn(b29, h19);
2374 __ uqxtn(h0, s13);
2375 __ uqxtn(s26, d22);
2376 __ uqxtn(v5.V2S(), v31.V2D());
2377 __ uqxtn(v30.V4H(), v19.V4S());
2378 __ uqxtn(v15.V8B(), v2.V8H());
2379 __ uqxtn2(v29.V16B(), v3.V8H());
2380 __ uqxtn2(v13.V4S(), v17.V2D());
2381 __ uqxtn2(v28.V8H(), v11.V4S());
2382 __ urecpe(v23.V2S(), v15.V2S());
2383 __ urecpe(v27.V4S(), v7.V4S());
2384 __ urhadd(v2.V16B(), v15.V16B(), v27.V16B());
2385 __ urhadd(v15.V2S(), v1.V2S(), v18.V2S());
2386 __ urhadd(v17.V4H(), v4.V4H(), v26.V4H());
2387 __ urhadd(v2.V4S(), v27.V4S(), v14.V4S());
2388 __ urhadd(v5.V8B(), v17.V8B(), v14.V8B());
2389 __ urhadd(v30.V8H(), v2.V8H(), v25.V8H());
2390 __ urshl(d4, d28, d30);
2391 __ urshl(v13.V16B(), v31.V16B(), v19.V16B());
2392 __ urshl(v14.V2D(), v23.V2D(), v21.V2D());
2393 __ urshl(v10.V2S(), v7.V2S(), v8.V2S());
2394 __ urshl(v15.V4H(), v21.V4H(), v28.V4H());
2395 __ urshl(v30.V4S(), v8.V4S(), v23.V4S());
2396 __ urshl(v31.V8B(), v20.V8B(), v5.V8B());
2397 __ urshl(v30.V8H(), v27.V8H(), v30.V8H());
2398 __ urshr(d4, d13, 49);
2399 __ urshr(v2.V16B(), v20.V16B(), 1);
2400 __ urshr(v13.V2D(), v11.V2D(), 51);
2401 __ urshr(v21.V2S(), v31.V2S(), 10);
2402 __ urshr(v21.V4H(), v17.V4H(), 11);
2403 __ urshr(v4.V4S(), v22.V4S(), 1);
2404 __ urshr(v0.V8B(), v1.V8B(), 7);
2405 __ urshr(v13.V8H(), v20.V8H(), 1);
2406 __ ursqrte(v20.V2S(), v16.V2S());
2407 __ ursqrte(v28.V4S(), v8.V4S());
2408 __ ursra(d27, d16, 45);
2409 __ ursra(v18.V16B(), v17.V16B(), 3);
2410 __ ursra(v26.V2D(), v28.V2D(), 58);
2411 __ ursra(v8.V2S(), v22.V2S(), 31);
2412 __ ursra(v31.V4H(), v4.V4H(), 7);
2413 __ ursra(v31.V4S(), v15.V4S(), 2);
2414 __ ursra(v3.V8B(), v1.V8B(), 5);
2415 __ ursra(v18.V8H(), v14.V8H(), 13);
2416 __ ushl(d31, d0, d16);
2417 __ ushl(v0.V16B(), v6.V16B(), v2.V16B());
2418 __ ushl(v18.V2D(), v1.V2D(), v18.V2D());
2419 __ ushl(v27.V2S(), v7.V2S(), v29.V2S());
2420 __ ushl(v14.V4H(), v14.V4H(), v13.V4H());
2421 __ ushl(v22.V4S(), v4.V4S(), v9.V4S());
2422 __ ushl(v23.V8B(), v22.V8B(), v27.V8B());
2423 __ ushl(v21.V8H(), v25.V8H(), v8.V8H());
2424 __ ushll(v11.V2D(), v0.V2S(), 21);
2425 __ ushll(v2.V4S(), v17.V4H(), 8);
2426 __ ushll(v11.V8H(), v14.V8B(), 1);
2427 __ ushll2(v8.V2D(), v29.V4S(), 7);
2428 __ ushll2(v29.V4S(), v9.V8H(), 2);
2429 __ ushll2(v5.V8H(), v24.V16B(), 6);
2430 __ ushr(d28, d27, 53);
2431 __ ushr(v1.V16B(), v9.V16B(), 7);
2432 __ ushr(v2.V2D(), v24.V2D(), 43);
2433 __ ushr(v30.V2S(), v25.V2S(), 11);
2434 __ ushr(v10.V4H(), v26.V4H(), 12);
2435 __ ushr(v4.V4S(), v5.V4S(), 30);
2436 __ ushr(v30.V8B(), v2.V8B(), 1);
2437 __ ushr(v6.V8H(), v12.V8H(), 2);
2438 __ usqadd(b19, b5);
2439 __ usqadd(d9, d2);
2440 __ usqadd(h2, h16);
2441 __ usqadd(s16, s3);
2442 __ usqadd(v31.V16B(), v29.V16B());
2443 __ usqadd(v8.V2D(), v10.V2D());
2444 __ usqadd(v18.V2S(), v9.V2S());
2445 __ usqadd(v24.V4H(), v14.V4H());
2446 __ usqadd(v10.V4S(), v30.V4S());
2447 __ usqadd(v16.V8B(), v20.V8B());
2448 __ usqadd(v12.V8H(), v16.V8H());
2449 __ usra(d28, d27, 37);
2450 __ usra(v5.V16B(), v22.V16B(), 5);
2451 __ usra(v2.V2D(), v19.V2D(), 33);
2452 __ usra(v0.V2S(), v0.V2S(), 21);
2453 __ usra(v7.V4H(), v6.V4H(), 12);
2454 __ usra(v4.V4S(), v17.V4S(), 9);
2455 __ usra(v9.V8B(), v12.V8B(), 7);
2456 __ usra(v3.V8H(), v27.V8H(), 14);
2457 __ usubl(v29.V2D(), v12.V2S(), v30.V2S());
2458 __ usubl(v29.V4S(), v28.V4H(), v6.V4H());
2459 __ usubl(v12.V8H(), v4.V8B(), v14.V8B());
2460 __ usubl2(v1.V2D(), v24.V4S(), v17.V4S());
2461 __ usubl2(v4.V4S(), v1.V8H(), v3.V8H());
2462 __ usubl2(v23.V8H(), v4.V16B(), v7.V16B());
2463 __ usubw(v9.V2D(), v20.V2D(), v30.V2S());
2464 __ usubw(v20.V4S(), v16.V4S(), v23.V4H());
2465 __ usubw(v25.V8H(), v8.V8H(), v29.V8B());
2466 __ usubw2(v18.V2D(), v29.V2D(), v6.V4S());
2467 __ usubw2(v6.V4S(), v6.V4S(), v20.V8H());
2468 __ usubw2(v18.V8H(), v4.V8H(), v16.V16B());
2469 __ uxtl(v27.V2D(), v21.V2S());
2470 __ uxtl(v0.V4S(), v31.V4H());
2471 __ uxtl(v27.V8H(), v10.V8B());
2472 __ uxtl2(v6.V2D(), v16.V4S());
2473 __ uxtl2(v22.V4S(), v20.V8H());
2474 __ uxtl2(v20.V8H(), v21.V16B());
2475 __ uzp1(v30.V16B(), v9.V16B(), v17.V16B());
2476 __ uzp1(v7.V2D(), v26.V2D(), v28.V2D());
2477 __ uzp1(v26.V2S(), v16.V2S(), v22.V2S());
2478 __ uzp1(v14.V4H(), v19.V4H(), v6.V4H());
2479 __ uzp1(v17.V4S(), v23.V4S(), v30.V4S());
2480 __ uzp1(v28.V8B(), v27.V8B(), v13.V8B());
2481 __ uzp1(v17.V8H(), v1.V8H(), v12.V8H());
2482 __ uzp2(v8.V16B(), v18.V16B(), v26.V16B());
2483 __ uzp2(v21.V2D(), v22.V2D(), v24.V2D());
2484 __ uzp2(v20.V2S(), v21.V2S(), v2.V2S());
2485 __ uzp2(v16.V4H(), v31.V4H(), v6.V4H());
2486 __ uzp2(v25.V4S(), v11.V4S(), v8.V4S());
2487 __ uzp2(v31.V8B(), v31.V8B(), v13.V8B());
2488 __ uzp2(v8.V8H(), v17.V8H(), v1.V8H());
2489 __ xtn(v17.V2S(), v26.V2D());
2490 __ xtn(v3.V4H(), v0.V4S());
2491 __ xtn(v18.V8B(), v8.V8H());
2492 __ xtn2(v0.V16B(), v0.V8H());
2493 __ xtn2(v15.V4S(), v4.V2D());
2494 __ xtn2(v31.V8H(), v18.V4S());
2495 __ zip1(v22.V16B(), v9.V16B(), v6.V16B());
2496 __ zip1(v23.V2D(), v11.V2D(), v2.V2D());
2497 __ zip1(v26.V2S(), v16.V2S(), v9.V2S());
2498 __ zip1(v1.V4H(), v9.V4H(), v7.V4H());
2499 __ zip1(v0.V4S(), v30.V4S(), v20.V4S());
2500 __ zip1(v30.V8B(), v17.V8B(), v15.V8B());
2501 __ zip1(v17.V8H(), v8.V8H(), v2.V8H());
2502 __ zip2(v23.V16B(), v10.V16B(), v11.V16B());
2503 __ zip2(v30.V2D(), v6.V2D(), v14.V2D());
2504 __ zip2(v9.V2S(), v10.V2S(), v21.V2S());
2505 __ zip2(v8.V4H(), v24.V4H(), v29.V4H());
2506 __ zip2(v0.V4S(), v21.V4S(), v23.V4S());
2507 __ zip2(v25.V8B(), v23.V8B(), v30.V8B());
2508 __ zip2(v7.V8H(), v10.V8H(), v30.V8H());
2509 } // NOLINT(readability/fn_size)
2510
2511
GenerateTestSequenceNEONFP(MacroAssembler * masm)2512 static void GenerateTestSequenceNEONFP(MacroAssembler* masm) {
2513 ExactAssemblyScope guard(masm,
2514 masm->GetBuffer()->GetRemainingBytes(),
2515 ExactAssemblyScope::kMaximumSize);
2516
2517 // NEON floating point instructions.
2518 __ fabd(v3.V2D(), v25.V2D(), v8.V2D());
2519 __ fabd(v14.V2S(), v27.V2S(), v11.V2S());
2520 __ fabd(v9.V4S(), v22.V4S(), v18.V4S());
2521 __ fabs(v1.V2D(), v29.V2D());
2522 __ fabs(v6.V2S(), v21.V2S());
2523 __ fabs(v12.V4S(), v25.V4S());
2524 __ facge(v18.V2D(), v5.V2D(), v0.V2D());
2525 __ facge(v15.V2S(), v11.V2S(), v6.V2S());
2526 __ facge(v30.V4S(), v10.V4S(), v25.V4S());
2527 __ facgt(v28.V2D(), v16.V2D(), v31.V2D());
2528 __ facgt(v15.V2S(), v1.V2S(), v4.V2S());
2529 __ facgt(v22.V4S(), v3.V4S(), v10.V4S());
2530 __ fadd(v7.V2D(), v10.V2D(), v24.V2D());
2531 __ fadd(v10.V2S(), v23.V2S(), v7.V2S());
2532 __ fadd(v16.V4S(), v22.V4S(), v11.V4S());
2533 __ faddp(d27, v28.V2D());
2534 __ faddp(s20, v23.V2S());
2535 __ faddp(v21.V2D(), v4.V2D(), v11.V2D());
2536 __ faddp(v31.V2S(), v26.V2S(), v1.V2S());
2537 __ faddp(v13.V4S(), v27.V4S(), v28.V4S());
2538 __ fcmeq(v17.V2D(), v13.V2D(), v20.V2D());
2539 __ fcmeq(v24.V2D(), v16.V2D(), 0.0);
2540 __ fcmeq(v26.V2S(), v17.V2S(), v10.V2S());
2541 __ fcmeq(v24.V2S(), v4.V2S(), 0.0);
2542 __ fcmeq(v8.V4S(), v4.V4S(), v14.V4S());
2543 __ fcmeq(v26.V4S(), v25.V4S(), 0.0);
2544 __ fcmge(v27.V2D(), v0.V2D(), v0.V2D());
2545 __ fcmge(v22.V2D(), v30.V2D(), 0.0);
2546 __ fcmge(v7.V2S(), v21.V2S(), v25.V2S());
2547 __ fcmge(v15.V2S(), v15.V2S(), 0.0);
2548 __ fcmge(v29.V4S(), v4.V4S(), v27.V4S());
2549 __ fcmge(v22.V4S(), v21.V4S(), 0.0);
2550 __ fcmgt(v1.V2D(), v26.V2D(), v15.V2D());
2551 __ fcmgt(v15.V2D(), v23.V2D(), 0.0);
2552 __ fcmgt(v21.V2S(), v16.V2S(), v6.V2S());
2553 __ fcmgt(v1.V2S(), v13.V2S(), 0.0);
2554 __ fcmgt(v14.V4S(), v0.V4S(), v25.V4S());
2555 __ fcmgt(v13.V4S(), v8.V4S(), 0.0);
2556 __ fcmle(v4.V2D(), v6.V2D(), 0.0);
2557 __ fcmle(v24.V2S(), v31.V2S(), 0.0);
2558 __ fcmle(v8.V4S(), v23.V4S(), 0.0);
2559 __ fcmlt(v7.V2D(), v3.V2D(), 0.0);
2560 __ fcmlt(v15.V2S(), v21.V2S(), 0.0);
2561 __ fcmlt(v1.V4S(), v2.V4S(), 0.0);
2562 __ fcvtas(v6.V2D(), v8.V2D());
2563 __ fcvtas(v1.V2S(), v9.V2S());
2564 __ fcvtas(v8.V4S(), v19.V4S());
2565 __ fcvtau(v5.V2D(), v31.V2D());
2566 __ fcvtau(v28.V2S(), v29.V2S());
2567 __ fcvtau(v11.V4S(), v26.V4S());
2568 __ fcvtl(v8.V2D(), v25.V2S());
2569 __ fcvtl(v27.V4S(), v14.V4H());
2570 __ fcvtl2(v1.V2D(), v6.V4S());
2571 __ fcvtl2(v24.V4S(), v9.V8H());
2572 __ fcvtms(v9.V2D(), v24.V2D());
2573 __ fcvtms(v7.V2S(), v11.V2S());
2574 __ fcvtms(v23.V4S(), v21.V4S());
2575 __ fcvtmu(v13.V2D(), v1.V2D());
2576 __ fcvtmu(v26.V2S(), v12.V2S());
2577 __ fcvtmu(v21.V4S(), v21.V4S());
2578 __ fcvtn(v11.V2S(), v1.V2D());
2579 __ fcvtn(v8.V4H(), v2.V4S());
2580 __ fcvtn2(v24.V4S(), v29.V2D());
2581 __ fcvtn2(v4.V8H(), v10.V4S());
2582 __ fcvtns(v25.V2D(), v10.V2D());
2583 __ fcvtns(v4.V2S(), v8.V2S());
2584 __ fcvtns(v29.V4S(), v27.V4S());
2585 __ fcvtnu(v18.V2D(), v27.V2D());
2586 __ fcvtnu(v11.V2S(), v14.V2S());
2587 __ fcvtnu(v27.V4S(), v21.V4S());
2588 __ fcvtps(v23.V2D(), v5.V2D());
2589 __ fcvtps(v24.V2S(), v15.V2S());
2590 __ fcvtps(v5.V4S(), v19.V4S());
2591 __ fcvtpu(v3.V2D(), v21.V2D());
2592 __ fcvtpu(v3.V2S(), v21.V2S());
2593 __ fcvtpu(v0.V4S(), v7.V4S());
2594 __ fcvtxn(v29.V2S(), v11.V2D());
2595 __ fcvtxn2(v31.V4S(), v25.V2D());
2596 __ fcvtzs(v19.V2D(), v17.V2D());
2597 __ fcvtzs(v12.V2D(), v24.V2D(), 64);
2598 __ fcvtzs(v9.V2S(), v2.V2S());
2599 __ fcvtzs(v5.V2S(), v20.V2S(), 29);
2600 __ fcvtzs(v21.V4S(), v25.V4S());
2601 __ fcvtzs(v26.V4S(), v1.V4S(), 6);
2602 __ fcvtzu(v13.V2D(), v25.V2D());
2603 __ fcvtzu(v28.V2D(), v13.V2D(), 32);
2604 __ fcvtzu(v26.V2S(), v6.V2S());
2605 __ fcvtzu(v9.V2S(), v10.V2S(), 15);
2606 __ fcvtzu(v30.V4S(), v6.V4S());
2607 __ fcvtzu(v19.V4S(), v22.V4S(), 18);
2608 __ fdiv(v15.V2D(), v8.V2D(), v15.V2D());
2609 __ fdiv(v12.V2S(), v9.V2S(), v26.V2S());
2610 __ fdiv(v19.V4S(), v22.V4S(), v19.V4S());
2611 __ fmax(v19.V2D(), v7.V2D(), v8.V2D());
2612 __ fmax(v25.V2S(), v12.V2S(), v29.V2S());
2613 __ fmax(v6.V4S(), v15.V4S(), v5.V4S());
2614 __ fmaxnm(v16.V2D(), v8.V2D(), v20.V2D());
2615 __ fmaxnm(v15.V2S(), v26.V2S(), v25.V2S());
2616 __ fmaxnm(v23.V4S(), v14.V4S(), v16.V4S());
2617 __ fmaxnmp(d6, v19.V2D());
2618 __ fmaxnmp(s27, v26.V2S());
2619 __ fmaxnmp(v8.V2D(), v12.V2D(), v23.V2D());
2620 __ fmaxnmp(v13.V2S(), v25.V2S(), v22.V2S());
2621 __ fmaxnmp(v15.V4S(), v11.V4S(), v17.V4S());
2622 __ fmaxnmv(s27, v19.V4S());
2623 __ fmaxp(d20, v14.V2D());
2624 __ fmaxp(s18, v2.V2S());
2625 __ fmaxp(v9.V2D(), v23.V2D(), v31.V2D());
2626 __ fmaxp(v7.V2S(), v22.V2S(), v31.V2S());
2627 __ fmaxp(v18.V4S(), v7.V4S(), v29.V4S());
2628 __ fmaxv(s31, v29.V4S());
2629 __ fmin(v2.V2D(), v5.V2D(), v2.V2D());
2630 __ fmin(v31.V2S(), v17.V2S(), v10.V2S());
2631 __ fmin(v10.V4S(), v4.V4S(), v16.V4S());
2632 __ fminnm(v21.V2D(), v6.V2D(), v5.V2D());
2633 __ fminnm(v22.V2S(), v18.V2S(), v14.V2S());
2634 __ fminnm(v25.V4S(), v31.V4S(), v3.V4S());
2635 __ fminnmp(d9, v1.V2D());
2636 __ fminnmp(s21, v20.V2S());
2637 __ fminnmp(v16.V2D(), v21.V2D(), v19.V2D());
2638 __ fminnmp(v16.V2S(), v31.V2S(), v25.V2S());
2639 __ fminnmp(v26.V4S(), v16.V4S(), v15.V4S());
2640 __ fminnmv(s3, v4.V4S());
2641 __ fminp(d24, v26.V2D());
2642 __ fminp(s7, v17.V2S());
2643 __ fminp(v23.V2D(), v19.V2D(), v3.V2D());
2644 __ fminp(v29.V2S(), v21.V2S(), v9.V2S());
2645 __ fminp(v0.V4S(), v24.V4S(), v21.V4S());
2646 __ fminv(s25, v8.V4S());
2647 __ fmla(d23, d0, v9.D(), 1);
2648 __ fmla(s23, s15, v7.S(), 0);
2649 __ fmla(v17.V2D(), v11.V2D(), v6.V2D());
2650 __ fmla(v30.V2D(), v30.V2D(), v11.D(), 0);
2651 __ fmla(v19.V2S(), v12.V2S(), v6.V2S());
2652 __ fmla(v24.V2S(), v17.V2S(), v9.S(), 0);
2653 __ fmla(v16.V4S(), v11.V4S(), v11.V4S());
2654 __ fmla(v27.V4S(), v23.V4S(), v9.S(), 2);
2655 __ fmls(d27, d30, v6.D(), 0);
2656 __ fmls(s21, s16, v2.S(), 0);
2657 __ fmls(v5.V2D(), v19.V2D(), v21.V2D());
2658 __ fmls(v18.V2D(), v30.V2D(), v12.D(), 0);
2659 __ fmls(v5.V2S(), v16.V2S(), v7.V2S());
2660 __ fmls(v3.V2S(), v18.V2S(), v11.S(), 1);
2661 __ fmls(v27.V4S(), v5.V4S(), v30.V4S());
2662 __ fmls(v26.V4S(), v20.V4S(), v4.S(), 3);
2663 __ fmov(v14.V2D(), -0.34375);
2664 __ fmov(v26.V2S(), 0.90625f);
2665 __ fmov(v31.V4S(), -5.0000f);
2666 __ fmov(v28.D(), 1, x25);
2667 __ fmov(x18, v2.D(), 1);
2668 __ fmul(d12, d4, v1.D(), 1);
2669 __ fmul(s30, s1, v15.S(), 3);
2670 __ fmul(v25.V2D(), v0.V2D(), v21.V2D());
2671 __ fmul(v10.V2D(), v24.V2D(), v10.D(), 1);
2672 __ fmul(v7.V2S(), v24.V2S(), v16.V2S());
2673 __ fmul(v1.V2S(), v16.V2S(), v4.S(), 2);
2674 __ fmul(v5.V4S(), v28.V4S(), v25.V4S());
2675 __ fmul(v11.V4S(), v3.V4S(), v8.S(), 0);
2676 __ fmulx(d28, d9, v3.D(), 1);
2677 __ fmulx(s25, s21, v15.S(), 1);
2678 __ fmulx(v31.V2D(), v28.V2D(), v8.V2D());
2679 __ fmulx(v3.V2D(), v21.V2D(), v6.D(), 0);
2680 __ fmulx(v9.V2S(), v1.V2S(), v0.V2S());
2681 __ fmulx(v16.V2S(), v27.V2S(), v6.S(), 0);
2682 __ fmulx(v2.V4S(), v4.V4S(), v5.V4S());
2683 __ fmulx(v18.V4S(), v7.V4S(), v4.S(), 0);
2684 __ fneg(v1.V2D(), v25.V2D());
2685 __ fneg(v14.V2S(), v31.V2S());
2686 __ fneg(v5.V4S(), v4.V4S());
2687 __ frecpe(v18.V2D(), v12.V2D());
2688 __ frecpe(v10.V2S(), v22.V2S());
2689 __ frecpe(v5.V4S(), v6.V4S());
2690 __ frecps(v22.V2D(), v7.V2D(), v26.V2D());
2691 __ frecps(v31.V2S(), v27.V2S(), v2.V2S());
2692 __ frecps(v18.V4S(), v6.V4S(), v27.V4S());
2693 __ frinta(v26.V2D(), v13.V2D());
2694 __ frinta(v15.V2S(), v26.V2S());
2695 __ frinta(v13.V4S(), v16.V4S());
2696 __ frinti(v9.V2D(), v12.V2D());
2697 __ frinti(v5.V2S(), v19.V2S());
2698 __ frinti(v15.V4S(), v11.V4S());
2699 __ frintm(v17.V2D(), v29.V2D());
2700 __ frintm(v30.V2S(), v11.V2S());
2701 __ frintm(v1.V4S(), v20.V4S());
2702 __ frintn(v24.V2D(), v6.V2D());
2703 __ frintn(v12.V2S(), v17.V2S());
2704 __ frintn(v29.V4S(), v11.V4S());
2705 __ frintp(v10.V2D(), v7.V2D());
2706 __ frintp(v12.V2S(), v18.V2S());
2707 __ frintp(v26.V4S(), v31.V4S());
2708 __ frintx(v24.V2D(), v13.V2D());
2709 __ frintx(v7.V2S(), v9.V2S());
2710 __ frintx(v18.V4S(), v21.V4S());
2711 __ frintz(v19.V2D(), v25.V2D());
2712 __ frintz(v15.V2S(), v8.V2S());
2713 __ frintz(v20.V4S(), v3.V4S());
2714 __ frsqrte(v23.V2D(), v5.V2D());
2715 __ frsqrte(v9.V2S(), v7.V2S());
2716 __ frsqrte(v3.V4S(), v9.V4S());
2717 __ frsqrts(v25.V2D(), v28.V2D(), v15.V2D());
2718 __ frsqrts(v9.V2S(), v26.V2S(), v10.V2S());
2719 __ frsqrts(v5.V4S(), v1.V4S(), v10.V4S());
2720 __ fsqrt(v6.V2D(), v18.V2D());
2721 __ fsqrt(v6.V2S(), v18.V2S());
2722 __ fsqrt(v0.V4S(), v31.V4S());
2723 __ fsub(v31.V2D(), v30.V2D(), v31.V2D());
2724 __ fsub(v11.V2S(), v8.V2S(), v6.V2S());
2725 __ fsub(v16.V4S(), v0.V4S(), v31.V4S());
2726 __ scvtf(v25.V2D(), v31.V2D());
2727 __ scvtf(v10.V2D(), v13.V2D(), 45);
2728 __ scvtf(v10.V2S(), v15.V2S());
2729 __ scvtf(v18.V2S(), v4.V2S(), 27);
2730 __ scvtf(v17.V4S(), v5.V4S());
2731 __ scvtf(v11.V4S(), v25.V4S(), 24);
2732 __ ucvtf(v9.V2D(), v3.V2D());
2733 __ ucvtf(v26.V2D(), v30.V2D(), 46);
2734 __ ucvtf(v11.V2S(), v4.V2S());
2735 __ ucvtf(v29.V2S(), v3.V2S(), 25);
2736 __ ucvtf(v22.V4S(), v23.V4S());
2737 __ ucvtf(v18.V4S(), v9.V4S(), 25);
2738 }
2739
2740
MaskAddresses(const char * trace)2741 static void MaskAddresses(const char* trace) {
2742 #ifdef __APPLE__
2743 #define ESCAPE(c) "\\\\" #c
2744 const char* sed_options = "-i \"\" -E";
2745 #else
2746 #define ESCAPE(c) "\\" #c
2747 const char* sed_options = "-i -E";
2748 #endif
2749 #define COLOUR "(." ESCAPE([) "[01];([0-9][0-9])?m)?"
2750 struct {
2751 const char* search;
2752 const char* replace;
2753 } patterns[] =
2754 {// Mask registers that hold addresses that change from run to run.
2755 {"((x0|x1|x2|sp): " COLOUR "0x)[0-9a-f]{16}",
2756 ESCAPE(1) "~~~~~~~~~~~~~~~~"},
2757 // Mask accessed memory addresses.
2758 {"((<-|->) " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
2759 // Mask instruction addresses.
2760 {"^0x[0-9a-f]{16}", "0x~~~~~~~~~~~~~~~~"},
2761 // Mask branch targets.
2762 {"(Branch" COLOUR " to 0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
2763 {"addr 0x[0-9a-f]+", "addr 0x~~~~~~~~~~~~~~~~"}};
2764 const size_t patterns_length = sizeof(patterns) / sizeof(patterns[0]);
2765 // Rewrite `trace`, masking addresses and other values that legitimately vary
2766 // from run to run.
2767 char command[1024];
2768 for (size_t i = 0; i < patterns_length; i++) {
2769 size_t length = snprintf(command,
2770 sizeof(command),
2771 "sed %s 's/%s/%s/' '%s'",
2772 sed_options,
2773 patterns[i].search,
2774 patterns[i].replace,
2775 trace);
2776 VIXL_CHECK(length < sizeof(command));
2777 VIXL_CHECK(system(command) == 0);
2778 }
2779 }
2780
2781
CheckOrGenerateTrace(const char * filename,const char * ref_file)2782 static bool CheckOrGenerateTrace(const char* filename, const char* ref_file) {
2783 bool trace_matched_reference;
2784 if (Test::generate_test_trace()) {
2785 // Copy trace_stream to stdout.
2786 FILE* trace_stream = fopen(filename, "r");
2787 VIXL_ASSERT(trace_stream != NULL);
2788 fseek(trace_stream, 0, SEEK_SET);
2789 int c;
2790 while (1) {
2791 c = getc(trace_stream);
2792 if (c == EOF) break;
2793 putc(c, stdout);
2794 }
2795 fclose(trace_stream);
2796 trace_matched_reference = true;
2797 } else {
2798 // Check trace_stream against ref_file.
2799 char command[1024];
2800 size_t length =
2801 snprintf(command, sizeof(command), "diff -u %s %s", ref_file, filename);
2802 VIXL_CHECK(length < sizeof(command));
2803 trace_matched_reference = (system(command) == 0);
2804 }
2805 return trace_matched_reference;
2806 }
2807
2808
2809 // Trace tests can only work with the simulator.
2810 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2811
TraceTestHelper(bool coloured_trace,TraceParameters trace_parameters,const char * ref_file)2812 static void TraceTestHelper(bool coloured_trace,
2813 TraceParameters trace_parameters,
2814 const char* ref_file) {
2815 MacroAssembler masm(12 * KBytes);
2816
2817 char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
2818 FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
2819
2820 Decoder decoder;
2821 Simulator simulator(&decoder, trace_stream);
2822 simulator.SetColouredTrace(coloured_trace);
2823 simulator.SetTraceParameters(trace_parameters);
2824 simulator.SilenceExclusiveAccessWarning();
2825
2826 // Set up a scratch buffer so we can test loads and stores.
2827 const int kScratchSize = 64 * KBytes;
2828 const int kScratchGuardSize = 128;
2829 char scratch_buffer[kScratchSize + kScratchGuardSize];
2830 for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0]));
2831 i++) {
2832 scratch_buffer[i] = i & 0xff;
2833 }
2834 // Used for offset addressing.
2835 simulator.WriteRegister(0, scratch_buffer);
2836 // Used for pre-/post-index addressing.
2837 simulator.WriteRegister(1, scratch_buffer);
2838
2839 const int kPostIndexRegisterStep = 13; // Arbitrary interesting value.
2840 // Used for post-index offsets.
2841 simulator.WriteRegister(2, kPostIndexRegisterStep);
2842
2843 // Initialize the other registers with unique values.
2844 uint64_t initial_base_u64 = 0x0100001000100101;
2845 for (unsigned i = 3; i < kNumberOfRegisters; i++) {
2846 if (i == kLinkRegCode) continue;
2847 if (i == kZeroRegCode) continue;
2848 // NoRegLog suppresses the log now, but the registers will still be logged
2849 // before the first instruction is executed since they have been written but
2850 // not printed.
2851 simulator.WriteRegister(i, initial_base_u64 * i, Simulator::NoRegLog);
2852 }
2853 float initial_base_f32 = 1.2345f;
2854 double initial_base_f64 = 1.3456f;
2855 for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
2856 // Try to initialise V registers with reasonable FP values.
2857 uint64_t low = (DoubleToRawbits(initial_base_f64 * i) & ~kSRegMask) |
2858 FloatToRawbits(initial_base_f32 * i);
2859 uint64_t high = low ^ 0x0005555500555555;
2860 LogicVRegister reg(simulator.ReadVRegister(i));
2861 reg.SetUint(kFormat2D, 0, low);
2862 reg.SetUint(kFormat2D, 1, high);
2863 }
2864
2865 GenerateTestSequenceBase(&masm);
2866 GenerateTestSequenceFP(&masm);
2867 GenerateTestSequenceNEON(&masm);
2868 GenerateTestSequenceNEONFP(&masm);
2869 masm.Ret();
2870 masm.FinalizeCode();
2871
2872 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>());
2873
2874 fclose(trace_stream);
2875 MaskAddresses(trace_stream_filename);
2876
2877 bool trace_matched_reference =
2878 CheckOrGenerateTrace(trace_stream_filename, ref_file);
2879 remove(trace_stream_filename); // Clean up before checking the result.
2880 VIXL_CHECK(trace_matched_reference);
2881
2882 uint64_t offset_base = simulator.ReadRegister<uint64_t>(0);
2883 uint64_t index_base = simulator.ReadRegister<uint64_t>(1);
2884
2885 VIXL_CHECK(index_base >= offset_base);
2886 VIXL_CHECK((index_base - offset_base) <= kScratchSize);
2887 }
2888
2889
2890 // Test individual options.
TEST(disasm)2891 TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); }
TEST(regs)2892 TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); }
TEST(vregs)2893 TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); }
TEST(sysregs)2894 TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); }
TEST(write)2895 TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); }
TEST(branch)2896 TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); }
2897
2898 // Test standard combinations.
TEST(none)2899 TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); }
TEST(state)2900 TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); }
TEST(all)2901 TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); }
2902
2903
2904 // Test individual options (with colour).
TEST(disasm_colour)2905 TEST(disasm_colour) {
2906 TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour"));
2907 }
TEST(regs_colour)2908 TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); }
TEST(vregs_colour)2909 TEST(vregs_colour) {
2910 TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour"));
2911 }
TEST(sysregs_colour)2912 TEST(sysregs_colour) {
2913 TraceTestHelper(true, LOG_SYSREGS, REF("log-sysregs-colour"));
2914 }
TEST(write_colour)2915 TEST(write_colour) {
2916 TraceTestHelper(true, LOG_WRITE, REF("log-write-colour"));
2917 }
TEST(branch_colour)2918 TEST(branch_colour) {
2919 TraceTestHelper(true, LOG_WRITE, REF("log-branch-colour"));
2920 }
2921
2922 // Test standard combinations (with colour).
TEST(none_colour)2923 TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); }
TEST(state_colour)2924 TEST(state_colour) {
2925 TraceTestHelper(true, LOG_STATE, REF("log-state-colour"));
2926 }
TEST(all_colour)2927 TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); }
2928
2929 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
2930
PrintDisassemblerTestHelper(const char * prefix,const char * suffix,const char * ref_file)2931 static void PrintDisassemblerTestHelper(const char* prefix,
2932 const char* suffix,
2933 const char* ref_file) {
2934 MacroAssembler masm(12 * KBytes);
2935
2936 char trace_stream_filename[] = "/tmp/vixl-test-trace-XXXXXX";
2937 FILE* trace_stream = fdopen(mkstemp(trace_stream_filename), "w");
2938
2939 // We don't need to execute this code so there's no need for the execution
2940 // environment setup from TraceTestHelper.
2941
2942 GenerateTestSequenceBase(&masm);
2943 GenerateTestSequenceFP(&masm);
2944 GenerateTestSequenceNEON(&masm);
2945 GenerateTestSequenceNEONFP(&masm);
2946 masm.FinalizeCode();
2947
2948 Decoder decoder;
2949 CPUFeaturesAuditor auditor(&decoder);
2950 PrintDisassembler disasm(trace_stream);
2951 if (prefix != NULL) disasm.SetCPUFeaturesPrefix(prefix);
2952 if (suffix != NULL) disasm.SetCPUFeaturesSuffix(suffix);
2953 disasm.RegisterCPUFeaturesAuditor(&auditor);
2954 decoder.AppendVisitor(&disasm);
2955
2956 Instruction* instruction = masm.GetBuffer()->GetStartAddress<Instruction*>();
2957 Instruction* end = masm.GetCursorAddress<Instruction*>();
2958 while (instruction != end) {
2959 decoder.Decode(instruction);
2960 instruction += kInstructionSize;
2961 }
2962
2963 fclose(trace_stream);
2964 MaskAddresses(trace_stream_filename);
2965
2966 bool trace_matched_reference =
2967 CheckOrGenerateTrace(trace_stream_filename, ref_file);
2968 remove(trace_stream_filename); // Clean up before checking the result.
2969 VIXL_CHECK(trace_matched_reference);
2970 }
2971
2972
2973 // Test CPUFeatures disassembly annotations.
TEST(cpufeatures)2974 TEST(cpufeatures) {
2975 PrintDisassemblerTestHelper(NULL, NULL, REF("log-cpufeatures"));
2976 }
TEST(cpufeatures_custom)2977 TEST(cpufeatures_custom) {
2978 PrintDisassemblerTestHelper("### {", "} ###", REF("log-cpufeatures-custom"));
2979 }
TEST(cpufeatures_colour)2980 TEST(cpufeatures_colour) {
2981 // The colour chosen is arbitrary.
2982 PrintDisassemblerTestHelper("\033[1;35m", // Prefix: Bold magenta.
2983 "\033[0;m", // Suffix: Reset colour.
2984 REF("log-cpufeatures-colour"));
2985 }
2986 } // namespace aarch64
2987 } // namespace vixl
2988