1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 constexpr bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 constexpr bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 constexpr bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 constexpr bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 constexpr bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 constexpr bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 constexpr bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
GenerateRandomTag(uint16_t exclude)170 uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171 // Generate a 4 bit integer from a 48bit random number
172 uint64_t rtag = rand_gen_() >> 44;
173 VIXL_ASSERT(IsUint4(rtag));
174
175 if (exclude == 0) {
176 exclude = static_cast<uint16_t>(rand_gen_() >> 44);
177 }
178
179 // TODO: implement this to better match the specification, which calls for a
180 // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
181 // PRNG.
182 return ChooseNonExcludedTag(rtag, 0, exclude);
183 }
184
185
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)186 bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
187 dst.ClearForWrite(vform);
188 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
189 if (!LoadLane(dst, vform, i, addr)) {
190 return false;
191 }
192 addr += LaneSizeInBytesFromFormat(vform);
193 }
194 return true;
195 }
196
197
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)198 bool Simulator::ld1(VectorFormat vform,
199 LogicVRegister dst,
200 int index,
201 uint64_t addr) {
202 dst.ClearForWrite(vform);
203 return LoadLane(dst, vform, index, addr);
204 }
205
206
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)207 bool Simulator::ld1r(VectorFormat vform,
208 VectorFormat unpack_vform,
209 LogicVRegister dst,
210 uint64_t addr,
211 bool is_signed) {
212 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
213 dst.ClearForWrite(vform);
214 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
215 if (is_signed) {
216 if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {
217 return false;
218 }
219 } else {
220 if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {
221 return false;
222 }
223 }
224 }
225 return true;
226 }
227
228
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)229 bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
230 return ld1r(vform, vform, dst, addr);
231 }
232
233
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)234 bool Simulator::ld2(VectorFormat vform,
235 LogicVRegister dst1,
236 LogicVRegister dst2,
237 uint64_t addr1) {
238 dst1.ClearForWrite(vform);
239 dst2.ClearForWrite(vform);
240 int esize = LaneSizeInBytesFromFormat(vform);
241 uint64_t addr2 = addr1 + esize;
242 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
243 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {
244 return false;
245 }
246 addr1 += 2 * esize;
247 addr2 += 2 * esize;
248 }
249 return true;
250 }
251
252
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)253 bool Simulator::ld2(VectorFormat vform,
254 LogicVRegister dst1,
255 LogicVRegister dst2,
256 int index,
257 uint64_t addr1) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
261 return (LoadLane(dst1, vform, index, addr1) &&
262 LoadLane(dst2, vform, index, addr2));
263 }
264
265
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)266 bool Simulator::ld2r(VectorFormat vform,
267 LogicVRegister dst1,
268 LogicVRegister dst2,
269 uint64_t addr) {
270 dst1.ClearForWrite(vform);
271 dst2.ClearForWrite(vform);
272 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
273 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
274 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {
275 return false;
276 }
277 }
278 return true;
279 }
280
281
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)282 bool Simulator::ld3(VectorFormat vform,
283 LogicVRegister dst1,
284 LogicVRegister dst2,
285 LogicVRegister dst3,
286 uint64_t addr1) {
287 dst1.ClearForWrite(vform);
288 dst2.ClearForWrite(vform);
289 dst3.ClearForWrite(vform);
290 int esize = LaneSizeInBytesFromFormat(vform);
291 uint64_t addr2 = addr1 + esize;
292 uint64_t addr3 = addr2 + esize;
293 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
294 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
295 !LoadLane(dst3, vform, i, addr3)) {
296 return false;
297 }
298 addr1 += 3 * esize;
299 addr2 += 3 * esize;
300 addr3 += 3 * esize;
301 }
302 return true;
303 }
304
305
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)306 bool Simulator::ld3(VectorFormat vform,
307 LogicVRegister dst1,
308 LogicVRegister dst2,
309 LogicVRegister dst3,
310 int index,
311 uint64_t addr1) {
312 dst1.ClearForWrite(vform);
313 dst2.ClearForWrite(vform);
314 dst3.ClearForWrite(vform);
315 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
316 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
317 return (LoadLane(dst1, vform, index, addr1) &&
318 LoadLane(dst2, vform, index, addr2) &&
319 LoadLane(dst3, vform, index, addr3));
320 }
321
322
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)323 bool Simulator::ld3r(VectorFormat vform,
324 LogicVRegister dst1,
325 LogicVRegister dst2,
326 LogicVRegister dst3,
327 uint64_t addr) {
328 dst1.ClearForWrite(vform);
329 dst2.ClearForWrite(vform);
330 dst3.ClearForWrite(vform);
331 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
332 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
333 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
334 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
335 !LoadLane(dst3, vform, i, addr3)) {
336 return false;
337 }
338 }
339 return true;
340 }
341
342
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)343 bool Simulator::ld4(VectorFormat vform,
344 LogicVRegister dst1,
345 LogicVRegister dst2,
346 LogicVRegister dst3,
347 LogicVRegister dst4,
348 uint64_t addr1) {
349 dst1.ClearForWrite(vform);
350 dst2.ClearForWrite(vform);
351 dst3.ClearForWrite(vform);
352 dst4.ClearForWrite(vform);
353 int esize = LaneSizeInBytesFromFormat(vform);
354 uint64_t addr2 = addr1 + esize;
355 uint64_t addr3 = addr2 + esize;
356 uint64_t addr4 = addr3 + esize;
357 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
358 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
359 !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
360 return false;
361 }
362 addr1 += 4 * esize;
363 addr2 += 4 * esize;
364 addr3 += 4 * esize;
365 addr4 += 4 * esize;
366 }
367 return true;
368 }
369
370
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)371 bool Simulator::ld4(VectorFormat vform,
372 LogicVRegister dst1,
373 LogicVRegister dst2,
374 LogicVRegister dst3,
375 LogicVRegister dst4,
376 int index,
377 uint64_t addr1) {
378 dst1.ClearForWrite(vform);
379 dst2.ClearForWrite(vform);
380 dst3.ClearForWrite(vform);
381 dst4.ClearForWrite(vform);
382 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
383 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
384 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
385 return (LoadLane(dst1, vform, index, addr1) &&
386 LoadLane(dst2, vform, index, addr2) &&
387 LoadLane(dst3, vform, index, addr3) &&
388 LoadLane(dst4, vform, index, addr4));
389 }
390
391
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)392 bool Simulator::ld4r(VectorFormat vform,
393 LogicVRegister dst1,
394 LogicVRegister dst2,
395 LogicVRegister dst3,
396 LogicVRegister dst4,
397 uint64_t addr) {
398 dst1.ClearForWrite(vform);
399 dst2.ClearForWrite(vform);
400 dst3.ClearForWrite(vform);
401 dst4.ClearForWrite(vform);
402 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
403 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
404 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
405 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
406 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
407 !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
408 return false;
409 }
410 }
411 return true;
412 }
413
414
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)415 bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
416 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
417 if (!StoreLane(src, vform, i, addr)) return false;
418 addr += LaneSizeInBytesFromFormat(vform);
419 }
420 return true;
421 }
422
423
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)424 bool Simulator::st1(VectorFormat vform,
425 LogicVRegister src,
426 int index,
427 uint64_t addr) {
428 return StoreLane(src, vform, index, addr);
429 }
430
431
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)432 bool Simulator::st2(VectorFormat vform,
433 LogicVRegister src,
434 LogicVRegister src2,
435 uint64_t addr) {
436 int esize = LaneSizeInBytesFromFormat(vform);
437 uint64_t addr2 = addr + esize;
438 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
440 return false;
441 }
442 addr += 2 * esize;
443 addr2 += 2 * esize;
444 }
445 return true;
446 }
447
448
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)449 bool Simulator::st2(VectorFormat vform,
450 LogicVRegister src,
451 LogicVRegister src2,
452 int index,
453 uint64_t addr) {
454 int esize = LaneSizeInBytesFromFormat(vform);
455 return (StoreLane(src, vform, index, addr) &&
456 StoreLane(src2, vform, index, addr + 1 * esize));
457 }
458
459
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)460 bool Simulator::st3(VectorFormat vform,
461 LogicVRegister src,
462 LogicVRegister src2,
463 LogicVRegister src3,
464 uint64_t addr) {
465 int esize = LaneSizeInBytesFromFormat(vform);
466 uint64_t addr2 = addr + esize;
467 uint64_t addr3 = addr2 + esize;
468 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
469 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
470 !StoreLane(src3, vform, i, addr3)) {
471 return false;
472 }
473 addr += 3 * esize;
474 addr2 += 3 * esize;
475 addr3 += 3 * esize;
476 }
477 return true;
478 }
479
480
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)481 bool Simulator::st3(VectorFormat vform,
482 LogicVRegister src,
483 LogicVRegister src2,
484 LogicVRegister src3,
485 int index,
486 uint64_t addr) {
487 int esize = LaneSizeInBytesFromFormat(vform);
488 return (StoreLane(src, vform, index, addr) &&
489 StoreLane(src2, vform, index, addr + 1 * esize) &&
490 StoreLane(src3, vform, index, addr + 2 * esize));
491 }
492
493
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)494 bool Simulator::st4(VectorFormat vform,
495 LogicVRegister src,
496 LogicVRegister src2,
497 LogicVRegister src3,
498 LogicVRegister src4,
499 uint64_t addr) {
500 int esize = LaneSizeInBytesFromFormat(vform);
501 uint64_t addr2 = addr + esize;
502 uint64_t addr3 = addr2 + esize;
503 uint64_t addr4 = addr3 + esize;
504 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
505 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
506 !StoreLane(src3, vform, i, addr3) ||
507 !StoreLane(src4, vform, i, addr4)) {
508 return false;
509 }
510 addr += 4 * esize;
511 addr2 += 4 * esize;
512 addr3 += 4 * esize;
513 addr4 += 4 * esize;
514 }
515 return true;
516 }
517
518
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)519 bool Simulator::st4(VectorFormat vform,
520 LogicVRegister src,
521 LogicVRegister src2,
522 LogicVRegister src3,
523 LogicVRegister src4,
524 int index,
525 uint64_t addr) {
526 int esize = LaneSizeInBytesFromFormat(vform);
527 return (StoreLane(src, vform, index, addr) &&
528 StoreLane(src2, vform, index, addr + 1 * esize) &&
529 StoreLane(src3, vform, index, addr + 2 * esize) &&
530 StoreLane(src4, vform, index, addr + 3 * esize));
531 }
532
533
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)534 LogicVRegister Simulator::cmp(VectorFormat vform,
535 LogicVRegister dst,
536 const LogicVRegister& src1,
537 const LogicVRegister& src2,
538 Condition cond) {
539 dst.ClearForWrite(vform);
540 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
541 int64_t sa = src1.Int(vform, i);
542 int64_t sb = src2.Int(vform, i);
543 uint64_t ua = src1.Uint(vform, i);
544 uint64_t ub = src2.Uint(vform, i);
545 bool result = false;
546 switch (cond) {
547 case eq:
548 result = (ua == ub);
549 break;
550 case ge:
551 result = (sa >= sb);
552 break;
553 case gt:
554 result = (sa > sb);
555 break;
556 case hi:
557 result = (ua > ub);
558 break;
559 case hs:
560 result = (ua >= ub);
561 break;
562 case lt:
563 result = (sa < sb);
564 break;
565 case le:
566 result = (sa <= sb);
567 break;
568 default:
569 VIXL_UNREACHABLE();
570 break;
571 }
572 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
573 }
574 return dst;
575 }
576
577
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)578 LogicVRegister Simulator::cmp(VectorFormat vform,
579 LogicVRegister dst,
580 const LogicVRegister& src1,
581 int imm,
582 Condition cond) {
583 SimVRegister temp;
584 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
585 return cmp(vform, dst, src1, imm_reg, cond);
586 }
587
588
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)589 LogicVRegister Simulator::cmptst(VectorFormat vform,
590 LogicVRegister dst,
591 const LogicVRegister& src1,
592 const LogicVRegister& src2) {
593 dst.ClearForWrite(vform);
594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
595 uint64_t ua = src1.Uint(vform, i);
596 uint64_t ub = src2.Uint(vform, i);
597 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
598 }
599 return dst;
600 }
601
602
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)603 LogicVRegister Simulator::add(VectorFormat vform,
604 LogicVRegister dst,
605 const LogicVRegister& src1,
606 const LogicVRegister& src2) {
607 int lane_size = LaneSizeInBitsFromFormat(vform);
608 dst.ClearForWrite(vform);
609
610 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
611 // Test for unsigned saturation.
612 uint64_t ua = src1.UintLeftJustified(vform, i);
613 uint64_t ub = src2.UintLeftJustified(vform, i);
614 uint64_t ur = ua + ub;
615 if (ur < ua) {
616 dst.SetUnsignedSat(i, true);
617 }
618
619 // Test for signed saturation.
620 bool pos_a = (ua >> 63) == 0;
621 bool pos_b = (ub >> 63) == 0;
622 bool pos_r = (ur >> 63) == 0;
623 // If the signs of the operands are the same, but different from the result,
624 // there was an overflow.
625 if ((pos_a == pos_b) && (pos_a != pos_r)) {
626 dst.SetSignedSat(i, pos_a);
627 }
628 dst.SetInt(vform, i, ur >> (64 - lane_size));
629 }
630 return dst;
631 }
632
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)633 LogicVRegister Simulator::add_uint(VectorFormat vform,
634 LogicVRegister dst,
635 const LogicVRegister& src1,
636 uint64_t value) {
637 int lane_size = LaneSizeInBitsFromFormat(vform);
638 VIXL_ASSERT(IsUintN(lane_size, value));
639 dst.ClearForWrite(vform);
640 // Left-justify `value`.
641 uint64_t ub = value << (64 - lane_size);
642 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
643 // Test for unsigned saturation.
644 uint64_t ua = src1.UintLeftJustified(vform, i);
645 uint64_t ur = ua + ub;
646 if (ur < ua) {
647 dst.SetUnsignedSat(i, true);
648 }
649
650 // Test for signed saturation.
651 // `value` is always positive, so we have an overflow if the (signed) result
652 // is smaller than the first operand.
653 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
654 dst.SetSignedSat(i, true);
655 }
656
657 dst.SetInt(vform, i, ur >> (64 - lane_size));
658 }
659 return dst;
660 }
661
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)662 LogicVRegister Simulator::addp(VectorFormat vform,
663 LogicVRegister dst,
664 const LogicVRegister& src1,
665 const LogicVRegister& src2) {
666 SimVRegister temp1, temp2;
667 uzp1(vform, temp1, src1, src2);
668 uzp2(vform, temp2, src1, src2);
669 add(vform, dst, temp1, temp2);
670 if (IsSVEFormat(vform)) {
671 interleave_top_bottom(vform, dst, dst);
672 }
673 return dst;
674 }
675
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::sdiv(VectorFormat vform,
677 LogicVRegister dst,
678 const LogicVRegister& src1,
679 const LogicVRegister& src2) {
680 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
681
682 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
683 int64_t val1 = src1.Int(vform, i);
684 int64_t val2 = src2.Int(vform, i);
685 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
686 int64_t quotient = 0;
687 if ((val1 == min_int) && (val2 == -1)) {
688 quotient = min_int;
689 } else if (val2 != 0) {
690 quotient = val1 / val2;
691 }
692 dst.SetInt(vform, i, quotient);
693 }
694
695 return dst;
696 }
697
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)698 LogicVRegister Simulator::udiv(VectorFormat vform,
699 LogicVRegister dst,
700 const LogicVRegister& src1,
701 const LogicVRegister& src2) {
702 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
703
704 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
705 uint64_t val1 = src1.Uint(vform, i);
706 uint64_t val2 = src2.Uint(vform, i);
707 uint64_t quotient = 0;
708 if (val2 != 0) {
709 quotient = val1 / val2;
710 }
711 dst.SetUint(vform, i, quotient);
712 }
713
714 return dst;
715 }
716
717
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)718 LogicVRegister Simulator::mla(VectorFormat vform,
719 LogicVRegister dst,
720 const LogicVRegister& srca,
721 const LogicVRegister& src1,
722 const LogicVRegister& src2) {
723 SimVRegister temp;
724 mul(vform, temp, src1, src2);
725 add(vform, dst, srca, temp);
726 return dst;
727 }
728
729
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)730 LogicVRegister Simulator::mls(VectorFormat vform,
731 LogicVRegister dst,
732 const LogicVRegister& srca,
733 const LogicVRegister& src1,
734 const LogicVRegister& src2) {
735 SimVRegister temp;
736 mul(vform, temp, src1, src2);
737 sub(vform, dst, srca, temp);
738 return dst;
739 }
740
741
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)742 LogicVRegister Simulator::mul(VectorFormat vform,
743 LogicVRegister dst,
744 const LogicVRegister& src1,
745 const LogicVRegister& src2) {
746 dst.ClearForWrite(vform);
747
748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
749 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
750 }
751 return dst;
752 }
753
754
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)755 LogicVRegister Simulator::mul(VectorFormat vform,
756 LogicVRegister dst,
757 const LogicVRegister& src1,
758 const LogicVRegister& src2,
759 int index) {
760 SimVRegister temp;
761 VectorFormat indexform = VectorFormatFillQ(vform);
762 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
763 }
764
765
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)766 LogicVRegister Simulator::smulh(VectorFormat vform,
767 LogicVRegister dst,
768 const LogicVRegister& src1,
769 const LogicVRegister& src2) {
770 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
771 int64_t dst_val = 0xbadbeef;
772 int64_t val1 = src1.Int(vform, i);
773 int64_t val2 = src2.Int(vform, i);
774 switch (LaneSizeInBitsFromFormat(vform)) {
775 case 8:
776 dst_val = internal::MultiplyHigh<8>(val1, val2);
777 break;
778 case 16:
779 dst_val = internal::MultiplyHigh<16>(val1, val2);
780 break;
781 case 32:
782 dst_val = internal::MultiplyHigh<32>(val1, val2);
783 break;
784 case 64:
785 dst_val = internal::MultiplyHigh<64>(val1, val2);
786 break;
787 default:
788 VIXL_UNREACHABLE();
789 break;
790 }
791 dst.SetInt(vform, i, dst_val);
792 }
793 return dst;
794 }
795
796
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)797 LogicVRegister Simulator::umulh(VectorFormat vform,
798 LogicVRegister dst,
799 const LogicVRegister& src1,
800 const LogicVRegister& src2) {
801 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
802 uint64_t dst_val = 0xbadbeef;
803 uint64_t val1 = src1.Uint(vform, i);
804 uint64_t val2 = src2.Uint(vform, i);
805 switch (LaneSizeInBitsFromFormat(vform)) {
806 case 8:
807 dst_val = internal::MultiplyHigh<8>(val1, val2);
808 break;
809 case 16:
810 dst_val = internal::MultiplyHigh<16>(val1, val2);
811 break;
812 case 32:
813 dst_val = internal::MultiplyHigh<32>(val1, val2);
814 break;
815 case 64:
816 dst_val = internal::MultiplyHigh<64>(val1, val2);
817 break;
818 default:
819 VIXL_UNREACHABLE();
820 break;
821 }
822 dst.SetUint(vform, i, dst_val);
823 }
824 return dst;
825 }
826
827
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)828 LogicVRegister Simulator::mla(VectorFormat vform,
829 LogicVRegister dst,
830 const LogicVRegister& src1,
831 const LogicVRegister& src2,
832 int index) {
833 SimVRegister temp;
834 VectorFormat indexform = VectorFormatFillQ(vform);
835 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
836 }
837
838
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)839 LogicVRegister Simulator::mls(VectorFormat vform,
840 LogicVRegister dst,
841 const LogicVRegister& src1,
842 const LogicVRegister& src2,
843 int index) {
844 SimVRegister temp;
845 VectorFormat indexform = VectorFormatFillQ(vform);
846 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
847 }
848
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)849 LogicVRegister Simulator::sqdmull(VectorFormat vform,
850 LogicVRegister dst,
851 const LogicVRegister& src1,
852 const LogicVRegister& src2,
853 int index) {
854 SimVRegister temp;
855 VectorFormat indexform =
856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
857 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
858 }
859
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)860 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
861 LogicVRegister dst,
862 const LogicVRegister& src1,
863 const LogicVRegister& src2,
864 int index) {
865 SimVRegister temp;
866 VectorFormat indexform =
867 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
868 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
869 }
870
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)871 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
872 LogicVRegister dst,
873 const LogicVRegister& src1,
874 const LogicVRegister& src2,
875 int index) {
876 SimVRegister temp;
877 VectorFormat indexform =
878 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
879 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
880 }
881
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)882 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
883 LogicVRegister dst,
884 const LogicVRegister& src1,
885 const LogicVRegister& src2,
886 int index) {
887 SimVRegister temp;
888 VectorFormat indexform = VectorFormatFillQ(vform);
889 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
890 }
891
892
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)893 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
894 LogicVRegister dst,
895 const LogicVRegister& src1,
896 const LogicVRegister& src2,
897 int index) {
898 SimVRegister temp;
899 VectorFormat indexform = VectorFormatFillQ(vform);
900 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
901 }
902
903
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)904 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
905 LogicVRegister dst,
906 const LogicVRegister& src1,
907 const LogicVRegister& src2,
908 int index) {
909 SimVRegister temp;
910 VectorFormat indexform = VectorFormatFillQ(vform);
911 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913
914
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
916 LogicVRegister dst,
917 const LogicVRegister& src1,
918 const LogicVRegister& src2,
919 int index) {
920 SimVRegister temp;
921 VectorFormat indexform = VectorFormatFillQ(vform);
922 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
923 }
924
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const925 uint64_t Simulator::PolynomialMult(uint64_t op1,
926 uint64_t op2,
927 int lane_size_in_bits) const {
928 return PolynomialMult128(op1, op2, lane_size_in_bits).second;
929 }
930
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)931 LogicVRegister Simulator::pmul(VectorFormat vform,
932 LogicVRegister dst,
933 const LogicVRegister& src1,
934 const LogicVRegister& src2) {
935 dst.ClearForWrite(vform);
936 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
937 dst.SetUint(vform,
938 i,
939 PolynomialMult(src1.Uint(vform, i),
940 src2.Uint(vform, i),
941 LaneSizeInBitsFromFormat(vform)));
942 }
943 return dst;
944 }
945
946
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)947 LogicVRegister Simulator::pmull(VectorFormat vform,
948 LogicVRegister dst,
949 const LogicVRegister& src1,
950 const LogicVRegister& src2) {
951 dst.ClearForWrite(vform);
952 VectorFormat vform_src = VectorFormatHalfWidth(vform);
953
954 // Process the elements in reverse to avoid problems when the destination
955 // register is the same as a source.
956 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
957 dst.SetUint(vform,
958 i,
959 PolynomialMult128(src1.Uint(vform_src, i),
960 src2.Uint(vform_src, i),
961 LaneSizeInBitsFromFormat(vform_src)));
962 }
963
964 return dst;
965 }
966
967
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)968 LogicVRegister Simulator::pmull2(VectorFormat vform,
969 LogicVRegister dst,
970 const LogicVRegister& src1,
971 const LogicVRegister& src2) {
972 dst.ClearForWrite(vform);
973 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
974
975 int lane_count = LaneCountFromFormat(vform);
976 for (int i = 0; i < lane_count; i++) {
977 dst.SetUint(vform,
978 i,
979 PolynomialMult128(src1.Uint(vform_src, lane_count + i),
980 src2.Uint(vform_src, lane_count + i),
981 LaneSizeInBitsFromFormat(vform_src)));
982 }
983
984 return dst;
985 }
986
987
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)988 LogicVRegister Simulator::sub(VectorFormat vform,
989 LogicVRegister dst,
990 const LogicVRegister& src1,
991 const LogicVRegister& src2) {
992 int lane_size = LaneSizeInBitsFromFormat(vform);
993 dst.ClearForWrite(vform);
994 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
995 // Test for unsigned saturation.
996 uint64_t ua = src1.UintLeftJustified(vform, i);
997 uint64_t ub = src2.UintLeftJustified(vform, i);
998 uint64_t ur = ua - ub;
999 if (ub > ua) {
1000 dst.SetUnsignedSat(i, false);
1001 }
1002
1003 // Test for signed saturation.
1004 bool pos_a = (ua >> 63) == 0;
1005 bool pos_b = (ub >> 63) == 0;
1006 bool pos_r = (ur >> 63) == 0;
1007 // If the signs of the operands are different, and the sign of the first
1008 // operand doesn't match the result, there was an overflow.
1009 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1010 dst.SetSignedSat(i, pos_a);
1011 }
1012
1013 dst.SetInt(vform, i, ur >> (64 - lane_size));
1014 }
1015 return dst;
1016 }
1017
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)1018 LogicVRegister Simulator::sub_uint(VectorFormat vform,
1019 LogicVRegister dst,
1020 const LogicVRegister& src1,
1021 uint64_t value) {
1022 int lane_size = LaneSizeInBitsFromFormat(vform);
1023 VIXL_ASSERT(IsUintN(lane_size, value));
1024 dst.ClearForWrite(vform);
1025 // Left-justify `value`.
1026 uint64_t ub = value << (64 - lane_size);
1027 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1028 // Test for unsigned saturation.
1029 uint64_t ua = src1.UintLeftJustified(vform, i);
1030 uint64_t ur = ua - ub;
1031 if (ub > ua) {
1032 dst.SetUnsignedSat(i, false);
1033 }
1034
1035 // Test for signed saturation.
1036 // `value` is always positive, so we have an overflow if the (signed) result
1037 // is greater than the first operand.
1038 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1039 dst.SetSignedSat(i, false);
1040 }
1041
1042 dst.SetInt(vform, i, ur >> (64 - lane_size));
1043 }
1044 return dst;
1045 }
1046
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1047 LogicVRegister Simulator::and_(VectorFormat vform,
1048 LogicVRegister dst,
1049 const LogicVRegister& src1,
1050 const LogicVRegister& src2) {
1051 dst.ClearForWrite(vform);
1052 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1053 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1054 }
1055 return dst;
1056 }
1057
1058
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1059 LogicVRegister Simulator::orr(VectorFormat vform,
1060 LogicVRegister dst,
1061 const LogicVRegister& src1,
1062 const LogicVRegister& src2) {
1063 dst.ClearForWrite(vform);
1064 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1065 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1066 }
1067 return dst;
1068 }
1069
1070
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1071 LogicVRegister Simulator::orn(VectorFormat vform,
1072 LogicVRegister dst,
1073 const LogicVRegister& src1,
1074 const LogicVRegister& src2) {
1075 dst.ClearForWrite(vform);
1076 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1077 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1078 }
1079 return dst;
1080 }
1081
1082
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1083 LogicVRegister Simulator::eor(VectorFormat vform,
1084 LogicVRegister dst,
1085 const LogicVRegister& src1,
1086 const LogicVRegister& src2) {
1087 dst.ClearForWrite(vform);
1088 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1089 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1090 }
1091 return dst;
1092 }
1093
1094
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1095 LogicVRegister Simulator::bic(VectorFormat vform,
1096 LogicVRegister dst,
1097 const LogicVRegister& src1,
1098 const LogicVRegister& src2) {
1099 dst.ClearForWrite(vform);
1100 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1101 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1102 }
1103 return dst;
1104 }
1105
1106
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1107 LogicVRegister Simulator::bic(VectorFormat vform,
1108 LogicVRegister dst,
1109 const LogicVRegister& src,
1110 uint64_t imm) {
1111 uint64_t result[16];
1112 int lane_count = LaneCountFromFormat(vform);
1113 for (int i = 0; i < lane_count; ++i) {
1114 result[i] = src.Uint(vform, i) & ~imm;
1115 }
1116 dst.ClearForWrite(vform);
1117 for (int i = 0; i < lane_count; ++i) {
1118 dst.SetUint(vform, i, result[i]);
1119 }
1120 return dst;
1121 }
1122
1123
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1124 LogicVRegister Simulator::bif(VectorFormat vform,
1125 LogicVRegister dst,
1126 const LogicVRegister& src1,
1127 const LogicVRegister& src2) {
1128 dst.ClearForWrite(vform);
1129 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1130 uint64_t operand1 = dst.Uint(vform, i);
1131 uint64_t operand2 = ~src2.Uint(vform, i);
1132 uint64_t operand3 = src1.Uint(vform, i);
1133 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1134 dst.SetUint(vform, i, result);
1135 }
1136 return dst;
1137 }
1138
1139
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1140 LogicVRegister Simulator::bit(VectorFormat vform,
1141 LogicVRegister dst,
1142 const LogicVRegister& src1,
1143 const LogicVRegister& src2) {
1144 dst.ClearForWrite(vform);
1145 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1146 uint64_t operand1 = dst.Uint(vform, i);
1147 uint64_t operand2 = src2.Uint(vform, i);
1148 uint64_t operand3 = src1.Uint(vform, i);
1149 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1150 dst.SetUint(vform, i, result);
1151 }
1152 return dst;
1153 }
1154
1155
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1156 LogicVRegister Simulator::bsl(VectorFormat vform,
1157 LogicVRegister dst,
1158 const LogicVRegister& src_mask,
1159 const LogicVRegister& src1,
1160 const LogicVRegister& src2) {
1161 dst.ClearForWrite(vform);
1162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1163 uint64_t operand1 = src2.Uint(vform, i);
1164 uint64_t operand2 = src_mask.Uint(vform, i);
1165 uint64_t operand3 = src1.Uint(vform, i);
1166 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1167 dst.SetUint(vform, i, result);
1168 }
1169 return dst;
1170 }
1171
1172
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1173 LogicVRegister Simulator::sminmax(VectorFormat vform,
1174 LogicVRegister dst,
1175 const LogicVRegister& src1,
1176 const LogicVRegister& src2,
1177 bool max) {
1178 dst.ClearForWrite(vform);
1179 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1180 int64_t src1_val = src1.Int(vform, i);
1181 int64_t src2_val = src2.Int(vform, i);
1182 int64_t dst_val;
1183 if (max) {
1184 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1185 } else {
1186 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1187 }
1188 dst.SetInt(vform, i, dst_val);
1189 }
1190 return dst;
1191 }
1192
1193
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1194 LogicVRegister Simulator::smax(VectorFormat vform,
1195 LogicVRegister dst,
1196 const LogicVRegister& src1,
1197 const LogicVRegister& src2) {
1198 return sminmax(vform, dst, src1, src2, true);
1199 }
1200
1201
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1202 LogicVRegister Simulator::smin(VectorFormat vform,
1203 LogicVRegister dst,
1204 const LogicVRegister& src1,
1205 const LogicVRegister& src2) {
1206 return sminmax(vform, dst, src1, src2, false);
1207 }
1208
1209
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1210 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1211 LogicVRegister dst,
1212 const LogicVRegister& src1,
1213 const LogicVRegister& src2,
1214 bool max) {
1215 unsigned lanes = LaneCountFromFormat(vform);
1216 int64_t result[kZRegMaxSizeInBytes];
1217 const LogicVRegister* src = &src1;
1218 for (unsigned j = 0; j < 2; j++) {
1219 for (unsigned i = 0; i < lanes; i += 2) {
1220 int64_t first_val = src->Int(vform, i);
1221 int64_t second_val = src->Int(vform, i + 1);
1222 int64_t dst_val;
1223 if (max) {
1224 dst_val = (first_val > second_val) ? first_val : second_val;
1225 } else {
1226 dst_val = (first_val < second_val) ? first_val : second_val;
1227 }
1228 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1229 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1230 }
1231 src = &src2;
1232 }
1233 dst.SetIntArray(vform, result);
1234 if (IsSVEFormat(vform)) {
1235 interleave_top_bottom(vform, dst, dst);
1236 }
1237 return dst;
1238 }
1239
1240
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1241 LogicVRegister Simulator::smaxp(VectorFormat vform,
1242 LogicVRegister dst,
1243 const LogicVRegister& src1,
1244 const LogicVRegister& src2) {
1245 return sminmaxp(vform, dst, src1, src2, true);
1246 }
1247
1248
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1249 LogicVRegister Simulator::sminp(VectorFormat vform,
1250 LogicVRegister dst,
1251 const LogicVRegister& src1,
1252 const LogicVRegister& src2) {
1253 return sminmaxp(vform, dst, src1, src2, false);
1254 }
1255
1256
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1257 LogicVRegister Simulator::addp(VectorFormat vform,
1258 LogicVRegister dst,
1259 const LogicVRegister& src) {
1260 VIXL_ASSERT(vform == kFormatD);
1261
1262 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1263 dst.ClearForWrite(vform);
1264 dst.SetUint(vform, 0, dst_val);
1265 return dst;
1266 }
1267
1268
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::addv(VectorFormat vform,
1270 LogicVRegister dst,
1271 const LogicVRegister& src) {
1272 VectorFormat vform_dst =
1273 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1274
1275
1276 int64_t dst_val = 0;
1277 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1278 dst_val += src.Int(vform, i);
1279 }
1280
1281 dst.ClearForWrite(vform_dst);
1282 dst.SetInt(vform_dst, 0, dst_val);
1283 return dst;
1284 }
1285
1286
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1287 LogicVRegister Simulator::saddlv(VectorFormat vform,
1288 LogicVRegister dst,
1289 const LogicVRegister& src) {
1290 VectorFormat vform_dst =
1291 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1292
1293 int64_t dst_val = 0;
1294 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1295 dst_val += src.Int(vform, i);
1296 }
1297
1298 dst.ClearForWrite(vform_dst);
1299 dst.SetInt(vform_dst, 0, dst_val);
1300 return dst;
1301 }
1302
1303
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1304 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1305 LogicVRegister dst,
1306 const LogicVRegister& src) {
1307 VectorFormat vform_dst =
1308 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1309
1310 uint64_t dst_val = 0;
1311 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1312 dst_val += src.Uint(vform, i);
1313 }
1314
1315 dst.ClearForWrite(vform_dst);
1316 dst.SetUint(vform_dst, 0, dst_val);
1317 return dst;
1318 }
1319
1320
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1321 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1322 LogicVRegister dst,
1323 const LogicPRegister& pg,
1324 const LogicVRegister& src,
1325 bool max) {
1326 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1327 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1328 if (!pg.IsActive(vform, i)) continue;
1329
1330 int64_t src_val = src.Int(vform, i);
1331 if (max) {
1332 dst_val = (src_val > dst_val) ? src_val : dst_val;
1333 } else {
1334 dst_val = (src_val < dst_val) ? src_val : dst_val;
1335 }
1336 }
1337 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1338 dst.SetInt(vform, 0, dst_val);
1339 return dst;
1340 }
1341
1342
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1343 LogicVRegister Simulator::smaxv(VectorFormat vform,
1344 LogicVRegister dst,
1345 const LogicVRegister& src) {
1346 sminmaxv(vform, dst, GetPTrue(), src, true);
1347 return dst;
1348 }
1349
1350
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1351 LogicVRegister Simulator::sminv(VectorFormat vform,
1352 LogicVRegister dst,
1353 const LogicVRegister& src) {
1354 sminmaxv(vform, dst, GetPTrue(), src, false);
1355 return dst;
1356 }
1357
1358
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1359 LogicVRegister Simulator::smaxv(VectorFormat vform,
1360 LogicVRegister dst,
1361 const LogicPRegister& pg,
1362 const LogicVRegister& src) {
1363 VIXL_ASSERT(IsSVEFormat(vform));
1364 sminmaxv(vform, dst, pg, src, true);
1365 return dst;
1366 }
1367
1368
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1369 LogicVRegister Simulator::sminv(VectorFormat vform,
1370 LogicVRegister dst,
1371 const LogicPRegister& pg,
1372 const LogicVRegister& src) {
1373 VIXL_ASSERT(IsSVEFormat(vform));
1374 sminmaxv(vform, dst, pg, src, false);
1375 return dst;
1376 }
1377
1378
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1379 LogicVRegister Simulator::uminmax(VectorFormat vform,
1380 LogicVRegister dst,
1381 const LogicVRegister& src1,
1382 const LogicVRegister& src2,
1383 bool max) {
1384 dst.ClearForWrite(vform);
1385 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1386 uint64_t src1_val = src1.Uint(vform, i);
1387 uint64_t src2_val = src2.Uint(vform, i);
1388 uint64_t dst_val;
1389 if (max) {
1390 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1391 } else {
1392 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1393 }
1394 dst.SetUint(vform, i, dst_val);
1395 }
1396 return dst;
1397 }
1398
1399
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1400 LogicVRegister Simulator::umax(VectorFormat vform,
1401 LogicVRegister dst,
1402 const LogicVRegister& src1,
1403 const LogicVRegister& src2) {
1404 return uminmax(vform, dst, src1, src2, true);
1405 }
1406
1407
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1408 LogicVRegister Simulator::umin(VectorFormat vform,
1409 LogicVRegister dst,
1410 const LogicVRegister& src1,
1411 const LogicVRegister& src2) {
1412 return uminmax(vform, dst, src1, src2, false);
1413 }
1414
1415
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1416 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1417 LogicVRegister dst,
1418 const LogicVRegister& src1,
1419 const LogicVRegister& src2,
1420 bool max) {
1421 unsigned lanes = LaneCountFromFormat(vform);
1422 uint64_t result[kZRegMaxSizeInBytes];
1423 const LogicVRegister* src = &src1;
1424 for (unsigned j = 0; j < 2; j++) {
1425 for (unsigned i = 0; i < lanes; i += 2) {
1426 uint64_t first_val = src->Uint(vform, i);
1427 uint64_t second_val = src->Uint(vform, i + 1);
1428 uint64_t dst_val;
1429 if (max) {
1430 dst_val = (first_val > second_val) ? first_val : second_val;
1431 } else {
1432 dst_val = (first_val < second_val) ? first_val : second_val;
1433 }
1434 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1435 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1436 }
1437 src = &src2;
1438 }
1439 dst.SetUintArray(vform, result);
1440 if (IsSVEFormat(vform)) {
1441 interleave_top_bottom(vform, dst, dst);
1442 }
1443 return dst;
1444 }
1445
1446
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1447 LogicVRegister Simulator::umaxp(VectorFormat vform,
1448 LogicVRegister dst,
1449 const LogicVRegister& src1,
1450 const LogicVRegister& src2) {
1451 return uminmaxp(vform, dst, src1, src2, true);
1452 }
1453
1454
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1455 LogicVRegister Simulator::uminp(VectorFormat vform,
1456 LogicVRegister dst,
1457 const LogicVRegister& src1,
1458 const LogicVRegister& src2) {
1459 return uminmaxp(vform, dst, src1, src2, false);
1460 }
1461
1462
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1463 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1464 LogicVRegister dst,
1465 const LogicPRegister& pg,
1466 const LogicVRegister& src,
1467 bool max) {
1468 uint64_t dst_val = max ? 0 : UINT64_MAX;
1469 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1470 if (!pg.IsActive(vform, i)) continue;
1471
1472 uint64_t src_val = src.Uint(vform, i);
1473 if (max) {
1474 dst_val = (src_val > dst_val) ? src_val : dst_val;
1475 } else {
1476 dst_val = (src_val < dst_val) ? src_val : dst_val;
1477 }
1478 }
1479 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1480 dst.SetUint(vform, 0, dst_val);
1481 return dst;
1482 }
1483
1484
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1485 LogicVRegister Simulator::umaxv(VectorFormat vform,
1486 LogicVRegister dst,
1487 const LogicVRegister& src) {
1488 uminmaxv(vform, dst, GetPTrue(), src, true);
1489 return dst;
1490 }
1491
1492
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1493 LogicVRegister Simulator::uminv(VectorFormat vform,
1494 LogicVRegister dst,
1495 const LogicVRegister& src) {
1496 uminmaxv(vform, dst, GetPTrue(), src, false);
1497 return dst;
1498 }
1499
1500
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1501 LogicVRegister Simulator::umaxv(VectorFormat vform,
1502 LogicVRegister dst,
1503 const LogicPRegister& pg,
1504 const LogicVRegister& src) {
1505 VIXL_ASSERT(IsSVEFormat(vform));
1506 uminmaxv(vform, dst, pg, src, true);
1507 return dst;
1508 }
1509
1510
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1511 LogicVRegister Simulator::uminv(VectorFormat vform,
1512 LogicVRegister dst,
1513 const LogicPRegister& pg,
1514 const LogicVRegister& src) {
1515 VIXL_ASSERT(IsSVEFormat(vform));
1516 uminmaxv(vform, dst, pg, src, false);
1517 return dst;
1518 }
1519
1520
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1521 LogicVRegister Simulator::shl(VectorFormat vform,
1522 LogicVRegister dst,
1523 const LogicVRegister& src,
1524 int shift) {
1525 VIXL_ASSERT(shift >= 0);
1526 SimVRegister temp;
1527 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1528 return ushl(vform, dst, src, shiftreg);
1529 }
1530
1531
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1532 LogicVRegister Simulator::sshll(VectorFormat vform,
1533 LogicVRegister dst,
1534 const LogicVRegister& src,
1535 int shift) {
1536 VIXL_ASSERT(shift >= 0);
1537 SimVRegister temp1, temp2;
1538 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1539 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1540 return sshl(vform, dst, extendedreg, shiftreg);
1541 }
1542
1543
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1544 LogicVRegister Simulator::sshll2(VectorFormat vform,
1545 LogicVRegister dst,
1546 const LogicVRegister& src,
1547 int shift) {
1548 VIXL_ASSERT(shift >= 0);
1549 SimVRegister temp1, temp2;
1550 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1551 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1552 return sshl(vform, dst, extendedreg, shiftreg);
1553 }
1554
1555
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1556 LogicVRegister Simulator::shll(VectorFormat vform,
1557 LogicVRegister dst,
1558 const LogicVRegister& src) {
1559 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1560 return sshll(vform, dst, src, shift);
1561 }
1562
1563
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1564 LogicVRegister Simulator::shll2(VectorFormat vform,
1565 LogicVRegister dst,
1566 const LogicVRegister& src) {
1567 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1568 return sshll2(vform, dst, src, shift);
1569 }
1570
1571
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1572 LogicVRegister Simulator::ushll(VectorFormat vform,
1573 LogicVRegister dst,
1574 const LogicVRegister& src,
1575 int shift) {
1576 VIXL_ASSERT(shift >= 0);
1577 SimVRegister temp1, temp2;
1578 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1579 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1580 return ushl(vform, dst, extendedreg, shiftreg);
1581 }
1582
1583
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1584 LogicVRegister Simulator::ushll2(VectorFormat vform,
1585 LogicVRegister dst,
1586 const LogicVRegister& src,
1587 int shift) {
1588 VIXL_ASSERT(shift >= 0);
1589 SimVRegister temp1, temp2;
1590 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1591 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1592 return ushl(vform, dst, extendedreg, shiftreg);
1593 }
1594
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1595 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1596 const LogicPRegister& pg,
1597 const LogicVRegister& src,
1598 int offset_from_last_active) {
1599 // Untested for any other values.
1600 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1601
1602 int last_active = GetLastActive(vform, pg);
1603 int lane_count = LaneCountFromFormat(vform);
1604 int index =
1605 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1606 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1607 }
1608
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1609 LogicVRegister Simulator::compact(VectorFormat vform,
1610 LogicVRegister dst,
1611 const LogicPRegister& pg,
1612 const LogicVRegister& src) {
1613 int j = 0;
1614 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1615 if (pg.IsActive(vform, i)) {
1616 dst.SetUint(vform, j++, src.Uint(vform, i));
1617 }
1618 }
1619 for (; j < LaneCountFromFormat(vform); j++) {
1620 dst.SetUint(vform, j, 0);
1621 }
1622 return dst;
1623 }
1624
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1625 LogicVRegister Simulator::splice(VectorFormat vform,
1626 LogicVRegister dst,
1627 const LogicPRegister& pg,
1628 const LogicVRegister& src1,
1629 const LogicVRegister& src2) {
1630 int lane_count = LaneCountFromFormat(vform);
1631 int first_active = GetFirstActive(vform, pg);
1632 int last_active = GetLastActive(vform, pg);
1633 int dst_idx = 0;
1634 uint64_t result[kZRegMaxSizeInBytes];
1635
1636 if (first_active >= 0) {
1637 VIXL_ASSERT(last_active >= first_active);
1638 VIXL_ASSERT(last_active < lane_count);
1639 for (int i = first_active; i <= last_active; i++) {
1640 result[dst_idx++] = src1.Uint(vform, i);
1641 }
1642 }
1643
1644 VIXL_ASSERT(dst_idx <= lane_count);
1645 for (int i = dst_idx; i < lane_count; i++) {
1646 result[i] = src2.Uint(vform, i - dst_idx);
1647 }
1648
1649 dst.SetUintArray(vform, result);
1650
1651 return dst;
1652 }
1653
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1654 LogicVRegister Simulator::sel(VectorFormat vform,
1655 LogicVRegister dst,
1656 const SimPRegister& pg,
1657 const LogicVRegister& src1,
1658 const LogicVRegister& src2) {
1659 int p_reg_bits_per_lane =
1660 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1661 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1662 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1663 ? src1.Uint(vform, lane)
1664 : src2.Uint(vform, lane);
1665 dst.SetUint(vform, lane, lane_value);
1666 }
1667 return dst;
1668 }
1669
1670
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1671 LogicPRegister Simulator::sel(LogicPRegister dst,
1672 const LogicPRegister& pg,
1673 const LogicPRegister& src1,
1674 const LogicPRegister& src2) {
1675 for (int i = 0; i < dst.GetChunkCount(); i++) {
1676 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1677 LogicPRegister::ChunkType result =
1678 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1679 dst.SetChunk(i, result);
1680 }
1681 return dst;
1682 }
1683
1684
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1685 LogicVRegister Simulator::sli(VectorFormat vform,
1686 LogicVRegister dst,
1687 const LogicVRegister& src,
1688 int shift) {
1689 dst.ClearForWrite(vform);
1690 int lane_count = LaneCountFromFormat(vform);
1691 for (int i = 0; i < lane_count; i++) {
1692 uint64_t src_lane = src.Uint(vform, i);
1693 uint64_t dst_lane = dst.Uint(vform, i);
1694 uint64_t shifted = src_lane << shift;
1695 uint64_t mask = MaxUintFromFormat(vform) << shift;
1696 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1697 }
1698 return dst;
1699 }
1700
1701
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1702 LogicVRegister Simulator::sqshl(VectorFormat vform,
1703 LogicVRegister dst,
1704 const LogicVRegister& src,
1705 int shift) {
1706 VIXL_ASSERT(shift >= 0);
1707 SimVRegister temp;
1708 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1709 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1710 }
1711
1712
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1713 LogicVRegister Simulator::uqshl(VectorFormat vform,
1714 LogicVRegister dst,
1715 const LogicVRegister& src,
1716 int shift) {
1717 VIXL_ASSERT(shift >= 0);
1718 SimVRegister temp;
1719 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1720 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1721 }
1722
1723
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1724 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1725 LogicVRegister dst,
1726 const LogicVRegister& src,
1727 int shift) {
1728 VIXL_ASSERT(shift >= 0);
1729 SimVRegister temp;
1730 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1731 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1732 }
1733
1734
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1735 LogicVRegister Simulator::sri(VectorFormat vform,
1736 LogicVRegister dst,
1737 const LogicVRegister& src,
1738 int shift) {
1739 dst.ClearForWrite(vform);
1740 int lane_count = LaneCountFromFormat(vform);
1741 VIXL_ASSERT((shift > 0) &&
1742 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1743 for (int i = 0; i < lane_count; i++) {
1744 uint64_t src_lane = src.Uint(vform, i);
1745 uint64_t dst_lane = dst.Uint(vform, i);
1746 uint64_t shifted;
1747 uint64_t mask;
1748 if (shift == 64) {
1749 shifted = 0;
1750 mask = 0;
1751 } else {
1752 shifted = src_lane >> shift;
1753 mask = MaxUintFromFormat(vform) >> shift;
1754 }
1755 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1756 }
1757 return dst;
1758 }
1759
1760
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1761 LogicVRegister Simulator::ushr(VectorFormat vform,
1762 LogicVRegister dst,
1763 const LogicVRegister& src,
1764 int shift) {
1765 VIXL_ASSERT(shift >= 0);
1766 SimVRegister temp;
1767 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1768 return ushl(vform, dst, src, shiftreg);
1769 }
1770
1771
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1772 LogicVRegister Simulator::sshr(VectorFormat vform,
1773 LogicVRegister dst,
1774 const LogicVRegister& src,
1775 int shift) {
1776 VIXL_ASSERT(shift >= 0);
1777 SimVRegister temp;
1778 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1779 return sshl(vform, dst, src, shiftreg);
1780 }
1781
1782
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1783 LogicVRegister Simulator::ssra(VectorFormat vform,
1784 LogicVRegister dst,
1785 const LogicVRegister& src,
1786 int shift) {
1787 SimVRegister temp;
1788 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1789 return add(vform, dst, dst, shifted_reg);
1790 }
1791
1792
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1793 LogicVRegister Simulator::usra(VectorFormat vform,
1794 LogicVRegister dst,
1795 const LogicVRegister& src,
1796 int shift) {
1797 SimVRegister temp;
1798 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1799 return add(vform, dst, dst, shifted_reg);
1800 }
1801
1802
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1803 LogicVRegister Simulator::srsra(VectorFormat vform,
1804 LogicVRegister dst,
1805 const LogicVRegister& src,
1806 int shift) {
1807 SimVRegister temp;
1808 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1809 return add(vform, dst, dst, shifted_reg);
1810 }
1811
1812
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1813 LogicVRegister Simulator::ursra(VectorFormat vform,
1814 LogicVRegister dst,
1815 const LogicVRegister& src,
1816 int shift) {
1817 SimVRegister temp;
1818 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1819 return add(vform, dst, dst, shifted_reg);
1820 }
1821
1822
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1823 LogicVRegister Simulator::cls(VectorFormat vform,
1824 LogicVRegister dst,
1825 const LogicVRegister& src) {
1826 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1827 int lane_count = LaneCountFromFormat(vform);
1828
1829 // Ensure that we can store one result per lane.
1830 int result[kZRegMaxSizeInBytes];
1831
1832 for (int i = 0; i < lane_count; i++) {
1833 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1834 }
1835
1836 dst.ClearForWrite(vform);
1837 for (int i = 0; i < lane_count; ++i) {
1838 dst.SetUint(vform, i, result[i]);
1839 }
1840 return dst;
1841 }
1842
1843
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1844 LogicVRegister Simulator::clz(VectorFormat vform,
1845 LogicVRegister dst,
1846 const LogicVRegister& src) {
1847 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1848 int lane_count = LaneCountFromFormat(vform);
1849
1850 // Ensure that we can store one result per lane.
1851 int result[kZRegMaxSizeInBytes];
1852
1853 for (int i = 0; i < lane_count; i++) {
1854 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1855 }
1856
1857 dst.ClearForWrite(vform);
1858 for (int i = 0; i < lane_count; ++i) {
1859 dst.SetUint(vform, i, result[i]);
1860 }
1861 return dst;
1862 }
1863
1864
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1865 LogicVRegister Simulator::cnot(VectorFormat vform,
1866 LogicVRegister dst,
1867 const LogicVRegister& src) {
1868 dst.ClearForWrite(vform);
1869 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1870 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1871 dst.SetUint(vform, i, value);
1872 }
1873 return dst;
1874 }
1875
1876
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1877 LogicVRegister Simulator::cnt(VectorFormat vform,
1878 LogicVRegister dst,
1879 const LogicVRegister& src) {
1880 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1881 int lane_count = LaneCountFromFormat(vform);
1882
1883 // Ensure that we can store one result per lane.
1884 int result[kZRegMaxSizeInBytes];
1885
1886 for (int i = 0; i < lane_count; i++) {
1887 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1888 }
1889
1890 dst.ClearForWrite(vform);
1891 for (int i = 0; i < lane_count; ++i) {
1892 dst.SetUint(vform, i, result[i]);
1893 }
1894 return dst;
1895 }
1896
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1897 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1898 int esize,
1899 bool shift_in_ls_byte) {
1900 if (shift_in_ls_byte) {
1901 // Neon uses the least-significant byte of the lane as the shift distance.
1902 shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1903 } else {
1904 // SVE uses a saturated shift distance in the range
1905 // -(esize + 1) ... (esize + 1).
1906 if (shift_val > (esize + 1)) shift_val = esize + 1;
1907 if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1908 }
1909 return shift_val;
1910 }
1911
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1912 LogicVRegister Simulator::sshl(VectorFormat vform,
1913 LogicVRegister dst,
1914 const LogicVRegister& src1,
1915 const LogicVRegister& src2,
1916 bool shift_in_ls_byte) {
1917 dst.ClearForWrite(vform);
1918 int esize = LaneSizeInBitsFromFormat(vform);
1919 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1920 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1921 esize,
1922 shift_in_ls_byte);
1923
1924 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1925
1926 // Set signed saturation state.
1927 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1928 dst.SetSignedSat(i, lj_src_val >= 0);
1929 }
1930
1931 // Set unsigned saturation state.
1932 if (lj_src_val < 0) {
1933 dst.SetUnsignedSat(i, false);
1934 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1935 (lj_src_val != 0)) {
1936 dst.SetUnsignedSat(i, true);
1937 }
1938
1939 int64_t src_val = src1.Int(vform, i);
1940 bool src_is_negative = src_val < 0;
1941 if (shift_val > 63) {
1942 dst.SetInt(vform, i, 0);
1943 } else if (shift_val < -63) {
1944 dst.SetRounding(i, src_is_negative);
1945 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1946 } else {
1947 // Use unsigned types for shifts, as behaviour is undefined for signed
1948 // lhs.
1949 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1950
1951 if (shift_val < 0) {
1952 // Convert to right shift.
1953 shift_val = -shift_val;
1954
1955 // Set rounding state by testing most-significant bit shifted out.
1956 // Rounding only needed on right shifts.
1957 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1958 dst.SetRounding(i, true);
1959 }
1960
1961 usrc_val >>= shift_val;
1962
1963 if (src_is_negative) {
1964 // Simulate sign-extension.
1965 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1966 }
1967 } else {
1968 usrc_val <<= shift_val;
1969 }
1970 dst.SetUint(vform, i, usrc_val);
1971 }
1972 }
1973 return dst;
1974 }
1975
1976
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1977 LogicVRegister Simulator::ushl(VectorFormat vform,
1978 LogicVRegister dst,
1979 const LogicVRegister& src1,
1980 const LogicVRegister& src2,
1981 bool shift_in_ls_byte) {
1982 dst.ClearForWrite(vform);
1983 int esize = LaneSizeInBitsFromFormat(vform);
1984 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1985 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1986 esize,
1987 shift_in_ls_byte);
1988
1989 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1990
1991 // Set saturation state.
1992 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1993 dst.SetUnsignedSat(i, true);
1994 }
1995
1996 uint64_t src_val = src1.Uint(vform, i);
1997 if ((shift_val > 63) || (shift_val < -64)) {
1998 dst.SetUint(vform, i, 0);
1999 } else {
2000 if (shift_val < 0) {
2001 // Set rounding state. Rounding only needed on right shifts.
2002 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2003 dst.SetRounding(i, true);
2004 }
2005
2006 if (shift_val == -64) {
2007 src_val = 0;
2008 } else {
2009 src_val >>= -shift_val;
2010 }
2011 } else {
2012 src_val <<= shift_val;
2013 }
2014 dst.SetUint(vform, i, src_val);
2015 }
2016 }
2017 return dst;
2018 }
2019
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2020 LogicVRegister Simulator::sshr(VectorFormat vform,
2021 LogicVRegister dst,
2022 const LogicVRegister& src1,
2023 const LogicVRegister& src2) {
2024 SimVRegister temp;
2025 // Saturate to sidestep the min-int problem.
2026 neg(vform, temp, src2).SignedSaturate(vform);
2027 sshl(vform, dst, src1, temp, false);
2028 return dst;
2029 }
2030
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2031 LogicVRegister Simulator::ushr(VectorFormat vform,
2032 LogicVRegister dst,
2033 const LogicVRegister& src1,
2034 const LogicVRegister& src2) {
2035 SimVRegister temp;
2036 // Saturate to sidestep the min-int problem.
2037 neg(vform, temp, src2).SignedSaturate(vform);
2038 ushl(vform, dst, src1, temp, false);
2039 return dst;
2040 }
2041
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2042 LogicVRegister Simulator::neg(VectorFormat vform,
2043 LogicVRegister dst,
2044 const LogicVRegister& src) {
2045 dst.ClearForWrite(vform);
2046 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2047 // Test for signed saturation.
2048 int64_t sa = src.Int(vform, i);
2049 if (sa == MinIntFromFormat(vform)) {
2050 dst.SetSignedSat(i, true);
2051 }
2052 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2053 }
2054 return dst;
2055 }
2056
2057
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2058 LogicVRegister Simulator::suqadd(VectorFormat vform,
2059 LogicVRegister dst,
2060 const LogicVRegister& src1,
2061 const LogicVRegister& src2) {
2062 dst.ClearForWrite(vform);
2063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2064 int64_t sa = src1.IntLeftJustified(vform, i);
2065 uint64_t ub = src2.UintLeftJustified(vform, i);
2066 uint64_t ur = sa + ub;
2067
2068 int64_t sr;
2069 memcpy(&sr, &ur, sizeof(sr));
2070 if (sr < sa) { // Test for signed positive saturation.
2071 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2072 } else {
2073 dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2074 }
2075 }
2076 return dst;
2077 }
2078
2079
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2080 LogicVRegister Simulator::usqadd(VectorFormat vform,
2081 LogicVRegister dst,
2082 const LogicVRegister& src1,
2083 const LogicVRegister& src2) {
2084 dst.ClearForWrite(vform);
2085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2086 uint64_t ua = src1.UintLeftJustified(vform, i);
2087 int64_t sb = src2.IntLeftJustified(vform, i);
2088 uint64_t ur = ua + sb;
2089
2090 if ((sb > 0) && (ur <= ua)) {
2091 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2092 } else if ((sb < 0) && (ur >= ua)) {
2093 dst.SetUint(vform, i, 0); // Negative saturation.
2094 } else {
2095 dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2096 }
2097 }
2098 return dst;
2099 }
2100
2101
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2102 LogicVRegister Simulator::abs(VectorFormat vform,
2103 LogicVRegister dst,
2104 const LogicVRegister& src) {
2105 dst.ClearForWrite(vform);
2106 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2107 // Test for signed saturation.
2108 int64_t sa = src.Int(vform, i);
2109 if (sa == MinIntFromFormat(vform)) {
2110 dst.SetSignedSat(i, true);
2111 }
2112 if (sa < 0) {
2113 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2114 } else {
2115 dst.SetInt(vform, i, sa);
2116 }
2117 }
2118 return dst;
2119 }
2120
2121
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2122 LogicVRegister Simulator::andv(VectorFormat vform,
2123 LogicVRegister dst,
2124 const LogicPRegister& pg,
2125 const LogicVRegister& src) {
2126 VIXL_ASSERT(IsSVEFormat(vform));
2127 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2128 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2129 if (!pg.IsActive(vform, i)) continue;
2130
2131 result &= src.Uint(vform, i);
2132 }
2133 VectorFormat vform_dst =
2134 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2135 dst.ClearForWrite(vform_dst);
2136 dst.SetUint(vform_dst, 0, result);
2137 return dst;
2138 }
2139
2140
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2141 LogicVRegister Simulator::eorv(VectorFormat vform,
2142 LogicVRegister dst,
2143 const LogicPRegister& pg,
2144 const LogicVRegister& src) {
2145 VIXL_ASSERT(IsSVEFormat(vform));
2146 uint64_t result = 0;
2147 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2148 if (!pg.IsActive(vform, i)) continue;
2149
2150 result ^= src.Uint(vform, i);
2151 }
2152 VectorFormat vform_dst =
2153 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2154 dst.ClearForWrite(vform_dst);
2155 dst.SetUint(vform_dst, 0, result);
2156 return dst;
2157 }
2158
2159
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2160 LogicVRegister Simulator::orv(VectorFormat vform,
2161 LogicVRegister dst,
2162 const LogicPRegister& pg,
2163 const LogicVRegister& src) {
2164 VIXL_ASSERT(IsSVEFormat(vform));
2165 uint64_t result = 0;
2166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2167 if (!pg.IsActive(vform, i)) continue;
2168
2169 result |= src.Uint(vform, i);
2170 }
2171 VectorFormat vform_dst =
2172 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2173 dst.ClearForWrite(vform_dst);
2174 dst.SetUint(vform_dst, 0, result);
2175 return dst;
2176 }
2177
2178
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2179 LogicVRegister Simulator::saddv(VectorFormat vform,
2180 LogicVRegister dst,
2181 const LogicPRegister& pg,
2182 const LogicVRegister& src) {
2183 VIXL_ASSERT(IsSVEFormat(vform));
2184 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2185 int64_t result = 0;
2186 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2187 if (!pg.IsActive(vform, i)) continue;
2188
2189 // The destination register always has D-lane sizes and the source register
2190 // always has S-lanes or smaller, so signed integer overflow -- undefined
2191 // behaviour -- can't occur.
2192 result += src.Int(vform, i);
2193 }
2194
2195 dst.ClearForWrite(kFormatD);
2196 dst.SetInt(kFormatD, 0, result);
2197 return dst;
2198 }
2199
2200
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2201 LogicVRegister Simulator::uaddv(VectorFormat vform,
2202 LogicVRegister dst,
2203 const LogicPRegister& pg,
2204 const LogicVRegister& src) {
2205 VIXL_ASSERT(IsSVEFormat(vform));
2206 uint64_t result = 0;
2207 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2208 if (!pg.IsActive(vform, i)) continue;
2209
2210 result += src.Uint(vform, i);
2211 }
2212
2213 dst.ClearForWrite(kFormatD);
2214 dst.SetUint(kFormatD, 0, result);
2215 return dst;
2216 }
2217
2218
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2219 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2220 LogicVRegister dst,
2221 bool dst_is_signed,
2222 const LogicVRegister& src,
2223 bool src_is_signed) {
2224 bool upperhalf = false;
2225 VectorFormat srcform = dstform;
2226 if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2227 (dstform == kFormat4S)) {
2228 upperhalf = true;
2229 srcform = VectorFormatHalfLanes(srcform);
2230 }
2231 srcform = VectorFormatDoubleWidth(srcform);
2232
2233 LogicVRegister src_copy = src;
2234
2235 int offset;
2236 if (upperhalf) {
2237 offset = LaneCountFromFormat(dstform) / 2;
2238 } else {
2239 offset = 0;
2240 }
2241
2242 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2243 int64_t ssrc = src_copy.Int(srcform, i);
2244 uint64_t usrc = src_copy.Uint(srcform, i);
2245
2246 // Test for signed saturation
2247 if (ssrc > MaxIntFromFormat(dstform)) {
2248 dst.SetSignedSat(offset + i, true);
2249 } else if (ssrc < MinIntFromFormat(dstform)) {
2250 dst.SetSignedSat(offset + i, false);
2251 }
2252
2253 // Test for unsigned saturation
2254 if (src_is_signed) {
2255 if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2256 dst.SetUnsignedSat(offset + i, true);
2257 } else if (ssrc < 0) {
2258 dst.SetUnsignedSat(offset + i, false);
2259 }
2260 } else {
2261 if (usrc > MaxUintFromFormat(dstform)) {
2262 dst.SetUnsignedSat(offset + i, true);
2263 }
2264 }
2265
2266 int64_t result;
2267 if (src_is_signed) {
2268 result = ssrc & MaxUintFromFormat(dstform);
2269 } else {
2270 result = usrc & MaxUintFromFormat(dstform);
2271 }
2272
2273 if (dst_is_signed) {
2274 dst.SetInt(dstform, offset + i, result);
2275 } else {
2276 dst.SetUint(dstform, offset + i, result);
2277 }
2278 }
2279
2280 if (upperhalf) {
2281 // Clear any bits beyond a Q register.
2282 dst.ClearForWrite(kFormat16B);
2283 } else {
2284 dst.ClearForWrite(dstform);
2285 }
2286 return dst;
2287 }
2288
2289
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2290 LogicVRegister Simulator::xtn(VectorFormat vform,
2291 LogicVRegister dst,
2292 const LogicVRegister& src) {
2293 return extractnarrow(vform, dst, true, src, true);
2294 }
2295
2296
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2297 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2298 LogicVRegister dst,
2299 const LogicVRegister& src) {
2300 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2301 }
2302
2303
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2304 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2305 LogicVRegister dst,
2306 const LogicVRegister& src) {
2307 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2308 }
2309
2310
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2311 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2312 LogicVRegister dst,
2313 const LogicVRegister& src) {
2314 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2315 }
2316
2317
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2318 LogicVRegister Simulator::absdiff(VectorFormat vform,
2319 LogicVRegister dst,
2320 const LogicVRegister& src1,
2321 const LogicVRegister& src2,
2322 bool is_signed) {
2323 dst.ClearForWrite(vform);
2324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2325 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2326 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2327 // Always calculate the answer using unsigned arithmetic, to avoid
2328 // implementation-defined signed overflow.
2329 if (src1_gt_src2) {
2330 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2331 } else {
2332 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2333 }
2334 }
2335 return dst;
2336 }
2337
2338
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2339 LogicVRegister Simulator::saba(VectorFormat vform,
2340 LogicVRegister dst,
2341 const LogicVRegister& src1,
2342 const LogicVRegister& src2) {
2343 SimVRegister temp;
2344 dst.ClearForWrite(vform);
2345 absdiff(vform, temp, src1, src2, true);
2346 add(vform, dst, dst, temp);
2347 return dst;
2348 }
2349
2350
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2351 LogicVRegister Simulator::uaba(VectorFormat vform,
2352 LogicVRegister dst,
2353 const LogicVRegister& src1,
2354 const LogicVRegister& src2) {
2355 SimVRegister temp;
2356 dst.ClearForWrite(vform);
2357 absdiff(vform, temp, src1, src2, false);
2358 add(vform, dst, dst, temp);
2359 return dst;
2360 }
2361
2362
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2363 LogicVRegister Simulator::not_(VectorFormat vform,
2364 LogicVRegister dst,
2365 const LogicVRegister& src) {
2366 dst.ClearForWrite(vform);
2367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2368 dst.SetUint(vform, i, ~src.Uint(vform, i));
2369 }
2370 return dst;
2371 }
2372
2373
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2374 LogicVRegister Simulator::rbit(VectorFormat vform,
2375 LogicVRegister dst,
2376 const LogicVRegister& src) {
2377 uint64_t result[kZRegMaxSizeInBytes];
2378 int lane_count = LaneCountFromFormat(vform);
2379 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2380 uint64_t reversed_value;
2381 uint64_t value;
2382 for (int i = 0; i < lane_count; i++) {
2383 value = src.Uint(vform, i);
2384 reversed_value = 0;
2385 for (int j = 0; j < lane_size_in_bits; j++) {
2386 reversed_value = (reversed_value << 1) | (value & 1);
2387 value >>= 1;
2388 }
2389 result[i] = reversed_value;
2390 }
2391
2392 dst.ClearForWrite(vform);
2393 for (int i = 0; i < lane_count; ++i) {
2394 dst.SetUint(vform, i, result[i]);
2395 }
2396 return dst;
2397 }
2398
2399
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2400 LogicVRegister Simulator::rev(VectorFormat vform,
2401 LogicVRegister dst,
2402 const LogicVRegister& src) {
2403 VIXL_ASSERT(IsSVEFormat(vform));
2404 int lane_count = LaneCountFromFormat(vform);
2405 for (int i = 0; i < lane_count / 2; i++) {
2406 uint64_t t = src.Uint(vform, i);
2407 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2408 dst.SetUint(vform, lane_count - i - 1, t);
2409 }
2410 return dst;
2411 }
2412
2413
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2414 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2415 LogicVRegister dst,
2416 const LogicVRegister& src,
2417 int rev_size) {
2418 uint64_t result[kZRegMaxSizeInBytes] = {};
2419 int lane_count = LaneCountFromFormat(vform);
2420 int lane_size = LaneSizeInBytesFromFormat(vform);
2421 int lanes_per_loop = rev_size / lane_size;
2422 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2423 for (int j = 0; j < lanes_per_loop; j++) {
2424 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2425 }
2426 }
2427 dst.ClearForWrite(vform);
2428 for (int i = 0; i < lane_count; ++i) {
2429 dst.SetUint(vform, i, result[i]);
2430 }
2431 return dst;
2432 }
2433
2434
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2435 LogicVRegister Simulator::rev16(VectorFormat vform,
2436 LogicVRegister dst,
2437 const LogicVRegister& src) {
2438 return rev_byte(vform, dst, src, 2);
2439 }
2440
2441
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2442 LogicVRegister Simulator::rev32(VectorFormat vform,
2443 LogicVRegister dst,
2444 const LogicVRegister& src) {
2445 return rev_byte(vform, dst, src, 4);
2446 }
2447
2448
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2449 LogicVRegister Simulator::rev64(VectorFormat vform,
2450 LogicVRegister dst,
2451 const LogicVRegister& src) {
2452 return rev_byte(vform, dst, src, 8);
2453 }
2454
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2455 LogicVRegister Simulator::addlp(VectorFormat vform,
2456 LogicVRegister dst,
2457 const LogicVRegister& src,
2458 bool is_signed,
2459 bool do_accumulate) {
2460 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2461 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2462
2463 uint64_t result[kZRegMaxSizeInBytes];
2464 int lane_count = LaneCountFromFormat(vform);
2465 for (int i = 0; i < lane_count; i++) {
2466 if (is_signed) {
2467 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2468 src.Int(vformsrc, 2 * i + 1));
2469 } else {
2470 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2471 }
2472 }
2473
2474 dst.ClearForWrite(vform);
2475 for (int i = 0; i < lane_count; ++i) {
2476 if (do_accumulate) {
2477 result[i] += dst.Uint(vform, i);
2478 }
2479 dst.SetUint(vform, i, result[i]);
2480 }
2481
2482 return dst;
2483 }
2484
2485
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2486 LogicVRegister Simulator::saddlp(VectorFormat vform,
2487 LogicVRegister dst,
2488 const LogicVRegister& src) {
2489 return addlp(vform, dst, src, true, false);
2490 }
2491
2492
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2493 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2494 LogicVRegister dst,
2495 const LogicVRegister& src) {
2496 return addlp(vform, dst, src, false, false);
2497 }
2498
2499
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2500 LogicVRegister Simulator::sadalp(VectorFormat vform,
2501 LogicVRegister dst,
2502 const LogicVRegister& src) {
2503 return addlp(vform, dst, src, true, true);
2504 }
2505
2506
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2507 LogicVRegister Simulator::uadalp(VectorFormat vform,
2508 LogicVRegister dst,
2509 const LogicVRegister& src) {
2510 return addlp(vform, dst, src, false, true);
2511 }
2512
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2513 LogicVRegister Simulator::ror(VectorFormat vform,
2514 LogicVRegister dst,
2515 const LogicVRegister& src,
2516 int rotation) {
2517 dst.ClearForWrite(vform);
2518 int width = LaneSizeInBitsFromFormat(vform);
2519 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2520 uint64_t value = src.Uint(vform, i);
2521 dst.SetUint(vform, i, RotateRight(value, rotation, width));
2522 }
2523 return dst;
2524 }
2525
rol(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2526 LogicVRegister Simulator::rol(VectorFormat vform,
2527 LogicVRegister dst,
2528 const LogicVRegister& src,
2529 int rotation) {
2530 int ror_equivalent = LaneSizeInBitsFromFormat(vform) - rotation;
2531 return ror(vform, dst, src, ror_equivalent);
2532 }
2533
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2534 LogicVRegister Simulator::ext(VectorFormat vform,
2535 LogicVRegister dst,
2536 const LogicVRegister& src1,
2537 const LogicVRegister& src2,
2538 int index) {
2539 uint8_t result[kZRegMaxSizeInBytes] = {};
2540 int lane_count = LaneCountFromFormat(vform);
2541 for (int i = 0; i < lane_count - index; ++i) {
2542 result[i] = static_cast<uint8_t>(src1.Uint(vform, i + index));
2543 }
2544 for (int i = 0; i < index; ++i) {
2545 result[lane_count - index + i] = static_cast<uint8_t>(src2.Uint(vform, i));
2546 }
2547 dst.ClearForWrite(vform);
2548 for (int i = 0; i < lane_count; ++i) {
2549 dst.SetUint(vform, i, result[i]);
2550 }
2551 return dst;
2552 }
2553
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2554 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2555 LogicVRegister dst,
2556 const LogicVRegister& src,
2557 int index) {
2558 if (index < 0) index += LaneCountFromFormat(vform);
2559 VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2560 index *= LaneSizeInBytesFromFormat(vform);
2561 return ext(kFormatVnB, dst, src, src, index);
2562 }
2563
2564
2565 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2566 LogicVRegister Simulator::fadda(VectorFormat vform,
2567 LogicVRegister acc,
2568 const LogicPRegister& pg,
2569 const LogicVRegister& src) {
2570 T result = acc.Float<T>(0);
2571 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2572 if (!pg.IsActive(vform, i)) continue;
2573
2574 result = FPAdd(result, src.Float<T>(i));
2575 }
2576 VectorFormat vform_dst =
2577 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2578 acc.ClearForWrite(vform_dst);
2579 acc.SetFloat(0, result);
2580 return acc;
2581 }
2582
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2583 LogicVRegister Simulator::fadda(VectorFormat vform,
2584 LogicVRegister acc,
2585 const LogicPRegister& pg,
2586 const LogicVRegister& src) {
2587 switch (LaneSizeInBitsFromFormat(vform)) {
2588 case kHRegSize:
2589 fadda<SimFloat16>(vform, acc, pg, src);
2590 break;
2591 case kSRegSize:
2592 fadda<float>(vform, acc, pg, src);
2593 break;
2594 case kDRegSize:
2595 fadda<double>(vform, acc, pg, src);
2596 break;
2597 default:
2598 VIXL_UNREACHABLE();
2599 }
2600 return acc;
2601 }
2602
2603 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2604 LogicVRegister Simulator::fcadd(VectorFormat vform,
2605 LogicVRegister dst, // d
2606 const LogicVRegister& src1, // n
2607 const LogicVRegister& src2, // m
2608 int rot) {
2609 int elements = LaneCountFromFormat(vform);
2610
2611 T element1, element3;
2612 rot = (rot == 1) ? 270 : 90;
2613
2614 // Loop example:
2615 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2616 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2617
2618 for (int e = 0; e <= (elements / 2) - 1; e++) {
2619 switch (rot) {
2620 case 90:
2621 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2622 element3 = src2.Float<T>(e * 2);
2623 break;
2624 case 270:
2625 element1 = src2.Float<T>(e * 2 + 1);
2626 element3 = FPNeg(src2.Float<T>(e * 2));
2627 break;
2628 default:
2629 VIXL_UNREACHABLE();
2630 return dst; // prevents "element(n) may be unintialized" errors
2631 }
2632 dst.ClearForWrite(vform);
2633 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2634 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2635 }
2636 return dst;
2637 }
2638
2639
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2640 LogicVRegister Simulator::fcadd(VectorFormat vform,
2641 LogicVRegister dst, // d
2642 const LogicVRegister& src1, // n
2643 const LogicVRegister& src2, // m
2644 int rot) {
2645 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2646 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2647 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2648 fcadd<float>(vform, dst, src1, src2, rot);
2649 } else {
2650 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2651 fcadd<double>(vform, dst, src1, src2, rot);
2652 }
2653 return dst;
2654 }
2655
2656 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2657 LogicVRegister Simulator::fcmla(VectorFormat vform,
2658 LogicVRegister dst,
2659 const LogicVRegister& src1,
2660 const LogicVRegister& src2,
2661 const LogicVRegister& acc,
2662 int index,
2663 int rot) {
2664 int elements = LaneCountFromFormat(vform);
2665
2666 T element1, element2, element3, element4;
2667 rot *= 90;
2668
2669 // Loop example:
2670 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2671 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2672
2673 for (int e = 0; e <= (elements / 2) - 1; e++) {
2674 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2675 // operation.
2676 int f = (index < 0) ? e : index;
2677
2678 switch (rot) {
2679 case 0:
2680 element1 = src2.Float<T>(f * 2);
2681 element2 = src1.Float<T>(e * 2);
2682 element3 = src2.Float<T>(f * 2 + 1);
2683 element4 = src1.Float<T>(e * 2);
2684 break;
2685 case 90:
2686 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2687 element2 = src1.Float<T>(e * 2 + 1);
2688 element3 = src2.Float<T>(f * 2);
2689 element4 = src1.Float<T>(e * 2 + 1);
2690 break;
2691 case 180:
2692 element1 = FPNeg(src2.Float<T>(f * 2));
2693 element2 = src1.Float<T>(e * 2);
2694 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2695 element4 = src1.Float<T>(e * 2);
2696 break;
2697 case 270:
2698 element1 = src2.Float<T>(f * 2 + 1);
2699 element2 = src1.Float<T>(e * 2 + 1);
2700 element3 = FPNeg(src2.Float<T>(f * 2));
2701 element4 = src1.Float<T>(e * 2 + 1);
2702 break;
2703 default:
2704 VIXL_UNREACHABLE();
2705 return dst; // prevents "element(n) may be unintialized" errors
2706 }
2707 dst.ClearForWrite(vform);
2708 dst.SetFloat<T>(vform,
2709 e * 2,
2710 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2711 dst.SetFloat<T>(vform,
2712 e * 2 + 1,
2713 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2714 }
2715 return dst;
2716 }
2717
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2718 LogicVRegister Simulator::fcmla(VectorFormat vform,
2719 LogicVRegister dst,
2720 const LogicVRegister& src1,
2721 const LogicVRegister& src2,
2722 const LogicVRegister& acc,
2723 int rot) {
2724 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2725 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2726 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2727 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2728 } else {
2729 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2730 }
2731 return dst;
2732 }
2733
2734
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2735 LogicVRegister Simulator::fcmla(VectorFormat vform,
2736 LogicVRegister dst, // d
2737 const LogicVRegister& src1, // n
2738 const LogicVRegister& src2, // m
2739 int index,
2740 int rot) {
2741 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2742 fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);
2743 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2744 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2745 } else {
2746 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2747 }
2748 return dst;
2749 }
2750
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2751 LogicVRegister Simulator::cadd(VectorFormat vform,
2752 LogicVRegister dst,
2753 const LogicVRegister& src1,
2754 const LogicVRegister& src2,
2755 int rot,
2756 bool saturate) {
2757 SimVRegister src1_r, src1_i;
2758 SimVRegister src2_r, src2_i;
2759 SimVRegister zero;
2760 zero.Clear();
2761 uzp1(vform, src1_r, src1, zero);
2762 uzp2(vform, src1_i, src1, zero);
2763 uzp1(vform, src2_r, src2, zero);
2764 uzp2(vform, src2_i, src2, zero);
2765
2766 if (rot == 90) {
2767 if (saturate) {
2768 sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2769 add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2770 } else {
2771 sub(vform, src1_r, src1_r, src2_i);
2772 add(vform, src1_i, src1_i, src2_r);
2773 }
2774 } else {
2775 VIXL_ASSERT(rot == 270);
2776 if (saturate) {
2777 add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2778 sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2779 } else {
2780 add(vform, src1_r, src1_r, src2_i);
2781 sub(vform, src1_i, src1_i, src2_r);
2782 }
2783 }
2784
2785 zip1(vform, dst, src1_r, src1_i);
2786 return dst;
2787 }
2788
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2789 LogicVRegister Simulator::cmla(VectorFormat vform,
2790 LogicVRegister dst,
2791 const LogicVRegister& srca,
2792 const LogicVRegister& src1,
2793 const LogicVRegister& src2,
2794 int rot) {
2795 SimVRegister src1_a;
2796 SimVRegister src2_a, src2_b;
2797 SimVRegister srca_i, srca_r;
2798 SimVRegister zero, temp;
2799 zero.Clear();
2800
2801 if ((rot == 0) || (rot == 180)) {
2802 uzp1(vform, src1_a, src1, zero);
2803 uzp1(vform, src2_a, src2, zero);
2804 uzp2(vform, src2_b, src2, zero);
2805 } else {
2806 uzp2(vform, src1_a, src1, zero);
2807 uzp2(vform, src2_a, src2, zero);
2808 uzp1(vform, src2_b, src2, zero);
2809 }
2810
2811 uzp1(vform, srca_r, srca, zero);
2812 uzp2(vform, srca_i, srca, zero);
2813
2814 bool sub_r = (rot == 90) || (rot == 180);
2815 bool sub_i = (rot == 180) || (rot == 270);
2816
2817 mul(vform, temp, src1_a, src2_a);
2818 if (sub_r) {
2819 sub(vform, srca_r, srca_r, temp);
2820 } else {
2821 add(vform, srca_r, srca_r, temp);
2822 }
2823
2824 mul(vform, temp, src1_a, src2_b);
2825 if (sub_i) {
2826 sub(vform, srca_i, srca_i, temp);
2827 } else {
2828 add(vform, srca_i, srca_i, temp);
2829 }
2830
2831 zip1(vform, dst, srca_r, srca_i);
2832 return dst;
2833 }
2834
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2835 LogicVRegister Simulator::cmla(VectorFormat vform,
2836 LogicVRegister dst,
2837 const LogicVRegister& srca,
2838 const LogicVRegister& src1,
2839 const LogicVRegister& src2,
2840 int index,
2841 int rot) {
2842 SimVRegister temp;
2843 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2844 return cmla(vform, dst, srca, src1, temp, rot);
2845 }
2846
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2847 LogicVRegister Simulator::bgrp(VectorFormat vform,
2848 LogicVRegister dst,
2849 const LogicVRegister& src1,
2850 const LogicVRegister& src2,
2851 bool do_bext) {
2852 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2853 uint64_t value = src1.Uint(vform, i);
2854 uint64_t mask = src2.Uint(vform, i);
2855 int high_pos = 0;
2856 int low_pos = 0;
2857 uint64_t result_high = 0;
2858 uint64_t result_low = 0;
2859 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2860 if ((mask & 1) == 0) {
2861 result_high |= (value & 1) << high_pos;
2862 high_pos++;
2863 } else {
2864 result_low |= (value & 1) << low_pos;
2865 low_pos++;
2866 }
2867 mask >>= 1;
2868 value >>= 1;
2869 }
2870
2871 if (!do_bext) {
2872 result_low |= result_high << low_pos;
2873 }
2874
2875 dst.SetUint(vform, i, result_low);
2876 }
2877 return dst;
2878 }
2879
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2880 LogicVRegister Simulator::bdep(VectorFormat vform,
2881 LogicVRegister dst,
2882 const LogicVRegister& src1,
2883 const LogicVRegister& src2) {
2884 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2885 uint64_t value = src1.Uint(vform, i);
2886 uint64_t mask = src2.Uint(vform, i);
2887 uint64_t result = 0;
2888 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2889 if ((mask & 1) == 1) {
2890 result |= (value & 1) << j;
2891 value >>= 1;
2892 }
2893 mask >>= 1;
2894 }
2895 dst.SetUint(vform, i, result);
2896 }
2897 return dst;
2898 }
2899
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2900 LogicVRegister Simulator::histogram(VectorFormat vform,
2901 LogicVRegister dst,
2902 const LogicPRegister& pg,
2903 const LogicVRegister& src1,
2904 const LogicVRegister& src2,
2905 bool do_segmented) {
2906 int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2907 uint64_t result[kZRegMaxSizeInBytes];
2908
2909 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2910 uint64_t count = 0;
2911 uint64_t value = src1.Uint(vform, i);
2912
2913 int segment = do_segmented ? (i / elements_per_segment) : 0;
2914 int segment_offset = segment * elements_per_segment;
2915 int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2916 for (int j = 0; j < hist_limit; j++) {
2917 if (pg.IsActive(vform, j) &&
2918 (value == src2.Uint(vform, j + segment_offset))) {
2919 count++;
2920 }
2921 }
2922 result[i] = count;
2923 }
2924 dst.SetUintArray(vform, result);
2925 return dst;
2926 }
2927
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2928 LogicVRegister Simulator::dup_element(VectorFormat vform,
2929 LogicVRegister dst,
2930 const LogicVRegister& src,
2931 int src_index) {
2932 if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2933 // When duplicating an element larger than 64 bits, split the element into
2934 // 64-bit parts, and duplicate the parts across the destination.
2935 uint64_t d[4];
2936 int count = (vform == kFormatVnQ) ? 2 : 4;
2937 for (int i = 0; i < count; i++) {
2938 d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2939 }
2940 dst.Clear();
2941 for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2942 dst.SetUint(kFormatVnD, i, d[i % count]);
2943 }
2944 } else {
2945 int lane_count = LaneCountFromFormat(vform);
2946 uint64_t value = src.Uint(vform, src_index);
2947 dst.ClearForWrite(vform);
2948 for (int i = 0; i < lane_count; ++i) {
2949 dst.SetUint(vform, i, value);
2950 }
2951 }
2952 return dst;
2953 }
2954
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2955 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2956 LogicVRegister dst,
2957 const LogicVRegister& src,
2958 int src_index) {
2959 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2960 // whereas in NEON, the size of segment is equal to the size of register
2961 // itself.
2962 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2963 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2964 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2965
2966 VIXL_ASSERT(src_index >= 0);
2967 VIXL_ASSERT(src_index < lanes_per_segment);
2968
2969 dst.ClearForWrite(vform);
2970 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2971 uint64_t value = src.Uint(vform, j + src_index);
2972 for (int i = 0; i < lanes_per_segment; i++) {
2973 dst.SetUint(vform, j + i, value);
2974 }
2975 }
2976 return dst;
2977 }
2978
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2979 LogicVRegister Simulator::dup_elements_to_segments(
2980 VectorFormat vform,
2981 LogicVRegister dst,
2982 const std::pair<int, int>& src_and_index) {
2983 return dup_elements_to_segments(vform,
2984 dst,
2985 ReadVRegister(src_and_index.first),
2986 src_and_index.second);
2987 }
2988
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2989 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2990 LogicVRegister dst,
2991 uint64_t imm) {
2992 int lane_count = LaneCountFromFormat(vform);
2993 uint64_t value = imm & MaxUintFromFormat(vform);
2994 dst.ClearForWrite(vform);
2995 for (int i = 0; i < lane_count; ++i) {
2996 dst.SetUint(vform, i, value);
2997 }
2998 return dst;
2999 }
3000
3001
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)3002 LogicVRegister Simulator::ins_element(VectorFormat vform,
3003 LogicVRegister dst,
3004 int dst_index,
3005 const LogicVRegister& src,
3006 int src_index) {
3007 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
3008 return dst;
3009 }
3010
3011
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)3012 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
3013 LogicVRegister dst,
3014 int dst_index,
3015 uint64_t imm) {
3016 uint64_t value = imm & MaxUintFromFormat(vform);
3017 dst.SetUint(vform, dst_index, value);
3018 return dst;
3019 }
3020
3021
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)3022 LogicVRegister Simulator::index(VectorFormat vform,
3023 LogicVRegister dst,
3024 uint64_t start,
3025 uint64_t step) {
3026 VIXL_ASSERT(IsSVEFormat(vform));
3027 uint64_t value = start;
3028 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3029 dst.SetUint(vform, i, value);
3030 value += step;
3031 }
3032 return dst;
3033 }
3034
3035
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)3036 LogicVRegister Simulator::insr(VectorFormat vform,
3037 LogicVRegister dst,
3038 uint64_t imm) {
3039 VIXL_ASSERT(IsSVEFormat(vform));
3040 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3041 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3042 }
3043 dst.SetUint(vform, 0, imm);
3044 return dst;
3045 }
3046
3047
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3048 LogicVRegister Simulator::mov(VectorFormat vform,
3049 LogicVRegister dst,
3050 const LogicVRegister& src) {
3051 dst.ClearForWrite(vform);
3052 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3053 dst.SetUint(vform, lane, src.Uint(vform, lane));
3054 }
3055 return dst;
3056 }
3057
3058
mov(LogicPRegister dst,const LogicPRegister & src)3059 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3060 // Avoid a copy if the registers already alias.
3061 if (dst.Aliases(src)) return dst;
3062
3063 for (int i = 0; i < dst.GetChunkCount(); i++) {
3064 dst.SetChunk(i, src.GetChunk(i));
3065 }
3066 return dst;
3067 }
3068
3069
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3070 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3071 LogicVRegister dst,
3072 const SimPRegister& pg,
3073 const LogicVRegister& src) {
3074 return sel(vform, dst, pg, src, dst);
3075 }
3076
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3077 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3078 LogicVRegister dst,
3079 const SimPRegister& pg,
3080 const LogicVRegister& src) {
3081 SimVRegister zero;
3082 dup_immediate(vform, zero, 0);
3083 return sel(vform, dst, pg, src, zero);
3084 }
3085
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3086 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3087 LogicVRegister dst,
3088 const LogicVRegister& src,
3089 int start_at) {
3090 VIXL_ASSERT((start_at == 0) || (start_at == 1));
3091 for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3092 dst.SetUint(vform, i, src.Uint(vform, i));
3093 }
3094 return dst;
3095 }
3096
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3097 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3098 const LogicPRegister& pg,
3099 const LogicPRegister& src) {
3100 return sel(dst, pg, src, dst);
3101 }
3102
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3103 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3104 const LogicPRegister& pg,
3105 const LogicPRegister& src) {
3106 SimPRegister all_false;
3107 return sel(dst, pg, src, pfalse(all_false));
3108 }
3109
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3110 LogicVRegister Simulator::movi(VectorFormat vform,
3111 LogicVRegister dst,
3112 uint64_t imm) {
3113 int lane_count = LaneCountFromFormat(vform);
3114 dst.ClearForWrite(vform);
3115 for (int i = 0; i < lane_count; ++i) {
3116 dst.SetUint(vform, i, imm);
3117 }
3118 return dst;
3119 }
3120
3121
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3122 LogicVRegister Simulator::mvni(VectorFormat vform,
3123 LogicVRegister dst,
3124 uint64_t imm) {
3125 int lane_count = LaneCountFromFormat(vform);
3126 dst.ClearForWrite(vform);
3127 for (int i = 0; i < lane_count; ++i) {
3128 dst.SetUint(vform, i, ~imm);
3129 }
3130 return dst;
3131 }
3132
3133
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3134 LogicVRegister Simulator::orr(VectorFormat vform,
3135 LogicVRegister dst,
3136 const LogicVRegister& src,
3137 uint64_t imm) {
3138 uint64_t result[16];
3139 int lane_count = LaneCountFromFormat(vform);
3140 for (int i = 0; i < lane_count; ++i) {
3141 result[i] = src.Uint(vform, i) | imm;
3142 }
3143 dst.ClearForWrite(vform);
3144 for (int i = 0; i < lane_count; ++i) {
3145 dst.SetUint(vform, i, result[i]);
3146 }
3147 return dst;
3148 }
3149
3150
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3151 LogicVRegister Simulator::uxtl(VectorFormat vform,
3152 LogicVRegister dst,
3153 const LogicVRegister& src,
3154 bool is_2) {
3155 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3156 int lane_count = LaneCountFromFormat(vform);
3157 int src_offset = is_2 ? lane_count : 0;
3158
3159 dst.ClearForWrite(vform);
3160 for (int i = 0; i < lane_count; i++) {
3161 dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3162 }
3163 return dst;
3164 }
3165
3166
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3167 LogicVRegister Simulator::sxtl(VectorFormat vform,
3168 LogicVRegister dst,
3169 const LogicVRegister& src,
3170 bool is_2) {
3171 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3172 int lane_count = LaneCountFromFormat(vform);
3173 int src_offset = is_2 ? lane_count : 0;
3174
3175 dst.ClearForWrite(vform);
3176 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3177 dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3178 }
3179 return dst;
3180 }
3181
3182
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3183 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3184 LogicVRegister dst,
3185 const LogicVRegister& src) {
3186 return uxtl(vform, dst, src, /* is_2 = */ true);
3187 }
3188
3189
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3190 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3191 LogicVRegister dst,
3192 const LogicVRegister& src) {
3193 return sxtl(vform, dst, src, /* is_2 = */ true);
3194 }
3195
3196
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3197 LogicVRegister Simulator::uxt(VectorFormat vform,
3198 LogicVRegister dst,
3199 const LogicVRegister& src,
3200 unsigned from_size_in_bits) {
3201 int lane_count = LaneCountFromFormat(vform);
3202 uint64_t mask = GetUintMask(from_size_in_bits);
3203
3204 dst.ClearForWrite(vform);
3205 for (int i = 0; i < lane_count; i++) {
3206 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3207 }
3208 return dst;
3209 }
3210
3211
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3212 LogicVRegister Simulator::sxt(VectorFormat vform,
3213 LogicVRegister dst,
3214 const LogicVRegister& src,
3215 unsigned from_size_in_bits) {
3216 int lane_count = LaneCountFromFormat(vform);
3217
3218 dst.ClearForWrite(vform);
3219 for (int i = 0; i < lane_count; i++) {
3220 uint64_t value =
3221 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3222 dst.SetInt(vform, i, value);
3223 }
3224 return dst;
3225 }
3226
3227
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3228 LogicVRegister Simulator::shrn(VectorFormat vform,
3229 LogicVRegister dst,
3230 const LogicVRegister& src,
3231 int shift) {
3232 SimVRegister temp;
3233 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3234 VectorFormat vform_dst = vform;
3235 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3236 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3237 }
3238
3239
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3240 LogicVRegister Simulator::shrn2(VectorFormat vform,
3241 LogicVRegister dst,
3242 const LogicVRegister& src,
3243 int shift) {
3244 SimVRegister temp;
3245 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3246 VectorFormat vformdst = vform;
3247 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3248 return extractnarrow(vformdst, dst, false, shifted_src, false);
3249 }
3250
3251
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3252 LogicVRegister Simulator::rshrn(VectorFormat vform,
3253 LogicVRegister dst,
3254 const LogicVRegister& src,
3255 int shift) {
3256 SimVRegister temp;
3257 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3258 VectorFormat vformdst = vform;
3259 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3260 return extractnarrow(vformdst, dst, false, shifted_src, false);
3261 }
3262
3263
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3264 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3265 LogicVRegister dst,
3266 const LogicVRegister& src,
3267 int shift) {
3268 SimVRegister temp;
3269 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3270 VectorFormat vformdst = vform;
3271 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3272 return extractnarrow(vformdst, dst, false, shifted_src, false);
3273 }
3274
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3275 LogicVRegister Simulator::Table(VectorFormat vform,
3276 LogicVRegister dst,
3277 const LogicVRegister& ind,
3278 bool zero_out_of_bounds,
3279 const LogicVRegister* tab1,
3280 const LogicVRegister* tab2,
3281 const LogicVRegister* tab3,
3282 const LogicVRegister* tab4) {
3283 VIXL_ASSERT(tab1 != NULL);
3284 int lane_count = LaneCountFromFormat(vform);
3285 VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3286 uint64_t table[kZRegMaxSizeInBytes * 2];
3287 uint64_t result[kZRegMaxSizeInBytes];
3288
3289 // For Neon, the table source registers are always 16B, and Neon allows only
3290 // 8B or 16B vform for the destination, so infer the table format from the
3291 // destination.
3292 VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3293
3294 uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3295 if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3296 if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3297 if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3298
3299 for (int i = 0; i < lane_count; i++) {
3300 uint64_t index = ind.Uint(vform, i);
3301 result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3302 if (index < tab_size) result[i] = table[index];
3303 }
3304 dst.SetUintArray(vform, result);
3305 return dst;
3306 }
3307
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3308 LogicVRegister Simulator::tbl(VectorFormat vform,
3309 LogicVRegister dst,
3310 const LogicVRegister& tab,
3311 const LogicVRegister& ind) {
3312 return Table(vform, dst, ind, true, &tab);
3313 }
3314
3315
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3316 LogicVRegister Simulator::tbl(VectorFormat vform,
3317 LogicVRegister dst,
3318 const LogicVRegister& tab,
3319 const LogicVRegister& tab2,
3320 const LogicVRegister& ind) {
3321 return Table(vform, dst, ind, true, &tab, &tab2);
3322 }
3323
3324
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3325 LogicVRegister Simulator::tbl(VectorFormat vform,
3326 LogicVRegister dst,
3327 const LogicVRegister& tab,
3328 const LogicVRegister& tab2,
3329 const LogicVRegister& tab3,
3330 const LogicVRegister& ind) {
3331 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3332 }
3333
3334
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3335 LogicVRegister Simulator::tbl(VectorFormat vform,
3336 LogicVRegister dst,
3337 const LogicVRegister& tab,
3338 const LogicVRegister& tab2,
3339 const LogicVRegister& tab3,
3340 const LogicVRegister& tab4,
3341 const LogicVRegister& ind) {
3342 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3343 }
3344
3345
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3346 LogicVRegister Simulator::tbx(VectorFormat vform,
3347 LogicVRegister dst,
3348 const LogicVRegister& tab,
3349 const LogicVRegister& ind) {
3350 return Table(vform, dst, ind, false, &tab);
3351 }
3352
3353
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3354 LogicVRegister Simulator::tbx(VectorFormat vform,
3355 LogicVRegister dst,
3356 const LogicVRegister& tab,
3357 const LogicVRegister& tab2,
3358 const LogicVRegister& ind) {
3359 return Table(vform, dst, ind, false, &tab, &tab2);
3360 }
3361
3362
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3363 LogicVRegister Simulator::tbx(VectorFormat vform,
3364 LogicVRegister dst,
3365 const LogicVRegister& tab,
3366 const LogicVRegister& tab2,
3367 const LogicVRegister& tab3,
3368 const LogicVRegister& ind) {
3369 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3370 }
3371
3372
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3373 LogicVRegister Simulator::tbx(VectorFormat vform,
3374 LogicVRegister dst,
3375 const LogicVRegister& tab,
3376 const LogicVRegister& tab2,
3377 const LogicVRegister& tab3,
3378 const LogicVRegister& tab4,
3379 const LogicVRegister& ind) {
3380 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3381 }
3382
3383
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3384 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3385 LogicVRegister dst,
3386 const LogicVRegister& src,
3387 int shift) {
3388 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3389 }
3390
3391
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3392 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3393 LogicVRegister dst,
3394 const LogicVRegister& src,
3395 int shift) {
3396 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3397 }
3398
3399
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3400 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3401 LogicVRegister dst,
3402 const LogicVRegister& src,
3403 int shift) {
3404 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3405 }
3406
3407
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3408 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3409 LogicVRegister dst,
3410 const LogicVRegister& src,
3411 int shift) {
3412 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3413 }
3414
3415
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3416 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3417 LogicVRegister dst,
3418 const LogicVRegister& src,
3419 int shift) {
3420 SimVRegister temp;
3421 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3422 VectorFormat vformdst = vform;
3423 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3424 return sqxtn(vformdst, dst, shifted_src);
3425 }
3426
3427
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3428 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3429 LogicVRegister dst,
3430 const LogicVRegister& src,
3431 int shift) {
3432 SimVRegister temp;
3433 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3434 VectorFormat vformdst = vform;
3435 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3436 return sqxtn(vformdst, dst, shifted_src);
3437 }
3438
3439
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3440 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3441 LogicVRegister dst,
3442 const LogicVRegister& src,
3443 int shift) {
3444 SimVRegister temp;
3445 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3446 VectorFormat vformdst = vform;
3447 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3448 return sqxtn(vformdst, dst, shifted_src);
3449 }
3450
3451
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3452 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3453 LogicVRegister dst,
3454 const LogicVRegister& src,
3455 int shift) {
3456 SimVRegister temp;
3457 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3458 VectorFormat vformdst = vform;
3459 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3460 return sqxtn(vformdst, dst, shifted_src);
3461 }
3462
3463
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3464 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3465 LogicVRegister dst,
3466 const LogicVRegister& src,
3467 int shift) {
3468 SimVRegister temp;
3469 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3470 VectorFormat vformdst = vform;
3471 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3472 return sqxtun(vformdst, dst, shifted_src);
3473 }
3474
3475
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3476 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3477 LogicVRegister dst,
3478 const LogicVRegister& src,
3479 int shift) {
3480 SimVRegister temp;
3481 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3482 VectorFormat vformdst = vform;
3483 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3484 return sqxtun(vformdst, dst, shifted_src);
3485 }
3486
3487
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3488 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3489 LogicVRegister dst,
3490 const LogicVRegister& src,
3491 int shift) {
3492 SimVRegister temp;
3493 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3494 VectorFormat vformdst = vform;
3495 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3496 return sqxtun(vformdst, dst, shifted_src);
3497 }
3498
3499
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3500 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3501 LogicVRegister dst,
3502 const LogicVRegister& src,
3503 int shift) {
3504 SimVRegister temp;
3505 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3506 VectorFormat vformdst = vform;
3507 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3508 return sqxtun(vformdst, dst, shifted_src);
3509 }
3510
3511
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3512 LogicVRegister Simulator::uaddl(VectorFormat vform,
3513 LogicVRegister dst,
3514 const LogicVRegister& src1,
3515 const LogicVRegister& src2) {
3516 SimVRegister temp1, temp2;
3517 uxtl(vform, temp1, src1);
3518 uxtl(vform, temp2, src2);
3519 add(vform, dst, temp1, temp2);
3520 return dst;
3521 }
3522
3523
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3524 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3525 LogicVRegister dst,
3526 const LogicVRegister& src1,
3527 const LogicVRegister& src2) {
3528 SimVRegister temp1, temp2;
3529 uxtl2(vform, temp1, src1);
3530 uxtl2(vform, temp2, src2);
3531 add(vform, dst, temp1, temp2);
3532 return dst;
3533 }
3534
3535
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3536 LogicVRegister Simulator::uaddw(VectorFormat vform,
3537 LogicVRegister dst,
3538 const LogicVRegister& src1,
3539 const LogicVRegister& src2) {
3540 SimVRegister temp;
3541 uxtl(vform, temp, src2);
3542 add(vform, dst, src1, temp);
3543 return dst;
3544 }
3545
3546
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3547 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3548 LogicVRegister dst,
3549 const LogicVRegister& src1,
3550 const LogicVRegister& src2) {
3551 SimVRegister temp;
3552 uxtl2(vform, temp, src2);
3553 add(vform, dst, src1, temp);
3554 return dst;
3555 }
3556
3557
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3558 LogicVRegister Simulator::saddl(VectorFormat vform,
3559 LogicVRegister dst,
3560 const LogicVRegister& src1,
3561 const LogicVRegister& src2) {
3562 SimVRegister temp1, temp2;
3563 sxtl(vform, temp1, src1);
3564 sxtl(vform, temp2, src2);
3565 add(vform, dst, temp1, temp2);
3566 return dst;
3567 }
3568
3569
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3570 LogicVRegister Simulator::saddl2(VectorFormat vform,
3571 LogicVRegister dst,
3572 const LogicVRegister& src1,
3573 const LogicVRegister& src2) {
3574 SimVRegister temp1, temp2;
3575 sxtl2(vform, temp1, src1);
3576 sxtl2(vform, temp2, src2);
3577 add(vform, dst, temp1, temp2);
3578 return dst;
3579 }
3580
3581
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3582 LogicVRegister Simulator::saddw(VectorFormat vform,
3583 LogicVRegister dst,
3584 const LogicVRegister& src1,
3585 const LogicVRegister& src2) {
3586 SimVRegister temp;
3587 sxtl(vform, temp, src2);
3588 add(vform, dst, src1, temp);
3589 return dst;
3590 }
3591
3592
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3593 LogicVRegister Simulator::saddw2(VectorFormat vform,
3594 LogicVRegister dst,
3595 const LogicVRegister& src1,
3596 const LogicVRegister& src2) {
3597 SimVRegister temp;
3598 sxtl2(vform, temp, src2);
3599 add(vform, dst, src1, temp);
3600 return dst;
3601 }
3602
3603
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3604 LogicVRegister Simulator::usubl(VectorFormat vform,
3605 LogicVRegister dst,
3606 const LogicVRegister& src1,
3607 const LogicVRegister& src2) {
3608 SimVRegister temp1, temp2;
3609 uxtl(vform, temp1, src1);
3610 uxtl(vform, temp2, src2);
3611 sub(vform, dst, temp1, temp2);
3612 return dst;
3613 }
3614
3615
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3616 LogicVRegister Simulator::usubl2(VectorFormat vform,
3617 LogicVRegister dst,
3618 const LogicVRegister& src1,
3619 const LogicVRegister& src2) {
3620 SimVRegister temp1, temp2;
3621 uxtl2(vform, temp1, src1);
3622 uxtl2(vform, temp2, src2);
3623 sub(vform, dst, temp1, temp2);
3624 return dst;
3625 }
3626
3627
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3628 LogicVRegister Simulator::usubw(VectorFormat vform,
3629 LogicVRegister dst,
3630 const LogicVRegister& src1,
3631 const LogicVRegister& src2) {
3632 SimVRegister temp;
3633 uxtl(vform, temp, src2);
3634 sub(vform, dst, src1, temp);
3635 return dst;
3636 }
3637
3638
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3639 LogicVRegister Simulator::usubw2(VectorFormat vform,
3640 LogicVRegister dst,
3641 const LogicVRegister& src1,
3642 const LogicVRegister& src2) {
3643 SimVRegister temp;
3644 uxtl2(vform, temp, src2);
3645 sub(vform, dst, src1, temp);
3646 return dst;
3647 }
3648
3649
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3650 LogicVRegister Simulator::ssubl(VectorFormat vform,
3651 LogicVRegister dst,
3652 const LogicVRegister& src1,
3653 const LogicVRegister& src2) {
3654 SimVRegister temp1, temp2;
3655 sxtl(vform, temp1, src1);
3656 sxtl(vform, temp2, src2);
3657 sub(vform, dst, temp1, temp2);
3658 return dst;
3659 }
3660
3661
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3662 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3663 LogicVRegister dst,
3664 const LogicVRegister& src1,
3665 const LogicVRegister& src2) {
3666 SimVRegister temp1, temp2;
3667 sxtl2(vform, temp1, src1);
3668 sxtl2(vform, temp2, src2);
3669 sub(vform, dst, temp1, temp2);
3670 return dst;
3671 }
3672
3673
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3674 LogicVRegister Simulator::ssubw(VectorFormat vform,
3675 LogicVRegister dst,
3676 const LogicVRegister& src1,
3677 const LogicVRegister& src2) {
3678 SimVRegister temp;
3679 sxtl(vform, temp, src2);
3680 sub(vform, dst, src1, temp);
3681 return dst;
3682 }
3683
3684
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3685 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3686 LogicVRegister dst,
3687 const LogicVRegister& src1,
3688 const LogicVRegister& src2) {
3689 SimVRegister temp;
3690 sxtl2(vform, temp, src2);
3691 sub(vform, dst, src1, temp);
3692 return dst;
3693 }
3694
3695
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3696 LogicVRegister Simulator::uabal(VectorFormat vform,
3697 LogicVRegister dst,
3698 const LogicVRegister& src1,
3699 const LogicVRegister& src2) {
3700 SimVRegister temp1, temp2;
3701 uxtl(vform, temp1, src1);
3702 uxtl(vform, temp2, src2);
3703 uaba(vform, dst, temp1, temp2);
3704 return dst;
3705 }
3706
3707
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3708 LogicVRegister Simulator::uabal2(VectorFormat vform,
3709 LogicVRegister dst,
3710 const LogicVRegister& src1,
3711 const LogicVRegister& src2) {
3712 SimVRegister temp1, temp2;
3713 uxtl2(vform, temp1, src1);
3714 uxtl2(vform, temp2, src2);
3715 uaba(vform, dst, temp1, temp2);
3716 return dst;
3717 }
3718
3719
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3720 LogicVRegister Simulator::sabal(VectorFormat vform,
3721 LogicVRegister dst,
3722 const LogicVRegister& src1,
3723 const LogicVRegister& src2) {
3724 SimVRegister temp1, temp2;
3725 sxtl(vform, temp1, src1);
3726 sxtl(vform, temp2, src2);
3727 saba(vform, dst, temp1, temp2);
3728 return dst;
3729 }
3730
3731
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3732 LogicVRegister Simulator::sabal2(VectorFormat vform,
3733 LogicVRegister dst,
3734 const LogicVRegister& src1,
3735 const LogicVRegister& src2) {
3736 SimVRegister temp1, temp2;
3737 sxtl2(vform, temp1, src1);
3738 sxtl2(vform, temp2, src2);
3739 saba(vform, dst, temp1, temp2);
3740 return dst;
3741 }
3742
3743
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3744 LogicVRegister Simulator::uabdl(VectorFormat vform,
3745 LogicVRegister dst,
3746 const LogicVRegister& src1,
3747 const LogicVRegister& src2) {
3748 SimVRegister temp1, temp2;
3749 uxtl(vform, temp1, src1);
3750 uxtl(vform, temp2, src2);
3751 absdiff(vform, dst, temp1, temp2, false);
3752 return dst;
3753 }
3754
3755
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3756 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3757 LogicVRegister dst,
3758 const LogicVRegister& src1,
3759 const LogicVRegister& src2) {
3760 SimVRegister temp1, temp2;
3761 uxtl2(vform, temp1, src1);
3762 uxtl2(vform, temp2, src2);
3763 absdiff(vform, dst, temp1, temp2, false);
3764 return dst;
3765 }
3766
3767
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3768 LogicVRegister Simulator::sabdl(VectorFormat vform,
3769 LogicVRegister dst,
3770 const LogicVRegister& src1,
3771 const LogicVRegister& src2) {
3772 SimVRegister temp1, temp2;
3773 sxtl(vform, temp1, src1);
3774 sxtl(vform, temp2, src2);
3775 absdiff(vform, dst, temp1, temp2, true);
3776 return dst;
3777 }
3778
3779
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3780 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3781 LogicVRegister dst,
3782 const LogicVRegister& src1,
3783 const LogicVRegister& src2) {
3784 SimVRegister temp1, temp2;
3785 sxtl2(vform, temp1, src1);
3786 sxtl2(vform, temp2, src2);
3787 absdiff(vform, dst, temp1, temp2, true);
3788 return dst;
3789 }
3790
3791
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3792 LogicVRegister Simulator::umull(VectorFormat vform,
3793 LogicVRegister dst,
3794 const LogicVRegister& src1,
3795 const LogicVRegister& src2,
3796 bool is_2) {
3797 SimVRegister temp1, temp2;
3798 uxtl(vform, temp1, src1, is_2);
3799 uxtl(vform, temp2, src2, is_2);
3800 mul(vform, dst, temp1, temp2);
3801 return dst;
3802 }
3803
3804
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3805 LogicVRegister Simulator::umull2(VectorFormat vform,
3806 LogicVRegister dst,
3807 const LogicVRegister& src1,
3808 const LogicVRegister& src2) {
3809 return umull(vform, dst, src1, src2, /* is_2 = */ true);
3810 }
3811
3812
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3813 LogicVRegister Simulator::smull(VectorFormat vform,
3814 LogicVRegister dst,
3815 const LogicVRegister& src1,
3816 const LogicVRegister& src2,
3817 bool is_2) {
3818 SimVRegister temp1, temp2;
3819 sxtl(vform, temp1, src1, is_2);
3820 sxtl(vform, temp2, src2, is_2);
3821 mul(vform, dst, temp1, temp2);
3822 return dst;
3823 }
3824
3825
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3826 LogicVRegister Simulator::smull2(VectorFormat vform,
3827 LogicVRegister dst,
3828 const LogicVRegister& src1,
3829 const LogicVRegister& src2) {
3830 return smull(vform, dst, src1, src2, /* is_2 = */ true);
3831 }
3832
3833
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3834 LogicVRegister Simulator::umlsl(VectorFormat vform,
3835 LogicVRegister dst,
3836 const LogicVRegister& src1,
3837 const LogicVRegister& src2,
3838 bool is_2) {
3839 SimVRegister temp1, temp2;
3840 uxtl(vform, temp1, src1, is_2);
3841 uxtl(vform, temp2, src2, is_2);
3842 mls(vform, dst, dst, temp1, temp2);
3843 return dst;
3844 }
3845
3846
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3847 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3848 LogicVRegister dst,
3849 const LogicVRegister& src1,
3850 const LogicVRegister& src2) {
3851 return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3852 }
3853
3854
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3855 LogicVRegister Simulator::smlsl(VectorFormat vform,
3856 LogicVRegister dst,
3857 const LogicVRegister& src1,
3858 const LogicVRegister& src2,
3859 bool is_2) {
3860 SimVRegister temp1, temp2;
3861 sxtl(vform, temp1, src1, is_2);
3862 sxtl(vform, temp2, src2, is_2);
3863 mls(vform, dst, dst, temp1, temp2);
3864 return dst;
3865 }
3866
3867
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3868 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3869 LogicVRegister dst,
3870 const LogicVRegister& src1,
3871 const LogicVRegister& src2) {
3872 return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3873 }
3874
3875
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3876 LogicVRegister Simulator::umlal(VectorFormat vform,
3877 LogicVRegister dst,
3878 const LogicVRegister& src1,
3879 const LogicVRegister& src2,
3880 bool is_2) {
3881 SimVRegister temp1, temp2;
3882 uxtl(vform, temp1, src1, is_2);
3883 uxtl(vform, temp2, src2, is_2);
3884 mla(vform, dst, dst, temp1, temp2);
3885 return dst;
3886 }
3887
3888
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3889 LogicVRegister Simulator::umlal2(VectorFormat vform,
3890 LogicVRegister dst,
3891 const LogicVRegister& src1,
3892 const LogicVRegister& src2) {
3893 return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3894 }
3895
3896
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3897 LogicVRegister Simulator::smlal(VectorFormat vform,
3898 LogicVRegister dst,
3899 const LogicVRegister& src1,
3900 const LogicVRegister& src2,
3901 bool is_2) {
3902 SimVRegister temp1, temp2;
3903 sxtl(vform, temp1, src1, is_2);
3904 sxtl(vform, temp2, src2, is_2);
3905 mla(vform, dst, dst, temp1, temp2);
3906 return dst;
3907 }
3908
3909
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3910 LogicVRegister Simulator::smlal2(VectorFormat vform,
3911 LogicVRegister dst,
3912 const LogicVRegister& src1,
3913 const LogicVRegister& src2) {
3914 return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3915 }
3916
3917
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3918 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3919 LogicVRegister dst,
3920 const LogicVRegister& src1,
3921 const LogicVRegister& src2,
3922 bool is_2) {
3923 SimVRegister temp;
3924 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3925 return add(vform, dst, dst, product).SignedSaturate(vform);
3926 }
3927
3928
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3929 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3930 LogicVRegister dst,
3931 const LogicVRegister& src1,
3932 const LogicVRegister& src2) {
3933 return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3934 }
3935
3936
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3937 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3938 LogicVRegister dst,
3939 const LogicVRegister& src1,
3940 const LogicVRegister& src2,
3941 bool is_2) {
3942 SimVRegister temp;
3943 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3944 return sub(vform, dst, dst, product).SignedSaturate(vform);
3945 }
3946
3947
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3948 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3949 LogicVRegister dst,
3950 const LogicVRegister& src1,
3951 const LogicVRegister& src2) {
3952 return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3953 }
3954
3955
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3956 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3957 LogicVRegister dst,
3958 const LogicVRegister& src1,
3959 const LogicVRegister& src2,
3960 bool is_2) {
3961 SimVRegister temp;
3962 LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3963 return add(vform, dst, product, product).SignedSaturate(vform);
3964 }
3965
3966
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3967 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3968 LogicVRegister dst,
3969 const LogicVRegister& src1,
3970 const LogicVRegister& src2) {
3971 return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3972 }
3973
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3974 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3975 LogicVRegister dst,
3976 const LogicVRegister& src1,
3977 const LogicVRegister& src2,
3978 bool round) {
3979 int esize = LaneSizeInBitsFromFormat(vform);
3980
3981 SimVRegister temp_lo, temp_hi;
3982
3983 // Compute low and high multiplication results.
3984 mul(vform, temp_lo, src1, src2);
3985 smulh(vform, temp_hi, src1, src2);
3986
3987 // Double by shifting high half, and adding in most-significant bit of low
3988 // half.
3989 shl(vform, temp_hi, temp_hi, 1);
3990 usra(vform, temp_hi, temp_lo, esize - 1);
3991
3992 if (round) {
3993 // Add the second (due to doubling) most-significant bit of the low half
3994 // into the result.
3995 shl(vform, temp_lo, temp_lo, 1);
3996 usra(vform, temp_hi, temp_lo, esize - 1);
3997 }
3998
3999 SimPRegister not_sat;
4000 LogicPRegister ptemp(not_sat);
4001 dst.ClearForWrite(vform);
4002 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4003 // Saturation only occurs when src1 = src2 = minimum representable value.
4004 // Check this as a special case.
4005 ptemp.SetActive(vform, i, true);
4006 if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
4007 (src2.Int(vform, i) == MinIntFromFormat(vform))) {
4008 ptemp.SetActive(vform, i, false);
4009 }
4010 dst.SetInt(vform, i, MaxIntFromFormat(vform));
4011 }
4012
4013 mov_merging(vform, dst, not_sat, temp_hi);
4014 return dst;
4015 }
4016
4017
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)4018 LogicVRegister Simulator::dot(VectorFormat vform,
4019 LogicVRegister dst,
4020 const LogicVRegister& src1,
4021 const LogicVRegister& src2,
4022 bool is_src1_signed,
4023 bool is_src2_signed) {
4024 VectorFormat quarter_vform =
4025 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4026
4027 dst.ClearForWrite(vform);
4028 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
4029 uint64_t result = 0;
4030 int64_t element1, element2;
4031 for (int i = 0; i < 4; i++) {
4032 int index = 4 * e + i;
4033 if (is_src1_signed) {
4034 element1 = src1.Int(quarter_vform, index);
4035 } else {
4036 element1 = src1.Uint(quarter_vform, index);
4037 }
4038 if (is_src2_signed) {
4039 element2 = src2.Int(quarter_vform, index);
4040 } else {
4041 element2 = src2.Uint(quarter_vform, index);
4042 }
4043 result += element1 * element2;
4044 }
4045 dst.SetUint(vform, e, result + dst.Uint(vform, e));
4046 }
4047 return dst;
4048 }
4049
4050
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4051 LogicVRegister Simulator::sdot(VectorFormat vform,
4052 LogicVRegister dst,
4053 const LogicVRegister& src1,
4054 const LogicVRegister& src2) {
4055 return dot(vform, dst, src1, src2, true, true);
4056 }
4057
4058
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4059 LogicVRegister Simulator::udot(VectorFormat vform,
4060 LogicVRegister dst,
4061 const LogicVRegister& src1,
4062 const LogicVRegister& src2) {
4063 return dot(vform, dst, src1, src2, false, false);
4064 }
4065
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4066 LogicVRegister Simulator::usdot(VectorFormat vform,
4067 LogicVRegister dst,
4068 const LogicVRegister& src1,
4069 const LogicVRegister& src2) {
4070 return dot(vform, dst, src1, src2, false, true);
4071 }
4072
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4073 LogicVRegister Simulator::cdot(VectorFormat vform,
4074 LogicVRegister dst,
4075 const LogicVRegister& acc,
4076 const LogicVRegister& src1,
4077 const LogicVRegister& src2,
4078 int rot) {
4079 VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4080 VectorFormat quarter_vform =
4081 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4082
4083 int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4084 int sel_b = 1 - sel_a;
4085 int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4086
4087 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4088 int64_t result = acc.Int(vform, i);
4089 for (int j = 0; j < 2; j++) {
4090 int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4091 int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4092 int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4093 int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4094 result += (r1 * r2) + (sub_i * i1 * i2);
4095 }
4096 dst.SetInt(vform, i, result);
4097 }
4098 return dst;
4099 }
4100
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4101 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4102 LogicVRegister dst,
4103 const LogicVRegister& srca,
4104 const LogicVRegister& src1,
4105 const LogicVRegister& src2,
4106 int rot) {
4107 SimVRegister src1_a, src1_b;
4108 SimVRegister src2_a, src2_b;
4109 SimVRegister srca_i, srca_r;
4110 SimVRegister zero, temp;
4111 zero.Clear();
4112
4113 if ((rot == 0) || (rot == 180)) {
4114 uzp1(vform, src1_a, src1, zero);
4115 uzp1(vform, src2_a, src2, zero);
4116 uzp2(vform, src2_b, src2, zero);
4117 } else {
4118 uzp2(vform, src1_a, src1, zero);
4119 uzp2(vform, src2_a, src2, zero);
4120 uzp1(vform, src2_b, src2, zero);
4121 }
4122
4123 uzp1(vform, srca_r, srca, zero);
4124 uzp2(vform, srca_i, srca, zero);
4125
4126 bool sub_r = (rot == 90) || (rot == 180);
4127 bool sub_i = (rot == 180) || (rot == 270);
4128
4129 const bool round = true;
4130 sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4131 sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4132 zip1(vform, dst, srca_r, srca_i);
4133 return dst;
4134 }
4135
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4136 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4137 LogicVRegister dst,
4138 const LogicVRegister& srca,
4139 const LogicVRegister& src1,
4140 const LogicVRegister& src2,
4141 int index,
4142 int rot) {
4143 SimVRegister temp;
4144 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4145 return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4146 }
4147
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4148 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4149 LogicVRegister dst,
4150 const LogicVRegister& src1,
4151 const LogicVRegister& src2,
4152 bool round,
4153 bool sub_op) {
4154 // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4155 // To avoid this, we use:
4156 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4157 // which is same as:
4158 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4159
4160 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4161 int esize = kDRegSize;
4162 vixl_uint128_t round_const, accum;
4163 round_const.first = 0;
4164 if (round) {
4165 round_const.second = UINT64_C(1) << (esize - 2);
4166 } else {
4167 round_const.second = 0;
4168 }
4169
4170 dst.ClearForWrite(vform);
4171 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4172 // Shift the whole value left by `esize - 1` bits.
4173 accum.first = dst.Int(vform, i) >> 1;
4174 accum.second = dst.Int(vform, i) << (esize - 1);
4175
4176 vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4177
4178 if (sub_op) {
4179 product = Neg128(product);
4180 }
4181 accum = Add128(accum, product);
4182
4183 // Perform rounding.
4184 accum = Add128(accum, round_const);
4185
4186 // Arithmetic shift the whole value right by `esize - 1` bits.
4187 accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4188 accum.first = UnsignedNegate(accum.first >> (esize - 1));
4189
4190 // Perform saturation.
4191 bool is_pos = (accum.first == 0) ? true : false;
4192 if (is_pos &&
4193 (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4194 accum.second = MaxIntFromFormat(vform);
4195 } else if (!is_pos && (accum.second <
4196 static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4197 accum.second = MinIntFromFormat(vform);
4198 }
4199
4200 dst.SetInt(vform, i, accum.second);
4201 }
4202
4203 return dst;
4204 }
4205
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4206 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4207 LogicVRegister dst,
4208 const LogicVRegister& src1,
4209 const LogicVRegister& src2,
4210 bool round,
4211 bool sub_op) {
4212 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4213 // To avoid this, we use:
4214 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4215 // which is same as:
4216 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4217
4218 if (vform == kFormatVnD) {
4219 return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4220 }
4221
4222 int esize = LaneSizeInBitsFromFormat(vform);
4223 int round_const = round ? (1 << (esize - 2)) : 0;
4224 int64_t accum;
4225
4226 dst.ClearForWrite(vform);
4227 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4228 accum = dst.Int(vform, i) << (esize - 1);
4229 if (sub_op) {
4230 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4231 } else {
4232 accum += src1.Int(vform, i) * src2.Int(vform, i);
4233 }
4234 accum += round_const;
4235 accum = accum >> (esize - 1);
4236
4237 if (accum > MaxIntFromFormat(vform)) {
4238 accum = MaxIntFromFormat(vform);
4239 } else if (accum < MinIntFromFormat(vform)) {
4240 accum = MinIntFromFormat(vform);
4241 }
4242 dst.SetInt(vform, i, accum);
4243 }
4244 return dst;
4245 }
4246
4247
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4248 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4249 LogicVRegister dst,
4250 const LogicVRegister& src1,
4251 const LogicVRegister& src2,
4252 bool round) {
4253 return sqrdmlash(vform, dst, src1, src2, round, false);
4254 }
4255
4256
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4257 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4258 LogicVRegister dst,
4259 const LogicVRegister& src1,
4260 const LogicVRegister& src2,
4261 bool round) {
4262 return sqrdmlash(vform, dst, src1, src2, round, true);
4263 }
4264
4265
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4266 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4267 LogicVRegister dst,
4268 const LogicVRegister& src1,
4269 const LogicVRegister& src2) {
4270 return sqrdmulh(vform, dst, src1, src2, false);
4271 }
4272
4273
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4274 LogicVRegister Simulator::addhn(VectorFormat vform,
4275 LogicVRegister dst,
4276 const LogicVRegister& src1,
4277 const LogicVRegister& src2) {
4278 SimVRegister temp;
4279 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4280 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4281 return dst;
4282 }
4283
4284
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4285 LogicVRegister Simulator::addhn2(VectorFormat vform,
4286 LogicVRegister dst,
4287 const LogicVRegister& src1,
4288 const LogicVRegister& src2) {
4289 SimVRegister temp;
4290 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4291 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4292 return dst;
4293 }
4294
4295
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4296 LogicVRegister Simulator::raddhn(VectorFormat vform,
4297 LogicVRegister dst,
4298 const LogicVRegister& src1,
4299 const LogicVRegister& src2) {
4300 SimVRegister temp;
4301 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4302 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4303 return dst;
4304 }
4305
4306
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4307 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4308 LogicVRegister dst,
4309 const LogicVRegister& src1,
4310 const LogicVRegister& src2) {
4311 SimVRegister temp;
4312 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4313 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4314 return dst;
4315 }
4316
4317
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4318 LogicVRegister Simulator::subhn(VectorFormat vform,
4319 LogicVRegister dst,
4320 const LogicVRegister& src1,
4321 const LogicVRegister& src2) {
4322 SimVRegister temp;
4323 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4324 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4325 return dst;
4326 }
4327
4328
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4329 LogicVRegister Simulator::subhn2(VectorFormat vform,
4330 LogicVRegister dst,
4331 const LogicVRegister& src1,
4332 const LogicVRegister& src2) {
4333 SimVRegister temp;
4334 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4335 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4336 return dst;
4337 }
4338
4339
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4340 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4341 LogicVRegister dst,
4342 const LogicVRegister& src1,
4343 const LogicVRegister& src2) {
4344 SimVRegister temp;
4345 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4346 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4347 return dst;
4348 }
4349
4350
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4351 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4352 LogicVRegister dst,
4353 const LogicVRegister& src1,
4354 const LogicVRegister& src2) {
4355 SimVRegister temp;
4356 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4357 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4358 return dst;
4359 }
4360
4361
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4362 LogicVRegister Simulator::trn1(VectorFormat vform,
4363 LogicVRegister dst,
4364 const LogicVRegister& src1,
4365 const LogicVRegister& src2) {
4366 uint64_t result[kZRegMaxSizeInBytes] = {};
4367 int lane_count = LaneCountFromFormat(vform);
4368 int pairs = lane_count / 2;
4369 for (int i = 0; i < pairs; ++i) {
4370 result[2 * i] = src1.Uint(vform, 2 * i);
4371 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4372 }
4373
4374 dst.ClearForWrite(vform);
4375 for (int i = 0; i < lane_count; ++i) {
4376 dst.SetUint(vform, i, result[i]);
4377 }
4378 return dst;
4379 }
4380
4381
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4382 LogicVRegister Simulator::trn2(VectorFormat vform,
4383 LogicVRegister dst,
4384 const LogicVRegister& src1,
4385 const LogicVRegister& src2) {
4386 uint64_t result[kZRegMaxSizeInBytes] = {};
4387 int lane_count = LaneCountFromFormat(vform);
4388 int pairs = lane_count / 2;
4389 for (int i = 0; i < pairs; ++i) {
4390 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4391 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4392 }
4393
4394 dst.ClearForWrite(vform);
4395 for (int i = 0; i < lane_count; ++i) {
4396 dst.SetUint(vform, i, result[i]);
4397 }
4398 return dst;
4399 }
4400
4401
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4402 LogicVRegister Simulator::zip1(VectorFormat vform,
4403 LogicVRegister dst,
4404 const LogicVRegister& src1,
4405 const LogicVRegister& src2) {
4406 uint64_t result[kZRegMaxSizeInBytes] = {};
4407 int lane_count = LaneCountFromFormat(vform);
4408 int pairs = lane_count / 2;
4409 for (int i = 0; i < pairs; ++i) {
4410 result[2 * i] = src1.Uint(vform, i);
4411 result[(2 * i) + 1] = src2.Uint(vform, i);
4412 }
4413
4414 dst.ClearForWrite(vform);
4415 for (int i = 0; i < lane_count; ++i) {
4416 dst.SetUint(vform, i, result[i]);
4417 }
4418 return dst;
4419 }
4420
4421
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4422 LogicVRegister Simulator::zip2(VectorFormat vform,
4423 LogicVRegister dst,
4424 const LogicVRegister& src1,
4425 const LogicVRegister& src2) {
4426 uint64_t result[kZRegMaxSizeInBytes] = {};
4427 int lane_count = LaneCountFromFormat(vform);
4428 int pairs = lane_count / 2;
4429 for (int i = 0; i < pairs; ++i) {
4430 result[2 * i] = src1.Uint(vform, pairs + i);
4431 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4432 }
4433
4434 dst.ClearForWrite(vform);
4435 for (int i = 0; i < lane_count; ++i) {
4436 dst.SetUint(vform, i, result[i]);
4437 }
4438 return dst;
4439 }
4440
4441
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4442 LogicVRegister Simulator::uzp1(VectorFormat vform,
4443 LogicVRegister dst,
4444 const LogicVRegister& src1,
4445 const LogicVRegister& src2) {
4446 uint64_t result[kZRegMaxSizeInBytes * 2];
4447 int lane_count = LaneCountFromFormat(vform);
4448 for (int i = 0; i < lane_count; ++i) {
4449 result[i] = src1.Uint(vform, i);
4450 result[lane_count + i] = src2.Uint(vform, i);
4451 }
4452
4453 dst.ClearForWrite(vform);
4454 for (int i = 0; i < lane_count; ++i) {
4455 dst.SetUint(vform, i, result[2 * i]);
4456 }
4457 return dst;
4458 }
4459
4460
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4461 LogicVRegister Simulator::uzp2(VectorFormat vform,
4462 LogicVRegister dst,
4463 const LogicVRegister& src1,
4464 const LogicVRegister& src2) {
4465 uint64_t result[kZRegMaxSizeInBytes * 2];
4466 int lane_count = LaneCountFromFormat(vform);
4467 for (int i = 0; i < lane_count; ++i) {
4468 result[i] = src1.Uint(vform, i);
4469 result[lane_count + i] = src2.Uint(vform, i);
4470 }
4471
4472 dst.ClearForWrite(vform);
4473 for (int i = 0; i < lane_count; ++i) {
4474 dst.SetUint(vform, i, result[(2 * i) + 1]);
4475 }
4476 return dst;
4477 }
4478
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4479 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4480 LogicVRegister dst,
4481 const LogicVRegister& src) {
4482 // Interleave the top and bottom half of a vector, ie. for a vector:
4483 //
4484 // [ ... | F | D | B | ... | E | C | A ]
4485 //
4486 // where B is the first element in the top half of the vector, produce a
4487 // result vector:
4488 //
4489 // [ ... | ... | F | E | D | C | B | A ]
4490
4491 uint64_t result[kZRegMaxSizeInBytes] = {};
4492 int lane_count = LaneCountFromFormat(vform);
4493 for (int i = 0; i < lane_count; i += 2) {
4494 result[i] = src.Uint(vform, i / 2);
4495 result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4496 }
4497 dst.SetUintArray(vform, result);
4498 return dst;
4499 }
4500
4501 template <typename T>
FPNeg(T op)4502 T Simulator::FPNeg(T op) {
4503 return -op;
4504 }
4505
4506 template <typename T>
FPAdd(T op1,T op2)4507 T Simulator::FPAdd(T op1, T op2) {
4508 T result = FPProcessNaNs(op1, op2);
4509 if (IsNaN(result)) {
4510 return result;
4511 }
4512
4513 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4514 // inf + -inf returns the default NaN.
4515 FPProcessException();
4516 return FPDefaultNaN<T>();
4517 } else {
4518 // Other cases should be handled by standard arithmetic.
4519 return op1 + op2;
4520 }
4521 }
4522
4523
4524 template <typename T>
FPSub(T op1,T op2)4525 T Simulator::FPSub(T op1, T op2) {
4526 // NaNs should be handled elsewhere.
4527 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4528
4529 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4530 // inf - inf returns the default NaN.
4531 FPProcessException();
4532 return FPDefaultNaN<T>();
4533 } else {
4534 // Other cases should be handled by standard arithmetic.
4535 return op1 - op2;
4536 }
4537 }
4538
4539 template <typename T>
FPMulNaNs(T op1,T op2)4540 T Simulator::FPMulNaNs(T op1, T op2) {
4541 T result = FPProcessNaNs(op1, op2);
4542 return IsNaN(result) ? result : FPMul(op1, op2);
4543 }
4544
4545 template <typename T>
FPMul(T op1,T op2)4546 T Simulator::FPMul(T op1, T op2) {
4547 // NaNs should be handled elsewhere.
4548 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4549
4550 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4551 // inf * 0.0 returns the default NaN.
4552 FPProcessException();
4553 return FPDefaultNaN<T>();
4554 } else {
4555 // Other cases should be handled by standard arithmetic.
4556 return op1 * op2;
4557 }
4558 }
4559
4560
4561 template <typename T>
FPMulx(T op1,T op2)4562 T Simulator::FPMulx(T op1, T op2) {
4563 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4564 // inf * 0.0 returns +/-2.0.
4565 T two = 2.0;
4566 return copysign(T(1.0), op1) * copysign(T(1.0), op2) * two;
4567 }
4568 return FPMul(op1, op2);
4569 }
4570
4571
4572 template <typename T>
FPMulAdd(T a,T op1,T op2)4573 T Simulator::FPMulAdd(T a, T op1, T op2) {
4574 T result = FPProcessNaNs3(a, op1, op2);
4575
4576 T sign_a = copysign(T(1.0), a);
4577 T sign_prod = copysign(T(1.0), op1) * copysign(T(1.0), op2);
4578 bool isinf_prod = IsInf(op1) || IsInf(op2);
4579 bool operation_generates_nan =
4580 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4581 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4582 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4583
4584 if (IsNaN(result)) {
4585 // Generated NaNs override quiet NaNs propagated from a.
4586 if (operation_generates_nan && IsQuietNaN(a)) {
4587 FPProcessException();
4588 return FPDefaultNaN<T>();
4589 } else {
4590 return result;
4591 }
4592 }
4593
4594 // If the operation would produce a NaN, return the default NaN.
4595 if (operation_generates_nan) {
4596 FPProcessException();
4597 return FPDefaultNaN<T>();
4598 }
4599
4600 // Work around broken fma implementations for exact zero results: The sign of
4601 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4602 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4603 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? T(-0.0) : T(0.0);
4604 }
4605
4606 result = FusedMultiplyAdd(op1, op2, a);
4607 VIXL_ASSERT(!IsNaN(result));
4608
4609 // Work around broken fma implementations for rounded zero results: If a is
4610 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4611 if ((a == 0.0) && (result == 0.0)) {
4612 return copysign(T(0.0), sign_prod);
4613 }
4614
4615 return result;
4616 }
4617
4618
4619 template <typename T>
FPDiv(T op1,T op2)4620 T Simulator::FPDiv(T op1, T op2) {
4621 // NaNs should be handled elsewhere.
4622 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4623
4624 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4625 // inf / inf and 0.0 / 0.0 return the default NaN.
4626 FPProcessException();
4627 return FPDefaultNaN<T>();
4628 } else {
4629 if (op2 == 0.0) {
4630 FPProcessException();
4631 if (!IsNaN(op1)) {
4632 double op1_sign = copysign(1.0, op1);
4633 double op2_sign = copysign(1.0, op2);
4634 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4635 }
4636 }
4637
4638 // Other cases should be handled by standard arithmetic.
4639 return op1 / op2;
4640 }
4641 }
4642
4643
4644 template <typename T>
FPSqrt(T op)4645 T Simulator::FPSqrt(T op) {
4646 if (IsNaN(op)) {
4647 return FPProcessNaN(op);
4648 } else if (op < T(0.0)) {
4649 FPProcessException();
4650 return FPDefaultNaN<T>();
4651 } else {
4652 return sqrt(op);
4653 }
4654 }
4655
4656
4657 template <typename T>
FPMax(T a,T b)4658 T Simulator::FPMax(T a, T b) {
4659 T result = FPProcessNaNs(a, b);
4660 if (IsNaN(result)) return result;
4661
4662 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4663 // a and b are zero, and the sign differs: return +0.0.
4664 return 0.0;
4665 } else {
4666 return (a > b) ? a : b;
4667 }
4668 }
4669
4670
4671 template <typename T>
FPMaxNM(T a,T b)4672 T Simulator::FPMaxNM(T a, T b) {
4673 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4674 a = T(kFP64NegativeInfinity);
4675 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4676 b = T(kFP64NegativeInfinity);
4677 }
4678
4679 T result = FPProcessNaNs(a, b);
4680 return IsNaN(result) ? result : FPMax(a, b);
4681 }
4682
4683
4684 template <typename T>
FPMin(T a,T b)4685 T Simulator::FPMin(T a, T b) {
4686 T result = FPProcessNaNs(a, b);
4687 if (IsNaN(result)) return result;
4688
4689 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4690 // a and b are zero, and the sign differs: return -0.0.
4691 return -0.0;
4692 } else {
4693 return (a < b) ? a : b;
4694 }
4695 }
4696
4697
4698 template <typename T>
FPMinNM(T a,T b)4699 T Simulator::FPMinNM(T a, T b) {
4700 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4701 a = T(kFP64PositiveInfinity);
4702 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4703 b = T(kFP64PositiveInfinity);
4704 }
4705
4706 T result = FPProcessNaNs(a, b);
4707 return IsNaN(result) ? result : FPMin(a, b);
4708 }
4709
4710
4711 template <typename T>
FPRecipStepFused(T op1,T op2)4712 T Simulator::FPRecipStepFused(T op1, T op2) {
4713 const T two = 2.0;
4714 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4715 return two;
4716 } else if (IsInf(op1) || IsInf(op2)) {
4717 // Return +inf if signs match, otherwise -inf.
4718 return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
4719 : T(kFP64NegativeInfinity);
4720 } else {
4721 return FusedMultiplyAdd(op1, op2, two);
4722 }
4723 }
4724
4725 template <typename T>
IsNormal(T value)4726 bool IsNormal(T value) {
4727 return std::isnormal(value);
4728 }
4729
4730 template <>
IsNormal(SimFloat16 value)4731 bool IsNormal(SimFloat16 value) {
4732 uint16_t rawbits = Float16ToRawbits(value);
4733 uint16_t exp_mask = 0x7c00;
4734 // Check that the exponent is neither all zeroes or all ones.
4735 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4736 }
4737
4738
4739 template <typename T>
FPRSqrtStepFused(T op1,T op2)4740 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4741 const T one_point_five = 1.5;
4742 const T two = 2.0;
4743
4744 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4745 return one_point_five;
4746 } else if (IsInf(op1) || IsInf(op2)) {
4747 // Return +inf if signs match, otherwise -inf.
4748 return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
4749 : T(kFP64NegativeInfinity);
4750 } else {
4751 // The multiply-add-halve operation must be fully fused, so avoid interim
4752 // rounding by checking which operand can be losslessly divided by two
4753 // before doing the multiply-add.
4754 if (IsNormal(op1 / two)) {
4755 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4756 } else if (IsNormal(op2 / two)) {
4757 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4758 } else {
4759 // Neither operand is normal after halving: the result is dominated by
4760 // the addition term, so just return that.
4761 return one_point_five;
4762 }
4763 }
4764 }
4765
FPToFixedJS(double value)4766 int32_t Simulator::FPToFixedJS(double value) {
4767 // The Z-flag is set when the conversion from double precision floating-point
4768 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4769 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4770 // Z-flag is unset.
4771 int Z = 1;
4772 int32_t result;
4773
4774 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4775 (value == kFP64NegativeInfinity)) {
4776 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4777 // unset the Z-flag.
4778 result = 0;
4779 if ((value != 0.0) || std::signbit(value)) {
4780 Z = 0;
4781 }
4782 } else if (std::isnan(value)) {
4783 // NaN values unset the Z-flag and set the result to 0.
4784 FPProcessNaN(value);
4785 result = 0;
4786 Z = 0;
4787 } else {
4788 // All other values are converted to an integer representation, rounded
4789 // toward zero.
4790 double int_result = std::floor(value);
4791 double error = value - int_result;
4792
4793 if ((error != 0.0) && (int_result < 0.0)) {
4794 int_result++;
4795 }
4796
4797 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4798 // write a one-liner with std::round, but the behaviour on ties is incorrect
4799 // for our purposes.
4800 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4801 double mod_error =
4802 (int_result / mod_const) - std::floor(int_result / mod_const);
4803 double constrained;
4804 if (mod_error == 0.5) {
4805 constrained = INT32_MIN;
4806 } else {
4807 constrained = int_result - mod_const * round(int_result / mod_const);
4808 }
4809
4810 VIXL_ASSERT(std::floor(constrained) == constrained);
4811 VIXL_ASSERT(constrained >= INT32_MIN);
4812 VIXL_ASSERT(constrained <= INT32_MAX);
4813
4814 // Take the bottom 32 bits of the result as a 32-bit integer.
4815 result = static_cast<int32_t>(constrained);
4816
4817 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4818 (error != 0.0)) {
4819 // If the integer result is out of range or the conversion isn't exact,
4820 // take exception and unset the Z-flag.
4821 FPProcessException();
4822 Z = 0;
4823 }
4824 }
4825
4826 ReadNzcv().SetN(0);
4827 ReadNzcv().SetZ(Z);
4828 ReadNzcv().SetC(0);
4829 ReadNzcv().SetV(0);
4830
4831 return result;
4832 }
4833
FPRoundIntCommon(double value,FPRounding round_mode)4834 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4835 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4836 (value != kFP64NegativeInfinity));
4837 VIXL_ASSERT(!IsNaN(value));
4838
4839 double int_result = std::floor(value);
4840 double error = value - int_result;
4841 switch (round_mode) {
4842 case FPTieAway: {
4843 // Take care of correctly handling the range ]-0.5, -0.0], which must
4844 // yield -0.0.
4845 if ((-0.5 < value) && (value < 0.0)) {
4846 int_result = -0.0;
4847
4848 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4849 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4850 // result is positive, round up.
4851 int_result++;
4852 }
4853 break;
4854 }
4855 case FPTieEven: {
4856 // Take care of correctly handling the range [-0.5, -0.0], which must
4857 // yield -0.0.
4858 if ((-0.5 <= value) && (value < 0.0)) {
4859 int_result = -0.0;
4860
4861 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4862 // result is odd, round up.
4863 } else if ((error > 0.5) ||
4864 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4865 int_result++;
4866 }
4867 break;
4868 }
4869 case FPZero: {
4870 // If value>0 then we take floor(value)
4871 // otherwise, ceil(value).
4872 if (value < 0) {
4873 int_result = ceil(value);
4874 }
4875 break;
4876 }
4877 case FPNegativeInfinity: {
4878 // We always use floor(value).
4879 break;
4880 }
4881 case FPPositiveInfinity: {
4882 // Take care of correctly handling the range ]-1.0, -0.0], which must
4883 // yield -0.0.
4884 if ((-1.0 < value) && (value < 0.0)) {
4885 int_result = -0.0;
4886
4887 // If the error is non-zero, round up.
4888 } else if (error > 0.0) {
4889 int_result++;
4890 }
4891 break;
4892 }
4893 default:
4894 VIXL_UNIMPLEMENTED();
4895 }
4896 return int_result;
4897 }
4898
FPRoundInt(double value,FPRounding round_mode)4899 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4900 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4901 (value == kFP64NegativeInfinity)) {
4902 return value;
4903 } else if (IsNaN(value)) {
4904 return FPProcessNaN(value);
4905 }
4906 return FPRoundIntCommon(value, round_mode);
4907 }
4908
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4909 double Simulator::FPRoundInt(double value,
4910 FPRounding round_mode,
4911 FrintMode frint_mode) {
4912 if (frint_mode == kFrintToInteger) {
4913 return FPRoundInt(value, round_mode);
4914 }
4915
4916 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4917
4918 if (value == 0.0) {
4919 return value;
4920 }
4921
4922 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4923 IsNaN(value)) {
4924 if (frint_mode == kFrintToInt32) {
4925 return INT32_MIN;
4926 } else {
4927 return INT64_MIN;
4928 }
4929 }
4930
4931 double result = FPRoundIntCommon(value, round_mode);
4932
4933 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4934 // representable as a double, and is rounded to (INT64_MAX + 1) when
4935 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4936 // instead; this is safe because `result` is known to be integral, and
4937 // `int64_max_plus_one` is exactly representable as a double.
4938 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4939 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4940 int64_max_plus_one)) == int64_max_plus_one);
4941
4942 if (frint_mode == kFrintToInt32) {
4943 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4944 return INT32_MIN;
4945 }
4946 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4947 return INT64_MIN;
4948 }
4949
4950 return result;
4951 }
4952
FPToInt16(double value,FPRounding rmode)4953 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4954 value = FPRoundInt(value, rmode);
4955 if (value >= kHMaxInt) {
4956 return kHMaxInt;
4957 } else if (value < kHMinInt) {
4958 return kHMinInt;
4959 }
4960 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4961 }
4962
4963
FPToInt32(double value,FPRounding rmode)4964 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4965 value = FPRoundInt(value, rmode);
4966 if (value >= kWMaxInt) {
4967 return kWMaxInt;
4968 } else if (value < kWMinInt) {
4969 return kWMinInt;
4970 }
4971 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4972 }
4973
4974
FPToInt64(double value,FPRounding rmode)4975 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4976 value = FPRoundInt(value, rmode);
4977 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4978 // as a result of kMaxInt not being representable as a double.
4979 if (value >= 9223372036854775808.) {
4980 return kXMaxInt;
4981 } else if (value < kXMinInt) {
4982 return kXMinInt;
4983 }
4984 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4985 }
4986
4987
FPToUInt16(double value,FPRounding rmode)4988 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4989 value = FPRoundInt(value, rmode);
4990 if (value >= kHMaxUInt) {
4991 return kHMaxUInt;
4992 } else if (value < 0.0) {
4993 return 0;
4994 }
4995 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4996 }
4997
4998
FPToUInt32(double value,FPRounding rmode)4999 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
5000 value = FPRoundInt(value, rmode);
5001 if (value >= kWMaxUInt) {
5002 return kWMaxUInt;
5003 } else if (value < 0.0) {
5004 return 0;
5005 }
5006 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
5007 }
5008
5009
FPToUInt64(double value,FPRounding rmode)5010 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
5011 value = FPRoundInt(value, rmode);
5012 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
5013 // as a result of kMaxUInt not being representable as a double.
5014 if (value >= 18446744073709551616.) {
5015 return kXMaxUInt;
5016 } else if (value < 0.0) {
5017 return 0;
5018 }
5019 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
5020 }
5021
5022
5023 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
5024 template <typename T> \
5025 LogicVRegister Simulator::FN(VectorFormat vform, \
5026 LogicVRegister dst, \
5027 const LogicVRegister& src1, \
5028 const LogicVRegister& src2) { \
5029 dst.ClearForWrite(vform); \
5030 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
5031 T op1 = src1.Float<T>(i); \
5032 T op2 = src2.Float<T>(i); \
5033 T result; \
5034 if (PROCNAN) { \
5035 result = FPProcessNaNs(op1, op2); \
5036 if (!IsNaN(result)) { \
5037 result = OP(op1, op2); \
5038 } \
5039 } else { \
5040 result = OP(op1, op2); \
5041 } \
5042 dst.SetFloat(vform, i, result); \
5043 } \
5044 return dst; \
5045 } \
5046 \
5047 LogicVRegister Simulator::FN(VectorFormat vform, \
5048 LogicVRegister dst, \
5049 const LogicVRegister& src1, \
5050 const LogicVRegister& src2) { \
5051 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5052 FN<SimFloat16>(vform, dst, src1, src2); \
5053 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5054 FN<float>(vform, dst, src1, src2); \
5055 } else { \
5056 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5057 FN<double>(vform, dst, src1, src2); \
5058 } \
5059 return dst; \
5060 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5061 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5062 #undef DEFINE_NEON_FP_VECTOR_OP
5063
5064
5065 LogicVRegister Simulator::fnmul(VectorFormat vform,
5066 LogicVRegister dst,
5067 const LogicVRegister& src1,
5068 const LogicVRegister& src2) {
5069 SimVRegister temp;
5070 LogicVRegister product = fmul(vform, temp, src1, src2);
5071 return fneg(vform, dst, product);
5072 }
5073
5074
5075 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5076 LogicVRegister Simulator::frecps(VectorFormat vform,
5077 LogicVRegister dst,
5078 const LogicVRegister& src1,
5079 const LogicVRegister& src2) {
5080 dst.ClearForWrite(vform);
5081 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5082 T op1 = -src1.Float<T>(i);
5083 T op2 = src2.Float<T>(i);
5084 T result = FPProcessNaNs(op1, op2);
5085 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5086 }
5087 return dst;
5088 }
5089
5090
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5091 LogicVRegister Simulator::frecps(VectorFormat vform,
5092 LogicVRegister dst,
5093 const LogicVRegister& src1,
5094 const LogicVRegister& src2) {
5095 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5096 frecps<SimFloat16>(vform, dst, src1, src2);
5097 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5098 frecps<float>(vform, dst, src1, src2);
5099 } else {
5100 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5101 frecps<double>(vform, dst, src1, src2);
5102 }
5103 return dst;
5104 }
5105
5106
5107 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5108 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5109 LogicVRegister dst,
5110 const LogicVRegister& src1,
5111 const LogicVRegister& src2) {
5112 dst.ClearForWrite(vform);
5113 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5114 T op1 = -src1.Float<T>(i);
5115 T op2 = src2.Float<T>(i);
5116 T result = FPProcessNaNs(op1, op2);
5117 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5118 }
5119 return dst;
5120 }
5121
5122
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5123 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5124 LogicVRegister dst,
5125 const LogicVRegister& src1,
5126 const LogicVRegister& src2) {
5127 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5128 frsqrts<SimFloat16>(vform, dst, src1, src2);
5129 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5130 frsqrts<float>(vform, dst, src1, src2);
5131 } else {
5132 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5133 frsqrts<double>(vform, dst, src1, src2);
5134 }
5135 return dst;
5136 }
5137
5138
5139 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5140 LogicVRegister Simulator::fcmp(VectorFormat vform,
5141 LogicVRegister dst,
5142 const LogicVRegister& src1,
5143 const LogicVRegister& src2,
5144 Condition cond) {
5145 dst.ClearForWrite(vform);
5146 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5147 bool result = false;
5148 T op1 = src1.Float<T>(i);
5149 T op2 = src2.Float<T>(i);
5150 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5151
5152 switch (cond) {
5153 case eq:
5154 result = (op1 == op2);
5155 break;
5156 case ge:
5157 result = (op1 >= op2);
5158 break;
5159 case gt:
5160 result = (op1 > op2);
5161 break;
5162 case le:
5163 result = (op1 <= op2);
5164 break;
5165 case lt:
5166 result = (op1 < op2);
5167 break;
5168 case ne:
5169 result = (op1 != op2);
5170 break;
5171 case uo:
5172 result = unordered;
5173 break;
5174 default:
5175 // Other conditions are defined in terms of those above.
5176 VIXL_UNREACHABLE();
5177 break;
5178 }
5179
5180 if (result && unordered) {
5181 // Only `uo` and `ne` can be true for unordered comparisons.
5182 VIXL_ASSERT((cond == uo) || (cond == ne));
5183 }
5184
5185 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5186 }
5187 return dst;
5188 }
5189
5190
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5191 LogicVRegister Simulator::fcmp(VectorFormat vform,
5192 LogicVRegister dst,
5193 const LogicVRegister& src1,
5194 const LogicVRegister& src2,
5195 Condition cond) {
5196 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5197 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5198 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5199 fcmp<float>(vform, dst, src1, src2, cond);
5200 } else {
5201 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5202 fcmp<double>(vform, dst, src1, src2, cond);
5203 }
5204 return dst;
5205 }
5206
5207
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5208 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5209 LogicVRegister dst,
5210 const LogicVRegister& src,
5211 Condition cond) {
5212 SimVRegister temp;
5213 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5214 LogicVRegister zero_reg =
5215 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5216 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5217 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5218 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5219 fcmp<float>(vform, dst, src, zero_reg, cond);
5220 } else {
5221 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5222 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5223 fcmp<double>(vform, dst, src, zero_reg, cond);
5224 }
5225 return dst;
5226 }
5227
5228
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5229 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5230 LogicVRegister dst,
5231 const LogicVRegister& src1,
5232 const LogicVRegister& src2,
5233 Condition cond) {
5234 SimVRegister temp1, temp2;
5235 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5236 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5237 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5238 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5239 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5240 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5241 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5242 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5243 } else {
5244 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5245 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5246 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5247 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5248 }
5249 return dst;
5250 }
5251
5252
5253 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5254 LogicVRegister Simulator::fmla(VectorFormat vform,
5255 LogicVRegister dst,
5256 const LogicVRegister& srca,
5257 const LogicVRegister& src1,
5258 const LogicVRegister& src2) {
5259 dst.ClearForWrite(vform);
5260 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5261 T op1 = src1.Float<T>(i);
5262 T op2 = src2.Float<T>(i);
5263 T acc = srca.Float<T>(i);
5264 T result = FPMulAdd(acc, op1, op2);
5265 dst.SetFloat(vform, i, result);
5266 }
5267 return dst;
5268 }
5269
5270
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5271 LogicVRegister Simulator::fmla(VectorFormat vform,
5272 LogicVRegister dst,
5273 const LogicVRegister& srca,
5274 const LogicVRegister& src1,
5275 const LogicVRegister& src2) {
5276 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5277 fmla<SimFloat16>(vform, dst, srca, src1, src2);
5278 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5279 fmla<float>(vform, dst, srca, src1, src2);
5280 } else {
5281 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5282 fmla<double>(vform, dst, srca, src1, src2);
5283 }
5284 return dst;
5285 }
5286
5287
5288 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5289 LogicVRegister Simulator::fmls(VectorFormat vform,
5290 LogicVRegister dst,
5291 const LogicVRegister& srca,
5292 const LogicVRegister& src1,
5293 const LogicVRegister& src2) {
5294 dst.ClearForWrite(vform);
5295 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5296 T op1 = -src1.Float<T>(i);
5297 T op2 = src2.Float<T>(i);
5298 T acc = srca.Float<T>(i);
5299 T result = FPMulAdd(acc, op1, op2);
5300 dst.SetFloat(i, result);
5301 }
5302 return dst;
5303 }
5304
5305
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5306 LogicVRegister Simulator::fmls(VectorFormat vform,
5307 LogicVRegister dst,
5308 const LogicVRegister& srca,
5309 const LogicVRegister& src1,
5310 const LogicVRegister& src2) {
5311 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5312 fmls<SimFloat16>(vform, dst, srca, src1, src2);
5313 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5314 fmls<float>(vform, dst, srca, src1, src2);
5315 } else {
5316 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5317 fmls<double>(vform, dst, srca, src1, src2);
5318 }
5319 return dst;
5320 }
5321
5322
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5323 LogicVRegister Simulator::fmlal(VectorFormat vform,
5324 LogicVRegister dst,
5325 const LogicVRegister& src1,
5326 const LogicVRegister& src2) {
5327 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5328 dst.ClearForWrite(vform);
5329 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5330 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5331 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5332 float acc = dst.Float<float>(i);
5333 float result = FPMulAdd(acc, op1, op2);
5334 dst.SetFloat(i, result);
5335 }
5336 return dst;
5337 }
5338
5339
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5340 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5341 LogicVRegister dst,
5342 const LogicVRegister& src1,
5343 const LogicVRegister& src2) {
5344 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5345 dst.ClearForWrite(vform);
5346 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5347 int src = i + LaneCountFromFormat(vform);
5348 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5349 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5350 float acc = dst.Float<float>(i);
5351 float result = FPMulAdd(acc, op1, op2);
5352 dst.SetFloat(i, result);
5353 }
5354 return dst;
5355 }
5356
5357
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5358 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5359 LogicVRegister dst,
5360 const LogicVRegister& src1,
5361 const LogicVRegister& src2) {
5362 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5363 dst.ClearForWrite(vform);
5364 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5365 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5366 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5367 float acc = dst.Float<float>(i);
5368 float result = FPMulAdd(acc, op1, op2);
5369 dst.SetFloat(i, result);
5370 }
5371 return dst;
5372 }
5373
5374
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5375 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5376 LogicVRegister dst,
5377 const LogicVRegister& src1,
5378 const LogicVRegister& src2) {
5379 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5380 dst.ClearForWrite(vform);
5381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5382 int src = i + LaneCountFromFormat(vform);
5383 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5384 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5385 float acc = dst.Float<float>(i);
5386 float result = FPMulAdd(acc, op1, op2);
5387 dst.SetFloat(i, result);
5388 }
5389 return dst;
5390 }
5391
5392
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5393 LogicVRegister Simulator::fmlal(VectorFormat vform,
5394 LogicVRegister dst,
5395 const LogicVRegister& src1,
5396 const LogicVRegister& src2,
5397 int index) {
5398 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5399 dst.ClearForWrite(vform);
5400 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5401 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5402 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5403 float acc = dst.Float<float>(i);
5404 float result = FPMulAdd(acc, op1, op2);
5405 dst.SetFloat(i, result);
5406 }
5407 return dst;
5408 }
5409
5410
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5411 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5412 LogicVRegister dst,
5413 const LogicVRegister& src1,
5414 const LogicVRegister& src2,
5415 int index) {
5416 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5417 dst.ClearForWrite(vform);
5418 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5419 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5420 int src = i + LaneCountFromFormat(vform);
5421 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5422 float acc = dst.Float<float>(i);
5423 float result = FPMulAdd(acc, op1, op2);
5424 dst.SetFloat(i, result);
5425 }
5426 return dst;
5427 }
5428
5429
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5430 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5431 LogicVRegister dst,
5432 const LogicVRegister& src1,
5433 const LogicVRegister& src2,
5434 int index) {
5435 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5436 dst.ClearForWrite(vform);
5437 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5438 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5439 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5440 float acc = dst.Float<float>(i);
5441 float result = FPMulAdd(acc, op1, op2);
5442 dst.SetFloat(i, result);
5443 }
5444 return dst;
5445 }
5446
5447
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5448 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5449 LogicVRegister dst,
5450 const LogicVRegister& src1,
5451 const LogicVRegister& src2,
5452 int index) {
5453 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5454 dst.ClearForWrite(vform);
5455 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5456 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5457 int src = i + LaneCountFromFormat(vform);
5458 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5459 float acc = dst.Float<float>(i);
5460 float result = FPMulAdd(acc, op1, op2);
5461 dst.SetFloat(i, result);
5462 }
5463 return dst;
5464 }
5465
5466
5467 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5468 LogicVRegister Simulator::fneg(VectorFormat vform,
5469 LogicVRegister dst,
5470 const LogicVRegister& src) {
5471 dst.ClearForWrite(vform);
5472 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5473 T op = src.Float<T>(i);
5474 op = -op;
5475 dst.SetFloat(i, op);
5476 }
5477 return dst;
5478 }
5479
5480
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5481 LogicVRegister Simulator::fneg(VectorFormat vform,
5482 LogicVRegister dst,
5483 const LogicVRegister& src) {
5484 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5485 fneg<SimFloat16>(vform, dst, src);
5486 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5487 fneg<float>(vform, dst, src);
5488 } else {
5489 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5490 fneg<double>(vform, dst, src);
5491 }
5492 return dst;
5493 }
5494
5495
5496 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5497 LogicVRegister Simulator::fabs_(VectorFormat vform,
5498 LogicVRegister dst,
5499 const LogicVRegister& src) {
5500 dst.ClearForWrite(vform);
5501 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5502 T op = src.Float<T>(i);
5503 if (copysign(1.0, op) < 0.0) {
5504 op = -op;
5505 }
5506 dst.SetFloat(i, op);
5507 }
5508 return dst;
5509 }
5510
5511
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5512 LogicVRegister Simulator::fabs_(VectorFormat vform,
5513 LogicVRegister dst,
5514 const LogicVRegister& src) {
5515 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5516 fabs_<SimFloat16>(vform, dst, src);
5517 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5518 fabs_<float>(vform, dst, src);
5519 } else {
5520 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5521 fabs_<double>(vform, dst, src);
5522 }
5523 return dst;
5524 }
5525
5526
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5527 LogicVRegister Simulator::fabd(VectorFormat vform,
5528 LogicVRegister dst,
5529 const LogicVRegister& src1,
5530 const LogicVRegister& src2) {
5531 SimVRegister temp;
5532 fsub(vform, temp, src1, src2);
5533 fabs_(vform, dst, temp);
5534 return dst;
5535 }
5536
5537
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5538 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5539 LogicVRegister dst,
5540 const LogicVRegister& src) {
5541 dst.ClearForWrite(vform);
5542 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5543 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5544 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5545 dst.SetFloat(i, result);
5546 }
5547 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5548 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5549 float result = FPSqrt(src.Float<float>(i));
5550 dst.SetFloat(i, result);
5551 }
5552 } else {
5553 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5554 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5555 double result = FPSqrt(src.Float<double>(i));
5556 dst.SetFloat(i, result);
5557 }
5558 }
5559 return dst;
5560 }
5561
5562
5563 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5564 LogicVRegister Simulator::FNP(VectorFormat vform, \
5565 LogicVRegister dst, \
5566 const LogicVRegister& src1, \
5567 const LogicVRegister& src2) { \
5568 SimVRegister temp1, temp2; \
5569 uzp1(vform, temp1, src1, src2); \
5570 uzp2(vform, temp2, src1, src2); \
5571 FN(vform, dst, temp1, temp2); \
5572 if (IsSVEFormat(vform)) { \
5573 interleave_top_bottom(vform, dst, dst); \
5574 } \
5575 return dst; \
5576 } \
5577 \
5578 LogicVRegister Simulator::FNP(VectorFormat vform, \
5579 LogicVRegister dst, \
5580 const LogicVRegister& src) { \
5581 if (vform == kFormatH) { \
5582 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16( \
5583 static_cast<uint16_t>(src.Uint(vform, 0)))), \
5584 SimFloat16(RawbitsToFloat16( \
5585 static_cast<uint16_t>(src.Uint(vform, 1)))))); \
5586 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5587 } else if (vform == kFormatS) { \
5588 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5589 dst.SetFloat(0, result); \
5590 } else { \
5591 VIXL_ASSERT(vform == kFormatD); \
5592 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5593 dst.SetFloat(0, result); \
5594 } \
5595 dst.ClearForWrite(vform); \
5596 return dst; \
5597 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5598 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5599 #undef DEFINE_NEON_FP_PAIR_OP
5600
5601 template <typename T>
5602 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5603 LogicVRegister dst,
5604 const LogicVRegister& src,
5605 typename TFPPairOp<T>::type fn,
5606 uint64_t inactive_value) {
5607 int lane_count = LaneCountFromFormat(vform);
5608 T result[kZRegMaxSizeInBytes / sizeof(T)];
5609 // Copy the source vector into a working array. Initialise the unused elements
5610 // at the end of the array to the same value that a false predicate would set.
5611 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5612 result[i] = (i < lane_count)
5613 ? src.Float<T>(i)
5614 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5615 }
5616
5617 // Pairwise reduce the elements to a single value, using the pair op function
5618 // argument.
5619 for (int step = 1; step < lane_count; step *= 2) {
5620 for (int i = 0; i < lane_count; i += step * 2) {
5621 result[i] = (this->*fn)(result[i], result[i + step]);
5622 }
5623 }
5624 dst.ClearForWrite(ScalarFormatFromFormat(vform));
5625 dst.SetFloat<T>(0, result[0]);
5626 return dst;
5627 }
5628
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5629 LogicVRegister Simulator::FPPairedAcrossHelper(
5630 VectorFormat vform,
5631 LogicVRegister dst,
5632 const LogicVRegister& src,
5633 typename TFPPairOp<SimFloat16>::type fn16,
5634 typename TFPPairOp<float>::type fn32,
5635 typename TFPPairOp<double>::type fn64,
5636 uint64_t inactive_value) {
5637 switch (LaneSizeInBitsFromFormat(vform)) {
5638 case kHRegSize:
5639 return FPPairedAcrossHelper<SimFloat16>(vform,
5640 dst,
5641 src,
5642 fn16,
5643 inactive_value);
5644 case kSRegSize:
5645 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5646 default:
5647 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5648 return FPPairedAcrossHelper<double>(vform,
5649 dst,
5650 src,
5651 fn64,
5652 inactive_value);
5653 }
5654 }
5655
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5656 LogicVRegister Simulator::faddv(VectorFormat vform,
5657 LogicVRegister dst,
5658 const LogicVRegister& src) {
5659 return FPPairedAcrossHelper(vform,
5660 dst,
5661 src,
5662 &Simulator::FPAdd<SimFloat16>,
5663 &Simulator::FPAdd<float>,
5664 &Simulator::FPAdd<double>,
5665 0);
5666 }
5667
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5668 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5669 LogicVRegister dst,
5670 const LogicVRegister& src) {
5671 int lane_size = LaneSizeInBitsFromFormat(vform);
5672 uint64_t inactive_value =
5673 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5674 return FPPairedAcrossHelper(vform,
5675 dst,
5676 src,
5677 &Simulator::FPMax<SimFloat16>,
5678 &Simulator::FPMax<float>,
5679 &Simulator::FPMax<double>,
5680 inactive_value);
5681 }
5682
5683
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5684 LogicVRegister Simulator::fminv(VectorFormat vform,
5685 LogicVRegister dst,
5686 const LogicVRegister& src) {
5687 int lane_size = LaneSizeInBitsFromFormat(vform);
5688 uint64_t inactive_value =
5689 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5690 return FPPairedAcrossHelper(vform,
5691 dst,
5692 src,
5693 &Simulator::FPMin<SimFloat16>,
5694 &Simulator::FPMin<float>,
5695 &Simulator::FPMin<double>,
5696 inactive_value);
5697 }
5698
5699
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5700 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5701 LogicVRegister dst,
5702 const LogicVRegister& src) {
5703 int lane_size = LaneSizeInBitsFromFormat(vform);
5704 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5705 return FPPairedAcrossHelper(vform,
5706 dst,
5707 src,
5708 &Simulator::FPMaxNM<SimFloat16>,
5709 &Simulator::FPMaxNM<float>,
5710 &Simulator::FPMaxNM<double>,
5711 inactive_value);
5712 }
5713
5714
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5715 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5716 LogicVRegister dst,
5717 const LogicVRegister& src) {
5718 int lane_size = LaneSizeInBitsFromFormat(vform);
5719 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5720 return FPPairedAcrossHelper(vform,
5721 dst,
5722 src,
5723 &Simulator::FPMinNM<SimFloat16>,
5724 &Simulator::FPMinNM<float>,
5725 &Simulator::FPMinNM<double>,
5726 inactive_value);
5727 }
5728
5729
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5730 LogicVRegister Simulator::fmul(VectorFormat vform,
5731 LogicVRegister dst,
5732 const LogicVRegister& src1,
5733 const LogicVRegister& src2,
5734 int index) {
5735 dst.ClearForWrite(vform);
5736 SimVRegister temp;
5737 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5738 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5739 fmul<SimFloat16>(vform, dst, src1, index_reg);
5740 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5741 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5742 fmul<float>(vform, dst, src1, index_reg);
5743 } else {
5744 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5745 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5746 fmul<double>(vform, dst, src1, index_reg);
5747 }
5748 return dst;
5749 }
5750
5751
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5752 LogicVRegister Simulator::fmla(VectorFormat vform,
5753 LogicVRegister dst,
5754 const LogicVRegister& src1,
5755 const LogicVRegister& src2,
5756 int index) {
5757 dst.ClearForWrite(vform);
5758 SimVRegister temp;
5759 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5760 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5761 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5762 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5763 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5764 fmla<float>(vform, dst, dst, src1, index_reg);
5765 } else {
5766 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5767 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5768 fmla<double>(vform, dst, dst, src1, index_reg);
5769 }
5770 return dst;
5771 }
5772
5773
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5774 LogicVRegister Simulator::fmls(VectorFormat vform,
5775 LogicVRegister dst,
5776 const LogicVRegister& src1,
5777 const LogicVRegister& src2,
5778 int index) {
5779 dst.ClearForWrite(vform);
5780 SimVRegister temp;
5781 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5782 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5783 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5784 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5785 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5786 fmls<float>(vform, dst, dst, src1, index_reg);
5787 } else {
5788 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5789 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5790 fmls<double>(vform, dst, dst, src1, index_reg);
5791 }
5792 return dst;
5793 }
5794
5795
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5796 LogicVRegister Simulator::fmulx(VectorFormat vform,
5797 LogicVRegister dst,
5798 const LogicVRegister& src1,
5799 const LogicVRegister& src2,
5800 int index) {
5801 dst.ClearForWrite(vform);
5802 SimVRegister temp;
5803 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5804 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5805 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5806 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5807 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5808 fmulx<float>(vform, dst, src1, index_reg);
5809 } else {
5810 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5811 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5812 fmulx<double>(vform, dst, src1, index_reg);
5813 }
5814 return dst;
5815 }
5816
5817
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5818 LogicVRegister Simulator::frint(VectorFormat vform,
5819 LogicVRegister dst,
5820 const LogicVRegister& src,
5821 FPRounding rounding_mode,
5822 bool inexact_exception,
5823 FrintMode frint_mode) {
5824 dst.ClearForWrite(vform);
5825 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5826 VIXL_ASSERT(frint_mode == kFrintToInteger);
5827 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5828 SimFloat16 input = src.Float<SimFloat16>(i);
5829 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5830 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5831 FPProcessException();
5832 }
5833 dst.SetFloat<SimFloat16>(i, rounded);
5834 }
5835 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5836 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5837 float input = src.Float<float>(i);
5838 float rounded =
5839 static_cast<float>(FPRoundInt(input, rounding_mode, frint_mode));
5840
5841 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5842 FPProcessException();
5843 }
5844 dst.SetFloat<float>(i, rounded);
5845 }
5846 } else {
5847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5848 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5849 double input = src.Float<double>(i);
5850 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5851 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5852 FPProcessException();
5853 }
5854 dst.SetFloat<double>(i, rounded);
5855 }
5856 }
5857 return dst;
5858 }
5859
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5860 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5861 VectorFormat src_vform,
5862 LogicVRegister dst,
5863 const LogicPRegister& pg,
5864 const LogicVRegister& src) {
5865 unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5866 unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5867 VectorFormat vform = SVEFormatFromLaneSizeInBits(
5868 std::max(dst_data_size_in_bits, src_data_size_in_bits));
5869
5870 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5871 if (!pg.IsActive(vform, i)) continue;
5872
5873 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5874 0,
5875 src.Uint(vform, i));
5876 double dst_value =
5877 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5878
5879 uint64_t dst_raw_bits =
5880 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5881
5882 dst.SetUint(vform, i, dst_raw_bits);
5883 }
5884
5885 return dst;
5886 }
5887
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5888 LogicVRegister Simulator::fcvts(VectorFormat vform,
5889 unsigned dst_data_size_in_bits,
5890 unsigned src_data_size_in_bits,
5891 LogicVRegister dst,
5892 const LogicPRegister& pg,
5893 const LogicVRegister& src,
5894 FPRounding round,
5895 int fbits) {
5896 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5897 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5898
5899 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5900 if (!pg.IsActive(vform, i)) continue;
5901
5902 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5903 0,
5904 src.Uint(vform, i));
5905 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5906 std::pow(2.0, fbits);
5907
5908 switch (dst_data_size_in_bits) {
5909 case kHRegSize:
5910 dst.SetInt(vform, i, FPToInt16(result, round));
5911 break;
5912 case kSRegSize:
5913 dst.SetInt(vform, i, FPToInt32(result, round));
5914 break;
5915 case kDRegSize:
5916 dst.SetInt(vform, i, FPToInt64(result, round));
5917 break;
5918 default:
5919 VIXL_UNIMPLEMENTED();
5920 break;
5921 }
5922 }
5923
5924 return dst;
5925 }
5926
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5927 LogicVRegister Simulator::fcvts(VectorFormat vform,
5928 LogicVRegister dst,
5929 const LogicVRegister& src,
5930 FPRounding round,
5931 int fbits) {
5932 dst.ClearForWrite(vform);
5933 return fcvts(vform,
5934 LaneSizeInBitsFromFormat(vform),
5935 LaneSizeInBitsFromFormat(vform),
5936 dst,
5937 GetPTrue(),
5938 src,
5939 round,
5940 fbits);
5941 }
5942
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5943 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5944 unsigned dst_data_size_in_bits,
5945 unsigned src_data_size_in_bits,
5946 LogicVRegister dst,
5947 const LogicPRegister& pg,
5948 const LogicVRegister& src,
5949 FPRounding round,
5950 int fbits) {
5951 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5952 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5953
5954 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5955 if (!pg.IsActive(vform, i)) continue;
5956
5957 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5958 0,
5959 src.Uint(vform, i));
5960 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5961 std::pow(2.0, fbits);
5962
5963 switch (dst_data_size_in_bits) {
5964 case kHRegSize:
5965 dst.SetUint(vform, i, FPToUInt16(result, round));
5966 break;
5967 case kSRegSize:
5968 dst.SetUint(vform, i, FPToUInt32(result, round));
5969 break;
5970 case kDRegSize:
5971 dst.SetUint(vform, i, FPToUInt64(result, round));
5972 break;
5973 default:
5974 VIXL_UNIMPLEMENTED();
5975 break;
5976 }
5977 }
5978
5979 return dst;
5980 }
5981
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5982 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5983 LogicVRegister dst,
5984 const LogicVRegister& src,
5985 FPRounding round,
5986 int fbits) {
5987 dst.ClearForWrite(vform);
5988 return fcvtu(vform,
5989 LaneSizeInBitsFromFormat(vform),
5990 LaneSizeInBitsFromFormat(vform),
5991 dst,
5992 GetPTrue(),
5993 src,
5994 round,
5995 fbits);
5996 }
5997
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5998 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5999 LogicVRegister dst,
6000 const LogicVRegister& src) {
6001 dst.ClearForWrite(vform);
6002 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6003 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
6004 // TODO: Full support for SimFloat16 in SimRegister(s).
6005 dst.SetFloat(i,
6006 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
6007 ReadDN()));
6008 }
6009 } else {
6010 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6011 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
6012 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
6013 }
6014 }
6015 return dst;
6016 }
6017
6018
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6019 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
6020 LogicVRegister dst,
6021 const LogicVRegister& src) {
6022 dst.ClearForWrite(vform);
6023 int lane_count = LaneCountFromFormat(vform);
6024 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6025 for (int i = 0; i < lane_count; i++) {
6026 // TODO: Full support for SimFloat16 in SimRegister(s).
6027 dst.SetFloat(i,
6028 FPToFloat(RawbitsToFloat16(
6029 src.Float<uint16_t>(i + lane_count)),
6030 ReadDN()));
6031 }
6032 } else {
6033 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6034 for (int i = 0; i < lane_count; i++) {
6035 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6036 }
6037 }
6038 return dst;
6039 }
6040
6041
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6042 LogicVRegister Simulator::fcvtn(VectorFormat vform,
6043 LogicVRegister dst,
6044 const LogicVRegister& src) {
6045 SimVRegister tmp;
6046 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6047 dst.ClearForWrite(vform);
6048 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6049 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6050 dst.SetFloat(i,
6051 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6052 FPTieEven,
6053 ReadDN())));
6054 }
6055 } else {
6056 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6057 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6058 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6059 }
6060 }
6061 return dst;
6062 }
6063
6064
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6065 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6066 LogicVRegister dst,
6067 const LogicVRegister& src) {
6068 dst.ClearForWrite(vform);
6069 int lane_count = LaneCountFromFormat(vform) / 2;
6070 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6071 for (int i = lane_count - 1; i >= 0; i--) {
6072 dst.SetFloat(i + lane_count,
6073 Float16ToRawbits(
6074 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6075 }
6076 } else {
6077 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6078 for (int i = lane_count - 1; i >= 0; i--) {
6079 dst.SetFloat(i + lane_count,
6080 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6081 }
6082 }
6083 return dst;
6084 }
6085
6086
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6087 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6088 LogicVRegister dst,
6089 const LogicVRegister& src) {
6090 SimVRegister tmp;
6091 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6092 int input_lane_count = LaneCountFromFormat(vform);
6093 if (IsSVEFormat(vform)) {
6094 mov(kFormatVnB, tmp, src);
6095 input_lane_count /= 2;
6096 }
6097
6098 dst.ClearForWrite(vform);
6099 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6100
6101 for (int i = 0; i < input_lane_count; i++) {
6102 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6103 }
6104 return dst;
6105 }
6106
6107
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6108 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6109 LogicVRegister dst,
6110 const LogicVRegister& src) {
6111 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6112 dst.ClearForWrite(vform);
6113 int lane_count = LaneCountFromFormat(vform) / 2;
6114 for (int i = lane_count - 1; i >= 0; i--) {
6115 dst.SetFloat(i + lane_count,
6116 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6117 }
6118 return dst;
6119 }
6120
6121
6122 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6123 double Simulator::recip_sqrt_estimate(double a) {
6124 int quot0, quot1, s;
6125 double r;
6126 if (a < 0.5) {
6127 quot0 = static_cast<int>(a * 512.0);
6128 r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6129 } else {
6130 quot1 = static_cast<int>(a * 256.0);
6131 r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6132 }
6133 s = static_cast<int>(256.0 * r + 0.5);
6134 return static_cast<double>(s) / 256.0;
6135 }
6136
6137
Bits(uint64_t val,int start_bit,int end_bit)6138 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6139 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6140 }
6141
6142
6143 template <typename T>
FPRecipSqrtEstimate(T op)6144 T Simulator::FPRecipSqrtEstimate(T op) {
6145 if (IsNaN(op)) {
6146 return FPProcessNaN(op);
6147 } else if (op == 0.0) {
6148 if (copysign(1.0, op) < 0.0) {
6149 return T(kFP64NegativeInfinity);
6150 } else {
6151 return T(kFP64PositiveInfinity);
6152 }
6153 } else if (copysign(1.0, op) < 0.0) {
6154 FPProcessException();
6155 return FPDefaultNaN<T>();
6156 } else if (IsInf(op)) {
6157 return 0.0;
6158 } else {
6159 uint64_t fraction;
6160 int exp, result_exp;
6161
6162 if constexpr (IsFloat16<T>()) {
6163 exp = Float16Exp(op);
6164 fraction = Float16Mantissa(op);
6165 fraction <<= 42;
6166 } else if constexpr (IsFloat32<T>()) {
6167 exp = FloatExp(op);
6168 fraction = FloatMantissa(op);
6169 fraction <<= 29;
6170 } else {
6171 VIXL_ASSERT(IsFloat64<T>());
6172 exp = DoubleExp(op);
6173 fraction = DoubleMantissa(op);
6174 }
6175
6176 if (exp == 0) {
6177 while (Bits(fraction, 51, 51) == 0) {
6178 fraction = Bits(fraction, 50, 0) << 1;
6179 exp -= 1;
6180 }
6181 fraction = Bits(fraction, 50, 0) << 1;
6182 }
6183
6184 double scaled;
6185 if (Bits(exp, 0, 0) == 0) {
6186 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6187 } else {
6188 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6189 }
6190
6191 if constexpr (IsFloat16<T>()) {
6192 result_exp = (44 - exp) / 2;
6193 } else if constexpr (IsFloat32<T>()) {
6194 result_exp = (380 - exp) / 2;
6195 } else {
6196 VIXL_ASSERT(IsFloat64<T>());
6197 result_exp = (3068 - exp) / 2;
6198 }
6199
6200 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6201
6202 if constexpr (IsFloat16<T>()) {
6203 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6204 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6205 return Float16Pack(0, exp_bits, est_bits);
6206 } else if constexpr (IsFloat32<T>()) {
6207 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6208 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6209 return FloatPack(0, exp_bits, est_bits);
6210 } else {
6211 VIXL_ASSERT(IsFloat64<T>());
6212 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6213 }
6214 }
6215 }
6216
6217
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6218 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6219 LogicVRegister dst,
6220 const LogicVRegister& src) {
6221 dst.ClearForWrite(vform);
6222 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6223 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6224 SimFloat16 input = src.Float<SimFloat16>(i);
6225 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6226 }
6227 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6228 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6229 float input = src.Float<float>(i);
6230 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6231 }
6232 } else {
6233 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6234 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6235 double input = src.Float<double>(i);
6236 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6237 }
6238 }
6239 return dst;
6240 }
6241
6242 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6243 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6244 uint32_t sign;
6245
6246 if constexpr (IsFloat16<T>()) {
6247 sign = Float16Sign(op);
6248 } else if constexpr (IsFloat32<T>()) {
6249 sign = FloatSign(op);
6250 } else {
6251 VIXL_ASSERT(IsFloat64<T>());
6252 sign = DoubleSign(op);
6253 }
6254
6255 if (IsNaN(op)) {
6256 return FPProcessNaN(op);
6257 } else if (IsInf(op)) {
6258 return (sign == 1) ? T(-0.0) : T(0.0);
6259 } else if (op == 0.0) {
6260 FPProcessException(); // FPExc_DivideByZero exception.
6261 return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
6262 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6263 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6264 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6265 bool overflow_to_inf = false;
6266 switch (rounding) {
6267 case FPTieEven:
6268 overflow_to_inf = true;
6269 break;
6270 case FPPositiveInfinity:
6271 overflow_to_inf = (sign == 0);
6272 break;
6273 case FPNegativeInfinity:
6274 overflow_to_inf = (sign == 1);
6275 break;
6276 case FPZero:
6277 overflow_to_inf = false;
6278 break;
6279 default:
6280 break;
6281 }
6282 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6283 if (overflow_to_inf) {
6284 return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
6285 } else {
6286 // Return FPMaxNormal(sign).
6287 if constexpr (IsFloat16<T>()) {
6288 return Float16Pack(sign, 0x1f, 0x3ff);
6289 } else if constexpr (IsFloat32<T>()) {
6290 return FloatPack(sign, 0xfe, 0x07fffff);
6291 } else {
6292 VIXL_ASSERT(IsFloat64<T>());
6293 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6294 }
6295 }
6296 } else {
6297 uint64_t fraction;
6298 int exp, result_exp;
6299
6300 if constexpr (IsFloat16<T>()) {
6301 sign = Float16Sign(op);
6302 exp = Float16Exp(op);
6303 fraction = Float16Mantissa(op);
6304 fraction <<= 42;
6305 } else if constexpr (IsFloat32<T>()) {
6306 sign = FloatSign(op);
6307 exp = FloatExp(op);
6308 fraction = FloatMantissa(op);
6309 fraction <<= 29;
6310 } else {
6311 VIXL_ASSERT(IsFloat64<T>());
6312 sign = DoubleSign(op);
6313 exp = DoubleExp(op);
6314 fraction = DoubleMantissa(op);
6315 }
6316
6317 if (exp == 0) {
6318 if (Bits(fraction, 51, 51) == 0) {
6319 exp -= 1;
6320 fraction = Bits(fraction, 49, 0) << 2;
6321 } else {
6322 fraction = Bits(fraction, 50, 0) << 1;
6323 }
6324 }
6325
6326 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6327
6328 if constexpr (IsFloat16<T>()) {
6329 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6330 } else if constexpr (IsFloat32<T>()) {
6331 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6332 } else {
6333 VIXL_ASSERT(IsFloat64<T>());
6334 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6335 }
6336
6337 double estimate = recip_estimate(scaled);
6338
6339 fraction = DoubleMantissa(estimate);
6340 if (result_exp == 0) {
6341 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6342 } else if (result_exp == -1) {
6343 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6344 result_exp = 0;
6345 }
6346 if constexpr (IsFloat16<T>()) {
6347 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6348 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6349 return Float16Pack(sign, exp_bits, frac_bits);
6350 } else if constexpr (IsFloat32<T>()) {
6351 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6352 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6353 return FloatPack(sign, exp_bits, frac_bits);
6354 } else {
6355 VIXL_ASSERT(IsFloat64<T>());
6356 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6357 }
6358 }
6359 }
6360
6361
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6362 LogicVRegister Simulator::frecpe(VectorFormat vform,
6363 LogicVRegister dst,
6364 const LogicVRegister& src,
6365 FPRounding round) {
6366 dst.ClearForWrite(vform);
6367 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6368 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6369 SimFloat16 input = src.Float<SimFloat16>(i);
6370 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6371 }
6372 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6373 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6374 float input = src.Float<float>(i);
6375 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6376 }
6377 } else {
6378 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6379 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6380 double input = src.Float<double>(i);
6381 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6382 }
6383 }
6384 return dst;
6385 }
6386
6387
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6388 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6389 LogicVRegister dst,
6390 const LogicVRegister& src) {
6391 dst.ClearForWrite(vform);
6392 uint64_t operand;
6393 uint32_t result;
6394 double dp_operand, dp_result;
6395 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6396 operand = src.Uint(vform, i);
6397 if (operand <= 0x3FFFFFFF) {
6398 result = 0xFFFFFFFF;
6399 } else {
6400 dp_operand = operand * std::pow(2.0, -32);
6401 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6402 result = static_cast<uint32_t>(dp_result);
6403 }
6404 dst.SetUint(vform, i, result);
6405 }
6406 return dst;
6407 }
6408
6409
6410 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6411 double Simulator::recip_estimate(double a) {
6412 int q, s;
6413 double r;
6414 q = static_cast<int>(a * 512.0);
6415 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6416 s = static_cast<int>(256.0 * r + 0.5);
6417 return static_cast<double>(s) / 256.0;
6418 }
6419
6420
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6421 LogicVRegister Simulator::urecpe(VectorFormat vform,
6422 LogicVRegister dst,
6423 const LogicVRegister& src) {
6424 dst.ClearForWrite(vform);
6425 uint64_t operand;
6426 uint32_t result;
6427 double dp_operand, dp_result;
6428 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6429 operand = src.Uint(vform, i);
6430 if (operand <= 0x7FFFFFFF) {
6431 result = 0xFFFFFFFF;
6432 } else {
6433 dp_operand = operand * std::pow(2.0, -32);
6434 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6435 result = static_cast<uint32_t>(dp_result);
6436 }
6437 dst.SetUint(vform, i, result);
6438 }
6439 return dst;
6440 }
6441
pfalse(LogicPRegister dst)6442 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6443 dst.Clear();
6444 return dst;
6445 }
6446
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6447 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6448 const LogicPRegister& pg,
6449 const LogicPRegister& src) {
6450 int first_pg = GetFirstActive(kFormatVnB, pg);
6451 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6452 mov(dst, src);
6453 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6454 return dst;
6455 }
6456
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6457 LogicPRegister Simulator::ptrue(VectorFormat vform,
6458 LogicPRegister dst,
6459 int pattern) {
6460 int count = GetPredicateConstraintLaneCount(vform, pattern);
6461 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6462 dst.SetActive(vform, i, i < count);
6463 }
6464 return dst;
6465 }
6466
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6467 LogicPRegister Simulator::pnext(VectorFormat vform,
6468 LogicPRegister dst,
6469 const LogicPRegister& pg,
6470 const LogicPRegister& src) {
6471 int next = GetLastActive(vform, src) + 1;
6472 while (next < LaneCountFromFormat(vform)) {
6473 if (pg.IsActive(vform, next)) break;
6474 next++;
6475 }
6476
6477 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6478 dst.SetActive(vform, i, (i == next));
6479 }
6480 return dst;
6481 }
6482
6483 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6484 LogicVRegister Simulator::frecpx(VectorFormat vform,
6485 LogicVRegister dst,
6486 const LogicVRegister& src) {
6487 dst.ClearForWrite(vform);
6488 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6489 T op = src.Float<T>(i);
6490 T result;
6491 if (IsNaN(op)) {
6492 result = FPProcessNaN(op);
6493 } else {
6494 int exp;
6495 uint32_t sign;
6496 if constexpr (IsFloat16<T>()) {
6497 sign = Float16Sign(op);
6498 exp = Float16Exp(op);
6499 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6500 result = Float16Pack(sign, exp, 0);
6501 } else if constexpr (IsFloat32<T>()) {
6502 sign = FloatSign(op);
6503 exp = FloatExp(op);
6504 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6505 result = FloatPack(sign, exp, 0);
6506 } else {
6507 VIXL_ASSERT(IsFloat64<T>());
6508 sign = DoubleSign(op);
6509 exp = DoubleExp(op);
6510 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6511 result = DoublePack(sign, exp, 0);
6512 }
6513 }
6514 dst.SetFloat(i, result);
6515 }
6516 return dst;
6517 }
6518
6519
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6520 LogicVRegister Simulator::frecpx(VectorFormat vform,
6521 LogicVRegister dst,
6522 const LogicVRegister& src) {
6523 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6524 frecpx<SimFloat16>(vform, dst, src);
6525 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6526 frecpx<float>(vform, dst, src);
6527 } else {
6528 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6529 frecpx<double>(vform, dst, src);
6530 }
6531 return dst;
6532 }
6533
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6534 LogicVRegister Simulator::flogb(VectorFormat vform,
6535 LogicVRegister dst,
6536 const LogicVRegister& src) {
6537 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6538 double op = 0.0;
6539 switch (vform) {
6540 case kFormatVnH:
6541 op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6542 break;
6543 case kFormatVnS:
6544 op = src.Float<float>(i);
6545 break;
6546 case kFormatVnD:
6547 op = src.Float<double>(i);
6548 break;
6549 default:
6550 VIXL_UNREACHABLE();
6551 }
6552
6553 switch (std::fpclassify(op)) {
6554 case FP_INFINITE:
6555 dst.SetInt(vform, i, MaxIntFromFormat(vform));
6556 break;
6557 case FP_NAN:
6558 case FP_ZERO:
6559 dst.SetInt(vform, i, MinIntFromFormat(vform));
6560 break;
6561 case FP_SUBNORMAL: {
6562 // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6563 // bits where the sign and exponent would be. We subtract 12 to
6564 // find the number of leading zero bits in the mantissa itself.
6565 int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6566 // Log2 of a subnormal is the lowest exponent a normal number can
6567 // represent, together with the zeros in the mantissa.
6568 dst.SetInt(vform, i, -1023 - mant_zero_count);
6569 break;
6570 }
6571 case FP_NORMAL:
6572 // Log2 of a normal number is the exponent minus the bias.
6573 dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6574 break;
6575 }
6576 }
6577 return dst;
6578 }
6579
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6580 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6581 LogicVRegister dst,
6582 const LogicVRegister& src1,
6583 const LogicVRegister& src2) {
6584 SimVRegister maybe_neg_src1;
6585
6586 // The bottom bit of src2 controls the sign of the result. Use it to
6587 // conditionally invert the sign of one `fmul` operand.
6588 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6589 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6590
6591 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6592 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6593 // rather than neg_src1, must be the first source argument.
6594 fmul(vform, dst, src1, maybe_neg_src1);
6595
6596 return dst;
6597 }
6598
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6599 LogicVRegister Simulator::ftssel(VectorFormat vform,
6600 LogicVRegister dst,
6601 const LogicVRegister& src1,
6602 const LogicVRegister& src2) {
6603 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6604 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6605 uint64_t one;
6606
6607 if (lane_bits == kHRegSize) {
6608 one = Float16ToRawbits(Float16(1.0));
6609 } else if (lane_bits == kSRegSize) {
6610 one = FloatToRawbits(1.0);
6611 } else {
6612 VIXL_ASSERT(lane_bits == kDRegSize);
6613 one = DoubleToRawbits(1.0);
6614 }
6615
6616 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6617 // Use integer accessors for this operation, as this is a data manipulation
6618 // task requiring no calculation.
6619 uint64_t op = src1.Uint(vform, i);
6620
6621 // Only the bottom two bits of the src2 register are significant, indicating
6622 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6623 // determines the sign of the value written to dst.
6624 uint64_t q = src2.Uint(vform, i);
6625 if ((q & 1) == 1) op = one;
6626 if ((q & 2) == 2) op ^= sign_bit;
6627
6628 dst.SetUint(vform, i, op);
6629 }
6630
6631 return dst;
6632 }
6633
6634 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6635 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6636 LogicVRegister dst,
6637 const LogicVRegister& src1,
6638 const LogicVRegister& src2,
6639 uint64_t coeff_pos,
6640 uint64_t coeff_neg) {
6641 SimVRegister zero;
6642 dup_immediate(kFormatVnB, zero, 0);
6643
6644 SimVRegister cf;
6645 SimVRegister cfn;
6646 dup_immediate(vform, cf, coeff_pos);
6647 dup_immediate(vform, cfn, coeff_neg);
6648
6649 // The specification requires testing the top bit of the raw value, rather
6650 // than the sign of the floating point number, so use an integer comparison
6651 // here.
6652 SimPRegister is_neg;
6653 SVEIntCompareVectorsHelper(lt,
6654 vform,
6655 is_neg,
6656 GetPTrue(),
6657 src2,
6658 zero,
6659 false,
6660 LeaveFlags);
6661 mov_merging(vform, cf, is_neg, cfn);
6662
6663 SimVRegister temp;
6664 fabs_<T>(vform, temp, src2);
6665 fmla<T>(vform, cf, cf, src1, temp);
6666 mov(vform, dst, cf);
6667 return dst;
6668 }
6669
6670
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6671 LogicVRegister Simulator::ftmad(VectorFormat vform,
6672 LogicVRegister dst,
6673 const LogicVRegister& src1,
6674 const LogicVRegister& src2,
6675 unsigned index) {
6676 static const uint64_t ftmad_coeff16[] = {0x3c00,
6677 0xb155,
6678 0x2030,
6679 0x0000,
6680 0x0000,
6681 0x0000,
6682 0x0000,
6683 0x0000,
6684 0x3c00,
6685 0xb800,
6686 0x293a,
6687 0x0000,
6688 0x0000,
6689 0x0000,
6690 0x0000,
6691 0x0000};
6692
6693 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6694 0xbe2aaaab,
6695 0x3c088886,
6696 0xb95008b9,
6697 0x36369d6d,
6698 0x00000000,
6699 0x00000000,
6700 0x00000000,
6701 0x3f800000,
6702 0xbf000000,
6703 0x3d2aaaa6,
6704 0xbab60705,
6705 0x37cd37cc,
6706 0x00000000,
6707 0x00000000,
6708 0x00000000};
6709
6710 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6711 0xbfc5555555555543,
6712 0x3f8111111110f30c,
6713 0xbf2a01a019b92fc6,
6714 0x3ec71de351f3d22b,
6715 0xbe5ae5e2b60f7b91,
6716 0x3de5d8408868552f,
6717 0x0000000000000000,
6718 0x3ff0000000000000,
6719 0xbfe0000000000000,
6720 0x3fa5555555555536,
6721 0xbf56c16c16c13a0b,
6722 0x3efa01a019b1e8d8,
6723 0xbe927e4f7282f468,
6724 0x3e21ee96d2641b13,
6725 0xbda8f76380fbb401};
6726 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6727 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6728 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6729
6730 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6731 FTMaddHelper<SimFloat16>(vform,
6732 dst,
6733 src1,
6734 src2,
6735 ftmad_coeff16[index],
6736 ftmad_coeff16[index + 8]);
6737 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6738 FTMaddHelper<float>(vform,
6739 dst,
6740 src1,
6741 src2,
6742 ftmad_coeff32[index],
6743 ftmad_coeff32[index + 8]);
6744 } else {
6745 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6746 FTMaddHelper<double>(vform,
6747 dst,
6748 src1,
6749 src2,
6750 ftmad_coeff64[index],
6751 ftmad_coeff64[index + 8]);
6752 }
6753 return dst;
6754 }
6755
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6756 LogicVRegister Simulator::fexpa(VectorFormat vform,
6757 LogicVRegister dst,
6758 const LogicVRegister& src) {
6759 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6760 0x005d, 0x0075, 0x008e, 0x00a8,
6761 0x00c2, 0x00dc, 0x00f8, 0x0114,
6762 0x0130, 0x014d, 0x016b, 0x0189,
6763 0x01a8, 0x01c8, 0x01e8, 0x0209,
6764 0x022b, 0x024e, 0x0271, 0x0295,
6765 0x02ba, 0x02e0, 0x0306, 0x032e,
6766 0x0356, 0x037f, 0x03a9, 0x03d4};
6767
6768 static const uint64_t fexpa_coeff32[] =
6769 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6770 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6771 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6772 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6773 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6774 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6775 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6776 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6777 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6778 0x7d3e0c};
6779
6780 static const uint64_t fexpa_coeff64[] =
6781 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6782 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6783 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6784 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6785 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6786 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6787 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6788 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6789 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6790 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6791 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6792 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6793 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6794 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6795 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6796 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6797
6798 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6799 int index_highbit = 5;
6800 int op_highbit, op_shift;
6801 const uint64_t* fexpa_coeff;
6802
6803 if (lane_size == kHRegSize) {
6804 index_highbit = 4;
6805 VIXL_ASSERT(ArrayLength(fexpa_coeff16) ==
6806 (uint64_t{1} << (index_highbit + 1)));
6807 fexpa_coeff = fexpa_coeff16;
6808 op_highbit = 9;
6809 op_shift = 10;
6810 } else if (lane_size == kSRegSize) {
6811 VIXL_ASSERT(ArrayLength(fexpa_coeff32) ==
6812 (uint64_t{1} << (index_highbit + 1)));
6813 fexpa_coeff = fexpa_coeff32;
6814 op_highbit = 13;
6815 op_shift = 23;
6816 } else {
6817 VIXL_ASSERT(lane_size == kDRegSize);
6818 VIXL_ASSERT(ArrayLength(fexpa_coeff64) ==
6819 (uint64_t{1} << (index_highbit + 1)));
6820 fexpa_coeff = fexpa_coeff64;
6821 op_highbit = 16;
6822 op_shift = 52;
6823 }
6824
6825 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6826 uint64_t op = src.Uint(vform, i);
6827 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6828 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6829 dst.SetUint(vform, i, result);
6830 }
6831 return dst;
6832 }
6833
6834 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6835 LogicVRegister Simulator::fscale(VectorFormat vform,
6836 LogicVRegister dst,
6837 const LogicVRegister& src1,
6838 const LogicVRegister& src2) {
6839 T two = T(2.0);
6840 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6841 T src1_val = src1.Float<T>(i);
6842 if (!IsNaN(src1_val)) {
6843 int64_t scale = src2.Int(vform, i);
6844 // TODO: this is a low-performance implementation, but it's simple and
6845 // less likely to be buggy. Consider replacing it with something faster.
6846
6847 // Scales outside of these bounds become infinity or zero, so there's no
6848 // point iterating further.
6849 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6850
6851 // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6852 // decrement scale until it's zero.
6853 while (scale-- > 0) {
6854 src1_val = FPMul(src1_val, two);
6855 }
6856
6857 // If scale is negative, divide by two and increment scale until it's
6858 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6859 while (++scale < 0) {
6860 src1_val = FPDiv(src1_val, two);
6861 }
6862 }
6863 dst.SetFloat<T>(i, src1_val);
6864 }
6865 return dst;
6866 }
6867
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6868 LogicVRegister Simulator::fscale(VectorFormat vform,
6869 LogicVRegister dst,
6870 const LogicVRegister& src1,
6871 const LogicVRegister& src2) {
6872 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6873 fscale<SimFloat16>(vform, dst, src1, src2);
6874 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6875 fscale<float>(vform, dst, src1, src2);
6876 } else {
6877 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6878 fscale<double>(vform, dst, src1, src2);
6879 }
6880 return dst;
6881 }
6882
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6883 LogicVRegister Simulator::scvtf(VectorFormat vform,
6884 unsigned dst_data_size_in_bits,
6885 unsigned src_data_size_in_bits,
6886 LogicVRegister dst,
6887 const LogicPRegister& pg,
6888 const LogicVRegister& src,
6889 FPRounding round,
6890 int fbits) {
6891 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6892 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6893 dst.ClearForWrite(vform);
6894
6895 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6896 if (!pg.IsActive(vform, i)) continue;
6897
6898 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6899 0,
6900 src.Uint(vform, i));
6901
6902 switch (dst_data_size_in_bits) {
6903 case kHRegSize: {
6904 SimFloat16 result = FixedToFloat16(value, fbits, round);
6905 dst.SetUint(vform, i, Float16ToRawbits(result));
6906 break;
6907 }
6908 case kSRegSize: {
6909 float result = FixedToFloat(value, fbits, round);
6910 dst.SetUint(vform, i, FloatToRawbits(result));
6911 break;
6912 }
6913 case kDRegSize: {
6914 double result = FixedToDouble(value, fbits, round);
6915 dst.SetUint(vform, i, DoubleToRawbits(result));
6916 break;
6917 }
6918 default:
6919 VIXL_UNIMPLEMENTED();
6920 break;
6921 }
6922 }
6923
6924 return dst;
6925 }
6926
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6927 LogicVRegister Simulator::scvtf(VectorFormat vform,
6928 LogicVRegister dst,
6929 const LogicVRegister& src,
6930 int fbits,
6931 FPRounding round) {
6932 return scvtf(vform,
6933 LaneSizeInBitsFromFormat(vform),
6934 LaneSizeInBitsFromFormat(vform),
6935 dst,
6936 GetPTrue(),
6937 src,
6938 round,
6939 fbits);
6940 }
6941
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6942 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6943 unsigned dst_data_size_in_bits,
6944 unsigned src_data_size_in_bits,
6945 LogicVRegister dst,
6946 const LogicPRegister& pg,
6947 const LogicVRegister& src,
6948 FPRounding round,
6949 int fbits) {
6950 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6951 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6952 dst.ClearForWrite(vform);
6953
6954 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6955 if (!pg.IsActive(vform, i)) continue;
6956
6957 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6958 0,
6959 src.Uint(vform, i));
6960
6961 switch (dst_data_size_in_bits) {
6962 case kHRegSize: {
6963 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6964 dst.SetUint(vform, i, Float16ToRawbits(result));
6965 break;
6966 }
6967 case kSRegSize: {
6968 float result = UFixedToFloat(value, fbits, round);
6969 dst.SetUint(vform, i, FloatToRawbits(result));
6970 break;
6971 }
6972 case kDRegSize: {
6973 double result = UFixedToDouble(value, fbits, round);
6974 dst.SetUint(vform, i, DoubleToRawbits(result));
6975 break;
6976 }
6977 default:
6978 VIXL_UNIMPLEMENTED();
6979 break;
6980 }
6981 }
6982
6983 return dst;
6984 }
6985
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6986 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6987 LogicVRegister dst,
6988 const LogicVRegister& src,
6989 int fbits,
6990 FPRounding round) {
6991 return ucvtf(vform,
6992 LaneSizeInBitsFromFormat(vform),
6993 LaneSizeInBitsFromFormat(vform),
6994 dst,
6995 GetPTrue(),
6996 src,
6997 round,
6998 fbits);
6999 }
7000
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)7001 LogicVRegister Simulator::unpk(VectorFormat vform,
7002 LogicVRegister dst,
7003 const LogicVRegister& src,
7004 UnpackType unpack_type,
7005 ExtendType extend_type) {
7006 VectorFormat vform_half = VectorFormatHalfWidth(vform);
7007 const int lane_count = LaneCountFromFormat(vform);
7008 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
7009
7010 switch (extend_type) {
7011 case kSignedExtend: {
7012 int64_t result[kZRegMaxSizeInBytes];
7013 for (int i = 0; i < lane_count; ++i) {
7014 result[i] = src.Int(vform_half, i + src_start_lane);
7015 }
7016 for (int i = 0; i < lane_count; ++i) {
7017 dst.SetInt(vform, i, result[i]);
7018 }
7019 break;
7020 }
7021 case kUnsignedExtend: {
7022 uint64_t result[kZRegMaxSizeInBytes];
7023 for (int i = 0; i < lane_count; ++i) {
7024 result[i] = src.Uint(vform_half, i + src_start_lane);
7025 }
7026 for (int i = 0; i < lane_count; ++i) {
7027 dst.SetUint(vform, i, result[i]);
7028 }
7029 break;
7030 }
7031 default:
7032 VIXL_UNREACHABLE();
7033 }
7034 return dst;
7035 }
7036
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)7037 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
7038 VectorFormat vform,
7039 LogicPRegister dst,
7040 const LogicPRegister& mask,
7041 const LogicVRegister& src1,
7042 const LogicVRegister& src2,
7043 bool is_wide_elements,
7044 FlagsUpdate flags) {
7045 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7046 bool result = false;
7047 if (mask.IsActive(vform, lane)) {
7048 int64_t op1 = 0xbadbeef;
7049 int64_t op2 = 0xbadbeef;
7050 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7051 switch (cond) {
7052 case eq:
7053 case ge:
7054 case gt:
7055 case lt:
7056 case le:
7057 case ne:
7058 op1 = src1.Int(vform, lane);
7059 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7060 : src2.Int(vform, lane);
7061 break;
7062 case hi:
7063 case hs:
7064 case ls:
7065 case lo:
7066 op1 = src1.Uint(vform, lane);
7067 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7068 : src2.Uint(vform, lane);
7069 break;
7070 default:
7071 VIXL_UNREACHABLE();
7072 }
7073
7074 switch (cond) {
7075 case eq:
7076 result = (op1 == op2);
7077 break;
7078 case ne:
7079 result = (op1 != op2);
7080 break;
7081 case ge:
7082 result = (op1 >= op2);
7083 break;
7084 case gt:
7085 result = (op1 > op2);
7086 break;
7087 case le:
7088 result = (op1 <= op2);
7089 break;
7090 case lt:
7091 result = (op1 < op2);
7092 break;
7093 case hs:
7094 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7095 break;
7096 case hi:
7097 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7098 break;
7099 case ls:
7100 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7101 break;
7102 case lo:
7103 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7104 break;
7105 default:
7106 VIXL_UNREACHABLE();
7107 }
7108 }
7109 dst.SetActive(vform, lane, result);
7110 }
7111
7112 if (flags == SetFlags) PredTest(vform, mask, dst);
7113
7114 return dst;
7115 }
7116
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7117 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7118 VectorFormat vform,
7119 LogicVRegister dst,
7120 const LogicVRegister& src1,
7121 const LogicVRegister& src2,
7122 bool is_wide_elements) {
7123 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7124 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7125
7126 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7127 int shift_src_lane = lane;
7128 if (is_wide_elements) {
7129 // If the shift amount comes from wide elements, select the D-sized lane
7130 // which occupies the corresponding lanes of the value to be shifted.
7131 shift_src_lane = (lane * lane_size) / kDRegSize;
7132 }
7133 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7134
7135 // Saturate shift_amount to the size of the lane that will be shifted.
7136 if (shift_amount > lane_size) shift_amount = lane_size;
7137
7138 uint64_t value = src1.Uint(vform, lane);
7139 int64_t result = ShiftOperand(lane_size,
7140 value,
7141 shift_op,
7142 static_cast<unsigned>(shift_amount));
7143 dst.SetUint(vform, lane, result);
7144 }
7145
7146 return dst;
7147 }
7148
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7149 LogicVRegister Simulator::asrd(VectorFormat vform,
7150 LogicVRegister dst,
7151 const LogicVRegister& src1,
7152 int shift) {
7153 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7154 LaneSizeInBitsFromFormat(vform)));
7155
7156 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7157 int64_t value = src1.Int(vform, i);
7158 if (shift <= 63) {
7159 if (value < 0) {
7160 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7161 // cast to int64_t, and cannot cause signed overflow in the result.
7162 value = value + GetUintMask(shift);
7163 }
7164 value = ShiftOperand(kDRegSize, value, ASR, shift);
7165 } else {
7166 value = 0;
7167 }
7168 dst.SetInt(vform, i, value);
7169 }
7170 return dst;
7171 }
7172
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7173 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7174 LogicalOp logical_op,
7175 VectorFormat vform,
7176 LogicVRegister zd,
7177 const LogicVRegister& zn,
7178 const LogicVRegister& zm) {
7179 VIXL_ASSERT(IsSVEFormat(vform));
7180 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7181 uint64_t op1 = zn.Uint(vform, i);
7182 uint64_t op2 = zm.Uint(vform, i);
7183 uint64_t result = 0;
7184 switch (logical_op) {
7185 case AND:
7186 result = op1 & op2;
7187 break;
7188 case BIC:
7189 result = op1 & ~op2;
7190 break;
7191 case EOR:
7192 result = op1 ^ op2;
7193 break;
7194 case ORR:
7195 result = op1 | op2;
7196 break;
7197 default:
7198 VIXL_UNIMPLEMENTED();
7199 }
7200 zd.SetUint(vform, i, result);
7201 }
7202
7203 return zd;
7204 }
7205
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7206 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7207 LogicPRegister pd,
7208 const LogicPRegister& pn,
7209 const LogicPRegister& pm) {
7210 for (int i = 0; i < pn.GetChunkCount(); i++) {
7211 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7212 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7213 LogicPRegister::ChunkType result = 0;
7214 switch (op) {
7215 case ANDS_p_p_pp_z:
7216 case AND_p_p_pp_z:
7217 result = op1 & op2;
7218 break;
7219 case BICS_p_p_pp_z:
7220 case BIC_p_p_pp_z:
7221 result = op1 & ~op2;
7222 break;
7223 case EORS_p_p_pp_z:
7224 case EOR_p_p_pp_z:
7225 result = op1 ^ op2;
7226 break;
7227 case NANDS_p_p_pp_z:
7228 case NAND_p_p_pp_z:
7229 result = ~(op1 & op2);
7230 break;
7231 case NORS_p_p_pp_z:
7232 case NOR_p_p_pp_z:
7233 result = ~(op1 | op2);
7234 break;
7235 case ORNS_p_p_pp_z:
7236 case ORN_p_p_pp_z:
7237 result = op1 | ~op2;
7238 break;
7239 case ORRS_p_p_pp_z:
7240 case ORR_p_p_pp_z:
7241 result = op1 | op2;
7242 break;
7243 default:
7244 VIXL_UNIMPLEMENTED();
7245 }
7246 pd.SetChunk(i, result);
7247 }
7248 return pd;
7249 }
7250
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7251 LogicVRegister Simulator::SVEBitwiseImmHelper(
7252 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7253 VectorFormat vform,
7254 LogicVRegister zd,
7255 uint64_t imm) {
7256 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7257 uint64_t op1 = zd.Uint(vform, i);
7258 uint64_t result = 0;
7259 switch (op) {
7260 case AND_z_zi:
7261 result = op1 & imm;
7262 break;
7263 case EOR_z_zi:
7264 result = op1 ^ imm;
7265 break;
7266 case ORR_z_zi:
7267 result = op1 | imm;
7268 break;
7269 default:
7270 VIXL_UNIMPLEMENTED();
7271 }
7272 zd.SetUint(vform, i, result);
7273 }
7274
7275 return zd;
7276 }
7277
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7278 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7279 const LogicPRegister& pg,
7280 unsigned zt_code,
7281 const LogicSVEAddressVector& addr) {
7282 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7283
7284 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7285 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7286 int msize_in_bytes = addr.GetMsizeInBytes();
7287 int reg_count = addr.GetRegCount();
7288
7289 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7290 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7291
7292 unsigned zt_codes[4] = {zt_code,
7293 (zt_code + 1) % kNumberOfZRegisters,
7294 (zt_code + 2) % kNumberOfZRegisters,
7295 (zt_code + 3) % kNumberOfZRegisters};
7296
7297 LogicVRegister zt[4] = {
7298 ReadVRegister(zt_codes[0]),
7299 ReadVRegister(zt_codes[1]),
7300 ReadVRegister(zt_codes[2]),
7301 ReadVRegister(zt_codes[3]),
7302 };
7303
7304 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7305 // are ignored, so read the source register using the VectorFormat that
7306 // corresponds with the storage format, and multiply the index accordingly.
7307 VectorFormat unpack_vform =
7308 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7309 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7310
7311 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7312 if (!pg.IsActive(vform, i)) continue;
7313
7314 for (int r = 0; r < reg_count; r++) {
7315 uint64_t element_address = addr.GetElementAddress(i, r);
7316 if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
7317 return;
7318 }
7319 }
7320 }
7321
7322 if (ShouldTraceWrites()) {
7323 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7324 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7325 // Use an FP format where it's likely that we're accessing FP data.
7326 format = GetPrintRegisterFormatTryFP(format);
7327 }
7328 // Stores don't represent a change to the source register's value, so only
7329 // print the relevant part of the value.
7330 format = GetPrintRegPartial(format);
7331
7332 PrintZStructAccess(zt_code,
7333 reg_count,
7334 pg,
7335 format,
7336 msize_in_bytes,
7337 "->",
7338 addr);
7339 }
7340 }
7341
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7342 bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7343 const LogicPRegister& pg,
7344 unsigned zt_code,
7345 const LogicSVEAddressVector& addr,
7346 bool is_signed) {
7347 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7348 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7349 int msize_in_bytes = addr.GetMsizeInBytes();
7350 int reg_count = addr.GetRegCount();
7351
7352 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7353 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7354 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7355
7356 unsigned zt_codes[4] = {zt_code,
7357 (zt_code + 1) % kNumberOfZRegisters,
7358 (zt_code + 2) % kNumberOfZRegisters,
7359 (zt_code + 3) % kNumberOfZRegisters};
7360 LogicVRegister zt[4] = {
7361 ReadVRegister(zt_codes[0]),
7362 ReadVRegister(zt_codes[1]),
7363 ReadVRegister(zt_codes[2]),
7364 ReadVRegister(zt_codes[3]),
7365 };
7366
7367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7368 for (int r = 0; r < reg_count; r++) {
7369 uint64_t element_address = addr.GetElementAddress(i, r);
7370
7371 if (!pg.IsActive(vform, i)) {
7372 zt[r].SetUint(vform, i, 0);
7373 continue;
7374 }
7375
7376 if (is_signed) {
7377 if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7378 return false;
7379 }
7380 } else {
7381 if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7382 return false;
7383 }
7384 }
7385 }
7386 }
7387
7388 if (ShouldTraceVRegs()) {
7389 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7390 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7391 // Use an FP format where it's likely that we're accessing FP data.
7392 format = GetPrintRegisterFormatTryFP(format);
7393 }
7394 PrintZStructAccess(zt_code,
7395 reg_count,
7396 pg,
7397 format,
7398 msize_in_bytes,
7399 "<-",
7400 addr);
7401 }
7402 return true;
7403 }
7404
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7405 LogicPRegister Simulator::brka(LogicPRegister pd,
7406 const LogicPRegister& pg,
7407 const LogicPRegister& pn) {
7408 bool break_ = false;
7409 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7410 if (pg.IsActive(kFormatVnB, i)) {
7411 pd.SetActive(kFormatVnB, i, !break_);
7412 break_ |= pn.IsActive(kFormatVnB, i);
7413 }
7414 }
7415
7416 return pd;
7417 }
7418
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7419 LogicPRegister Simulator::brkb(LogicPRegister pd,
7420 const LogicPRegister& pg,
7421 const LogicPRegister& pn) {
7422 bool break_ = false;
7423 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7424 if (pg.IsActive(kFormatVnB, i)) {
7425 break_ |= pn.IsActive(kFormatVnB, i);
7426 pd.SetActive(kFormatVnB, i, !break_);
7427 }
7428 }
7429
7430 return pd;
7431 }
7432
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7433 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7434 const LogicPRegister& pg,
7435 const LogicPRegister& pn) {
7436 if (!IsLastActive(kFormatVnB, pg, pn)) {
7437 pfalse(pdm);
7438 }
7439 return pdm;
7440 }
7441
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7442 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7443 const LogicPRegister& pg,
7444 const LogicPRegister& pn,
7445 const LogicPRegister& pm) {
7446 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7447
7448 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7449 bool active = false;
7450 if (pg.IsActive(kFormatVnB, i)) {
7451 active = last_active;
7452 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7453 }
7454 pd.SetActive(kFormatVnB, i, active);
7455 }
7456
7457 return pd;
7458 }
7459
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7460 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7461 const LogicPRegister& pg,
7462 const LogicPRegister& pn,
7463 const LogicPRegister& pm) {
7464 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7465
7466 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7467 bool active = false;
7468 if (pg.IsActive(kFormatVnB, i)) {
7469 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7470 active = last_active;
7471 }
7472 pd.SetActive(kFormatVnB, i, active);
7473 }
7474
7475 return pd;
7476 }
7477
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7478 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7479 const LogicPRegister& pg,
7480 unsigned zt_code,
7481 const LogicSVEAddressVector& addr,
7482 SVEFaultTolerantLoadType type,
7483 bool is_signed) {
7484 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7485 int msize_in_bits = addr.GetMsizeInBits();
7486 int msize_in_bytes = addr.GetMsizeInBytes();
7487
7488 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7489 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7490 VIXL_ASSERT(addr.GetRegCount() == 1);
7491
7492 LogicVRegister zt = ReadVRegister(zt_code);
7493 LogicPRegister ffr = ReadFFR();
7494
7495 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7496 // code, fail a random element in roughly one in eight full-vector loads.
7497 uint32_t rnd = static_cast<uint32_t>(rand_gen_());
7498 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7499
7500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7501 uint64_t value = 0;
7502
7503 if (pg.IsActive(vform, i)) {
7504 uint64_t element_address = addr.GetElementAddress(i, 0);
7505
7506 if (type == kSVEFirstFaultLoad) {
7507 // First-faulting loads always load the first active element, regardless
7508 // of FFR. The result will be discarded if its FFR lane is inactive, but
7509 // it could still generate a fault.
7510 VIXL_DEFINE_OR_RETURN(mem_result,
7511 MemReadUint(msize_in_bytes, element_address));
7512 value = mem_result;
7513 // All subsequent elements have non-fault semantics.
7514 type = kSVENonFaultLoad;
7515
7516 } else if (ffr.IsActive(vform, i)) {
7517 // Simulation of fault-tolerant loads relies on system calls, and is
7518 // likely to be relatively slow, so we only actually perform the load if
7519 // its FFR lane is active.
7520
7521 bool can_read = (i < fake_fault_at_lane) &&
7522 CanReadMemory(element_address, msize_in_bytes);
7523 if (can_read) {
7524 VIXL_DEFINE_OR_RETURN(mem_result,
7525 MemReadUint(msize_in_bytes, element_address));
7526 value = mem_result;
7527 } else {
7528 // Propagate the fault to the end of FFR.
7529 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7530 ffr.SetActive(vform, j, false);
7531 }
7532 }
7533 }
7534 }
7535
7536 // The architecture permits a few possible results for inactive FFR lanes
7537 // (including those caused by a fault in this instruction). We choose to
7538 // leave the register value unchanged (like merging predication) because
7539 // no other input to this instruction can have the same behaviour.
7540 //
7541 // Note that this behaviour takes precedence over pg's zeroing predication.
7542
7543 if (ffr.IsActive(vform, i)) {
7544 int msb = msize_in_bits - 1;
7545 if (is_signed) {
7546 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7547 } else {
7548 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7549 }
7550 }
7551 }
7552
7553 if (ShouldTraceVRegs()) {
7554 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7555 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7556 // Use an FP format where it's likely that we're accessing FP data.
7557 format = GetPrintRegisterFormatTryFP(format);
7558 }
7559 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7560 // expects a single mask, so combine the two predicates.
7561 SimPRegister mask;
7562 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7563 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7564 }
7565 }
7566
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7567 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7568 VectorFormat vform,
7569 SVEOffsetModifier mod) {
7570 bool is_signed = instr->ExtractBit(14) == 0;
7571 bool is_ff = instr->ExtractBit(13) == 1;
7572 // Note that these instructions don't use the Dtype encoding.
7573 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7574 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7575 uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7576 LogicSVEAddressVector addr(base,
7577 &ReadVRegister(instr->GetRm()),
7578 vform,
7579 mod,
7580 scale);
7581 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7582 if (is_ff) {
7583 SVEFaultTolerantLoadHelper(vform,
7584 ReadPRegister(instr->GetPgLow8()),
7585 instr->GetRt(),
7586 addr,
7587 kSVEFirstFaultLoad,
7588 is_signed);
7589 } else {
7590 SVEStructuredLoadHelper(vform,
7591 ReadPRegister(instr->GetPgLow8()),
7592 instr->GetRt(),
7593 addr,
7594 is_signed);
7595 }
7596 }
7597
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7598 int Simulator::GetFirstActive(VectorFormat vform,
7599 const LogicPRegister& pg) const {
7600 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7601 if (pg.IsActive(vform, i)) return i;
7602 }
7603 return -1;
7604 }
7605
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7606 int Simulator::GetLastActive(VectorFormat vform,
7607 const LogicPRegister& pg) const {
7608 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7609 if (pg.IsActive(vform, i)) return i;
7610 }
7611 return -1;
7612 }
7613
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7614 int Simulator::CountActiveLanes(VectorFormat vform,
7615 const LogicPRegister& pg) const {
7616 int count = 0;
7617 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7618 count += pg.IsActive(vform, i) ? 1 : 0;
7619 }
7620 return count;
7621 }
7622
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7623 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7624 const LogicPRegister& pg,
7625 const LogicPRegister& pn) const {
7626 int count = 0;
7627 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7628 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7629 }
7630 return count;
7631 }
7632
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7633 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7634 int pattern) const {
7635 VIXL_ASSERT(IsSVEFormat(vform));
7636 int all = LaneCountFromFormat(vform);
7637 VIXL_ASSERT(all > 0);
7638
7639 switch (pattern) {
7640 case SVE_VL1:
7641 case SVE_VL2:
7642 case SVE_VL3:
7643 case SVE_VL4:
7644 case SVE_VL5:
7645 case SVE_VL6:
7646 case SVE_VL7:
7647 case SVE_VL8:
7648 // VL1-VL8 are encoded directly.
7649 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7650 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7651 return (pattern <= all) ? pattern : 0;
7652 case SVE_VL16:
7653 case SVE_VL32:
7654 case SVE_VL64:
7655 case SVE_VL128:
7656 case SVE_VL256: {
7657 // VL16-VL256 are encoded as log2(N) + c.
7658 int min = 16 << (pattern - SVE_VL16);
7659 return (min <= all) ? min : 0;
7660 }
7661 // Special cases.
7662 case SVE_POW2:
7663 return 1 << HighestSetBitPosition(all);
7664 case SVE_MUL4:
7665 return all - (all % 4);
7666 case SVE_MUL3:
7667 return all - (all % 3);
7668 case SVE_ALL:
7669 return all;
7670 }
7671 // Unnamed cases architecturally return 0.
7672 return 0;
7673 }
7674
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7675 LogicPRegister Simulator::match(VectorFormat vform,
7676 LogicPRegister dst,
7677 const LogicVRegister& haystack,
7678 const LogicVRegister& needles,
7679 bool negate_match) {
7680 SimVRegister ztemp;
7681 SimPRegister ptemp;
7682
7683 pfalse(dst);
7684 int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7685 for (int i = 0; i < lanes_per_segment; i++) {
7686 dup_elements_to_segments(vform, ztemp, needles, i);
7687 SVEIntCompareVectorsHelper(eq,
7688 vform,
7689 ptemp,
7690 GetPTrue(),
7691 haystack,
7692 ztemp,
7693 false,
7694 LeaveFlags);
7695 SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7696 }
7697 if (negate_match) {
7698 ptrue(vform, ptemp, SVE_ALL);
7699 SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7700 }
7701 return dst;
7702 }
7703
GetStructAddress(int lane) const7704 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7705 if (IsContiguous()) {
7706 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7707 }
7708
7709 VIXL_ASSERT(IsScatterGather());
7710 VIXL_ASSERT(vector_ != NULL);
7711
7712 // For scatter-gather accesses, we need to extract the offset from vector_,
7713 // and apply modifiers.
7714
7715 uint64_t offset = 0;
7716 switch (vector_form_) {
7717 case kFormatVnS:
7718 offset = vector_->GetLane<uint32_t>(lane);
7719 break;
7720 case kFormatVnD:
7721 offset = vector_->GetLane<uint64_t>(lane);
7722 break;
7723 default:
7724 VIXL_UNIMPLEMENTED();
7725 break;
7726 }
7727
7728 switch (vector_mod_) {
7729 case SVE_MUL_VL:
7730 VIXL_UNIMPLEMENTED();
7731 break;
7732 case SVE_LSL:
7733 // We apply the shift below. There's nothing to do here.
7734 break;
7735 case NO_SVE_OFFSET_MODIFIER:
7736 VIXL_ASSERT(vector_shift_ == 0);
7737 break;
7738 case SVE_UXTW:
7739 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7740 break;
7741 case SVE_SXTW:
7742 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7743 break;
7744 }
7745
7746 return base_ + (offset << vector_shift_);
7747 }
7748
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7749 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7750 LogicVRegister dst,
7751 const LogicVRegister& src) {
7752 SimVRegister zero;
7753 zero.Clear();
7754 return uzp2(vform, dst, src, zero);
7755 }
7756
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7757 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7758 LogicVRegister dst,
7759 const LogicVRegister& src) {
7760 SimVRegister zero;
7761 zero.Clear();
7762 return uzp1(vform, dst, src, zero);
7763 }
7764
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7765 LogicVRegister Simulator::adcl(VectorFormat vform,
7766 LogicVRegister dst,
7767 const LogicVRegister& src1,
7768 const LogicVRegister& src2,
7769 bool top) {
7770 unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7771 VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7772
7773 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7774 uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7775 uint64_t right = dst.Uint(vform, i);
7776 unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7777 std::pair<uint64_t, uint8_t> val_and_flags =
7778 AddWithCarry(reg_size, left, right, carry_in);
7779
7780 // Set even lanes to the result of the addition.
7781 dst.SetUint(vform, i, val_and_flags.first);
7782
7783 // Set odd lanes to the carry flag from the addition.
7784 uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7785 dst.SetUint(vform, i + 1, carry_out);
7786 }
7787 return dst;
7788 }
7789
7790 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7791 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7792 //
7793 // Matrices of the form:
7794 //
7795 // src1 = ( a b c d e f g h ) src2 = ( A B )
7796 // ( i j k l m n o p ) ( C D )
7797 // ( E F )
7798 // ( G H )
7799 // ( I J )
7800 // ( K L )
7801 // ( M N )
7802 // ( O P )
7803 //
7804 // Are stored in the input vector registers as:
7805 //
7806 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7807 // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7808 // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7809 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7810 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7811 LogicVRegister srcdst,
7812 const LogicVRegister& src1,
7813 const LogicVRegister& src2,
7814 bool src1_signed,
7815 bool src2_signed) {
7816 // Two destination forms are supported: Q register containing four S-sized
7817 // elements (4S) and Z register containing n S-sized elements (VnS).
7818 VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7819 VectorFormat vform_src = kFormatVnB;
7820 int b_per_segment = kQRegSize / kBRegSize;
7821 int s_per_segment = kQRegSize / kSRegSize;
7822 int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7823 int segment_count = LaneCountFromFormat(vform_dst) / 4;
7824 for (int seg = 0; seg < segment_count; seg++) {
7825 for (int i = 0; i < 2; i++) {
7826 for (int j = 0; j < 2; j++) {
7827 int dstidx = (2 * i) + j + (seg * s_per_segment);
7828 int64_t sum = srcdst.Int(vform_dst, dstidx);
7829 for (int k = 0; k < 8; k++) {
7830 int idx1 = (8 * i) + k + (seg * b_per_segment);
7831 int idx2 = (8 * j) + k + (seg * b_per_segment);
7832 int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7833 : src1.Uint(vform_src, idx1);
7834 int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7835 : src2.Uint(vform_src, idx2);
7836 sum += e1 * e2;
7837 }
7838 result[dstidx] = sum;
7839 }
7840 }
7841 }
7842 srcdst.SetIntArray(vform_dst, result);
7843 return srcdst;
7844 }
7845
7846 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7847 // result to the matrix in srcdst, and write back to srcdst.
7848 //
7849 // Matrices of the form:
7850 //
7851 // src1 = ( a b ) src2 = ( A B )
7852 // ( c d ) ( C D )
7853 //
7854 // Are stored in the input vector registers as:
7855 //
7856 // 3 2 1 0
7857 // src1 = [ d | c | b | a ]
7858 // src2 = [ D | B | C | A ]
7859 //
7860 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7861 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7862 LogicVRegister srcdst,
7863 const LogicVRegister& src1,
7864 const LogicVRegister& src2) {
7865 T result[kZRegMaxSizeInBytes / sizeof(T)];
7866 int T_per_segment = 4;
7867 int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7868 for (int seg = 0; seg < segment_count; seg++) {
7869 int segoff = seg * T_per_segment;
7870 for (int i = 0; i < 2; i++) {
7871 for (int j = 0; j < 2; j++) {
7872 T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7873 src2.Float<T>(2 * j + 0 + segoff));
7874 T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7875 src2.Float<T>(2 * j + 1 + segoff));
7876 T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7877 result[2 * i + j + segoff] = FPAdd(sum, prod1);
7878 }
7879 }
7880 }
7881 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7882 // Elements outside a multiple of 4T are set to zero. This happens only
7883 // for double precision operations, when the VL is a multiple of 128 bits,
7884 // but not a multiple of 256 bits.
7885 T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7886 srcdst.SetFloat<T>(vform, i, value);
7887 }
7888 return srcdst;
7889 }
7890
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7891 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7892 LogicVRegister dst,
7893 const LogicVRegister& src1,
7894 const LogicVRegister& src2) {
7895 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7896 fmatmul<float>(vform, dst, src1, src2);
7897 } else {
7898 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7899 fmatmul<double>(vform, dst, src1, src2);
7900 }
7901 return dst;
7902 }
7903
7904 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7905 uint64_t CryptoOp<"choose"_h>(uint64_t x, uint64_t y, uint64_t z) {
7906 return ((y ^ z) & x) ^ z;
7907 }
7908
7909 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7910 uint64_t CryptoOp<"majority"_h>(uint64_t x, uint64_t y, uint64_t z) {
7911 return (x & y) | ((x | y) & z);
7912 }
7913
7914 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7915 uint64_t CryptoOp<"parity"_h>(uint64_t x, uint64_t y, uint64_t z) {
7916 return x ^ y ^ z;
7917 }
7918
7919 template <typename T, unsigned A, unsigned B, unsigned C>
SHASigma(uint64_t x)7920 static uint64_t SHASigma(uint64_t x) {
7921 return static_cast<T>(RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
7922 RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
7923 RotateRight(x, C, sizeof(T) * kBitsPerByte));
7924 }
7925
sha2h(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool part1)7926 LogicVRegister Simulator::sha2h(LogicVRegister srcdst,
7927 const LogicVRegister& src1,
7928 const LogicVRegister& src2,
7929 bool part1) {
7930 uint64_t x[4] = {};
7931 uint64_t y[4] = {};
7932 if (part1) {
7933 // Switch input order based on which part is being handled.
7934 srcdst.UintArray(kFormat4S, x);
7935 src1.UintArray(kFormat4S, y);
7936 } else {
7937 src1.UintArray(kFormat4S, x);
7938 srcdst.UintArray(kFormat4S, y);
7939 }
7940
7941 for (unsigned i = 0; i < ArrayLength(x); i++) {
7942 uint64_t chs = CryptoOp<"choose"_h>(y[0], y[1], y[2]);
7943 uint64_t maj = CryptoOp<"majority"_h>(x[0], x[1], x[2]);
7944
7945 uint64_t w = src2.Uint(kFormat4S, i);
7946 uint64_t t = y[3] + SHASigma<uint32_t, 6, 11, 25>(y[0]) + chs + w;
7947
7948 x[3] += t;
7949 y[3] = t + SHASigma<uint32_t, 2, 13, 22>(x[0]) + maj;
7950
7951 // y:x = ROL(y:x, 32)
7952 SHARotateEltsLeftOne(x);
7953 SHARotateEltsLeftOne(y);
7954 std::swap(x[0], y[0]);
7955 }
7956
7957 srcdst.SetUintArray(kFormat4S, part1 ? x : y);
7958 return srcdst;
7959 }
7960
7961 template <typename T, unsigned A, unsigned B, unsigned C>
SHASURotate(uint64_t x)7962 static uint64_t SHASURotate(uint64_t x) {
7963 return RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
7964 RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
7965 ((x & ~static_cast<T>(0)) >> C);
7966 }
7967
sha2su0(LogicVRegister srcdst,const LogicVRegister & src1)7968 LogicVRegister Simulator::sha2su0(LogicVRegister srcdst,
7969 const LogicVRegister& src1) {
7970 uint64_t w[4] = {};
7971 uint64_t result[4];
7972 srcdst.UintArray(kFormat4S, w);
7973 uint64_t x = src1.Uint(kFormat4S, 0);
7974
7975 result[0] = SHASURotate<uint32_t, 7, 18, 3>(w[1]) + w[0];
7976 result[1] = SHASURotate<uint32_t, 7, 18, 3>(w[2]) + w[1];
7977 result[2] = SHASURotate<uint32_t, 7, 18, 3>(w[3]) + w[2];
7978 result[3] = SHASURotate<uint32_t, 7, 18, 3>(x) + w[3];
7979
7980 srcdst.SetUintArray(kFormat4S, result);
7981 return srcdst;
7982 }
7983
sha2su1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7984 LogicVRegister Simulator::sha2su1(LogicVRegister srcdst,
7985 const LogicVRegister& src1,
7986 const LogicVRegister& src2) {
7987 uint64_t w[4] = {};
7988 uint64_t x[4] = {};
7989 uint64_t y[4] = {};
7990 uint64_t result[4];
7991 srcdst.UintArray(kFormat4S, w);
7992 src1.UintArray(kFormat4S, x);
7993 src2.UintArray(kFormat4S, y);
7994
7995 result[0] = SHASURotate<uint32_t, 17, 19, 10>(y[2]) + w[0] + x[1];
7996 result[1] = SHASURotate<uint32_t, 17, 19, 10>(y[3]) + w[1] + x[2];
7997 result[2] = SHASURotate<uint32_t, 17, 19, 10>(result[0]) + w[2] + x[3];
7998 result[3] = SHASURotate<uint32_t, 17, 19, 10>(result[1]) + w[3] + y[0];
7999
8000 srcdst.SetUintArray(kFormat4S, result);
8001 return srcdst;
8002 }
8003
sha512h(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8004 LogicVRegister Simulator::sha512h(LogicVRegister srcdst,
8005 const LogicVRegister& src1,
8006 const LogicVRegister& src2) {
8007 uint64_t w[2] = {};
8008 uint64_t x[2] = {};
8009 uint64_t y[2] = {};
8010 uint64_t result[2] = {};
8011 srcdst.UintArray(kFormat2D, w);
8012 src1.UintArray(kFormat2D, x);
8013 src2.UintArray(kFormat2D, y);
8014
8015 result[1] = (y[1] & x[0]) ^ (~y[1] & x[1]);
8016 result[1] += SHASigma<uint64_t, 14, 18, 41>(y[1]) + w[1];
8017
8018 uint64_t tmp = result[1] + y[0];
8019
8020 result[0] = (tmp & y[1]) ^ (~tmp & x[0]);
8021 result[0] += SHASigma<uint64_t, 14, 18, 41>(tmp) + w[0];
8022
8023 srcdst.SetUintArray(kFormat2D, result);
8024 return srcdst;
8025 }
8026
sha512h2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8027 LogicVRegister Simulator::sha512h2(LogicVRegister srcdst,
8028 const LogicVRegister& src1,
8029 const LogicVRegister& src2) {
8030 uint64_t w[2] = {};
8031 uint64_t x[2] = {};
8032 uint64_t y[2] = {};
8033 uint64_t result[2] = {};
8034 srcdst.UintArray(kFormat2D, w);
8035 src1.UintArray(kFormat2D, x);
8036 src2.UintArray(kFormat2D, y);
8037
8038 result[1] = (x[0] & y[1]) ^ (x[0] & y[0]) ^ (y[1] & y[0]);
8039 result[1] += SHASigma<uint64_t, 28, 34, 39>(y[0]) + w[1];
8040
8041 result[0] = (result[1] & y[0]) ^ (result[1] & y[1]) ^ (y[1] & y[0]);
8042 result[0] += SHASigma<uint64_t, 28, 34, 39>(result[1]) + w[0];
8043
8044 srcdst.SetUintArray(kFormat2D, result);
8045 return srcdst;
8046 }
8047
sha512su0(LogicVRegister srcdst,const LogicVRegister & src1)8048 LogicVRegister Simulator::sha512su0(LogicVRegister srcdst,
8049 const LogicVRegister& src1) {
8050 uint64_t w[2] = {};
8051 uint64_t x[2] = {};
8052 uint64_t result[2] = {};
8053 srcdst.UintArray(kFormat2D, w);
8054 src1.UintArray(kFormat2D, x);
8055
8056 result[0] = SHASURotate<uint64_t, 1, 8, 7>(w[1]) + w[0];
8057 result[1] = SHASURotate<uint64_t, 1, 8, 7>(x[0]) + w[1];
8058
8059 srcdst.SetUintArray(kFormat2D, result);
8060 return srcdst;
8061 }
8062
sha512su1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8063 LogicVRegister Simulator::sha512su1(LogicVRegister srcdst,
8064 const LogicVRegister& src1,
8065 const LogicVRegister& src2) {
8066 uint64_t w[2] = {};
8067 uint64_t x[2] = {};
8068 uint64_t y[2] = {};
8069 uint64_t result[2] = {};
8070 srcdst.UintArray(kFormat2D, w);
8071 src1.UintArray(kFormat2D, x);
8072 src2.UintArray(kFormat2D, y);
8073
8074 result[1] = w[1] + SHASURotate<uint64_t, 19, 61, 6>(x[1]) + y[1];
8075 result[0] = w[0] + SHASURotate<uint64_t, 19, 61, 6>(x[0]) + y[0];
8076
8077 srcdst.SetUintArray(kFormat2D, result);
8078 return srcdst;
8079 }
8080
GalMul(int table,uint64_t x)8081 static uint8_t GalMul(int table, uint64_t x) {
8082 // Galois multiplication lookup tables.
8083 static const uint8_t ffmul02[256] = {
8084 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16,
8085 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
8086 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46,
8087 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
8088 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76,
8089 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e,
8090 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6,
8091 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
8092 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6,
8093 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee,
8094 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d,
8095 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
8096 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d,
8097 0x23, 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55,
8098 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, 0x7b, 0x79, 0x7f, 0x7d,
8099 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
8100 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d,
8101 0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5,
8102 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd,
8103 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
8104 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed,
8105 0xe3, 0xe1, 0xe7, 0xe5,
8106 };
8107
8108 static const uint8_t ffmul03[256] = {
8109 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d,
8110 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39,
8111 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65,
8112 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
8113 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d,
8114 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
8115 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5,
8116 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
8117 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd,
8118 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99,
8119 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e,
8120 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
8121 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6,
8122 0xbf, 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2,
8123 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, 0xcb, 0xc8, 0xcd, 0xce,
8124 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
8125 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46,
8126 0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62,
8127 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e,
8128 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
8129 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16,
8130 0x1f, 0x1c, 0x19, 0x1a,
8131 };
8132
8133 static const uint8_t ffmul09[256] = {
8134 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53,
8135 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf,
8136 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3b, 0x32, 0x29, 0x20,
8137 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
8138 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8,
8139 0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49,
8140 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd,
8141 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
8142 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e,
8143 0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2,
8144 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7,
8145 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
8146 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f,
8147 0x10, 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8,
8148 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0x47, 0x4e, 0x55, 0x5c,
8149 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
8150 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9,
8151 0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35,
8152 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba,
8153 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
8154 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62,
8155 0x5d, 0x54, 0x4f, 0x46,
8156 };
8157
8158 static const uint8_t ffmul0b[256] = {
8159 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45,
8160 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
8161 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7b, 0x70, 0x6d, 0x66,
8162 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
8163 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e,
8164 0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7,
8165 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b,
8166 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
8167 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8,
8168 0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c,
8169 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea,
8170 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
8171 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02,
8172 0x33, 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd,
8173 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, 0x3c, 0x37, 0x2a, 0x21,
8174 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
8175 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44,
8176 0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
8177 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67,
8178 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
8179 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f,
8180 0xbe, 0xb5, 0xa8, 0xa3,
8181 };
8182
8183 static const uint8_t ffmul0d[256] = {
8184 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f,
8185 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3,
8186 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbb, 0xb6, 0xa1, 0xac,
8187 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
8188 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14,
8189 0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e,
8190 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa,
8191 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
8192 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9,
8193 0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25,
8194 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd,
8195 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
8196 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75,
8197 0x56, 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42,
8198 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, 0xb1, 0xbc, 0xab, 0xa6,
8199 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
8200 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8,
8201 0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
8202 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b,
8203 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
8204 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3,
8205 0x80, 0x8d, 0x9a, 0x97,
8206 };
8207
8208 static const uint8_t ffmul0e[256] = {
8209 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62,
8210 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca,
8211 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdb, 0xd5, 0xc7, 0xc9,
8212 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
8213 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59,
8214 0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87,
8215 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f,
8216 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
8217 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14,
8218 0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc,
8219 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53,
8220 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
8221 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3,
8222 0xe9, 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0,
8223 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x7a, 0x74, 0x66, 0x68,
8224 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
8225 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e,
8226 0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26,
8227 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25,
8228 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
8229 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5,
8230 0x9f, 0x91, 0x83, 0x8d,
8231 };
8232
8233 x &= 255;
8234 switch (table) {
8235 case 0x2:
8236 return ffmul02[x];
8237 case 0x3:
8238 return ffmul03[x];
8239 case 0x9:
8240 return ffmul09[x];
8241 case 0xb:
8242 return ffmul0b[x];
8243 case 0xd:
8244 return ffmul0d[x];
8245 case 0xe:
8246 return ffmul0e[x];
8247 case 0:
8248 // Case 0 indicates no table lookup, used for some forward mix stages.
8249 return static_cast<uint8_t>(x);
8250 default:
8251 VIXL_UNREACHABLE();
8252 return static_cast<uint8_t>(x);
8253 }
8254 }
8255
8256
AESMixInner(uint64_t * x,int stage,bool inverse)8257 static uint8_t AESMixInner(uint64_t* x, int stage, bool inverse) {
8258 VIXL_ASSERT(IsUint2(stage));
8259
8260 int imc_gm[7] = {0xb, 0xd, 0x9, 0xe};
8261 int mc_gm[7] = {0x3, 0x0, 0x0, 0x2};
8262
8263 int* gm = inverse ? imc_gm : mc_gm;
8264 int index = 3 - stage;
8265
8266 uint8_t result = 0;
8267 for (int i = 0; i < 4; i++) {
8268 result ^= GalMul(gm[(index + i) % 4], x[i]);
8269 }
8270 return result;
8271 }
8272
8273
aesmix(LogicVRegister dst,const LogicVRegister & src,bool inverse)8274 LogicVRegister Simulator::aesmix(LogicVRegister dst,
8275 const LogicVRegister& src,
8276 bool inverse) {
8277 uint64_t in[16] = {};
8278 src.UintArray(kFormat16B, in);
8279 dst.ClearForWrite(kFormat16B);
8280
8281 for (int c = 0; c < 16; c++) {
8282 int cmod4 = c % 4;
8283 int d = c - cmod4;
8284 VIXL_ASSERT((d == 0) || (d == 4) || (d == 8) || (d == 12));
8285 dst.SetUint(kFormat16B, c, AESMixInner(&in[d], cmod4, inverse));
8286 }
8287
8288 return dst;
8289 }
8290
aes(LogicVRegister dst,const LogicVRegister & src,bool decrypt)8291 LogicVRegister Simulator::aes(LogicVRegister dst,
8292 const LogicVRegister& src,
8293 bool decrypt) {
8294 dst.ClearForWrite(kFormat16B);
8295
8296 // (Inverse) shift rows.
8297 uint8_t shift[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
8298 uint8_t shift_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3};
8299 for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
8300 uint8_t index = decrypt ? shift_inv[i] : shift[i];
8301 dst.SetUint(kFormat16B, i, src.Uint(kFormat16B, index));
8302 }
8303
8304 // (Inverse) substitute bytes.
8305 static const uint8_t gf2[256] = {
8306 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
8307 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
8308 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
8309 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
8310 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
8311 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
8312 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
8313 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
8314 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
8315 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
8316 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
8317 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
8318 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
8319 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
8320 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
8321 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
8322 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
8323 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
8324 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
8325 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
8326 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
8327 0xb0, 0x54, 0xbb, 0x16,
8328 };
8329 static const uint8_t gf2_inv[256] = {
8330 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
8331 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
8332 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32,
8333 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
8334 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
8335 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
8336 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50,
8337 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
8338 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
8339 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
8340 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41,
8341 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
8342 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
8343 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
8344 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b,
8345 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
8346 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
8347 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
8348 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d,
8349 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
8350 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
8351 0x55, 0x21, 0x0c, 0x7d,
8352 };
8353
8354 for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
8355 const uint8_t* table = decrypt ? gf2_inv : gf2;
8356 dst.SetUint(kFormat16B, i, table[dst.Uint(kFormat16B, i)]);
8357 }
8358 return dst;
8359 }
8360
sm3partw1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8361 LogicVRegister Simulator::sm3partw1(LogicVRegister srcdst,
8362 const LogicVRegister& src1,
8363 const LogicVRegister& src2) {
8364 using namespace std::placeholders;
8365 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8366
8367 SimVRegister temp;
8368
8369 ext(kFormat16B, temp, src2, temp, 4);
8370 rol(kFormat4S, temp, temp, 15);
8371 eor(kFormat4S, temp, temp, src1);
8372 LogicVRegister r = eor(kFormat4S, temp, temp, srcdst);
8373
8374 uint64_t result[4] = {};
8375 r.UintArray(kFormat4S, result);
8376 for (int i = 0; i < 4; i++) {
8377 if (i == 3) {
8378 // result[3] already contains srcdst[3] ^ src1[3] from the operations
8379 // above.
8380 result[i] ^= ROL(result[0], 15);
8381 }
8382 result[i] ^= ROL(result[i], 15) ^ ROL(result[i], 23);
8383 }
8384 srcdst.SetUintArray(kFormat4S, result);
8385 return srcdst;
8386 }
8387
sm3partw2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8388 LogicVRegister Simulator::sm3partw2(LogicVRegister srcdst,
8389 const LogicVRegister& src1,
8390 const LogicVRegister& src2) {
8391 using namespace std::placeholders;
8392 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8393
8394 SimVRegister temp;
8395 VectorFormat vf = kFormat4S;
8396
8397 rol(vf, temp, src2, 7);
8398 LogicVRegister r = eor(vf, temp, temp, src1);
8399 eor(vf, srcdst, temp, srcdst);
8400
8401 uint64_t tmp2 = ROL(r.Uint(vf, 0), 15);
8402 tmp2 ^= ROL(tmp2, 15) ^ ROL(tmp2, 23);
8403 srcdst.SetUint(vf, 3, srcdst.Uint(vf, 3) ^ tmp2);
8404 return srcdst;
8405 }
8406
sm3ss1(LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & src3)8407 LogicVRegister Simulator::sm3ss1(LogicVRegister dst,
8408 const LogicVRegister& src1,
8409 const LogicVRegister& src2,
8410 const LogicVRegister& src3) {
8411 using namespace std::placeholders;
8412 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8413
8414 VectorFormat vf = kFormat4S;
8415 uint64_t result = ROL(src1.Uint(vf, 3), 12);
8416 result += src2.Uint(vf, 3) + src3.Uint(vf, 3);
8417 dst.Clear();
8418 dst.SetUint(vf, 3, ROL(result, 7));
8419 return dst;
8420 }
8421
sm3tt1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,int index,bool is_a)8422 LogicVRegister Simulator::sm3tt1(LogicVRegister srcdst,
8423 const LogicVRegister& src1,
8424 const LogicVRegister& src2,
8425 int index,
8426 bool is_a) {
8427 VectorFormat vf = kFormat4S;
8428 using namespace std::placeholders;
8429 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8430 auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
8431
8432 VIXL_ASSERT(IsUint2(index));
8433
8434 uint64_t wjprime = src2.Uint(vf, index);
8435 uint64_t ss2 = src1.Uint(vf, 3) ^ ROL(sd(3), 12);
8436
8437 uint64_t tt1;
8438 if (is_a) {
8439 tt1 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
8440 } else {
8441 tt1 = CryptoOp<"majority"_h>(sd(1), sd(2), sd(3));
8442 }
8443 tt1 += sd(0) + ss2 + wjprime;
8444
8445 ext(kFormat16B, srcdst, srcdst, srcdst, 4);
8446 srcdst.SetUint(vf, 1, ROL(sd(1), 9));
8447 srcdst.SetUint(vf, 3, tt1);
8448 return srcdst;
8449 }
8450
sm3tt2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,int index,bool is_a)8451 LogicVRegister Simulator::sm3tt2(LogicVRegister srcdst,
8452 const LogicVRegister& src1,
8453 const LogicVRegister& src2,
8454 int index,
8455 bool is_a) {
8456 VectorFormat vf = kFormat4S;
8457 using namespace std::placeholders;
8458 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8459 auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
8460
8461 VIXL_ASSERT(IsUint2(index));
8462
8463 uint64_t wj = src2.Uint(vf, index);
8464
8465 uint64_t tt2;
8466 if (is_a) {
8467 tt2 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
8468 } else {
8469 tt2 = CryptoOp<"choose"_h>(sd(3), sd(2), sd(1));
8470 }
8471 tt2 += sd(0) + src1.Uint(vf, 3) + wj;
8472
8473 ext(kFormat16B, srcdst, srcdst, srcdst, 4);
8474 srcdst.SetUint(vf, 1, ROL(sd(1), 19));
8475 tt2 ^= ROL(tt2, 9) ^ ROL(tt2, 17);
8476 srcdst.SetUint(vf, 3, tt2);
8477 return srcdst;
8478 }
8479
SM4SBox(uint64_t x)8480 static uint64_t SM4SBox(uint64_t x) {
8481 static const uint8_t sbox[256] = {
8482 0x48, 0x39, 0xcb, 0xd7, 0x3e, 0x5f, 0xee, 0x79, 0x20, 0x4d, 0xdc, 0x3a,
8483 0xec, 0x7d, 0xf0, 0x18, 0x84, 0xc6, 0x6e, 0xc5, 0x09, 0xf1, 0xb9, 0x65,
8484 0x7e, 0x77, 0x96, 0x0c, 0x4a, 0x97, 0x69, 0x89, 0xb0, 0xb4, 0xe5, 0xb8,
8485 0x12, 0xd0, 0x74, 0x2d, 0xbd, 0x7b, 0xcd, 0xa5, 0x88, 0x31, 0xc1, 0x0a,
8486 0xd8, 0x5a, 0x10, 0x1f, 0x41, 0x5c, 0xd9, 0x11, 0x7f, 0xbc, 0xdd, 0xbb,
8487 0x92, 0xaf, 0x1b, 0x8d, 0x51, 0x5b, 0x6c, 0x6d, 0x72, 0x6a, 0xff, 0x03,
8488 0x2f, 0x8e, 0xfd, 0xde, 0x45, 0x37, 0xdb, 0xd5, 0x6f, 0x4e, 0x53, 0x0d,
8489 0xab, 0x23, 0x29, 0xc0, 0x60, 0xca, 0x66, 0x82, 0x2e, 0xe2, 0xf6, 0x1d,
8490 0xe3, 0xb1, 0x8c, 0xf5, 0x30, 0x32, 0x93, 0xad, 0x55, 0x1a, 0x34, 0x9b,
8491 0xa4, 0x5d, 0xae, 0xe0, 0xa1, 0x15, 0x61, 0xf9, 0xce, 0xf2, 0xf7, 0xa3,
8492 0xb5, 0x38, 0xc7, 0x40, 0xd2, 0x8a, 0xbf, 0xea, 0x9e, 0xc8, 0xc4, 0xa0,
8493 0xe7, 0x02, 0x36, 0x4c, 0x52, 0x27, 0xd3, 0x9f, 0x57, 0x46, 0x00, 0xd4,
8494 0x87, 0x78, 0x21, 0x01, 0x3b, 0x7c, 0x22, 0x25, 0xa2, 0xd1, 0x58, 0x63,
8495 0x5e, 0x0e, 0x24, 0x1e, 0x35, 0x9d, 0x56, 0x70, 0x4b, 0x0f, 0xeb, 0xf8,
8496 0x8b, 0xda, 0x64, 0x71, 0xb2, 0x81, 0x6b, 0x68, 0xa8, 0x4f, 0x85, 0xe6,
8497 0x19, 0x3c, 0x59, 0x83, 0xba, 0x17, 0x73, 0xf3, 0xfc, 0xa7, 0x07, 0x47,
8498 0xa6, 0x3f, 0x8f, 0x75, 0xfa, 0x94, 0xdf, 0x80, 0x95, 0xe8, 0x08, 0xc9,
8499 0xa9, 0x1c, 0xb3, 0xe4, 0x62, 0xac, 0xcf, 0xed, 0x43, 0x0b, 0x54, 0x33,
8500 0x7a, 0x98, 0xef, 0x91, 0xf4, 0x50, 0x42, 0x9c, 0x99, 0x06, 0x86, 0x49,
8501 0x26, 0x13, 0x44, 0xaa, 0xc3, 0x04, 0xbe, 0x2a, 0x76, 0x9a, 0x67, 0x2b,
8502 0x05, 0x2c, 0xfb, 0x28, 0xc2, 0x14, 0xb6, 0x16, 0xb7, 0x3d, 0xe1, 0xcc,
8503 0xfe, 0xe9, 0x90, 0xd6,
8504 };
8505 uint64_t result = 0;
8506 for (int j = 24; j >= 0; j -= 8) {
8507 uint8_t s = 255 - ((x >> j) & 0xff);
8508 result = (result << 8) | sbox[s];
8509 }
8510 return result;
8511 }
8512
sm4(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_key)8513 LogicVRegister Simulator::sm4(LogicVRegister srcdst,
8514 const LogicVRegister& src1,
8515 const LogicVRegister& src2,
8516 bool is_key) {
8517 using namespace std::placeholders;
8518 auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8519
8520 VectorFormat vf = kFormat4S;
8521 uint64_t result[4] = {};
8522 if (is_key) {
8523 src1.UintArray(vf, result);
8524 } else {
8525 srcdst.UintArray(vf, result);
8526 }
8527
8528 for (int i = 0; i < 4; i++) {
8529 uint64_t k = is_key ? src2.Uint(vf, i) : src1.Uint(vf, i);
8530 uint64_t intval = result[3] ^ result[2] ^ result[1] ^ k;
8531 intval = SM4SBox(intval);
8532
8533 if (is_key) {
8534 intval ^= ROL(intval, 13) ^ ROL(intval, 23);
8535 } else {
8536 intval ^=
8537 ROL(intval, 2) ^ ROL(intval, 10) ^ ROL(intval, 18) ^ ROL(intval, 24);
8538 }
8539
8540 intval ^= result[0];
8541
8542 result[0] = result[1];
8543 result[1] = result[2];
8544 result[2] = result[3];
8545 result[3] = intval;
8546 }
8547 srcdst.SetUintArray(vf, result);
8548 return srcdst;
8549 }
8550
8551 } // namespace aarch64
8552 } // namespace vixl
8553
8554 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
8555