1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 template <>
FPDefaultNaN()37 double Simulator::FPDefaultNaN<double>() {
38 return kFP64DefaultNaN;
39 }
40
41
42 template <>
FPDefaultNaN()43 float Simulator::FPDefaultNaN<float>() {
44 return kFP32DefaultNaN;
45 }
46
47 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)48 static inline double FPRoundToDouble(int64_t sign,
49 int64_t exponent,
50 uint64_t mantissa,
51 FPRounding round_mode) {
52 int64_t bits =
53 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
54 exponent,
55 mantissa,
56 round_mode);
57 return RawbitsToDouble(bits);
58 }
59
60
61 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)62 static inline float FPRoundToFloat(int64_t sign,
63 int64_t exponent,
64 uint64_t mantissa,
65 FPRounding round_mode) {
66 int32_t bits =
67 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
68 exponent,
69 mantissa,
70 round_mode);
71 return RawbitsToFloat(bits);
72 }
73
74
75 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)76 static inline float16 FPRoundToFloat16(int64_t sign,
77 int64_t exponent,
78 uint64_t mantissa,
79 FPRounding round_mode) {
80 return FPRound<float16,
81 kFloat16ExponentBits,
82 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
83 }
84
85
FixedToDouble(int64_t src,int fbits,FPRounding round)86 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
87 if (src >= 0) {
88 return UFixedToDouble(src, fbits, round);
89 } else if (src == INT64_MIN) {
90 return -UFixedToDouble(src, fbits, round);
91 } else {
92 return -UFixedToDouble(-src, fbits, round);
93 }
94 }
95
96
UFixedToDouble(uint64_t src,int fbits,FPRounding round)97 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
98 // An input of 0 is a special case because the result is effectively
99 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
100 if (src == 0) {
101 return 0.0;
102 }
103
104 // Calculate the exponent. The highest significant bit will have the value
105 // 2^exponent.
106 const int highest_significant_bit = 63 - CountLeadingZeros(src);
107 const int64_t exponent = highest_significant_bit - fbits;
108
109 return FPRoundToDouble(0, exponent, src, round);
110 }
111
112
FixedToFloat(int64_t src,int fbits,FPRounding round)113 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
114 if (src >= 0) {
115 return UFixedToFloat(src, fbits, round);
116 } else if (src == INT64_MIN) {
117 return -UFixedToFloat(src, fbits, round);
118 } else {
119 return -UFixedToFloat(-src, fbits, round);
120 }
121 }
122
123
UFixedToFloat(uint64_t src,int fbits,FPRounding round)124 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
125 // An input of 0 is a special case because the result is effectively
126 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
127 if (src == 0) {
128 return 0.0f;
129 }
130
131 // Calculate the exponent. The highest significant bit will have the value
132 // 2^exponent.
133 const int highest_significant_bit = 63 - CountLeadingZeros(src);
134 const int32_t exponent = highest_significant_bit - fbits;
135
136 return FPRoundToFloat(0, exponent, src, round);
137 }
138
139
FPToDouble(float value)140 double Simulator::FPToDouble(float value) {
141 switch (std::fpclassify(value)) {
142 case FP_NAN: {
143 if (IsSignallingNaN(value)) {
144 FPProcessException();
145 }
146 if (ReadDN()) return kFP64DefaultNaN;
147
148 // Convert NaNs as the processor would:
149 // - The sign is propagated.
150 // - The payload (mantissa) is transferred entirely, except that the top
151 // bit is forced to '1', making the result a quiet NaN. The unused
152 // (low-order) payload bits are set to 0.
153 uint32_t raw = FloatToRawbits(value);
154
155 uint64_t sign = raw >> 31;
156 uint64_t exponent = (1 << 11) - 1;
157 uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
158 payload <<= (52 - 23); // The unused low-order bits should be 0.
159 payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
160
161 return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
162 }
163
164 case FP_ZERO:
165 case FP_NORMAL:
166 case FP_SUBNORMAL:
167 case FP_INFINITE: {
168 // All other inputs are preserved in a standard cast, because every value
169 // representable using an IEEE-754 float is also representable using an
170 // IEEE-754 double.
171 return static_cast<double>(value);
172 }
173 }
174
175 VIXL_UNREACHABLE();
176 return static_cast<double>(value);
177 }
178
179
FPToFloat(float16 value)180 float Simulator::FPToFloat(float16 value) {
181 uint32_t sign = value >> 15;
182 uint32_t exponent =
183 ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
184 kFloat16MantissaBits,
185 value);
186 uint32_t mantissa =
187 ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
188
189 switch (Float16Classify(value)) {
190 case FP_ZERO:
191 return (sign == 0) ? 0.0f : -0.0f;
192
193 case FP_INFINITE:
194 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
195
196 case FP_SUBNORMAL: {
197 // Calculate shift required to put mantissa into the most-significant bits
198 // of the destination mantissa.
199 int shift = CountLeadingZeros(mantissa << (32 - 10));
200
201 // Shift mantissa and discard implicit '1'.
202 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
203 mantissa &= (1 << kFloatMantissaBits) - 1;
204
205 // Adjust the exponent for the shift applied, and rebias.
206 exponent = exponent - shift + (-15 + 127);
207 break;
208 }
209
210 case FP_NAN:
211 if (IsSignallingNaN(value)) {
212 FPProcessException();
213 }
214 if (ReadDN()) return kFP32DefaultNaN;
215
216 // Convert NaNs as the processor would:
217 // - The sign is propagated.
218 // - The payload (mantissa) is transferred entirely, except that the top
219 // bit is forced to '1', making the result a quiet NaN. The unused
220 // (low-order) payload bits are set to 0.
221 exponent = (1 << kFloatExponentBits) - 1;
222
223 // Increase bits in mantissa, making low-order bits 0.
224 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
225 mantissa |= 1 << 22; // Force a quiet NaN.
226 break;
227
228 case FP_NORMAL:
229 // Increase bits in mantissa, making low-order bits 0.
230 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
231
232 // Change exponent bias.
233 exponent += (-15 + 127);
234 break;
235
236 default:
237 VIXL_UNREACHABLE();
238 }
239 return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
240 mantissa);
241 }
242
243
FPToFloat16(float value,FPRounding round_mode)244 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
245 // Only the FPTieEven rounding mode is implemented.
246 VIXL_ASSERT(round_mode == FPTieEven);
247 USE(round_mode);
248
249 uint32_t raw = FloatToRawbits(value);
250 int32_t sign = raw >> 31;
251 int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
252 uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
253
254 switch (std::fpclassify(value)) {
255 case FP_NAN: {
256 if (IsSignallingNaN(value)) {
257 FPProcessException();
258 }
259 if (ReadDN()) return kFP16DefaultNaN;
260
261 // Convert NaNs as the processor would:
262 // - The sign is propagated.
263 // - The payload (mantissa) is transferred as much as possible, except
264 // that the top bit is forced to '1', making the result a quiet NaN.
265 float16 result =
266 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
268 result |= (1 << 9); // Force a quiet NaN;
269 return result;
270 }
271
272 case FP_ZERO:
273 return (sign == 0) ? 0 : 0x8000;
274
275 case FP_INFINITE:
276 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277
278 case FP_NORMAL:
279 case FP_SUBNORMAL: {
280 // Convert float-to-half as the processor would, assuming that FPCR.FZ
281 // (flush-to-zero) is not set.
282
283 // Add the implicit '1' bit to the mantissa.
284 mantissa += (1 << 23);
285 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286 }
287 }
288
289 VIXL_UNREACHABLE();
290 return 0;
291 }
292
293
FPToFloat16(double value,FPRounding round_mode)294 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
295 // Only the FPTieEven rounding mode is implemented.
296 VIXL_ASSERT(round_mode == FPTieEven);
297 USE(round_mode);
298
299 uint64_t raw = DoubleToRawbits(value);
300 int32_t sign = raw >> 63;
301 int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
302 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
303
304 switch (std::fpclassify(value)) {
305 case FP_NAN: {
306 if (IsSignallingNaN(value)) {
307 FPProcessException();
308 }
309 if (ReadDN()) return kFP16DefaultNaN;
310
311 // Convert NaNs as the processor would:
312 // - The sign is propagated.
313 // - The payload (mantissa) is transferred as much as possible, except
314 // that the top bit is forced to '1', making the result a quiet NaN.
315 float16 result =
316 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
317 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
318 result |= (1 << 9); // Force a quiet NaN;
319 return result;
320 }
321
322 case FP_ZERO:
323 return (sign == 0) ? 0 : 0x8000;
324
325 case FP_INFINITE:
326 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
327
328 case FP_NORMAL:
329 case FP_SUBNORMAL: {
330 // Convert double-to-half as the processor would, assuming that FPCR.FZ
331 // (flush-to-zero) is not set.
332
333 // Add the implicit '1' bit to the mantissa.
334 mantissa += (UINT64_C(1) << 52);
335 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
336 }
337 }
338
339 VIXL_UNREACHABLE();
340 return 0;
341 }
342
343
FPToFloat(double value,FPRounding round_mode)344 float Simulator::FPToFloat(double value, FPRounding round_mode) {
345 // Only the FPTieEven rounding mode is implemented.
346 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
347 USE(round_mode);
348
349 switch (std::fpclassify(value)) {
350 case FP_NAN: {
351 if (IsSignallingNaN(value)) {
352 FPProcessException();
353 }
354 if (ReadDN()) return kFP32DefaultNaN;
355
356 // Convert NaNs as the processor would:
357 // - The sign is propagated.
358 // - The payload (mantissa) is transferred as much as possible, except
359 // that the top bit is forced to '1', making the result a quiet NaN.
360 uint64_t raw = DoubleToRawbits(value);
361
362 uint32_t sign = raw >> 63;
363 uint32_t exponent = (1 << 8) - 1;
364 uint32_t payload =
365 static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
366 payload |= (1 << 22); // Force a quiet NaN.
367
368 return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
369 }
370
371 case FP_ZERO:
372 case FP_INFINITE: {
373 // In a C++ cast, any value representable in the target type will be
374 // unchanged. This is always the case for +/-0.0 and infinities.
375 return static_cast<float>(value);
376 }
377
378 case FP_NORMAL:
379 case FP_SUBNORMAL: {
380 // Convert double-to-float as the processor would, assuming that FPCR.FZ
381 // (flush-to-zero) is not set.
382 uint64_t raw = DoubleToRawbits(value);
383 // Extract the IEEE-754 double components.
384 uint32_t sign = raw >> 63;
385 // Extract the exponent and remove the IEEE-754 encoding bias.
386 int32_t exponent =
387 static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
388 // Extract the mantissa and add the implicit '1' bit.
389 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
390 if (std::fpclassify(value) == FP_NORMAL) {
391 mantissa |= (UINT64_C(1) << 52);
392 }
393 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
394 }
395 }
396
397 VIXL_UNREACHABLE();
398 return value;
399 }
400
401
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)402 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
403 dst.ClearForWrite(vform);
404 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
405 dst.ReadUintFromMem(vform, i, addr);
406 addr += LaneSizeInBytesFromFormat(vform);
407 }
408 }
409
410
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)411 void Simulator::ld1(VectorFormat vform,
412 LogicVRegister dst,
413 int index,
414 uint64_t addr) {
415 dst.ReadUintFromMem(vform, index, addr);
416 }
417
418
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)419 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
420 dst.ClearForWrite(vform);
421 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
422 dst.ReadUintFromMem(vform, i, addr);
423 }
424 }
425
426
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)427 void Simulator::ld2(VectorFormat vform,
428 LogicVRegister dst1,
429 LogicVRegister dst2,
430 uint64_t addr1) {
431 dst1.ClearForWrite(vform);
432 dst2.ClearForWrite(vform);
433 int esize = LaneSizeInBytesFromFormat(vform);
434 uint64_t addr2 = addr1 + esize;
435 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
436 dst1.ReadUintFromMem(vform, i, addr1);
437 dst2.ReadUintFromMem(vform, i, addr2);
438 addr1 += 2 * esize;
439 addr2 += 2 * esize;
440 }
441 }
442
443
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)444 void Simulator::ld2(VectorFormat vform,
445 LogicVRegister dst1,
446 LogicVRegister dst2,
447 int index,
448 uint64_t addr1) {
449 dst1.ClearForWrite(vform);
450 dst2.ClearForWrite(vform);
451 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
452 dst1.ReadUintFromMem(vform, index, addr1);
453 dst2.ReadUintFromMem(vform, index, addr2);
454 }
455
456
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)457 void Simulator::ld2r(VectorFormat vform,
458 LogicVRegister dst1,
459 LogicVRegister dst2,
460 uint64_t addr) {
461 dst1.ClearForWrite(vform);
462 dst2.ClearForWrite(vform);
463 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
464 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465 dst1.ReadUintFromMem(vform, i, addr);
466 dst2.ReadUintFromMem(vform, i, addr2);
467 }
468 }
469
470
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)471 void Simulator::ld3(VectorFormat vform,
472 LogicVRegister dst1,
473 LogicVRegister dst2,
474 LogicVRegister dst3,
475 uint64_t addr1) {
476 dst1.ClearForWrite(vform);
477 dst2.ClearForWrite(vform);
478 dst3.ClearForWrite(vform);
479 int esize = LaneSizeInBytesFromFormat(vform);
480 uint64_t addr2 = addr1 + esize;
481 uint64_t addr3 = addr2 + esize;
482 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
483 dst1.ReadUintFromMem(vform, i, addr1);
484 dst2.ReadUintFromMem(vform, i, addr2);
485 dst3.ReadUintFromMem(vform, i, addr3);
486 addr1 += 3 * esize;
487 addr2 += 3 * esize;
488 addr3 += 3 * esize;
489 }
490 }
491
492
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)493 void Simulator::ld3(VectorFormat vform,
494 LogicVRegister dst1,
495 LogicVRegister dst2,
496 LogicVRegister dst3,
497 int index,
498 uint64_t addr1) {
499 dst1.ClearForWrite(vform);
500 dst2.ClearForWrite(vform);
501 dst3.ClearForWrite(vform);
502 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
503 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
504 dst1.ReadUintFromMem(vform, index, addr1);
505 dst2.ReadUintFromMem(vform, index, addr2);
506 dst3.ReadUintFromMem(vform, index, addr3);
507 }
508
509
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)510 void Simulator::ld3r(VectorFormat vform,
511 LogicVRegister dst1,
512 LogicVRegister dst2,
513 LogicVRegister dst3,
514 uint64_t addr) {
515 dst1.ClearForWrite(vform);
516 dst2.ClearForWrite(vform);
517 dst3.ClearForWrite(vform);
518 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
519 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
520 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
521 dst1.ReadUintFromMem(vform, i, addr);
522 dst2.ReadUintFromMem(vform, i, addr2);
523 dst3.ReadUintFromMem(vform, i, addr3);
524 }
525 }
526
527
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)528 void Simulator::ld4(VectorFormat vform,
529 LogicVRegister dst1,
530 LogicVRegister dst2,
531 LogicVRegister dst3,
532 LogicVRegister dst4,
533 uint64_t addr1) {
534 dst1.ClearForWrite(vform);
535 dst2.ClearForWrite(vform);
536 dst3.ClearForWrite(vform);
537 dst4.ClearForWrite(vform);
538 int esize = LaneSizeInBytesFromFormat(vform);
539 uint64_t addr2 = addr1 + esize;
540 uint64_t addr3 = addr2 + esize;
541 uint64_t addr4 = addr3 + esize;
542 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
543 dst1.ReadUintFromMem(vform, i, addr1);
544 dst2.ReadUintFromMem(vform, i, addr2);
545 dst3.ReadUintFromMem(vform, i, addr3);
546 dst4.ReadUintFromMem(vform, i, addr4);
547 addr1 += 4 * esize;
548 addr2 += 4 * esize;
549 addr3 += 4 * esize;
550 addr4 += 4 * esize;
551 }
552 }
553
554
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)555 void Simulator::ld4(VectorFormat vform,
556 LogicVRegister dst1,
557 LogicVRegister dst2,
558 LogicVRegister dst3,
559 LogicVRegister dst4,
560 int index,
561 uint64_t addr1) {
562 dst1.ClearForWrite(vform);
563 dst2.ClearForWrite(vform);
564 dst3.ClearForWrite(vform);
565 dst4.ClearForWrite(vform);
566 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
567 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
568 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
569 dst1.ReadUintFromMem(vform, index, addr1);
570 dst2.ReadUintFromMem(vform, index, addr2);
571 dst3.ReadUintFromMem(vform, index, addr3);
572 dst4.ReadUintFromMem(vform, index, addr4);
573 }
574
575
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)576 void Simulator::ld4r(VectorFormat vform,
577 LogicVRegister dst1,
578 LogicVRegister dst2,
579 LogicVRegister dst3,
580 LogicVRegister dst4,
581 uint64_t addr) {
582 dst1.ClearForWrite(vform);
583 dst2.ClearForWrite(vform);
584 dst3.ClearForWrite(vform);
585 dst4.ClearForWrite(vform);
586 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
587 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
588 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
589 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590 dst1.ReadUintFromMem(vform, i, addr);
591 dst2.ReadUintFromMem(vform, i, addr2);
592 dst3.ReadUintFromMem(vform, i, addr3);
593 dst4.ReadUintFromMem(vform, i, addr4);
594 }
595 }
596
597
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)598 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
599 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
600 src.WriteUintToMem(vform, i, addr);
601 addr += LaneSizeInBytesFromFormat(vform);
602 }
603 }
604
605
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)606 void Simulator::st1(VectorFormat vform,
607 LogicVRegister src,
608 int index,
609 uint64_t addr) {
610 src.WriteUintToMem(vform, index, addr);
611 }
612
613
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)614 void Simulator::st2(VectorFormat vform,
615 LogicVRegister dst,
616 LogicVRegister dst2,
617 uint64_t addr) {
618 int esize = LaneSizeInBytesFromFormat(vform);
619 uint64_t addr2 = addr + esize;
620 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
621 dst.WriteUintToMem(vform, i, addr);
622 dst2.WriteUintToMem(vform, i, addr2);
623 addr += 2 * esize;
624 addr2 += 2 * esize;
625 }
626 }
627
628
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)629 void Simulator::st2(VectorFormat vform,
630 LogicVRegister dst,
631 LogicVRegister dst2,
632 int index,
633 uint64_t addr) {
634 int esize = LaneSizeInBytesFromFormat(vform);
635 dst.WriteUintToMem(vform, index, addr);
636 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
637 }
638
639
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)640 void Simulator::st3(VectorFormat vform,
641 LogicVRegister dst,
642 LogicVRegister dst2,
643 LogicVRegister dst3,
644 uint64_t addr) {
645 int esize = LaneSizeInBytesFromFormat(vform);
646 uint64_t addr2 = addr + esize;
647 uint64_t addr3 = addr2 + esize;
648 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
649 dst.WriteUintToMem(vform, i, addr);
650 dst2.WriteUintToMem(vform, i, addr2);
651 dst3.WriteUintToMem(vform, i, addr3);
652 addr += 3 * esize;
653 addr2 += 3 * esize;
654 addr3 += 3 * esize;
655 }
656 }
657
658
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)659 void Simulator::st3(VectorFormat vform,
660 LogicVRegister dst,
661 LogicVRegister dst2,
662 LogicVRegister dst3,
663 int index,
664 uint64_t addr) {
665 int esize = LaneSizeInBytesFromFormat(vform);
666 dst.WriteUintToMem(vform, index, addr);
667 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
668 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
669 }
670
671
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)672 void Simulator::st4(VectorFormat vform,
673 LogicVRegister dst,
674 LogicVRegister dst2,
675 LogicVRegister dst3,
676 LogicVRegister dst4,
677 uint64_t addr) {
678 int esize = LaneSizeInBytesFromFormat(vform);
679 uint64_t addr2 = addr + esize;
680 uint64_t addr3 = addr2 + esize;
681 uint64_t addr4 = addr3 + esize;
682 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
683 dst.WriteUintToMem(vform, i, addr);
684 dst2.WriteUintToMem(vform, i, addr2);
685 dst3.WriteUintToMem(vform, i, addr3);
686 dst4.WriteUintToMem(vform, i, addr4);
687 addr += 4 * esize;
688 addr2 += 4 * esize;
689 addr3 += 4 * esize;
690 addr4 += 4 * esize;
691 }
692 }
693
694
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)695 void Simulator::st4(VectorFormat vform,
696 LogicVRegister dst,
697 LogicVRegister dst2,
698 LogicVRegister dst3,
699 LogicVRegister dst4,
700 int index,
701 uint64_t addr) {
702 int esize = LaneSizeInBytesFromFormat(vform);
703 dst.WriteUintToMem(vform, index, addr);
704 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
705 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
706 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
707 }
708
709
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)710 LogicVRegister Simulator::cmp(VectorFormat vform,
711 LogicVRegister dst,
712 const LogicVRegister& src1,
713 const LogicVRegister& src2,
714 Condition cond) {
715 dst.ClearForWrite(vform);
716 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
717 int64_t sa = src1.Int(vform, i);
718 int64_t sb = src2.Int(vform, i);
719 uint64_t ua = src1.Uint(vform, i);
720 uint64_t ub = src2.Uint(vform, i);
721 bool result = false;
722 switch (cond) {
723 case eq:
724 result = (ua == ub);
725 break;
726 case ge:
727 result = (sa >= sb);
728 break;
729 case gt:
730 result = (sa > sb);
731 break;
732 case hi:
733 result = (ua > ub);
734 break;
735 case hs:
736 result = (ua >= ub);
737 break;
738 case lt:
739 result = (sa < sb);
740 break;
741 case le:
742 result = (sa <= sb);
743 break;
744 default:
745 VIXL_UNREACHABLE();
746 break;
747 }
748 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
749 }
750 return dst;
751 }
752
753
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)754 LogicVRegister Simulator::cmp(VectorFormat vform,
755 LogicVRegister dst,
756 const LogicVRegister& src1,
757 int imm,
758 Condition cond) {
759 SimVRegister temp;
760 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
761 return cmp(vform, dst, src1, imm_reg, cond);
762 }
763
764
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)765 LogicVRegister Simulator::cmptst(VectorFormat vform,
766 LogicVRegister dst,
767 const LogicVRegister& src1,
768 const LogicVRegister& src2) {
769 dst.ClearForWrite(vform);
770 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
771 uint64_t ua = src1.Uint(vform, i);
772 uint64_t ub = src2.Uint(vform, i);
773 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
774 }
775 return dst;
776 }
777
778
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)779 LogicVRegister Simulator::add(VectorFormat vform,
780 LogicVRegister dst,
781 const LogicVRegister& src1,
782 const LogicVRegister& src2) {
783 int lane_size = LaneSizeInBitsFromFormat(vform);
784 dst.ClearForWrite(vform);
785 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
786 // Test for unsigned saturation.
787 uint64_t ua = src1.UintLeftJustified(vform, i);
788 uint64_t ub = src2.UintLeftJustified(vform, i);
789 uint64_t ur = ua + ub;
790 if (ur < ua) {
791 dst.SetUnsignedSat(i, true);
792 }
793
794 // Test for signed saturation.
795 bool pos_a = (ua >> 63) == 0;
796 bool pos_b = (ub >> 63) == 0;
797 bool pos_r = (ur >> 63) == 0;
798 // If the signs of the operands are the same, but different from the result,
799 // there was an overflow.
800 if ((pos_a == pos_b) && (pos_a != pos_r)) {
801 dst.SetSignedSat(i, pos_a);
802 }
803
804 dst.SetInt(vform, i, ur >> (64 - lane_size));
805 }
806 return dst;
807 }
808
809
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)810 LogicVRegister Simulator::addp(VectorFormat vform,
811 LogicVRegister dst,
812 const LogicVRegister& src1,
813 const LogicVRegister& src2) {
814 SimVRegister temp1, temp2;
815 uzp1(vform, temp1, src1, src2);
816 uzp2(vform, temp2, src1, src2);
817 add(vform, dst, temp1, temp2);
818 return dst;
819 }
820
821
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)822 LogicVRegister Simulator::mla(VectorFormat vform,
823 LogicVRegister dst,
824 const LogicVRegister& src1,
825 const LogicVRegister& src2) {
826 SimVRegister temp;
827 mul(vform, temp, src1, src2);
828 add(vform, dst, dst, temp);
829 return dst;
830 }
831
832
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)833 LogicVRegister Simulator::mls(VectorFormat vform,
834 LogicVRegister dst,
835 const LogicVRegister& src1,
836 const LogicVRegister& src2) {
837 SimVRegister temp;
838 mul(vform, temp, src1, src2);
839 sub(vform, dst, dst, temp);
840 return dst;
841 }
842
843
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)844 LogicVRegister Simulator::mul(VectorFormat vform,
845 LogicVRegister dst,
846 const LogicVRegister& src1,
847 const LogicVRegister& src2) {
848 dst.ClearForWrite(vform);
849 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
850 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
851 }
852 return dst;
853 }
854
855
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)856 LogicVRegister Simulator::mul(VectorFormat vform,
857 LogicVRegister dst,
858 const LogicVRegister& src1,
859 const LogicVRegister& src2,
860 int index) {
861 SimVRegister temp;
862 VectorFormat indexform = VectorFormatFillQ(vform);
863 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
864 }
865
866
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)867 LogicVRegister Simulator::mla(VectorFormat vform,
868 LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2,
871 int index) {
872 SimVRegister temp;
873 VectorFormat indexform = VectorFormatFillQ(vform);
874 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
875 }
876
877
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)878 LogicVRegister Simulator::mls(VectorFormat vform,
879 LogicVRegister dst,
880 const LogicVRegister& src1,
881 const LogicVRegister& src2,
882 int index) {
883 SimVRegister temp;
884 VectorFormat indexform = VectorFormatFillQ(vform);
885 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
886 }
887
888
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)889 LogicVRegister Simulator::smull(VectorFormat vform,
890 LogicVRegister dst,
891 const LogicVRegister& src1,
892 const LogicVRegister& src2,
893 int index) {
894 SimVRegister temp;
895 VectorFormat indexform =
896 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
897 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
898 }
899
900
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)901 LogicVRegister Simulator::smull2(VectorFormat vform,
902 LogicVRegister dst,
903 const LogicVRegister& src1,
904 const LogicVRegister& src2,
905 int index) {
906 SimVRegister temp;
907 VectorFormat indexform =
908 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
909 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
910 }
911
912
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)913 LogicVRegister Simulator::umull(VectorFormat vform,
914 LogicVRegister dst,
915 const LogicVRegister& src1,
916 const LogicVRegister& src2,
917 int index) {
918 SimVRegister temp;
919 VectorFormat indexform =
920 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
921 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
922 }
923
924
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)925 LogicVRegister Simulator::umull2(VectorFormat vform,
926 LogicVRegister dst,
927 const LogicVRegister& src1,
928 const LogicVRegister& src2,
929 int index) {
930 SimVRegister temp;
931 VectorFormat indexform =
932 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
933 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
934 }
935
936
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)937 LogicVRegister Simulator::smlal(VectorFormat vform,
938 LogicVRegister dst,
939 const LogicVRegister& src1,
940 const LogicVRegister& src2,
941 int index) {
942 SimVRegister temp;
943 VectorFormat indexform =
944 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
945 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
946 }
947
948
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)949 LogicVRegister Simulator::smlal2(VectorFormat vform,
950 LogicVRegister dst,
951 const LogicVRegister& src1,
952 const LogicVRegister& src2,
953 int index) {
954 SimVRegister temp;
955 VectorFormat indexform =
956 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
957 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
958 }
959
960
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)961 LogicVRegister Simulator::umlal(VectorFormat vform,
962 LogicVRegister dst,
963 const LogicVRegister& src1,
964 const LogicVRegister& src2,
965 int index) {
966 SimVRegister temp;
967 VectorFormat indexform =
968 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
969 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
970 }
971
972
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)973 LogicVRegister Simulator::umlal2(VectorFormat vform,
974 LogicVRegister dst,
975 const LogicVRegister& src1,
976 const LogicVRegister& src2,
977 int index) {
978 SimVRegister temp;
979 VectorFormat indexform =
980 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
981 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
982 }
983
984
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)985 LogicVRegister Simulator::smlsl(VectorFormat vform,
986 LogicVRegister dst,
987 const LogicVRegister& src1,
988 const LogicVRegister& src2,
989 int index) {
990 SimVRegister temp;
991 VectorFormat indexform =
992 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
993 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
994 }
995
996
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)997 LogicVRegister Simulator::smlsl2(VectorFormat vform,
998 LogicVRegister dst,
999 const LogicVRegister& src1,
1000 const LogicVRegister& src2,
1001 int index) {
1002 SimVRegister temp;
1003 VectorFormat indexform =
1004 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1005 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1006 }
1007
1008
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1009 LogicVRegister Simulator::umlsl(VectorFormat vform,
1010 LogicVRegister dst,
1011 const LogicVRegister& src1,
1012 const LogicVRegister& src2,
1013 int index) {
1014 SimVRegister temp;
1015 VectorFormat indexform =
1016 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1017 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1018 }
1019
1020
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1021 LogicVRegister Simulator::umlsl2(VectorFormat vform,
1022 LogicVRegister dst,
1023 const LogicVRegister& src1,
1024 const LogicVRegister& src2,
1025 int index) {
1026 SimVRegister temp;
1027 VectorFormat indexform =
1028 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1029 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1030 }
1031
1032
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1033 LogicVRegister Simulator::sqdmull(VectorFormat vform,
1034 LogicVRegister dst,
1035 const LogicVRegister& src1,
1036 const LogicVRegister& src2,
1037 int index) {
1038 SimVRegister temp;
1039 VectorFormat indexform =
1040 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1041 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1042 }
1043
1044
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1045 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1046 LogicVRegister dst,
1047 const LogicVRegister& src1,
1048 const LogicVRegister& src2,
1049 int index) {
1050 SimVRegister temp;
1051 VectorFormat indexform =
1052 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1053 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1054 }
1055
1056
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1057 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1058 LogicVRegister dst,
1059 const LogicVRegister& src1,
1060 const LogicVRegister& src2,
1061 int index) {
1062 SimVRegister temp;
1063 VectorFormat indexform =
1064 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1065 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1066 }
1067
1068
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1069 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1070 LogicVRegister dst,
1071 const LogicVRegister& src1,
1072 const LogicVRegister& src2,
1073 int index) {
1074 SimVRegister temp;
1075 VectorFormat indexform =
1076 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1077 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1078 }
1079
1080
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1081 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1082 LogicVRegister dst,
1083 const LogicVRegister& src1,
1084 const LogicVRegister& src2,
1085 int index) {
1086 SimVRegister temp;
1087 VectorFormat indexform =
1088 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1089 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1090 }
1091
1092
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1093 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1094 LogicVRegister dst,
1095 const LogicVRegister& src1,
1096 const LogicVRegister& src2,
1097 int index) {
1098 SimVRegister temp;
1099 VectorFormat indexform =
1100 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1101 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1102 }
1103
1104
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1105 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1106 LogicVRegister dst,
1107 const LogicVRegister& src1,
1108 const LogicVRegister& src2,
1109 int index) {
1110 SimVRegister temp;
1111 VectorFormat indexform = VectorFormatFillQ(vform);
1112 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1113 }
1114
1115
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1116 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1117 LogicVRegister dst,
1118 const LogicVRegister& src1,
1119 const LogicVRegister& src2,
1120 int index) {
1121 SimVRegister temp;
1122 VectorFormat indexform = VectorFormatFillQ(vform);
1123 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1124 }
1125
1126
PolynomialMult(uint8_t op1,uint8_t op2) const1127 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1128 uint16_t result = 0;
1129 uint16_t extended_op2 = op2;
1130 for (int i = 0; i < 8; ++i) {
1131 if ((op1 >> i) & 1) {
1132 result = result ^ (extended_op2 << i);
1133 }
1134 }
1135 return result;
1136 }
1137
1138
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1139 LogicVRegister Simulator::pmul(VectorFormat vform,
1140 LogicVRegister dst,
1141 const LogicVRegister& src1,
1142 const LogicVRegister& src2) {
1143 dst.ClearForWrite(vform);
1144 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145 dst.SetUint(vform,
1146 i,
1147 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1148 }
1149 return dst;
1150 }
1151
1152
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1153 LogicVRegister Simulator::pmull(VectorFormat vform,
1154 LogicVRegister dst,
1155 const LogicVRegister& src1,
1156 const LogicVRegister& src2) {
1157 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1158 dst.ClearForWrite(vform);
1159 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1160 dst.SetUint(vform,
1161 i,
1162 PolynomialMult(src1.Uint(vform_src, i),
1163 src2.Uint(vform_src, i)));
1164 }
1165 return dst;
1166 }
1167
1168
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1169 LogicVRegister Simulator::pmull2(VectorFormat vform,
1170 LogicVRegister dst,
1171 const LogicVRegister& src1,
1172 const LogicVRegister& src2) {
1173 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1174 dst.ClearForWrite(vform);
1175 int lane_count = LaneCountFromFormat(vform);
1176 for (int i = 0; i < lane_count; i++) {
1177 dst.SetUint(vform,
1178 i,
1179 PolynomialMult(src1.Uint(vform_src, lane_count + i),
1180 src2.Uint(vform_src, lane_count + i)));
1181 }
1182 return dst;
1183 }
1184
1185
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1186 LogicVRegister Simulator::sub(VectorFormat vform,
1187 LogicVRegister dst,
1188 const LogicVRegister& src1,
1189 const LogicVRegister& src2) {
1190 int lane_size = LaneSizeInBitsFromFormat(vform);
1191 dst.ClearForWrite(vform);
1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193 // Test for unsigned saturation.
1194 uint64_t ua = src1.UintLeftJustified(vform, i);
1195 uint64_t ub = src2.UintLeftJustified(vform, i);
1196 uint64_t ur = ua - ub;
1197 if (ub > ua) {
1198 dst.SetUnsignedSat(i, false);
1199 }
1200
1201 // Test for signed saturation.
1202 bool pos_a = (ua >> 63) == 0;
1203 bool pos_b = (ub >> 63) == 0;
1204 bool pos_r = (ur >> 63) == 0;
1205 // If the signs of the operands are different, and the sign of the first
1206 // operand doesn't match the result, there was an overflow.
1207 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1208 dst.SetSignedSat(i, pos_a);
1209 }
1210
1211 dst.SetInt(vform, i, ur >> (64 - lane_size));
1212 }
1213 return dst;
1214 }
1215
1216
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1217 LogicVRegister Simulator::and_(VectorFormat vform,
1218 LogicVRegister dst,
1219 const LogicVRegister& src1,
1220 const LogicVRegister& src2) {
1221 dst.ClearForWrite(vform);
1222 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1223 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1224 }
1225 return dst;
1226 }
1227
1228
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1229 LogicVRegister Simulator::orr(VectorFormat vform,
1230 LogicVRegister dst,
1231 const LogicVRegister& src1,
1232 const LogicVRegister& src2) {
1233 dst.ClearForWrite(vform);
1234 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1235 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1236 }
1237 return dst;
1238 }
1239
1240
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1241 LogicVRegister Simulator::orn(VectorFormat vform,
1242 LogicVRegister dst,
1243 const LogicVRegister& src1,
1244 const LogicVRegister& src2) {
1245 dst.ClearForWrite(vform);
1246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1247 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1248 }
1249 return dst;
1250 }
1251
1252
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1253 LogicVRegister Simulator::eor(VectorFormat vform,
1254 LogicVRegister dst,
1255 const LogicVRegister& src1,
1256 const LogicVRegister& src2) {
1257 dst.ClearForWrite(vform);
1258 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1259 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1260 }
1261 return dst;
1262 }
1263
1264
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1265 LogicVRegister Simulator::bic(VectorFormat vform,
1266 LogicVRegister dst,
1267 const LogicVRegister& src1,
1268 const LogicVRegister& src2) {
1269 dst.ClearForWrite(vform);
1270 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1271 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1272 }
1273 return dst;
1274 }
1275
1276
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1277 LogicVRegister Simulator::bic(VectorFormat vform,
1278 LogicVRegister dst,
1279 const LogicVRegister& src,
1280 uint64_t imm) {
1281 uint64_t result[16];
1282 int laneCount = LaneCountFromFormat(vform);
1283 for (int i = 0; i < laneCount; ++i) {
1284 result[i] = src.Uint(vform, i) & ~imm;
1285 }
1286 dst.ClearForWrite(vform);
1287 for (int i = 0; i < laneCount; ++i) {
1288 dst.SetUint(vform, i, result[i]);
1289 }
1290 return dst;
1291 }
1292
1293
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1294 LogicVRegister Simulator::bif(VectorFormat vform,
1295 LogicVRegister dst,
1296 const LogicVRegister& src1,
1297 const LogicVRegister& src2) {
1298 dst.ClearForWrite(vform);
1299 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1300 uint64_t operand1 = dst.Uint(vform, i);
1301 uint64_t operand2 = ~src2.Uint(vform, i);
1302 uint64_t operand3 = src1.Uint(vform, i);
1303 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1304 dst.SetUint(vform, i, result);
1305 }
1306 return dst;
1307 }
1308
1309
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1310 LogicVRegister Simulator::bit(VectorFormat vform,
1311 LogicVRegister dst,
1312 const LogicVRegister& src1,
1313 const LogicVRegister& src2) {
1314 dst.ClearForWrite(vform);
1315 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316 uint64_t operand1 = dst.Uint(vform, i);
1317 uint64_t operand2 = src2.Uint(vform, i);
1318 uint64_t operand3 = src1.Uint(vform, i);
1319 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1320 dst.SetUint(vform, i, result);
1321 }
1322 return dst;
1323 }
1324
1325
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1326 LogicVRegister Simulator::bsl(VectorFormat vform,
1327 LogicVRegister dst,
1328 const LogicVRegister& src1,
1329 const LogicVRegister& src2) {
1330 dst.ClearForWrite(vform);
1331 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1332 uint64_t operand1 = src2.Uint(vform, i);
1333 uint64_t operand2 = dst.Uint(vform, i);
1334 uint64_t operand3 = src1.Uint(vform, i);
1335 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1336 dst.SetUint(vform, i, result);
1337 }
1338 return dst;
1339 }
1340
1341
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1342 LogicVRegister Simulator::sminmax(VectorFormat vform,
1343 LogicVRegister dst,
1344 const LogicVRegister& src1,
1345 const LogicVRegister& src2,
1346 bool max) {
1347 dst.ClearForWrite(vform);
1348 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1349 int64_t src1_val = src1.Int(vform, i);
1350 int64_t src2_val = src2.Int(vform, i);
1351 int64_t dst_val;
1352 if (max) {
1353 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1354 } else {
1355 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1356 }
1357 dst.SetInt(vform, i, dst_val);
1358 }
1359 return dst;
1360 }
1361
1362
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1363 LogicVRegister Simulator::smax(VectorFormat vform,
1364 LogicVRegister dst,
1365 const LogicVRegister& src1,
1366 const LogicVRegister& src2) {
1367 return sminmax(vform, dst, src1, src2, true);
1368 }
1369
1370
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1371 LogicVRegister Simulator::smin(VectorFormat vform,
1372 LogicVRegister dst,
1373 const LogicVRegister& src1,
1374 const LogicVRegister& src2) {
1375 return sminmax(vform, dst, src1, src2, false);
1376 }
1377
1378
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1379 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1380 LogicVRegister dst,
1381 const LogicVRegister& src1,
1382 const LogicVRegister& src2,
1383 bool max) {
1384 int lanes = LaneCountFromFormat(vform);
1385 int64_t result[kMaxLanesPerVector];
1386 const LogicVRegister* src = &src1;
1387 for (int j = 0; j < 2; j++) {
1388 for (int i = 0; i < lanes; i += 2) {
1389 int64_t first_val = src->Int(vform, i);
1390 int64_t second_val = src->Int(vform, i + 1);
1391 int64_t dst_val;
1392 if (max) {
1393 dst_val = (first_val > second_val) ? first_val : second_val;
1394 } else {
1395 dst_val = (first_val < second_val) ? first_val : second_val;
1396 }
1397 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1398 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1399 }
1400 src = &src2;
1401 }
1402 dst.SetIntArray(vform, result);
1403 return dst;
1404 }
1405
1406
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1407 LogicVRegister Simulator::smaxp(VectorFormat vform,
1408 LogicVRegister dst,
1409 const LogicVRegister& src1,
1410 const LogicVRegister& src2) {
1411 return sminmaxp(vform, dst, src1, src2, true);
1412 }
1413
1414
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1415 LogicVRegister Simulator::sminp(VectorFormat vform,
1416 LogicVRegister dst,
1417 const LogicVRegister& src1,
1418 const LogicVRegister& src2) {
1419 return sminmaxp(vform, dst, src1, src2, false);
1420 }
1421
1422
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1423 LogicVRegister Simulator::addp(VectorFormat vform,
1424 LogicVRegister dst,
1425 const LogicVRegister& src) {
1426 VIXL_ASSERT(vform == kFormatD);
1427
1428 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1429 dst.ClearForWrite(vform);
1430 dst.SetUint(vform, 0, dst_val);
1431 return dst;
1432 }
1433
1434
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1435 LogicVRegister Simulator::addv(VectorFormat vform,
1436 LogicVRegister dst,
1437 const LogicVRegister& src) {
1438 VectorFormat vform_dst =
1439 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1440
1441
1442 int64_t dst_val = 0;
1443 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1444 dst_val += src.Int(vform, i);
1445 }
1446
1447 dst.ClearForWrite(vform_dst);
1448 dst.SetInt(vform_dst, 0, dst_val);
1449 return dst;
1450 }
1451
1452
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1453 LogicVRegister Simulator::saddlv(VectorFormat vform,
1454 LogicVRegister dst,
1455 const LogicVRegister& src) {
1456 VectorFormat vform_dst =
1457 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1458
1459 int64_t dst_val = 0;
1460 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1461 dst_val += src.Int(vform, i);
1462 }
1463
1464 dst.ClearForWrite(vform_dst);
1465 dst.SetInt(vform_dst, 0, dst_val);
1466 return dst;
1467 }
1468
1469
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1470 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1471 LogicVRegister dst,
1472 const LogicVRegister& src) {
1473 VectorFormat vform_dst =
1474 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1475
1476 uint64_t dst_val = 0;
1477 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1478 dst_val += src.Uint(vform, i);
1479 }
1480
1481 dst.ClearForWrite(vform_dst);
1482 dst.SetUint(vform_dst, 0, dst_val);
1483 return dst;
1484 }
1485
1486
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1487 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1488 LogicVRegister dst,
1489 const LogicVRegister& src,
1490 bool max) {
1491 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1492 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1493 int64_t src_val = src.Int(vform, i);
1494 if (max) {
1495 dst_val = (src_val > dst_val) ? src_val : dst_val;
1496 } else {
1497 dst_val = (src_val < dst_val) ? src_val : dst_val;
1498 }
1499 }
1500 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1501 dst.SetInt(vform, 0, dst_val);
1502 return dst;
1503 }
1504
1505
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1506 LogicVRegister Simulator::smaxv(VectorFormat vform,
1507 LogicVRegister dst,
1508 const LogicVRegister& src) {
1509 sminmaxv(vform, dst, src, true);
1510 return dst;
1511 }
1512
1513
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1514 LogicVRegister Simulator::sminv(VectorFormat vform,
1515 LogicVRegister dst,
1516 const LogicVRegister& src) {
1517 sminmaxv(vform, dst, src, false);
1518 return dst;
1519 }
1520
1521
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1522 LogicVRegister Simulator::uminmax(VectorFormat vform,
1523 LogicVRegister dst,
1524 const LogicVRegister& src1,
1525 const LogicVRegister& src2,
1526 bool max) {
1527 dst.ClearForWrite(vform);
1528 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1529 uint64_t src1_val = src1.Uint(vform, i);
1530 uint64_t src2_val = src2.Uint(vform, i);
1531 uint64_t dst_val;
1532 if (max) {
1533 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1534 } else {
1535 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1536 }
1537 dst.SetUint(vform, i, dst_val);
1538 }
1539 return dst;
1540 }
1541
1542
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1543 LogicVRegister Simulator::umax(VectorFormat vform,
1544 LogicVRegister dst,
1545 const LogicVRegister& src1,
1546 const LogicVRegister& src2) {
1547 return uminmax(vform, dst, src1, src2, true);
1548 }
1549
1550
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1551 LogicVRegister Simulator::umin(VectorFormat vform,
1552 LogicVRegister dst,
1553 const LogicVRegister& src1,
1554 const LogicVRegister& src2) {
1555 return uminmax(vform, dst, src1, src2, false);
1556 }
1557
1558
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1559 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1560 LogicVRegister dst,
1561 const LogicVRegister& src1,
1562 const LogicVRegister& src2,
1563 bool max) {
1564 int lanes = LaneCountFromFormat(vform);
1565 uint64_t result[kMaxLanesPerVector];
1566 const LogicVRegister* src = &src1;
1567 for (int j = 0; j < 2; j++) {
1568 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1569 uint64_t first_val = src->Uint(vform, i);
1570 uint64_t second_val = src->Uint(vform, i + 1);
1571 uint64_t dst_val;
1572 if (max) {
1573 dst_val = (first_val > second_val) ? first_val : second_val;
1574 } else {
1575 dst_val = (first_val < second_val) ? first_val : second_val;
1576 }
1577 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1578 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1579 }
1580 src = &src2;
1581 }
1582 dst.SetUintArray(vform, result);
1583 return dst;
1584 }
1585
1586
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1587 LogicVRegister Simulator::umaxp(VectorFormat vform,
1588 LogicVRegister dst,
1589 const LogicVRegister& src1,
1590 const LogicVRegister& src2) {
1591 return uminmaxp(vform, dst, src1, src2, true);
1592 }
1593
1594
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1595 LogicVRegister Simulator::uminp(VectorFormat vform,
1596 LogicVRegister dst,
1597 const LogicVRegister& src1,
1598 const LogicVRegister& src2) {
1599 return uminmaxp(vform, dst, src1, src2, false);
1600 }
1601
1602
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1603 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1604 LogicVRegister dst,
1605 const LogicVRegister& src,
1606 bool max) {
1607 uint64_t dst_val = max ? 0 : UINT64_MAX;
1608 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1609 uint64_t src_val = src.Uint(vform, i);
1610 if (max) {
1611 dst_val = (src_val > dst_val) ? src_val : dst_val;
1612 } else {
1613 dst_val = (src_val < dst_val) ? src_val : dst_val;
1614 }
1615 }
1616 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1617 dst.SetUint(vform, 0, dst_val);
1618 return dst;
1619 }
1620
1621
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1622 LogicVRegister Simulator::umaxv(VectorFormat vform,
1623 LogicVRegister dst,
1624 const LogicVRegister& src) {
1625 uminmaxv(vform, dst, src, true);
1626 return dst;
1627 }
1628
1629
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1630 LogicVRegister Simulator::uminv(VectorFormat vform,
1631 LogicVRegister dst,
1632 const LogicVRegister& src) {
1633 uminmaxv(vform, dst, src, false);
1634 return dst;
1635 }
1636
1637
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1638 LogicVRegister Simulator::shl(VectorFormat vform,
1639 LogicVRegister dst,
1640 const LogicVRegister& src,
1641 int shift) {
1642 VIXL_ASSERT(shift >= 0);
1643 SimVRegister temp;
1644 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1645 return ushl(vform, dst, src, shiftreg);
1646 }
1647
1648
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1649 LogicVRegister Simulator::sshll(VectorFormat vform,
1650 LogicVRegister dst,
1651 const LogicVRegister& src,
1652 int shift) {
1653 VIXL_ASSERT(shift >= 0);
1654 SimVRegister temp1, temp2;
1655 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1656 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1657 return sshl(vform, dst, extendedreg, shiftreg);
1658 }
1659
1660
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1661 LogicVRegister Simulator::sshll2(VectorFormat vform,
1662 LogicVRegister dst,
1663 const LogicVRegister& src,
1664 int shift) {
1665 VIXL_ASSERT(shift >= 0);
1666 SimVRegister temp1, temp2;
1667 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1668 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1669 return sshl(vform, dst, extendedreg, shiftreg);
1670 }
1671
1672
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1673 LogicVRegister Simulator::shll(VectorFormat vform,
1674 LogicVRegister dst,
1675 const LogicVRegister& src) {
1676 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1677 return sshll(vform, dst, src, shift);
1678 }
1679
1680
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1681 LogicVRegister Simulator::shll2(VectorFormat vform,
1682 LogicVRegister dst,
1683 const LogicVRegister& src) {
1684 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1685 return sshll2(vform, dst, src, shift);
1686 }
1687
1688
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689 LogicVRegister Simulator::ushll(VectorFormat vform,
1690 LogicVRegister dst,
1691 const LogicVRegister& src,
1692 int shift) {
1693 VIXL_ASSERT(shift >= 0);
1694 SimVRegister temp1, temp2;
1695 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1696 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1697 return ushl(vform, dst, extendedreg, shiftreg);
1698 }
1699
1700
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1701 LogicVRegister Simulator::ushll2(VectorFormat vform,
1702 LogicVRegister dst,
1703 const LogicVRegister& src,
1704 int shift) {
1705 VIXL_ASSERT(shift >= 0);
1706 SimVRegister temp1, temp2;
1707 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1708 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1709 return ushl(vform, dst, extendedreg, shiftreg);
1710 }
1711
1712
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1713 LogicVRegister Simulator::sli(VectorFormat vform,
1714 LogicVRegister dst,
1715 const LogicVRegister& src,
1716 int shift) {
1717 dst.ClearForWrite(vform);
1718 int laneCount = LaneCountFromFormat(vform);
1719 for (int i = 0; i < laneCount; i++) {
1720 uint64_t src_lane = src.Uint(vform, i);
1721 uint64_t dst_lane = dst.Uint(vform, i);
1722 uint64_t shifted = src_lane << shift;
1723 uint64_t mask = MaxUintFromFormat(vform) << shift;
1724 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1725 }
1726 return dst;
1727 }
1728
1729
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1730 LogicVRegister Simulator::sqshl(VectorFormat vform,
1731 LogicVRegister dst,
1732 const LogicVRegister& src,
1733 int shift) {
1734 VIXL_ASSERT(shift >= 0);
1735 SimVRegister temp;
1736 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1737 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1738 }
1739
1740
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1741 LogicVRegister Simulator::uqshl(VectorFormat vform,
1742 LogicVRegister dst,
1743 const LogicVRegister& src,
1744 int shift) {
1745 VIXL_ASSERT(shift >= 0);
1746 SimVRegister temp;
1747 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1748 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1749 }
1750
1751
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1752 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1753 LogicVRegister dst,
1754 const LogicVRegister& src,
1755 int shift) {
1756 VIXL_ASSERT(shift >= 0);
1757 SimVRegister temp;
1758 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1759 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1760 }
1761
1762
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1763 LogicVRegister Simulator::sri(VectorFormat vform,
1764 LogicVRegister dst,
1765 const LogicVRegister& src,
1766 int shift) {
1767 dst.ClearForWrite(vform);
1768 int laneCount = LaneCountFromFormat(vform);
1769 VIXL_ASSERT((shift > 0) &&
1770 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1771 for (int i = 0; i < laneCount; i++) {
1772 uint64_t src_lane = src.Uint(vform, i);
1773 uint64_t dst_lane = dst.Uint(vform, i);
1774 uint64_t shifted;
1775 uint64_t mask;
1776 if (shift == 64) {
1777 shifted = 0;
1778 mask = 0;
1779 } else {
1780 shifted = src_lane >> shift;
1781 mask = MaxUintFromFormat(vform) >> shift;
1782 }
1783 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1784 }
1785 return dst;
1786 }
1787
1788
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1789 LogicVRegister Simulator::ushr(VectorFormat vform,
1790 LogicVRegister dst,
1791 const LogicVRegister& src,
1792 int shift) {
1793 VIXL_ASSERT(shift >= 0);
1794 SimVRegister temp;
1795 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1796 return ushl(vform, dst, src, shiftreg);
1797 }
1798
1799
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1800 LogicVRegister Simulator::sshr(VectorFormat vform,
1801 LogicVRegister dst,
1802 const LogicVRegister& src,
1803 int shift) {
1804 VIXL_ASSERT(shift >= 0);
1805 SimVRegister temp;
1806 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1807 return sshl(vform, dst, src, shiftreg);
1808 }
1809
1810
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1811 LogicVRegister Simulator::ssra(VectorFormat vform,
1812 LogicVRegister dst,
1813 const LogicVRegister& src,
1814 int shift) {
1815 SimVRegister temp;
1816 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1817 return add(vform, dst, dst, shifted_reg);
1818 }
1819
1820
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1821 LogicVRegister Simulator::usra(VectorFormat vform,
1822 LogicVRegister dst,
1823 const LogicVRegister& src,
1824 int shift) {
1825 SimVRegister temp;
1826 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1827 return add(vform, dst, dst, shifted_reg);
1828 }
1829
1830
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1831 LogicVRegister Simulator::srsra(VectorFormat vform,
1832 LogicVRegister dst,
1833 const LogicVRegister& src,
1834 int shift) {
1835 SimVRegister temp;
1836 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1837 return add(vform, dst, dst, shifted_reg);
1838 }
1839
1840
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1841 LogicVRegister Simulator::ursra(VectorFormat vform,
1842 LogicVRegister dst,
1843 const LogicVRegister& src,
1844 int shift) {
1845 SimVRegister temp;
1846 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1847 return add(vform, dst, dst, shifted_reg);
1848 }
1849
1850
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1851 LogicVRegister Simulator::cls(VectorFormat vform,
1852 LogicVRegister dst,
1853 const LogicVRegister& src) {
1854 uint64_t result[16];
1855 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1856 int laneCount = LaneCountFromFormat(vform);
1857 for (int i = 0; i < laneCount; i++) {
1858 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1859 }
1860
1861 dst.ClearForWrite(vform);
1862 for (int i = 0; i < laneCount; ++i) {
1863 dst.SetUint(vform, i, result[i]);
1864 }
1865 return dst;
1866 }
1867
1868
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1869 LogicVRegister Simulator::clz(VectorFormat vform,
1870 LogicVRegister dst,
1871 const LogicVRegister& src) {
1872 uint64_t result[16];
1873 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1874 int laneCount = LaneCountFromFormat(vform);
1875 for (int i = 0; i < laneCount; i++) {
1876 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1877 }
1878
1879 dst.ClearForWrite(vform);
1880 for (int i = 0; i < laneCount; ++i) {
1881 dst.SetUint(vform, i, result[i]);
1882 }
1883 return dst;
1884 }
1885
1886
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1887 LogicVRegister Simulator::cnt(VectorFormat vform,
1888 LogicVRegister dst,
1889 const LogicVRegister& src) {
1890 uint64_t result[16];
1891 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1892 int laneCount = LaneCountFromFormat(vform);
1893 for (int i = 0; i < laneCount; i++) {
1894 uint64_t value = src.Uint(vform, i);
1895 result[i] = 0;
1896 for (int j = 0; j < laneSizeInBits; j++) {
1897 result[i] += (value & 1);
1898 value >>= 1;
1899 }
1900 }
1901
1902 dst.ClearForWrite(vform);
1903 for (int i = 0; i < laneCount; ++i) {
1904 dst.SetUint(vform, i, result[i]);
1905 }
1906 return dst;
1907 }
1908
1909
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1910 LogicVRegister Simulator::sshl(VectorFormat vform,
1911 LogicVRegister dst,
1912 const LogicVRegister& src1,
1913 const LogicVRegister& src2) {
1914 dst.ClearForWrite(vform);
1915 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1916 int8_t shift_val = src2.Int(vform, i);
1917 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1918
1919 // Set signed saturation state.
1920 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1921 dst.SetSignedSat(i, lj_src_val >= 0);
1922 }
1923
1924 // Set unsigned saturation state.
1925 if (lj_src_val < 0) {
1926 dst.SetUnsignedSat(i, false);
1927 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1928 (lj_src_val != 0)) {
1929 dst.SetUnsignedSat(i, true);
1930 }
1931
1932 int64_t src_val = src1.Int(vform, i);
1933 bool src_is_negative = src_val < 0;
1934 if (shift_val > 63) {
1935 dst.SetInt(vform, i, 0);
1936 } else if (shift_val < -63) {
1937 dst.SetRounding(i, src_is_negative);
1938 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1939 } else {
1940 // Use unsigned types for shifts, as behaviour is undefined for signed
1941 // lhs.
1942 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1943
1944 if (shift_val < 0) {
1945 // Convert to right shift.
1946 shift_val = -shift_val;
1947
1948 // Set rounding state by testing most-significant bit shifted out.
1949 // Rounding only needed on right shifts.
1950 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1951 dst.SetRounding(i, true);
1952 }
1953
1954 usrc_val >>= shift_val;
1955
1956 if (src_is_negative) {
1957 // Simulate sign-extension.
1958 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1959 }
1960 } else {
1961 usrc_val <<= shift_val;
1962 }
1963 dst.SetUint(vform, i, usrc_val);
1964 }
1965 }
1966 return dst;
1967 }
1968
1969
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1970 LogicVRegister Simulator::ushl(VectorFormat vform,
1971 LogicVRegister dst,
1972 const LogicVRegister& src1,
1973 const LogicVRegister& src2) {
1974 dst.ClearForWrite(vform);
1975 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1976 int8_t shift_val = src2.Int(vform, i);
1977 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1978
1979 // Set saturation state.
1980 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1981 dst.SetUnsignedSat(i, true);
1982 }
1983
1984 uint64_t src_val = src1.Uint(vform, i);
1985 if ((shift_val > 63) || (shift_val < -64)) {
1986 dst.SetUint(vform, i, 0);
1987 } else {
1988 if (shift_val < 0) {
1989 // Set rounding state. Rounding only needed on right shifts.
1990 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1991 dst.SetRounding(i, true);
1992 }
1993
1994 if (shift_val == -64) {
1995 src_val = 0;
1996 } else {
1997 src_val >>= -shift_val;
1998 }
1999 } else {
2000 src_val <<= shift_val;
2001 }
2002 dst.SetUint(vform, i, src_val);
2003 }
2004 }
2005 return dst;
2006 }
2007
2008
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2009 LogicVRegister Simulator::neg(VectorFormat vform,
2010 LogicVRegister dst,
2011 const LogicVRegister& src) {
2012 dst.ClearForWrite(vform);
2013 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2014 // Test for signed saturation.
2015 int64_t sa = src.Int(vform, i);
2016 if (sa == MinIntFromFormat(vform)) {
2017 dst.SetSignedSat(i, true);
2018 }
2019 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2020 }
2021 return dst;
2022 }
2023
2024
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2025 LogicVRegister Simulator::suqadd(VectorFormat vform,
2026 LogicVRegister dst,
2027 const LogicVRegister& src) {
2028 dst.ClearForWrite(vform);
2029 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2030 int64_t sa = dst.IntLeftJustified(vform, i);
2031 uint64_t ub = src.UintLeftJustified(vform, i);
2032 uint64_t ur = sa + ub;
2033
2034 int64_t sr;
2035 memcpy(&sr, &ur, sizeof(sr));
2036 if (sr < sa) { // Test for signed positive saturation.
2037 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2038 } else {
2039 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
2040 }
2041 }
2042 return dst;
2043 }
2044
2045
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2046 LogicVRegister Simulator::usqadd(VectorFormat vform,
2047 LogicVRegister dst,
2048 const LogicVRegister& src) {
2049 dst.ClearForWrite(vform);
2050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051 uint64_t ua = dst.UintLeftJustified(vform, i);
2052 int64_t sb = src.IntLeftJustified(vform, i);
2053 uint64_t ur = ua + sb;
2054
2055 if ((sb > 0) && (ur <= ua)) {
2056 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2057 } else if ((sb < 0) && (ur >= ua)) {
2058 dst.SetUint(vform, i, 0); // Negative saturation.
2059 } else {
2060 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2061 }
2062 }
2063 return dst;
2064 }
2065
2066
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067 LogicVRegister Simulator::abs(VectorFormat vform,
2068 LogicVRegister dst,
2069 const LogicVRegister& src) {
2070 dst.ClearForWrite(vform);
2071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072 // Test for signed saturation.
2073 int64_t sa = src.Int(vform, i);
2074 if (sa == MinIntFromFormat(vform)) {
2075 dst.SetSignedSat(i, true);
2076 }
2077 if (sa < 0) {
2078 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079 } else {
2080 dst.SetInt(vform, i, sa);
2081 }
2082 }
2083 return dst;
2084 }
2085
2086
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2087 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2088 LogicVRegister dst,
2089 bool dstIsSigned,
2090 const LogicVRegister& src,
2091 bool srcIsSigned) {
2092 bool upperhalf = false;
2093 VectorFormat srcform = kFormatUndefined;
2094 int64_t ssrc[8];
2095 uint64_t usrc[8];
2096
2097 switch (dstform) {
2098 case kFormat8B:
2099 upperhalf = false;
2100 srcform = kFormat8H;
2101 break;
2102 case kFormat16B:
2103 upperhalf = true;
2104 srcform = kFormat8H;
2105 break;
2106 case kFormat4H:
2107 upperhalf = false;
2108 srcform = kFormat4S;
2109 break;
2110 case kFormat8H:
2111 upperhalf = true;
2112 srcform = kFormat4S;
2113 break;
2114 case kFormat2S:
2115 upperhalf = false;
2116 srcform = kFormat2D;
2117 break;
2118 case kFormat4S:
2119 upperhalf = true;
2120 srcform = kFormat2D;
2121 break;
2122 case kFormatB:
2123 upperhalf = false;
2124 srcform = kFormatH;
2125 break;
2126 case kFormatH:
2127 upperhalf = false;
2128 srcform = kFormatS;
2129 break;
2130 case kFormatS:
2131 upperhalf = false;
2132 srcform = kFormatD;
2133 break;
2134 default:
2135 VIXL_UNIMPLEMENTED();
2136 }
2137
2138 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2139 ssrc[i] = src.Int(srcform, i);
2140 usrc[i] = src.Uint(srcform, i);
2141 }
2142
2143 int offset;
2144 if (upperhalf) {
2145 offset = LaneCountFromFormat(dstform) / 2;
2146 } else {
2147 offset = 0;
2148 dst.ClearForWrite(dstform);
2149 }
2150
2151 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2152 // Test for signed saturation
2153 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2154 dst.SetSignedSat(offset + i, true);
2155 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2156 dst.SetSignedSat(offset + i, false);
2157 }
2158
2159 // Test for unsigned saturation
2160 if (srcIsSigned) {
2161 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2162 dst.SetUnsignedSat(offset + i, true);
2163 } else if (ssrc[i] < 0) {
2164 dst.SetUnsignedSat(offset + i, false);
2165 }
2166 } else {
2167 if (usrc[i] > MaxUintFromFormat(dstform)) {
2168 dst.SetUnsignedSat(offset + i, true);
2169 }
2170 }
2171
2172 int64_t result;
2173 if (srcIsSigned) {
2174 result = ssrc[i] & MaxUintFromFormat(dstform);
2175 } else {
2176 result = usrc[i] & MaxUintFromFormat(dstform);
2177 }
2178
2179 if (dstIsSigned) {
2180 dst.SetInt(dstform, offset + i, result);
2181 } else {
2182 dst.SetUint(dstform, offset + i, result);
2183 }
2184 }
2185 return dst;
2186 }
2187
2188
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2189 LogicVRegister Simulator::xtn(VectorFormat vform,
2190 LogicVRegister dst,
2191 const LogicVRegister& src) {
2192 return extractnarrow(vform, dst, true, src, true);
2193 }
2194
2195
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2196 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2197 LogicVRegister dst,
2198 const LogicVRegister& src) {
2199 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2200 }
2201
2202
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2203 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2204 LogicVRegister dst,
2205 const LogicVRegister& src) {
2206 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2207 }
2208
2209
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2210 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2211 LogicVRegister dst,
2212 const LogicVRegister& src) {
2213 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2214 }
2215
2216
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2217 LogicVRegister Simulator::absdiff(VectorFormat vform,
2218 LogicVRegister dst,
2219 const LogicVRegister& src1,
2220 const LogicVRegister& src2,
2221 bool issigned) {
2222 dst.ClearForWrite(vform);
2223 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2224 if (issigned) {
2225 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2226 sr = sr > 0 ? sr : -sr;
2227 dst.SetInt(vform, i, sr);
2228 } else {
2229 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2230 sr = sr > 0 ? sr : -sr;
2231 dst.SetUint(vform, i, sr);
2232 }
2233 }
2234 return dst;
2235 }
2236
2237
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2238 LogicVRegister Simulator::saba(VectorFormat vform,
2239 LogicVRegister dst,
2240 const LogicVRegister& src1,
2241 const LogicVRegister& src2) {
2242 SimVRegister temp;
2243 dst.ClearForWrite(vform);
2244 absdiff(vform, temp, src1, src2, true);
2245 add(vform, dst, dst, temp);
2246 return dst;
2247 }
2248
2249
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2250 LogicVRegister Simulator::uaba(VectorFormat vform,
2251 LogicVRegister dst,
2252 const LogicVRegister& src1,
2253 const LogicVRegister& src2) {
2254 SimVRegister temp;
2255 dst.ClearForWrite(vform);
2256 absdiff(vform, temp, src1, src2, false);
2257 add(vform, dst, dst, temp);
2258 return dst;
2259 }
2260
2261
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2262 LogicVRegister Simulator::not_(VectorFormat vform,
2263 LogicVRegister dst,
2264 const LogicVRegister& src) {
2265 dst.ClearForWrite(vform);
2266 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2267 dst.SetUint(vform, i, ~src.Uint(vform, i));
2268 }
2269 return dst;
2270 }
2271
2272
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2273 LogicVRegister Simulator::rbit(VectorFormat vform,
2274 LogicVRegister dst,
2275 const LogicVRegister& src) {
2276 uint64_t result[16];
2277 int laneCount = LaneCountFromFormat(vform);
2278 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2279 uint64_t reversed_value;
2280 uint64_t value;
2281 for (int i = 0; i < laneCount; i++) {
2282 value = src.Uint(vform, i);
2283 reversed_value = 0;
2284 for (int j = 0; j < laneSizeInBits; j++) {
2285 reversed_value = (reversed_value << 1) | (value & 1);
2286 value >>= 1;
2287 }
2288 result[i] = reversed_value;
2289 }
2290
2291 dst.ClearForWrite(vform);
2292 for (int i = 0; i < laneCount; ++i) {
2293 dst.SetUint(vform, i, result[i]);
2294 }
2295 return dst;
2296 }
2297
2298
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2299 LogicVRegister Simulator::rev(VectorFormat vform,
2300 LogicVRegister dst,
2301 const LogicVRegister& src,
2302 int revSize) {
2303 uint64_t result[16];
2304 int laneCount = LaneCountFromFormat(vform);
2305 int laneSize = LaneSizeInBytesFromFormat(vform);
2306 int lanesPerLoop = revSize / laneSize;
2307 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2308 for (int j = 0; j < lanesPerLoop; j++) {
2309 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2310 }
2311 }
2312 dst.ClearForWrite(vform);
2313 for (int i = 0; i < laneCount; ++i) {
2314 dst.SetUint(vform, i, result[i]);
2315 }
2316 return dst;
2317 }
2318
2319
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2320 LogicVRegister Simulator::rev16(VectorFormat vform,
2321 LogicVRegister dst,
2322 const LogicVRegister& src) {
2323 return rev(vform, dst, src, 2);
2324 }
2325
2326
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2327 LogicVRegister Simulator::rev32(VectorFormat vform,
2328 LogicVRegister dst,
2329 const LogicVRegister& src) {
2330 return rev(vform, dst, src, 4);
2331 }
2332
2333
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2334 LogicVRegister Simulator::rev64(VectorFormat vform,
2335 LogicVRegister dst,
2336 const LogicVRegister& src) {
2337 return rev(vform, dst, src, 8);
2338 }
2339
2340
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2341 LogicVRegister Simulator::addlp(VectorFormat vform,
2342 LogicVRegister dst,
2343 const LogicVRegister& src,
2344 bool is_signed,
2345 bool do_accumulate) {
2346 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2347 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2348 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
2349
2350 uint64_t result[8];
2351 int lane_count = LaneCountFromFormat(vform);
2352 for (int i = 0; i < lane_count; i++) {
2353 if (is_signed) {
2354 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2355 src.Int(vformsrc, 2 * i + 1));
2356 } else {
2357 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2358 }
2359 }
2360
2361 dst.ClearForWrite(vform);
2362 for (int i = 0; i < lane_count; ++i) {
2363 if (do_accumulate) {
2364 result[i] += dst.Uint(vform, i);
2365 }
2366 dst.SetUint(vform, i, result[i]);
2367 }
2368
2369 return dst;
2370 }
2371
2372
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2373 LogicVRegister Simulator::saddlp(VectorFormat vform,
2374 LogicVRegister dst,
2375 const LogicVRegister& src) {
2376 return addlp(vform, dst, src, true, false);
2377 }
2378
2379
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2380 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2381 LogicVRegister dst,
2382 const LogicVRegister& src) {
2383 return addlp(vform, dst, src, false, false);
2384 }
2385
2386
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2387 LogicVRegister Simulator::sadalp(VectorFormat vform,
2388 LogicVRegister dst,
2389 const LogicVRegister& src) {
2390 return addlp(vform, dst, src, true, true);
2391 }
2392
2393
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394 LogicVRegister Simulator::uadalp(VectorFormat vform,
2395 LogicVRegister dst,
2396 const LogicVRegister& src) {
2397 return addlp(vform, dst, src, false, true);
2398 }
2399
2400
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2401 LogicVRegister Simulator::ext(VectorFormat vform,
2402 LogicVRegister dst,
2403 const LogicVRegister& src1,
2404 const LogicVRegister& src2,
2405 int index) {
2406 uint8_t result[16];
2407 int laneCount = LaneCountFromFormat(vform);
2408 for (int i = 0; i < laneCount - index; ++i) {
2409 result[i] = src1.Uint(vform, i + index);
2410 }
2411 for (int i = 0; i < index; ++i) {
2412 result[laneCount - index + i] = src2.Uint(vform, i);
2413 }
2414 dst.ClearForWrite(vform);
2415 for (int i = 0; i < laneCount; ++i) {
2416 dst.SetUint(vform, i, result[i]);
2417 }
2418 return dst;
2419 }
2420
2421
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2422 LogicVRegister Simulator::dup_element(VectorFormat vform,
2423 LogicVRegister dst,
2424 const LogicVRegister& src,
2425 int src_index) {
2426 int laneCount = LaneCountFromFormat(vform);
2427 uint64_t value = src.Uint(vform, src_index);
2428 dst.ClearForWrite(vform);
2429 for (int i = 0; i < laneCount; ++i) {
2430 dst.SetUint(vform, i, value);
2431 }
2432 return dst;
2433 }
2434
2435
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2436 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2437 LogicVRegister dst,
2438 uint64_t imm) {
2439 int laneCount = LaneCountFromFormat(vform);
2440 uint64_t value = imm & MaxUintFromFormat(vform);
2441 dst.ClearForWrite(vform);
2442 for (int i = 0; i < laneCount; ++i) {
2443 dst.SetUint(vform, i, value);
2444 }
2445 return dst;
2446 }
2447
2448
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2449 LogicVRegister Simulator::ins_element(VectorFormat vform,
2450 LogicVRegister dst,
2451 int dst_index,
2452 const LogicVRegister& src,
2453 int src_index) {
2454 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2455 return dst;
2456 }
2457
2458
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2459 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2460 LogicVRegister dst,
2461 int dst_index,
2462 uint64_t imm) {
2463 uint64_t value = imm & MaxUintFromFormat(vform);
2464 dst.SetUint(vform, dst_index, value);
2465 return dst;
2466 }
2467
2468
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2469 LogicVRegister Simulator::movi(VectorFormat vform,
2470 LogicVRegister dst,
2471 uint64_t imm) {
2472 int laneCount = LaneCountFromFormat(vform);
2473 dst.ClearForWrite(vform);
2474 for (int i = 0; i < laneCount; ++i) {
2475 dst.SetUint(vform, i, imm);
2476 }
2477 return dst;
2478 }
2479
2480
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2481 LogicVRegister Simulator::mvni(VectorFormat vform,
2482 LogicVRegister dst,
2483 uint64_t imm) {
2484 int laneCount = LaneCountFromFormat(vform);
2485 dst.ClearForWrite(vform);
2486 for (int i = 0; i < laneCount; ++i) {
2487 dst.SetUint(vform, i, ~imm);
2488 }
2489 return dst;
2490 }
2491
2492
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2493 LogicVRegister Simulator::orr(VectorFormat vform,
2494 LogicVRegister dst,
2495 const LogicVRegister& src,
2496 uint64_t imm) {
2497 uint64_t result[16];
2498 int laneCount = LaneCountFromFormat(vform);
2499 for (int i = 0; i < laneCount; ++i) {
2500 result[i] = src.Uint(vform, i) | imm;
2501 }
2502 dst.ClearForWrite(vform);
2503 for (int i = 0; i < laneCount; ++i) {
2504 dst.SetUint(vform, i, result[i]);
2505 }
2506 return dst;
2507 }
2508
2509
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2510 LogicVRegister Simulator::uxtl(VectorFormat vform,
2511 LogicVRegister dst,
2512 const LogicVRegister& src) {
2513 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2514
2515 dst.ClearForWrite(vform);
2516 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2517 dst.SetUint(vform, i, src.Uint(vform_half, i));
2518 }
2519 return dst;
2520 }
2521
2522
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2523 LogicVRegister Simulator::sxtl(VectorFormat vform,
2524 LogicVRegister dst,
2525 const LogicVRegister& src) {
2526 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2527
2528 dst.ClearForWrite(vform);
2529 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2530 dst.SetInt(vform, i, src.Int(vform_half, i));
2531 }
2532 return dst;
2533 }
2534
2535
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2536 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2537 LogicVRegister dst,
2538 const LogicVRegister& src) {
2539 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2540 int lane_count = LaneCountFromFormat(vform);
2541
2542 dst.ClearForWrite(vform);
2543 for (int i = 0; i < lane_count; i++) {
2544 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2545 }
2546 return dst;
2547 }
2548
2549
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2550 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2551 LogicVRegister dst,
2552 const LogicVRegister& src) {
2553 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2554 int lane_count = LaneCountFromFormat(vform);
2555
2556 dst.ClearForWrite(vform);
2557 for (int i = 0; i < lane_count; i++) {
2558 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2559 }
2560 return dst;
2561 }
2562
2563
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2564 LogicVRegister Simulator::shrn(VectorFormat vform,
2565 LogicVRegister dst,
2566 const LogicVRegister& src,
2567 int shift) {
2568 SimVRegister temp;
2569 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2570 VectorFormat vform_dst = vform;
2571 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2572 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2573 }
2574
2575
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2576 LogicVRegister Simulator::shrn2(VectorFormat vform,
2577 LogicVRegister dst,
2578 const LogicVRegister& src,
2579 int shift) {
2580 SimVRegister temp;
2581 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2582 VectorFormat vformdst = vform;
2583 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2584 return extractnarrow(vformdst, dst, false, shifted_src, false);
2585 }
2586
2587
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2588 LogicVRegister Simulator::rshrn(VectorFormat vform,
2589 LogicVRegister dst,
2590 const LogicVRegister& src,
2591 int shift) {
2592 SimVRegister temp;
2593 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2594 VectorFormat vformdst = vform;
2595 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2596 return extractnarrow(vformdst, dst, false, shifted_src, false);
2597 }
2598
2599
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2600 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2601 LogicVRegister dst,
2602 const LogicVRegister& src,
2603 int shift) {
2604 SimVRegister temp;
2605 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2606 VectorFormat vformdst = vform;
2607 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2608 return extractnarrow(vformdst, dst, false, shifted_src, false);
2609 }
2610
2611
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2612 LogicVRegister Simulator::Table(VectorFormat vform,
2613 LogicVRegister dst,
2614 const LogicVRegister& ind,
2615 bool zero_out_of_bounds,
2616 const LogicVRegister* tab1,
2617 const LogicVRegister* tab2,
2618 const LogicVRegister* tab3,
2619 const LogicVRegister* tab4) {
2620 VIXL_ASSERT(tab1 != NULL);
2621 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2622 uint64_t result[kMaxLanesPerVector];
2623 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2624 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2625 }
2626 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2627 uint64_t j = ind.Uint(vform, i);
2628 int tab_idx = static_cast<int>(j >> 4);
2629 int j_idx = static_cast<int>(j & 15);
2630 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2631 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2632 }
2633 }
2634 dst.SetUintArray(vform, result);
2635 return dst;
2636 }
2637
2638
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2639 LogicVRegister Simulator::tbl(VectorFormat vform,
2640 LogicVRegister dst,
2641 const LogicVRegister& tab,
2642 const LogicVRegister& ind) {
2643 return Table(vform, dst, ind, true, &tab);
2644 }
2645
2646
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2647 LogicVRegister Simulator::tbl(VectorFormat vform,
2648 LogicVRegister dst,
2649 const LogicVRegister& tab,
2650 const LogicVRegister& tab2,
2651 const LogicVRegister& ind) {
2652 return Table(vform, dst, ind, true, &tab, &tab2);
2653 }
2654
2655
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2656 LogicVRegister Simulator::tbl(VectorFormat vform,
2657 LogicVRegister dst,
2658 const LogicVRegister& tab,
2659 const LogicVRegister& tab2,
2660 const LogicVRegister& tab3,
2661 const LogicVRegister& ind) {
2662 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2663 }
2664
2665
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2666 LogicVRegister Simulator::tbl(VectorFormat vform,
2667 LogicVRegister dst,
2668 const LogicVRegister& tab,
2669 const LogicVRegister& tab2,
2670 const LogicVRegister& tab3,
2671 const LogicVRegister& tab4,
2672 const LogicVRegister& ind) {
2673 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2674 }
2675
2676
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2677 LogicVRegister Simulator::tbx(VectorFormat vform,
2678 LogicVRegister dst,
2679 const LogicVRegister& tab,
2680 const LogicVRegister& ind) {
2681 return Table(vform, dst, ind, false, &tab);
2682 }
2683
2684
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2685 LogicVRegister Simulator::tbx(VectorFormat vform,
2686 LogicVRegister dst,
2687 const LogicVRegister& tab,
2688 const LogicVRegister& tab2,
2689 const LogicVRegister& ind) {
2690 return Table(vform, dst, ind, false, &tab, &tab2);
2691 }
2692
2693
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2694 LogicVRegister Simulator::tbx(VectorFormat vform,
2695 LogicVRegister dst,
2696 const LogicVRegister& tab,
2697 const LogicVRegister& tab2,
2698 const LogicVRegister& tab3,
2699 const LogicVRegister& ind) {
2700 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2701 }
2702
2703
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2704 LogicVRegister Simulator::tbx(VectorFormat vform,
2705 LogicVRegister dst,
2706 const LogicVRegister& tab,
2707 const LogicVRegister& tab2,
2708 const LogicVRegister& tab3,
2709 const LogicVRegister& tab4,
2710 const LogicVRegister& ind) {
2711 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2712 }
2713
2714
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2715 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2716 LogicVRegister dst,
2717 const LogicVRegister& src,
2718 int shift) {
2719 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2720 }
2721
2722
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2723 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2724 LogicVRegister dst,
2725 const LogicVRegister& src,
2726 int shift) {
2727 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2728 }
2729
2730
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2731 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2732 LogicVRegister dst,
2733 const LogicVRegister& src,
2734 int shift) {
2735 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2736 }
2737
2738
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2739 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2740 LogicVRegister dst,
2741 const LogicVRegister& src,
2742 int shift) {
2743 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2744 }
2745
2746
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2747 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2748 LogicVRegister dst,
2749 const LogicVRegister& src,
2750 int shift) {
2751 SimVRegister temp;
2752 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2753 VectorFormat vformdst = vform;
2754 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2755 return sqxtn(vformdst, dst, shifted_src);
2756 }
2757
2758
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2759 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2760 LogicVRegister dst,
2761 const LogicVRegister& src,
2762 int shift) {
2763 SimVRegister temp;
2764 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2765 VectorFormat vformdst = vform;
2766 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2767 return sqxtn(vformdst, dst, shifted_src);
2768 }
2769
2770
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2771 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2772 LogicVRegister dst,
2773 const LogicVRegister& src,
2774 int shift) {
2775 SimVRegister temp;
2776 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2777 VectorFormat vformdst = vform;
2778 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2779 return sqxtn(vformdst, dst, shifted_src);
2780 }
2781
2782
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2783 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2784 LogicVRegister dst,
2785 const LogicVRegister& src,
2786 int shift) {
2787 SimVRegister temp;
2788 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2789 VectorFormat vformdst = vform;
2790 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2791 return sqxtn(vformdst, dst, shifted_src);
2792 }
2793
2794
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2795 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2796 LogicVRegister dst,
2797 const LogicVRegister& src,
2798 int shift) {
2799 SimVRegister temp;
2800 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2801 VectorFormat vformdst = vform;
2802 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2803 return sqxtun(vformdst, dst, shifted_src);
2804 }
2805
2806
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2807 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2808 LogicVRegister dst,
2809 const LogicVRegister& src,
2810 int shift) {
2811 SimVRegister temp;
2812 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2813 VectorFormat vformdst = vform;
2814 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2815 return sqxtun(vformdst, dst, shifted_src);
2816 }
2817
2818
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2819 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2820 LogicVRegister dst,
2821 const LogicVRegister& src,
2822 int shift) {
2823 SimVRegister temp;
2824 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2825 VectorFormat vformdst = vform;
2826 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2827 return sqxtun(vformdst, dst, shifted_src);
2828 }
2829
2830
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2831 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2832 LogicVRegister dst,
2833 const LogicVRegister& src,
2834 int shift) {
2835 SimVRegister temp;
2836 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2837 VectorFormat vformdst = vform;
2838 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2839 return sqxtun(vformdst, dst, shifted_src);
2840 }
2841
2842
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2843 LogicVRegister Simulator::uaddl(VectorFormat vform,
2844 LogicVRegister dst,
2845 const LogicVRegister& src1,
2846 const LogicVRegister& src2) {
2847 SimVRegister temp1, temp2;
2848 uxtl(vform, temp1, src1);
2849 uxtl(vform, temp2, src2);
2850 add(vform, dst, temp1, temp2);
2851 return dst;
2852 }
2853
2854
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2855 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2856 LogicVRegister dst,
2857 const LogicVRegister& src1,
2858 const LogicVRegister& src2) {
2859 SimVRegister temp1, temp2;
2860 uxtl2(vform, temp1, src1);
2861 uxtl2(vform, temp2, src2);
2862 add(vform, dst, temp1, temp2);
2863 return dst;
2864 }
2865
2866
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2867 LogicVRegister Simulator::uaddw(VectorFormat vform,
2868 LogicVRegister dst,
2869 const LogicVRegister& src1,
2870 const LogicVRegister& src2) {
2871 SimVRegister temp;
2872 uxtl(vform, temp, src2);
2873 add(vform, dst, src1, temp);
2874 return dst;
2875 }
2876
2877
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2878 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2879 LogicVRegister dst,
2880 const LogicVRegister& src1,
2881 const LogicVRegister& src2) {
2882 SimVRegister temp;
2883 uxtl2(vform, temp, src2);
2884 add(vform, dst, src1, temp);
2885 return dst;
2886 }
2887
2888
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2889 LogicVRegister Simulator::saddl(VectorFormat vform,
2890 LogicVRegister dst,
2891 const LogicVRegister& src1,
2892 const LogicVRegister& src2) {
2893 SimVRegister temp1, temp2;
2894 sxtl(vform, temp1, src1);
2895 sxtl(vform, temp2, src2);
2896 add(vform, dst, temp1, temp2);
2897 return dst;
2898 }
2899
2900
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2901 LogicVRegister Simulator::saddl2(VectorFormat vform,
2902 LogicVRegister dst,
2903 const LogicVRegister& src1,
2904 const LogicVRegister& src2) {
2905 SimVRegister temp1, temp2;
2906 sxtl2(vform, temp1, src1);
2907 sxtl2(vform, temp2, src2);
2908 add(vform, dst, temp1, temp2);
2909 return dst;
2910 }
2911
2912
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2913 LogicVRegister Simulator::saddw(VectorFormat vform,
2914 LogicVRegister dst,
2915 const LogicVRegister& src1,
2916 const LogicVRegister& src2) {
2917 SimVRegister temp;
2918 sxtl(vform, temp, src2);
2919 add(vform, dst, src1, temp);
2920 return dst;
2921 }
2922
2923
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2924 LogicVRegister Simulator::saddw2(VectorFormat vform,
2925 LogicVRegister dst,
2926 const LogicVRegister& src1,
2927 const LogicVRegister& src2) {
2928 SimVRegister temp;
2929 sxtl2(vform, temp, src2);
2930 add(vform, dst, src1, temp);
2931 return dst;
2932 }
2933
2934
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2935 LogicVRegister Simulator::usubl(VectorFormat vform,
2936 LogicVRegister dst,
2937 const LogicVRegister& src1,
2938 const LogicVRegister& src2) {
2939 SimVRegister temp1, temp2;
2940 uxtl(vform, temp1, src1);
2941 uxtl(vform, temp2, src2);
2942 sub(vform, dst, temp1, temp2);
2943 return dst;
2944 }
2945
2946
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2947 LogicVRegister Simulator::usubl2(VectorFormat vform,
2948 LogicVRegister dst,
2949 const LogicVRegister& src1,
2950 const LogicVRegister& src2) {
2951 SimVRegister temp1, temp2;
2952 uxtl2(vform, temp1, src1);
2953 uxtl2(vform, temp2, src2);
2954 sub(vform, dst, temp1, temp2);
2955 return dst;
2956 }
2957
2958
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2959 LogicVRegister Simulator::usubw(VectorFormat vform,
2960 LogicVRegister dst,
2961 const LogicVRegister& src1,
2962 const LogicVRegister& src2) {
2963 SimVRegister temp;
2964 uxtl(vform, temp, src2);
2965 sub(vform, dst, src1, temp);
2966 return dst;
2967 }
2968
2969
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2970 LogicVRegister Simulator::usubw2(VectorFormat vform,
2971 LogicVRegister dst,
2972 const LogicVRegister& src1,
2973 const LogicVRegister& src2) {
2974 SimVRegister temp;
2975 uxtl2(vform, temp, src2);
2976 sub(vform, dst, src1, temp);
2977 return dst;
2978 }
2979
2980
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2981 LogicVRegister Simulator::ssubl(VectorFormat vform,
2982 LogicVRegister dst,
2983 const LogicVRegister& src1,
2984 const LogicVRegister& src2) {
2985 SimVRegister temp1, temp2;
2986 sxtl(vform, temp1, src1);
2987 sxtl(vform, temp2, src2);
2988 sub(vform, dst, temp1, temp2);
2989 return dst;
2990 }
2991
2992
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2993 LogicVRegister Simulator::ssubl2(VectorFormat vform,
2994 LogicVRegister dst,
2995 const LogicVRegister& src1,
2996 const LogicVRegister& src2) {
2997 SimVRegister temp1, temp2;
2998 sxtl2(vform, temp1, src1);
2999 sxtl2(vform, temp2, src2);
3000 sub(vform, dst, temp1, temp2);
3001 return dst;
3002 }
3003
3004
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3005 LogicVRegister Simulator::ssubw(VectorFormat vform,
3006 LogicVRegister dst,
3007 const LogicVRegister& src1,
3008 const LogicVRegister& src2) {
3009 SimVRegister temp;
3010 sxtl(vform, temp, src2);
3011 sub(vform, dst, src1, temp);
3012 return dst;
3013 }
3014
3015
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3016 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3017 LogicVRegister dst,
3018 const LogicVRegister& src1,
3019 const LogicVRegister& src2) {
3020 SimVRegister temp;
3021 sxtl2(vform, temp, src2);
3022 sub(vform, dst, src1, temp);
3023 return dst;
3024 }
3025
3026
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3027 LogicVRegister Simulator::uabal(VectorFormat vform,
3028 LogicVRegister dst,
3029 const LogicVRegister& src1,
3030 const LogicVRegister& src2) {
3031 SimVRegister temp1, temp2;
3032 uxtl(vform, temp1, src1);
3033 uxtl(vform, temp2, src2);
3034 uaba(vform, dst, temp1, temp2);
3035 return dst;
3036 }
3037
3038
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3039 LogicVRegister Simulator::uabal2(VectorFormat vform,
3040 LogicVRegister dst,
3041 const LogicVRegister& src1,
3042 const LogicVRegister& src2) {
3043 SimVRegister temp1, temp2;
3044 uxtl2(vform, temp1, src1);
3045 uxtl2(vform, temp2, src2);
3046 uaba(vform, dst, temp1, temp2);
3047 return dst;
3048 }
3049
3050
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3051 LogicVRegister Simulator::sabal(VectorFormat vform,
3052 LogicVRegister dst,
3053 const LogicVRegister& src1,
3054 const LogicVRegister& src2) {
3055 SimVRegister temp1, temp2;
3056 sxtl(vform, temp1, src1);
3057 sxtl(vform, temp2, src2);
3058 saba(vform, dst, temp1, temp2);
3059 return dst;
3060 }
3061
3062
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3063 LogicVRegister Simulator::sabal2(VectorFormat vform,
3064 LogicVRegister dst,
3065 const LogicVRegister& src1,
3066 const LogicVRegister& src2) {
3067 SimVRegister temp1, temp2;
3068 sxtl2(vform, temp1, src1);
3069 sxtl2(vform, temp2, src2);
3070 saba(vform, dst, temp1, temp2);
3071 return dst;
3072 }
3073
3074
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3075 LogicVRegister Simulator::uabdl(VectorFormat vform,
3076 LogicVRegister dst,
3077 const LogicVRegister& src1,
3078 const LogicVRegister& src2) {
3079 SimVRegister temp1, temp2;
3080 uxtl(vform, temp1, src1);
3081 uxtl(vform, temp2, src2);
3082 absdiff(vform, dst, temp1, temp2, false);
3083 return dst;
3084 }
3085
3086
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3087 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3088 LogicVRegister dst,
3089 const LogicVRegister& src1,
3090 const LogicVRegister& src2) {
3091 SimVRegister temp1, temp2;
3092 uxtl2(vform, temp1, src1);
3093 uxtl2(vform, temp2, src2);
3094 absdiff(vform, dst, temp1, temp2, false);
3095 return dst;
3096 }
3097
3098
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3099 LogicVRegister Simulator::sabdl(VectorFormat vform,
3100 LogicVRegister dst,
3101 const LogicVRegister& src1,
3102 const LogicVRegister& src2) {
3103 SimVRegister temp1, temp2;
3104 sxtl(vform, temp1, src1);
3105 sxtl(vform, temp2, src2);
3106 absdiff(vform, dst, temp1, temp2, true);
3107 return dst;
3108 }
3109
3110
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3111 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3112 LogicVRegister dst,
3113 const LogicVRegister& src1,
3114 const LogicVRegister& src2) {
3115 SimVRegister temp1, temp2;
3116 sxtl2(vform, temp1, src1);
3117 sxtl2(vform, temp2, src2);
3118 absdiff(vform, dst, temp1, temp2, true);
3119 return dst;
3120 }
3121
3122
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3123 LogicVRegister Simulator::umull(VectorFormat vform,
3124 LogicVRegister dst,
3125 const LogicVRegister& src1,
3126 const LogicVRegister& src2) {
3127 SimVRegister temp1, temp2;
3128 uxtl(vform, temp1, src1);
3129 uxtl(vform, temp2, src2);
3130 mul(vform, dst, temp1, temp2);
3131 return dst;
3132 }
3133
3134
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3135 LogicVRegister Simulator::umull2(VectorFormat vform,
3136 LogicVRegister dst,
3137 const LogicVRegister& src1,
3138 const LogicVRegister& src2) {
3139 SimVRegister temp1, temp2;
3140 uxtl2(vform, temp1, src1);
3141 uxtl2(vform, temp2, src2);
3142 mul(vform, dst, temp1, temp2);
3143 return dst;
3144 }
3145
3146
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3147 LogicVRegister Simulator::smull(VectorFormat vform,
3148 LogicVRegister dst,
3149 const LogicVRegister& src1,
3150 const LogicVRegister& src2) {
3151 SimVRegister temp1, temp2;
3152 sxtl(vform, temp1, src1);
3153 sxtl(vform, temp2, src2);
3154 mul(vform, dst, temp1, temp2);
3155 return dst;
3156 }
3157
3158
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3159 LogicVRegister Simulator::smull2(VectorFormat vform,
3160 LogicVRegister dst,
3161 const LogicVRegister& src1,
3162 const LogicVRegister& src2) {
3163 SimVRegister temp1, temp2;
3164 sxtl2(vform, temp1, src1);
3165 sxtl2(vform, temp2, src2);
3166 mul(vform, dst, temp1, temp2);
3167 return dst;
3168 }
3169
3170
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3171 LogicVRegister Simulator::umlsl(VectorFormat vform,
3172 LogicVRegister dst,
3173 const LogicVRegister& src1,
3174 const LogicVRegister& src2) {
3175 SimVRegister temp1, temp2;
3176 uxtl(vform, temp1, src1);
3177 uxtl(vform, temp2, src2);
3178 mls(vform, dst, temp1, temp2);
3179 return dst;
3180 }
3181
3182
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3183 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3184 LogicVRegister dst,
3185 const LogicVRegister& src1,
3186 const LogicVRegister& src2) {
3187 SimVRegister temp1, temp2;
3188 uxtl2(vform, temp1, src1);
3189 uxtl2(vform, temp2, src2);
3190 mls(vform, dst, temp1, temp2);
3191 return dst;
3192 }
3193
3194
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3195 LogicVRegister Simulator::smlsl(VectorFormat vform,
3196 LogicVRegister dst,
3197 const LogicVRegister& src1,
3198 const LogicVRegister& src2) {
3199 SimVRegister temp1, temp2;
3200 sxtl(vform, temp1, src1);
3201 sxtl(vform, temp2, src2);
3202 mls(vform, dst, temp1, temp2);
3203 return dst;
3204 }
3205
3206
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3207 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3208 LogicVRegister dst,
3209 const LogicVRegister& src1,
3210 const LogicVRegister& src2) {
3211 SimVRegister temp1, temp2;
3212 sxtl2(vform, temp1, src1);
3213 sxtl2(vform, temp2, src2);
3214 mls(vform, dst, temp1, temp2);
3215 return dst;
3216 }
3217
3218
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3219 LogicVRegister Simulator::umlal(VectorFormat vform,
3220 LogicVRegister dst,
3221 const LogicVRegister& src1,
3222 const LogicVRegister& src2) {
3223 SimVRegister temp1, temp2;
3224 uxtl(vform, temp1, src1);
3225 uxtl(vform, temp2, src2);
3226 mla(vform, dst, temp1, temp2);
3227 return dst;
3228 }
3229
3230
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3231 LogicVRegister Simulator::umlal2(VectorFormat vform,
3232 LogicVRegister dst,
3233 const LogicVRegister& src1,
3234 const LogicVRegister& src2) {
3235 SimVRegister temp1, temp2;
3236 uxtl2(vform, temp1, src1);
3237 uxtl2(vform, temp2, src2);
3238 mla(vform, dst, temp1, temp2);
3239 return dst;
3240 }
3241
3242
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3243 LogicVRegister Simulator::smlal(VectorFormat vform,
3244 LogicVRegister dst,
3245 const LogicVRegister& src1,
3246 const LogicVRegister& src2) {
3247 SimVRegister temp1, temp2;
3248 sxtl(vform, temp1, src1);
3249 sxtl(vform, temp2, src2);
3250 mla(vform, dst, temp1, temp2);
3251 return dst;
3252 }
3253
3254
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3255 LogicVRegister Simulator::smlal2(VectorFormat vform,
3256 LogicVRegister dst,
3257 const LogicVRegister& src1,
3258 const LogicVRegister& src2) {
3259 SimVRegister temp1, temp2;
3260 sxtl2(vform, temp1, src1);
3261 sxtl2(vform, temp2, src2);
3262 mla(vform, dst, temp1, temp2);
3263 return dst;
3264 }
3265
3266
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3267 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3268 LogicVRegister dst,
3269 const LogicVRegister& src1,
3270 const LogicVRegister& src2) {
3271 SimVRegister temp;
3272 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3273 return add(vform, dst, dst, product).SignedSaturate(vform);
3274 }
3275
3276
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3277 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3278 LogicVRegister dst,
3279 const LogicVRegister& src1,
3280 const LogicVRegister& src2) {
3281 SimVRegister temp;
3282 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3283 return add(vform, dst, dst, product).SignedSaturate(vform);
3284 }
3285
3286
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3287 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3288 LogicVRegister dst,
3289 const LogicVRegister& src1,
3290 const LogicVRegister& src2) {
3291 SimVRegister temp;
3292 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3293 return sub(vform, dst, dst, product).SignedSaturate(vform);
3294 }
3295
3296
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3297 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3298 LogicVRegister dst,
3299 const LogicVRegister& src1,
3300 const LogicVRegister& src2) {
3301 SimVRegister temp;
3302 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3303 return sub(vform, dst, dst, product).SignedSaturate(vform);
3304 }
3305
3306
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3307 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3308 LogicVRegister dst,
3309 const LogicVRegister& src1,
3310 const LogicVRegister& src2) {
3311 SimVRegister temp;
3312 LogicVRegister product = smull(vform, temp, src1, src2);
3313 return add(vform, dst, product, product).SignedSaturate(vform);
3314 }
3315
3316
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3317 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3318 LogicVRegister dst,
3319 const LogicVRegister& src1,
3320 const LogicVRegister& src2) {
3321 SimVRegister temp;
3322 LogicVRegister product = smull2(vform, temp, src1, src2);
3323 return add(vform, dst, product, product).SignedSaturate(vform);
3324 }
3325
3326
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3327 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3328 LogicVRegister dst,
3329 const LogicVRegister& src1,
3330 const LogicVRegister& src2,
3331 bool round) {
3332 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3333 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3334 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3335
3336 int esize = LaneSizeInBitsFromFormat(vform);
3337 int round_const = round ? (1 << (esize - 2)) : 0;
3338 int64_t product;
3339
3340 dst.ClearForWrite(vform);
3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3342 product = src1.Int(vform, i) * src2.Int(vform, i);
3343 product += round_const;
3344 product = product >> (esize - 1);
3345
3346 if (product > MaxIntFromFormat(vform)) {
3347 product = MaxIntFromFormat(vform);
3348 } else if (product < MinIntFromFormat(vform)) {
3349 product = MinIntFromFormat(vform);
3350 }
3351 dst.SetInt(vform, i, product);
3352 }
3353 return dst;
3354 }
3355
3356
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3357 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3358 LogicVRegister dst,
3359 const LogicVRegister& src1,
3360 const LogicVRegister& src2) {
3361 return sqrdmulh(vform, dst, src1, src2, false);
3362 }
3363
3364
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3365 LogicVRegister Simulator::addhn(VectorFormat vform,
3366 LogicVRegister dst,
3367 const LogicVRegister& src1,
3368 const LogicVRegister& src2) {
3369 SimVRegister temp;
3370 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3371 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3372 return dst;
3373 }
3374
3375
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3376 LogicVRegister Simulator::addhn2(VectorFormat vform,
3377 LogicVRegister dst,
3378 const LogicVRegister& src1,
3379 const LogicVRegister& src2) {
3380 SimVRegister temp;
3381 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3382 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3383 return dst;
3384 }
3385
3386
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3387 LogicVRegister Simulator::raddhn(VectorFormat vform,
3388 LogicVRegister dst,
3389 const LogicVRegister& src1,
3390 const LogicVRegister& src2) {
3391 SimVRegister temp;
3392 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3393 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3394 return dst;
3395 }
3396
3397
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3398 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3399 LogicVRegister dst,
3400 const LogicVRegister& src1,
3401 const LogicVRegister& src2) {
3402 SimVRegister temp;
3403 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3404 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3405 return dst;
3406 }
3407
3408
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3409 LogicVRegister Simulator::subhn(VectorFormat vform,
3410 LogicVRegister dst,
3411 const LogicVRegister& src1,
3412 const LogicVRegister& src2) {
3413 SimVRegister temp;
3414 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3415 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3416 return dst;
3417 }
3418
3419
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3420 LogicVRegister Simulator::subhn2(VectorFormat vform,
3421 LogicVRegister dst,
3422 const LogicVRegister& src1,
3423 const LogicVRegister& src2) {
3424 SimVRegister temp;
3425 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3426 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3427 return dst;
3428 }
3429
3430
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3431 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3432 LogicVRegister dst,
3433 const LogicVRegister& src1,
3434 const LogicVRegister& src2) {
3435 SimVRegister temp;
3436 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3437 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3438 return dst;
3439 }
3440
3441
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3442 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3443 LogicVRegister dst,
3444 const LogicVRegister& src1,
3445 const LogicVRegister& src2) {
3446 SimVRegister temp;
3447 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3448 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3449 return dst;
3450 }
3451
3452
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3453 LogicVRegister Simulator::trn1(VectorFormat vform,
3454 LogicVRegister dst,
3455 const LogicVRegister& src1,
3456 const LogicVRegister& src2) {
3457 uint64_t result[16];
3458 int laneCount = LaneCountFromFormat(vform);
3459 int pairs = laneCount / 2;
3460 for (int i = 0; i < pairs; ++i) {
3461 result[2 * i] = src1.Uint(vform, 2 * i);
3462 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3463 }
3464
3465 dst.ClearForWrite(vform);
3466 for (int i = 0; i < laneCount; ++i) {
3467 dst.SetUint(vform, i, result[i]);
3468 }
3469 return dst;
3470 }
3471
3472
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3473 LogicVRegister Simulator::trn2(VectorFormat vform,
3474 LogicVRegister dst,
3475 const LogicVRegister& src1,
3476 const LogicVRegister& src2) {
3477 uint64_t result[16];
3478 int laneCount = LaneCountFromFormat(vform);
3479 int pairs = laneCount / 2;
3480 for (int i = 0; i < pairs; ++i) {
3481 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3482 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3483 }
3484
3485 dst.ClearForWrite(vform);
3486 for (int i = 0; i < laneCount; ++i) {
3487 dst.SetUint(vform, i, result[i]);
3488 }
3489 return dst;
3490 }
3491
3492
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3493 LogicVRegister Simulator::zip1(VectorFormat vform,
3494 LogicVRegister dst,
3495 const LogicVRegister& src1,
3496 const LogicVRegister& src2) {
3497 uint64_t result[16];
3498 int laneCount = LaneCountFromFormat(vform);
3499 int pairs = laneCount / 2;
3500 for (int i = 0; i < pairs; ++i) {
3501 result[2 * i] = src1.Uint(vform, i);
3502 result[(2 * i) + 1] = src2.Uint(vform, i);
3503 }
3504
3505 dst.ClearForWrite(vform);
3506 for (int i = 0; i < laneCount; ++i) {
3507 dst.SetUint(vform, i, result[i]);
3508 }
3509 return dst;
3510 }
3511
3512
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3513 LogicVRegister Simulator::zip2(VectorFormat vform,
3514 LogicVRegister dst,
3515 const LogicVRegister& src1,
3516 const LogicVRegister& src2) {
3517 uint64_t result[16];
3518 int laneCount = LaneCountFromFormat(vform);
3519 int pairs = laneCount / 2;
3520 for (int i = 0; i < pairs; ++i) {
3521 result[2 * i] = src1.Uint(vform, pairs + i);
3522 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3523 }
3524
3525 dst.ClearForWrite(vform);
3526 for (int i = 0; i < laneCount; ++i) {
3527 dst.SetUint(vform, i, result[i]);
3528 }
3529 return dst;
3530 }
3531
3532
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3533 LogicVRegister Simulator::uzp1(VectorFormat vform,
3534 LogicVRegister dst,
3535 const LogicVRegister& src1,
3536 const LogicVRegister& src2) {
3537 uint64_t result[32];
3538 int laneCount = LaneCountFromFormat(vform);
3539 for (int i = 0; i < laneCount; ++i) {
3540 result[i] = src1.Uint(vform, i);
3541 result[laneCount + i] = src2.Uint(vform, i);
3542 }
3543
3544 dst.ClearForWrite(vform);
3545 for (int i = 0; i < laneCount; ++i) {
3546 dst.SetUint(vform, i, result[2 * i]);
3547 }
3548 return dst;
3549 }
3550
3551
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3552 LogicVRegister Simulator::uzp2(VectorFormat vform,
3553 LogicVRegister dst,
3554 const LogicVRegister& src1,
3555 const LogicVRegister& src2) {
3556 uint64_t result[32];
3557 int laneCount = LaneCountFromFormat(vform);
3558 for (int i = 0; i < laneCount; ++i) {
3559 result[i] = src1.Uint(vform, i);
3560 result[laneCount + i] = src2.Uint(vform, i);
3561 }
3562
3563 dst.ClearForWrite(vform);
3564 for (int i = 0; i < laneCount; ++i) {
3565 dst.SetUint(vform, i, result[(2 * i) + 1]);
3566 }
3567 return dst;
3568 }
3569
3570
3571 template <typename T>
FPAdd(T op1,T op2)3572 T Simulator::FPAdd(T op1, T op2) {
3573 T result = FPProcessNaNs(op1, op2);
3574 if (std::isnan(result)) return result;
3575
3576 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3577 // inf + -inf returns the default NaN.
3578 FPProcessException();
3579 return FPDefaultNaN<T>();
3580 } else {
3581 // Other cases should be handled by standard arithmetic.
3582 return op1 + op2;
3583 }
3584 }
3585
3586
3587 template <typename T>
FPSub(T op1,T op2)3588 T Simulator::FPSub(T op1, T op2) {
3589 // NaNs should be handled elsewhere.
3590 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3591
3592 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3593 // inf - inf returns the default NaN.
3594 FPProcessException();
3595 return FPDefaultNaN<T>();
3596 } else {
3597 // Other cases should be handled by standard arithmetic.
3598 return op1 - op2;
3599 }
3600 }
3601
3602
3603 template <typename T>
FPMul(T op1,T op2)3604 T Simulator::FPMul(T op1, T op2) {
3605 // NaNs should be handled elsewhere.
3606 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3607
3608 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3609 // inf * 0.0 returns the default NaN.
3610 FPProcessException();
3611 return FPDefaultNaN<T>();
3612 } else {
3613 // Other cases should be handled by standard arithmetic.
3614 return op1 * op2;
3615 }
3616 }
3617
3618
3619 template <typename T>
FPMulx(T op1,T op2)3620 T Simulator::FPMulx(T op1, T op2) {
3621 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3622 // inf * 0.0 returns +/-2.0.
3623 T two = 2.0;
3624 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3625 }
3626 return FPMul(op1, op2);
3627 }
3628
3629
3630 template <typename T>
FPMulAdd(T a,T op1,T op2)3631 T Simulator::FPMulAdd(T a, T op1, T op2) {
3632 T result = FPProcessNaNs3(a, op1, op2);
3633
3634 T sign_a = copysign(1.0, a);
3635 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3636 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3637 bool operation_generates_nan =
3638 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3639 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3640 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3641
3642 if (std::isnan(result)) {
3643 // Generated NaNs override quiet NaNs propagated from a.
3644 if (operation_generates_nan && IsQuietNaN(a)) {
3645 FPProcessException();
3646 return FPDefaultNaN<T>();
3647 } else {
3648 return result;
3649 }
3650 }
3651
3652 // If the operation would produce a NaN, return the default NaN.
3653 if (operation_generates_nan) {
3654 FPProcessException();
3655 return FPDefaultNaN<T>();
3656 }
3657
3658 // Work around broken fma implementations for exact zero results: The sign of
3659 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3660 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3661 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3662 }
3663
3664 result = FusedMultiplyAdd(op1, op2, a);
3665 VIXL_ASSERT(!std::isnan(result));
3666
3667 // Work around broken fma implementations for rounded zero results: If a is
3668 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3669 if ((a == 0.0) && (result == 0.0)) {
3670 return copysign(0.0, sign_prod);
3671 }
3672
3673 return result;
3674 }
3675
3676
3677 template <typename T>
FPDiv(T op1,T op2)3678 T Simulator::FPDiv(T op1, T op2) {
3679 // NaNs should be handled elsewhere.
3680 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3681
3682 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3683 // inf / inf and 0.0 / 0.0 return the default NaN.
3684 FPProcessException();
3685 return FPDefaultNaN<T>();
3686 } else {
3687 if (op2 == 0.0) {
3688 FPProcessException();
3689 if (!std::isnan(op1)) {
3690 double op1_sign = copysign(1.0, op1);
3691 double op2_sign = copysign(1.0, op2);
3692 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3693 }
3694 }
3695
3696 // Other cases should be handled by standard arithmetic.
3697 return op1 / op2;
3698 }
3699 }
3700
3701
3702 template <typename T>
FPSqrt(T op)3703 T Simulator::FPSqrt(T op) {
3704 if (std::isnan(op)) {
3705 return FPProcessNaN(op);
3706 } else if (op < 0.0) {
3707 FPProcessException();
3708 return FPDefaultNaN<T>();
3709 } else {
3710 return sqrt(op);
3711 }
3712 }
3713
3714
3715 template <typename T>
FPMax(T a,T b)3716 T Simulator::FPMax(T a, T b) {
3717 T result = FPProcessNaNs(a, b);
3718 if (std::isnan(result)) return result;
3719
3720 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3721 // a and b are zero, and the sign differs: return +0.0.
3722 return 0.0;
3723 } else {
3724 return (a > b) ? a : b;
3725 }
3726 }
3727
3728
3729 template <typename T>
FPMaxNM(T a,T b)3730 T Simulator::FPMaxNM(T a, T b) {
3731 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3732 a = kFP64NegativeInfinity;
3733 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3734 b = kFP64NegativeInfinity;
3735 }
3736
3737 T result = FPProcessNaNs(a, b);
3738 return std::isnan(result) ? result : FPMax(a, b);
3739 }
3740
3741
3742 template <typename T>
FPMin(T a,T b)3743 T Simulator::FPMin(T a, T b) {
3744 T result = FPProcessNaNs(a, b);
3745 if (std::isnan(result)) return result;
3746
3747 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3748 // a and b are zero, and the sign differs: return -0.0.
3749 return -0.0;
3750 } else {
3751 return (a < b) ? a : b;
3752 }
3753 }
3754
3755
3756 template <typename T>
FPMinNM(T a,T b)3757 T Simulator::FPMinNM(T a, T b) {
3758 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3759 a = kFP64PositiveInfinity;
3760 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3761 b = kFP64PositiveInfinity;
3762 }
3763
3764 T result = FPProcessNaNs(a, b);
3765 return std::isnan(result) ? result : FPMin(a, b);
3766 }
3767
3768
3769 template <typename T>
FPRecipStepFused(T op1,T op2)3770 T Simulator::FPRecipStepFused(T op1, T op2) {
3771 const T two = 2.0;
3772 if ((std::isinf(op1) && (op2 == 0.0)) ||
3773 ((op1 == 0.0) && (std::isinf(op2)))) {
3774 return two;
3775 } else if (std::isinf(op1) || std::isinf(op2)) {
3776 // Return +inf if signs match, otherwise -inf.
3777 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3778 : kFP64NegativeInfinity;
3779 } else {
3780 return FusedMultiplyAdd(op1, op2, two);
3781 }
3782 }
3783
3784
3785 template <typename T>
FPRSqrtStepFused(T op1,T op2)3786 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3787 const T one_point_five = 1.5;
3788 const T two = 2.0;
3789
3790 if ((std::isinf(op1) && (op2 == 0.0)) ||
3791 ((op1 == 0.0) && (std::isinf(op2)))) {
3792 return one_point_five;
3793 } else if (std::isinf(op1) || std::isinf(op2)) {
3794 // Return +inf if signs match, otherwise -inf.
3795 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3796 : kFP64NegativeInfinity;
3797 } else {
3798 // The multiply-add-halve operation must be fully fused, so avoid interim
3799 // rounding by checking which operand can be losslessly divided by two
3800 // before doing the multiply-add.
3801 if (std::isnormal(op1 / two)) {
3802 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3803 } else if (std::isnormal(op2 / two)) {
3804 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3805 } else {
3806 // Neither operand is normal after halving: the result is dominated by
3807 // the addition term, so just return that.
3808 return one_point_five;
3809 }
3810 }
3811 }
3812
3813
FPRoundInt(double value,FPRounding round_mode)3814 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3815 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3816 (value == kFP64NegativeInfinity)) {
3817 return value;
3818 } else if (std::isnan(value)) {
3819 return FPProcessNaN(value);
3820 }
3821
3822 double int_result = std::floor(value);
3823 double error = value - int_result;
3824 switch (round_mode) {
3825 case FPTieAway: {
3826 // Take care of correctly handling the range ]-0.5, -0.0], which must
3827 // yield -0.0.
3828 if ((-0.5 < value) && (value < 0.0)) {
3829 int_result = -0.0;
3830
3831 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3832 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3833 // result is positive, round up.
3834 int_result++;
3835 }
3836 break;
3837 }
3838 case FPTieEven: {
3839 // Take care of correctly handling the range [-0.5, -0.0], which must
3840 // yield -0.0.
3841 if ((-0.5 <= value) && (value < 0.0)) {
3842 int_result = -0.0;
3843
3844 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3845 // result is odd, round up.
3846 } else if ((error > 0.5) ||
3847 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3848 int_result++;
3849 }
3850 break;
3851 }
3852 case FPZero: {
3853 // If value>0 then we take floor(value)
3854 // otherwise, ceil(value).
3855 if (value < 0) {
3856 int_result = ceil(value);
3857 }
3858 break;
3859 }
3860 case FPNegativeInfinity: {
3861 // We always use floor(value).
3862 break;
3863 }
3864 case FPPositiveInfinity: {
3865 // Take care of correctly handling the range ]-1.0, -0.0], which must
3866 // yield -0.0.
3867 if ((-1.0 < value) && (value < 0.0)) {
3868 int_result = -0.0;
3869
3870 // If the error is non-zero, round up.
3871 } else if (error > 0.0) {
3872 int_result++;
3873 }
3874 break;
3875 }
3876 default:
3877 VIXL_UNIMPLEMENTED();
3878 }
3879 return int_result;
3880 }
3881
3882
FPToInt32(double value,FPRounding rmode)3883 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3884 value = FPRoundInt(value, rmode);
3885 if (value >= kWMaxInt) {
3886 return kWMaxInt;
3887 } else if (value < kWMinInt) {
3888 return kWMinInt;
3889 }
3890 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3891 }
3892
3893
FPToInt64(double value,FPRounding rmode)3894 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3895 value = FPRoundInt(value, rmode);
3896 if (value >= kXMaxInt) {
3897 return kXMaxInt;
3898 } else if (value < kXMinInt) {
3899 return kXMinInt;
3900 }
3901 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3902 }
3903
3904
FPToUInt32(double value,FPRounding rmode)3905 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3906 value = FPRoundInt(value, rmode);
3907 if (value >= kWMaxUInt) {
3908 return kWMaxUInt;
3909 } else if (value < 0.0) {
3910 return 0;
3911 }
3912 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3913 }
3914
3915
FPToUInt64(double value,FPRounding rmode)3916 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3917 value = FPRoundInt(value, rmode);
3918 if (value >= kXMaxUInt) {
3919 return kXMaxUInt;
3920 } else if (value < 0.0) {
3921 return 0;
3922 }
3923 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3924 }
3925
3926
3927 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3928 template <typename T> \
3929 LogicVRegister Simulator::FN(VectorFormat vform, \
3930 LogicVRegister dst, \
3931 const LogicVRegister& src1, \
3932 const LogicVRegister& src2) { \
3933 dst.ClearForWrite(vform); \
3934 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3935 T op1 = src1.Float<T>(i); \
3936 T op2 = src2.Float<T>(i); \
3937 T result; \
3938 if (PROCNAN) { \
3939 result = FPProcessNaNs(op1, op2); \
3940 if (!std::isnan(result)) { \
3941 result = OP(op1, op2); \
3942 } \
3943 } else { \
3944 result = OP(op1, op2); \
3945 } \
3946 dst.SetFloat(i, result); \
3947 } \
3948 return dst; \
3949 } \
3950 \
3951 LogicVRegister Simulator::FN(VectorFormat vform, \
3952 LogicVRegister dst, \
3953 const LogicVRegister& src1, \
3954 const LogicVRegister& src2) { \
3955 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
3956 FN<float>(vform, dst, src1, src2); \
3957 } else { \
3958 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3959 FN<double>(vform, dst, src1, src2); \
3960 } \
3961 return dst; \
3962 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3963 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3964 #undef DEFINE_NEON_FP_VECTOR_OP
3965
3966
3967 LogicVRegister Simulator::fnmul(VectorFormat vform,
3968 LogicVRegister dst,
3969 const LogicVRegister& src1,
3970 const LogicVRegister& src2) {
3971 SimVRegister temp;
3972 LogicVRegister product = fmul(vform, temp, src1, src2);
3973 return fneg(vform, dst, product);
3974 }
3975
3976
3977 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3978 LogicVRegister Simulator::frecps(VectorFormat vform,
3979 LogicVRegister dst,
3980 const LogicVRegister& src1,
3981 const LogicVRegister& src2) {
3982 dst.ClearForWrite(vform);
3983 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3984 T op1 = -src1.Float<T>(i);
3985 T op2 = src2.Float<T>(i);
3986 T result = FPProcessNaNs(op1, op2);
3987 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3988 }
3989 return dst;
3990 }
3991
3992
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3993 LogicVRegister Simulator::frecps(VectorFormat vform,
3994 LogicVRegister dst,
3995 const LogicVRegister& src1,
3996 const LogicVRegister& src2) {
3997 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3998 frecps<float>(vform, dst, src1, src2);
3999 } else {
4000 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4001 frecps<double>(vform, dst, src1, src2);
4002 }
4003 return dst;
4004 }
4005
4006
4007 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4008 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4009 LogicVRegister dst,
4010 const LogicVRegister& src1,
4011 const LogicVRegister& src2) {
4012 dst.ClearForWrite(vform);
4013 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4014 T op1 = -src1.Float<T>(i);
4015 T op2 = src2.Float<T>(i);
4016 T result = FPProcessNaNs(op1, op2);
4017 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
4018 }
4019 return dst;
4020 }
4021
4022
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4023 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4024 LogicVRegister dst,
4025 const LogicVRegister& src1,
4026 const LogicVRegister& src2) {
4027 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4028 frsqrts<float>(vform, dst, src1, src2);
4029 } else {
4030 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4031 frsqrts<double>(vform, dst, src1, src2);
4032 }
4033 return dst;
4034 }
4035
4036
4037 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4038 LogicVRegister Simulator::fcmp(VectorFormat vform,
4039 LogicVRegister dst,
4040 const LogicVRegister& src1,
4041 const LogicVRegister& src2,
4042 Condition cond) {
4043 dst.ClearForWrite(vform);
4044 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4045 bool result = false;
4046 T op1 = src1.Float<T>(i);
4047 T op2 = src2.Float<T>(i);
4048 T nan_result = FPProcessNaNs(op1, op2);
4049 if (!std::isnan(nan_result)) {
4050 switch (cond) {
4051 case eq:
4052 result = (op1 == op2);
4053 break;
4054 case ge:
4055 result = (op1 >= op2);
4056 break;
4057 case gt:
4058 result = (op1 > op2);
4059 break;
4060 case le:
4061 result = (op1 <= op2);
4062 break;
4063 case lt:
4064 result = (op1 < op2);
4065 break;
4066 default:
4067 VIXL_UNREACHABLE();
4068 break;
4069 }
4070 }
4071 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4072 }
4073 return dst;
4074 }
4075
4076
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4077 LogicVRegister Simulator::fcmp(VectorFormat vform,
4078 LogicVRegister dst,
4079 const LogicVRegister& src1,
4080 const LogicVRegister& src2,
4081 Condition cond) {
4082 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4083 fcmp<float>(vform, dst, src1, src2, cond);
4084 } else {
4085 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4086 fcmp<double>(vform, dst, src1, src2, cond);
4087 }
4088 return dst;
4089 }
4090
4091
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4092 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4093 LogicVRegister dst,
4094 const LogicVRegister& src,
4095 Condition cond) {
4096 SimVRegister temp;
4097 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4098 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4099 fcmp<float>(vform, dst, src, zero_reg, cond);
4100 } else {
4101 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4102 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4103 fcmp<double>(vform, dst, src, zero_reg, cond);
4104 }
4105 return dst;
4106 }
4107
4108
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4109 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4110 LogicVRegister dst,
4111 const LogicVRegister& src1,
4112 const LogicVRegister& src2,
4113 Condition cond) {
4114 SimVRegister temp1, temp2;
4115 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4116 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4117 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4118 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4119 } else {
4120 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4121 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4122 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4123 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4124 }
4125 return dst;
4126 }
4127
4128
4129 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4130 LogicVRegister Simulator::fmla(VectorFormat vform,
4131 LogicVRegister dst,
4132 const LogicVRegister& src1,
4133 const LogicVRegister& src2) {
4134 dst.ClearForWrite(vform);
4135 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4136 T op1 = src1.Float<T>(i);
4137 T op2 = src2.Float<T>(i);
4138 T acc = dst.Float<T>(i);
4139 T result = FPMulAdd(acc, op1, op2);
4140 dst.SetFloat(i, result);
4141 }
4142 return dst;
4143 }
4144
4145
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4146 LogicVRegister Simulator::fmla(VectorFormat vform,
4147 LogicVRegister dst,
4148 const LogicVRegister& src1,
4149 const LogicVRegister& src2) {
4150 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4151 fmla<float>(vform, dst, src1, src2);
4152 } else {
4153 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4154 fmla<double>(vform, dst, src1, src2);
4155 }
4156 return dst;
4157 }
4158
4159
4160 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4161 LogicVRegister Simulator::fmls(VectorFormat vform,
4162 LogicVRegister dst,
4163 const LogicVRegister& src1,
4164 const LogicVRegister& src2) {
4165 dst.ClearForWrite(vform);
4166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4167 T op1 = -src1.Float<T>(i);
4168 T op2 = src2.Float<T>(i);
4169 T acc = dst.Float<T>(i);
4170 T result = FPMulAdd(acc, op1, op2);
4171 dst.SetFloat(i, result);
4172 }
4173 return dst;
4174 }
4175
4176
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4177 LogicVRegister Simulator::fmls(VectorFormat vform,
4178 LogicVRegister dst,
4179 const LogicVRegister& src1,
4180 const LogicVRegister& src2) {
4181 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4182 fmls<float>(vform, dst, src1, src2);
4183 } else {
4184 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4185 fmls<double>(vform, dst, src1, src2);
4186 }
4187 return dst;
4188 }
4189
4190
4191 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4192 LogicVRegister Simulator::fneg(VectorFormat vform,
4193 LogicVRegister dst,
4194 const LogicVRegister& src) {
4195 dst.ClearForWrite(vform);
4196 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4197 T op = src.Float<T>(i);
4198 op = -op;
4199 dst.SetFloat(i, op);
4200 }
4201 return dst;
4202 }
4203
4204
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4205 LogicVRegister Simulator::fneg(VectorFormat vform,
4206 LogicVRegister dst,
4207 const LogicVRegister& src) {
4208 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4209 fneg<float>(vform, dst, src);
4210 } else {
4211 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4212 fneg<double>(vform, dst, src);
4213 }
4214 return dst;
4215 }
4216
4217
4218 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4219 LogicVRegister Simulator::fabs_(VectorFormat vform,
4220 LogicVRegister dst,
4221 const LogicVRegister& src) {
4222 dst.ClearForWrite(vform);
4223 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4224 T op = src.Float<T>(i);
4225 if (copysign(1.0, op) < 0.0) {
4226 op = -op;
4227 }
4228 dst.SetFloat(i, op);
4229 }
4230 return dst;
4231 }
4232
4233
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4234 LogicVRegister Simulator::fabs_(VectorFormat vform,
4235 LogicVRegister dst,
4236 const LogicVRegister& src) {
4237 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4238 fabs_<float>(vform, dst, src);
4239 } else {
4240 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4241 fabs_<double>(vform, dst, src);
4242 }
4243 return dst;
4244 }
4245
4246
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4247 LogicVRegister Simulator::fabd(VectorFormat vform,
4248 LogicVRegister dst,
4249 const LogicVRegister& src1,
4250 const LogicVRegister& src2) {
4251 SimVRegister temp;
4252 fsub(vform, temp, src1, src2);
4253 fabs_(vform, dst, temp);
4254 return dst;
4255 }
4256
4257
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4258 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4259 LogicVRegister dst,
4260 const LogicVRegister& src) {
4261 dst.ClearForWrite(vform);
4262 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4263 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4264 float result = FPSqrt(src.Float<float>(i));
4265 dst.SetFloat(i, result);
4266 }
4267 } else {
4268 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4269 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4270 double result = FPSqrt(src.Float<double>(i));
4271 dst.SetFloat(i, result);
4272 }
4273 }
4274 return dst;
4275 }
4276
4277
4278 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4279 LogicVRegister Simulator::FNP(VectorFormat vform, \
4280 LogicVRegister dst, \
4281 const LogicVRegister& src1, \
4282 const LogicVRegister& src2) { \
4283 SimVRegister temp1, temp2; \
4284 uzp1(vform, temp1, src1, src2); \
4285 uzp2(vform, temp2, src1, src2); \
4286 FN(vform, dst, temp1, temp2); \
4287 return dst; \
4288 } \
4289 \
4290 LogicVRegister Simulator::FNP(VectorFormat vform, \
4291 LogicVRegister dst, \
4292 const LogicVRegister& src) { \
4293 if (vform == kFormatS) { \
4294 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4295 dst.SetFloat(0, result); \
4296 } else { \
4297 VIXL_ASSERT(vform == kFormatD); \
4298 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4299 dst.SetFloat(0, result); \
4300 } \
4301 dst.ClearForWrite(vform); \
4302 return dst; \
4303 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4304 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4305 #undef DEFINE_NEON_FP_PAIR_OP
4306
4307
4308 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4309 LogicVRegister dst,
4310 const LogicVRegister& src,
4311 FPMinMaxOp Op) {
4312 VIXL_ASSERT(vform == kFormat4S);
4313 USE(vform);
4314 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4315 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4316 float result = (this->*Op)(result1, result2);
4317 dst.ClearForWrite(kFormatS);
4318 dst.SetFloat<float>(0, result);
4319 return dst;
4320 }
4321
4322
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4323 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4324 LogicVRegister dst,
4325 const LogicVRegister& src) {
4326 return fminmaxv(vform, dst, src, &Simulator::FPMax);
4327 }
4328
4329
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4330 LogicVRegister Simulator::fminv(VectorFormat vform,
4331 LogicVRegister dst,
4332 const LogicVRegister& src) {
4333 return fminmaxv(vform, dst, src, &Simulator::FPMin);
4334 }
4335
4336
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4337 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4338 LogicVRegister dst,
4339 const LogicVRegister& src) {
4340 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4341 }
4342
4343
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4344 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4345 LogicVRegister dst,
4346 const LogicVRegister& src) {
4347 return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4348 }
4349
4350
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4351 LogicVRegister Simulator::fmul(VectorFormat vform,
4352 LogicVRegister dst,
4353 const LogicVRegister& src1,
4354 const LogicVRegister& src2,
4355 int index) {
4356 dst.ClearForWrite(vform);
4357 SimVRegister temp;
4358 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4359 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4360 fmul<float>(vform, dst, src1, index_reg);
4361
4362 } else {
4363 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4364 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4365 fmul<double>(vform, dst, src1, index_reg);
4366 }
4367 return dst;
4368 }
4369
4370
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4371 LogicVRegister Simulator::fmla(VectorFormat vform,
4372 LogicVRegister dst,
4373 const LogicVRegister& src1,
4374 const LogicVRegister& src2,
4375 int index) {
4376 dst.ClearForWrite(vform);
4377 SimVRegister temp;
4378 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4379 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4380 fmla<float>(vform, dst, src1, index_reg);
4381
4382 } else {
4383 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4384 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4385 fmla<double>(vform, dst, src1, index_reg);
4386 }
4387 return dst;
4388 }
4389
4390
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4391 LogicVRegister Simulator::fmls(VectorFormat vform,
4392 LogicVRegister dst,
4393 const LogicVRegister& src1,
4394 const LogicVRegister& src2,
4395 int index) {
4396 dst.ClearForWrite(vform);
4397 SimVRegister temp;
4398 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4399 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4400 fmls<float>(vform, dst, src1, index_reg);
4401
4402 } else {
4403 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4404 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4405 fmls<double>(vform, dst, src1, index_reg);
4406 }
4407 return dst;
4408 }
4409
4410
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4411 LogicVRegister Simulator::fmulx(VectorFormat vform,
4412 LogicVRegister dst,
4413 const LogicVRegister& src1,
4414 const LogicVRegister& src2,
4415 int index) {
4416 dst.ClearForWrite(vform);
4417 SimVRegister temp;
4418 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4419 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4420 fmulx<float>(vform, dst, src1, index_reg);
4421
4422 } else {
4423 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4424 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4425 fmulx<double>(vform, dst, src1, index_reg);
4426 }
4427 return dst;
4428 }
4429
4430
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4431 LogicVRegister Simulator::frint(VectorFormat vform,
4432 LogicVRegister dst,
4433 const LogicVRegister& src,
4434 FPRounding rounding_mode,
4435 bool inexact_exception) {
4436 dst.ClearForWrite(vform);
4437 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4438 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4439 float input = src.Float<float>(i);
4440 float rounded = FPRoundInt(input, rounding_mode);
4441 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4442 FPProcessException();
4443 }
4444 dst.SetFloat<float>(i, rounded);
4445 }
4446 } else {
4447 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4448 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4449 double input = src.Float<double>(i);
4450 double rounded = FPRoundInt(input, rounding_mode);
4451 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4452 FPProcessException();
4453 }
4454 dst.SetFloat<double>(i, rounded);
4455 }
4456 }
4457 return dst;
4458 }
4459
4460
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4461 LogicVRegister Simulator::fcvts(VectorFormat vform,
4462 LogicVRegister dst,
4463 const LogicVRegister& src,
4464 FPRounding rounding_mode,
4465 int fbits) {
4466 dst.ClearForWrite(vform);
4467 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4468 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4469 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4470 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4471 }
4472 } else {
4473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4475 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4476 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4477 }
4478 }
4479 return dst;
4480 }
4481
4482
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4483 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4484 LogicVRegister dst,
4485 const LogicVRegister& src,
4486 FPRounding rounding_mode,
4487 int fbits) {
4488 dst.ClearForWrite(vform);
4489 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4490 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4491 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4492 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4493 }
4494 } else {
4495 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4496 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4497 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4498 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4499 }
4500 }
4501 return dst;
4502 }
4503
4504
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4505 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4506 LogicVRegister dst,
4507 const LogicVRegister& src) {
4508 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4509 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4510 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4511 }
4512 } else {
4513 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4514 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4515 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4516 }
4517 }
4518 return dst;
4519 }
4520
4521
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4522 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4523 LogicVRegister dst,
4524 const LogicVRegister& src) {
4525 int lane_count = LaneCountFromFormat(vform);
4526 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4527 for (int i = 0; i < lane_count; i++) {
4528 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4529 }
4530 } else {
4531 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4532 for (int i = 0; i < lane_count; i++) {
4533 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4534 }
4535 }
4536 return dst;
4537 }
4538
4539
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4540 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4541 LogicVRegister dst,
4542 const LogicVRegister& src) {
4543 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4544 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4545 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4546 }
4547 } else {
4548 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4549 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4550 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4551 }
4552 }
4553 return dst;
4554 }
4555
4556
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4557 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4558 LogicVRegister dst,
4559 const LogicVRegister& src) {
4560 int lane_count = LaneCountFromFormat(vform) / 2;
4561 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4562 for (int i = lane_count - 1; i >= 0; i--) {
4563 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4564 }
4565 } else {
4566 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4567 for (int i = lane_count - 1; i >= 0; i--) {
4568 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4569 }
4570 }
4571 return dst;
4572 }
4573
4574
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4575 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4576 LogicVRegister dst,
4577 const LogicVRegister& src) {
4578 dst.ClearForWrite(vform);
4579 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4580 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4581 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4582 }
4583 return dst;
4584 }
4585
4586
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4587 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4588 LogicVRegister dst,
4589 const LogicVRegister& src) {
4590 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4591 int lane_count = LaneCountFromFormat(vform) / 2;
4592 for (int i = lane_count - 1; i >= 0; i--) {
4593 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4594 }
4595 return dst;
4596 }
4597
4598
4599 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4600 double Simulator::recip_sqrt_estimate(double a) {
4601 int q0, q1, s;
4602 double r;
4603 if (a < 0.5) {
4604 q0 = static_cast<int>(a * 512.0);
4605 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4606 } else {
4607 q1 = static_cast<int>(a * 256.0);
4608 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4609 }
4610 s = static_cast<int>(256.0 * r + 0.5);
4611 return static_cast<double>(s) / 256.0;
4612 }
4613
4614
Bits(uint64_t val,int start_bit,int end_bit)4615 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4616 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4617 }
4618
4619
4620 template <typename T>
FPRecipSqrtEstimate(T op)4621 T Simulator::FPRecipSqrtEstimate(T op) {
4622 if (std::isnan(op)) {
4623 return FPProcessNaN(op);
4624 } else if (op == 0.0) {
4625 if (copysign(1.0, op) < 0.0) {
4626 return kFP64NegativeInfinity;
4627 } else {
4628 return kFP64PositiveInfinity;
4629 }
4630 } else if (copysign(1.0, op) < 0.0) {
4631 FPProcessException();
4632 return FPDefaultNaN<T>();
4633 } else if (std::isinf(op)) {
4634 return 0.0;
4635 } else {
4636 uint64_t fraction;
4637 int exp, result_exp;
4638
4639 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4640 exp = FloatExp(op);
4641 fraction = FloatMantissa(op);
4642 fraction <<= 29;
4643 } else {
4644 exp = DoubleExp(op);
4645 fraction = DoubleMantissa(op);
4646 }
4647
4648 if (exp == 0) {
4649 while (Bits(fraction, 51, 51) == 0) {
4650 fraction = Bits(fraction, 50, 0) << 1;
4651 exp -= 1;
4652 }
4653 fraction = Bits(fraction, 50, 0) << 1;
4654 }
4655
4656 double scaled;
4657 if (Bits(exp, 0, 0) == 0) {
4658 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4659 } else {
4660 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4661 }
4662
4663 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4664 result_exp = (380 - exp) / 2;
4665 } else {
4666 result_exp = (3068 - exp) / 2;
4667 }
4668
4669 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
4670
4671 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4672 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4673 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4674 return FloatPack(0, exp_bits, est_bits);
4675 } else {
4676 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4677 }
4678 }
4679 }
4680
4681
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4682 LogicVRegister Simulator::frsqrte(VectorFormat vform,
4683 LogicVRegister dst,
4684 const LogicVRegister& src) {
4685 dst.ClearForWrite(vform);
4686 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4687 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4688 float input = src.Float<float>(i);
4689 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4690 }
4691 } else {
4692 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4693 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4694 double input = src.Float<double>(i);
4695 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4696 }
4697 }
4698 return dst;
4699 }
4700
4701 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4702 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4703 uint32_t sign;
4704
4705 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4706 sign = FloatSign(op);
4707 } else {
4708 sign = DoubleSign(op);
4709 }
4710
4711 if (std::isnan(op)) {
4712 return FPProcessNaN(op);
4713 } else if (std::isinf(op)) {
4714 return (sign == 1) ? -0.0 : 0.0;
4715 } else if (op == 0.0) {
4716 FPProcessException(); // FPExc_DivideByZero exception.
4717 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4718 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
4719 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4720 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
4721 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4722 bool overflow_to_inf = false;
4723 switch (rounding) {
4724 case FPTieEven:
4725 overflow_to_inf = true;
4726 break;
4727 case FPPositiveInfinity:
4728 overflow_to_inf = (sign == 0);
4729 break;
4730 case FPNegativeInfinity:
4731 overflow_to_inf = (sign == 1);
4732 break;
4733 case FPZero:
4734 overflow_to_inf = false;
4735 break;
4736 default:
4737 break;
4738 }
4739 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4740 if (overflow_to_inf) {
4741 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4742 } else {
4743 // Return FPMaxNormal(sign).
4744 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4745 return FloatPack(sign, 0xfe, 0x07fffff);
4746 } else {
4747 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
4748 }
4749 }
4750 } else {
4751 uint64_t fraction;
4752 int exp, result_exp;
4753 uint32_t sign;
4754
4755 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4756 sign = FloatSign(op);
4757 exp = FloatExp(op);
4758 fraction = FloatMantissa(op);
4759 fraction <<= 29;
4760 } else {
4761 sign = DoubleSign(op);
4762 exp = DoubleExp(op);
4763 fraction = DoubleMantissa(op);
4764 }
4765
4766 if (exp == 0) {
4767 if (Bits(fraction, 51, 51) == 0) {
4768 exp -= 1;
4769 fraction = Bits(fraction, 49, 0) << 2;
4770 } else {
4771 fraction = Bits(fraction, 50, 0) << 1;
4772 }
4773 }
4774
4775 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4776
4777 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4778 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
4779 } else {
4780 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
4781 }
4782
4783 double estimate = recip_estimate(scaled);
4784
4785 fraction = DoubleMantissa(estimate);
4786 if (result_exp == 0) {
4787 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4788 } else if (result_exp == -1) {
4789 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4790 result_exp = 0;
4791 }
4792 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4793 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4794 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4795 return FloatPack(sign, exp_bits, frac_bits);
4796 } else {
4797 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4798 }
4799 }
4800 }
4801
4802
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4803 LogicVRegister Simulator::frecpe(VectorFormat vform,
4804 LogicVRegister dst,
4805 const LogicVRegister& src,
4806 FPRounding round) {
4807 dst.ClearForWrite(vform);
4808 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4809 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4810 float input = src.Float<float>(i);
4811 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4812 }
4813 } else {
4814 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4815 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4816 double input = src.Float<double>(i);
4817 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4818 }
4819 }
4820 return dst;
4821 }
4822
4823
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4824 LogicVRegister Simulator::ursqrte(VectorFormat vform,
4825 LogicVRegister dst,
4826 const LogicVRegister& src) {
4827 dst.ClearForWrite(vform);
4828 uint64_t operand;
4829 uint32_t result;
4830 double dp_operand, dp_result;
4831 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4832 operand = src.Uint(vform, i);
4833 if (operand <= 0x3FFFFFFF) {
4834 result = 0xFFFFFFFF;
4835 } else {
4836 dp_operand = operand * std::pow(2.0, -32);
4837 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4838 result = static_cast<uint32_t>(dp_result);
4839 }
4840 dst.SetUint(vform, i, result);
4841 }
4842 return dst;
4843 }
4844
4845
4846 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4847 double Simulator::recip_estimate(double a) {
4848 int q, s;
4849 double r;
4850 q = static_cast<int>(a * 512.0);
4851 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4852 s = static_cast<int>(256.0 * r + 0.5);
4853 return static_cast<double>(s) / 256.0;
4854 }
4855
4856
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4857 LogicVRegister Simulator::urecpe(VectorFormat vform,
4858 LogicVRegister dst,
4859 const LogicVRegister& src) {
4860 dst.ClearForWrite(vform);
4861 uint64_t operand;
4862 uint32_t result;
4863 double dp_operand, dp_result;
4864 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4865 operand = src.Uint(vform, i);
4866 if (operand <= 0x7FFFFFFF) {
4867 result = 0xFFFFFFFF;
4868 } else {
4869 dp_operand = operand * std::pow(2.0, -32);
4870 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4871 result = static_cast<uint32_t>(dp_result);
4872 }
4873 dst.SetUint(vform, i, result);
4874 }
4875 return dst;
4876 }
4877
4878 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4879 LogicVRegister Simulator::frecpx(VectorFormat vform,
4880 LogicVRegister dst,
4881 const LogicVRegister& src) {
4882 dst.ClearForWrite(vform);
4883 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4884 T op = src.Float<T>(i);
4885 T result;
4886 if (std::isnan(op)) {
4887 result = FPProcessNaN(op);
4888 } else {
4889 int exp;
4890 uint32_t sign;
4891 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4892 sign = FloatSign(op);
4893 exp = FloatExp(op);
4894 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4895 result = FloatPack(sign, exp, 0);
4896 } else {
4897 sign = DoubleSign(op);
4898 exp = DoubleExp(op);
4899 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4900 result = DoublePack(sign, exp, 0);
4901 }
4902 }
4903 dst.SetFloat(i, result);
4904 }
4905 return dst;
4906 }
4907
4908
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4909 LogicVRegister Simulator::frecpx(VectorFormat vform,
4910 LogicVRegister dst,
4911 const LogicVRegister& src) {
4912 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4913 frecpx<float>(vform, dst, src);
4914 } else {
4915 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4916 frecpx<double>(vform, dst, src);
4917 }
4918 return dst;
4919 }
4920
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4921 LogicVRegister Simulator::scvtf(VectorFormat vform,
4922 LogicVRegister dst,
4923 const LogicVRegister& src,
4924 int fbits,
4925 FPRounding round) {
4926 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4927 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4928 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4929 dst.SetFloat<float>(i, result);
4930 } else {
4931 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4932 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4933 dst.SetFloat<double>(i, result);
4934 }
4935 }
4936 return dst;
4937 }
4938
4939
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4940 LogicVRegister Simulator::ucvtf(VectorFormat vform,
4941 LogicVRegister dst,
4942 const LogicVRegister& src,
4943 int fbits,
4944 FPRounding round) {
4945 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4946 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4947 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4948 dst.SetFloat<float>(i, result);
4949 } else {
4950 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4951 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4952 dst.SetFloat<double>(i, result);
4953 }
4954 }
4955 return dst;
4956 }
4957
4958
4959 } // namespace aarch64
4960 } // namespace vixl
4961
4962 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
4963