1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/execution/arm64/simulator-arm64.h"
6
7 #if defined(USE_SIMULATOR)
8
9 #include <cmath>
10
11 namespace v8 {
12 namespace internal {
13
14 namespace {
15
16 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)17 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
18 FPRounding round_mode) {
19 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
20 sign, exponent, mantissa, round_mode);
21 return bit_cast<double>(bits);
22 }
23
24 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)25 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
26 FPRounding round_mode) {
27 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
28 sign, exponent, mantissa, round_mode);
29 return bit_cast<float>(bits);
30 }
31
32 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)33 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
34 uint64_t mantissa, FPRounding round_mode) {
35 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
36 sign, exponent, mantissa, round_mode);
37 }
38
39 } // namespace
40
FixedToDouble(int64_t src,int fbits,FPRounding round)41 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
42 if (src >= 0) {
43 return UFixedToDouble(src, fbits, round);
44 } else if (src == INT64_MIN) {
45 return -UFixedToDouble(src, fbits, round);
46 } else {
47 return -UFixedToDouble(-src, fbits, round);
48 }
49 }
50
UFixedToDouble(uint64_t src,int fbits,FPRounding round)51 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
52 // An input of 0 is a special case because the result is effectively
53 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
54 if (src == 0) {
55 return 0.0;
56 }
57
58 // Calculate the exponent. The highest significant bit will have the value
59 // 2^exponent.
60 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
61 const int64_t exponent = highest_significant_bit - fbits;
62
63 return FPRoundToDouble(0, exponent, src, round);
64 }
65
FixedToFloat(int64_t src,int fbits,FPRounding round)66 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
67 if (src >= 0) {
68 return UFixedToFloat(src, fbits, round);
69 } else if (src == INT64_MIN) {
70 return -UFixedToFloat(src, fbits, round);
71 } else {
72 return -UFixedToFloat(-src, fbits, round);
73 }
74 }
75
UFixedToFloat(uint64_t src,int fbits,FPRounding round)76 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
77 // An input of 0 is a special case because the result is effectively
78 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
79 if (src == 0) {
80 return 0.0f;
81 }
82
83 // Calculate the exponent. The highest significant bit will have the value
84 // 2^exponent.
85 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
86 const int32_t exponent = highest_significant_bit - fbits;
87
88 return FPRoundToFloat(0, exponent, src, round);
89 }
90
FPToDouble(float value)91 double Simulator::FPToDouble(float value) {
92 switch (std::fpclassify(value)) {
93 case FP_NAN: {
94 if (IsSignallingNaN(value)) {
95 FPProcessException();
96 }
97 if (DN()) return kFP64DefaultNaN;
98
99 // Convert NaNs as the processor would:
100 // - The sign is propagated.
101 // - The mantissa is transferred entirely, except that the top bit is
102 // forced to '1', making the result a quiet NaN. The unused (low-order)
103 // mantissa bits are set to 0.
104 uint32_t raw = bit_cast<uint32_t>(value);
105
106 uint64_t sign = raw >> 31;
107 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
108 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
109
110 // Unused low-order bits remain zero.
111 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
112
113 // Force a quiet NaN.
114 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
115
116 return double_pack(sign, exponent, mantissa);
117 }
118
119 case FP_ZERO:
120 case FP_NORMAL:
121 case FP_SUBNORMAL:
122 case FP_INFINITE: {
123 // All other inputs are preserved in a standard cast, because every value
124 // representable using an IEEE-754 float is also representable using an
125 // IEEE-754 double.
126 return static_cast<double>(value);
127 }
128 }
129
130 UNREACHABLE();
131 }
132
FPToFloat(float16 value)133 float Simulator::FPToFloat(float16 value) {
134 uint32_t sign = value >> 15;
135 uint32_t exponent =
136 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
137 kFloat16MantissaBits, value);
138 uint32_t mantissa =
139 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
140
141 switch (float16classify(value)) {
142 case FP_ZERO:
143 return (sign == 0) ? 0.0f : -0.0f;
144
145 case FP_INFINITE:
146 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
147
148 case FP_SUBNORMAL: {
149 // Calculate shift required to put mantissa into the most-significant bits
150 // of the destination mantissa.
151 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
152
153 // Shift mantissa and discard implicit '1'.
154 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
155 mantissa &= (1 << kFloatMantissaBits) - 1;
156
157 // Adjust the exponent for the shift applied, and rebias.
158 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
159 break;
160 }
161
162 case FP_NAN: {
163 if (IsSignallingNaN(value)) {
164 FPProcessException();
165 }
166 if (DN()) return kFP32DefaultNaN;
167
168 // Convert NaNs as the processor would:
169 // - The sign is propagated.
170 // - The mantissa is transferred entirely, except that the top bit is
171 // forced to '1', making the result a quiet NaN. The unused (low-order)
172 // mantissa bits are set to 0.
173 exponent = (1 << kFloatExponentBits) - 1;
174
175 // Increase bits in mantissa, making low-order bits 0.
176 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
177 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
178 break;
179 }
180
181 case FP_NORMAL: {
182 // Increase bits in mantissa, making low-order bits 0.
183 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
184
185 // Change exponent bias.
186 exponent += (kFloatExponentBias - kFloat16ExponentBias);
187 break;
188 }
189
190 default:
191 UNREACHABLE();
192 }
193 return float_pack(sign, exponent, mantissa);
194 }
195
FPToFloat16(float value,FPRounding round_mode)196 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
197 // Only the FPTieEven rounding mode is implemented.
198 DCHECK_EQ(round_mode, FPTieEven);
199 USE(round_mode);
200
201 int64_t sign = float_sign(value);
202 int64_t exponent =
203 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
204 uint32_t mantissa = float_mantissa(value);
205
206 switch (std::fpclassify(value)) {
207 case FP_NAN: {
208 if (IsSignallingNaN(value)) {
209 FPProcessException();
210 }
211 if (DN()) return kFP16DefaultNaN;
212
213 // Convert NaNs as the processor would:
214 // - The sign is propagated.
215 // - The mantissa is transferred as much as possible, except that the top
216 // bit is forced to '1', making the result a quiet NaN.
217 float16 result =
218 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
219 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
220 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
221 return result;
222 }
223
224 case FP_ZERO:
225 return (sign == 0) ? 0 : 0x8000;
226
227 case FP_INFINITE:
228 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
229
230 case FP_NORMAL:
231 case FP_SUBNORMAL: {
232 // Convert float-to-half as the processor would, assuming that FPCR.FZ
233 // (flush-to-zero) is not set.
234
235 // Add the implicit '1' bit to the mantissa.
236 mantissa += (1 << kFloatMantissaBits);
237 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
238 }
239 }
240
241 UNREACHABLE();
242 }
243
FPToFloat16(double value,FPRounding round_mode)244 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
245 // Only the FPTieEven rounding mode is implemented.
246 DCHECK_EQ(round_mode, FPTieEven);
247 USE(round_mode);
248
249 int64_t sign = double_sign(value);
250 int64_t exponent =
251 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
252 uint64_t mantissa = double_mantissa(value);
253
254 switch (std::fpclassify(value)) {
255 case FP_NAN: {
256 if (IsSignallingNaN(value)) {
257 FPProcessException();
258 }
259 if (DN()) return kFP16DefaultNaN;
260
261 // Convert NaNs as the processor would:
262 // - The sign is propagated.
263 // - The mantissa is transferred as much as possible, except that the top
264 // bit is forced to '1', making the result a quiet NaN.
265 float16 result =
266 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
268 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
269 return result;
270 }
271
272 case FP_ZERO:
273 return (sign == 0) ? 0 : 0x8000;
274
275 case FP_INFINITE:
276 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277
278 case FP_NORMAL:
279 case FP_SUBNORMAL: {
280 // Convert double-to-half as the processor would, assuming that FPCR.FZ
281 // (flush-to-zero) is not set.
282
283 // Add the implicit '1' bit to the mantissa.
284 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
285 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286 }
287 }
288
289 UNREACHABLE();
290 }
291
FPToFloat(double value,FPRounding round_mode)292 float Simulator::FPToFloat(double value, FPRounding round_mode) {
293 // Only the FPTieEven rounding mode is implemented.
294 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
295 USE(round_mode);
296
297 switch (std::fpclassify(value)) {
298 case FP_NAN: {
299 if (IsSignallingNaN(value)) {
300 FPProcessException();
301 }
302 if (DN()) return kFP32DefaultNaN;
303
304 // Convert NaNs as the processor would:
305 // - The sign is propagated.
306 // - The mantissa is transferred as much as possible, except that the
307 // top bit is forced to '1', making the result a quiet NaN.
308
309 uint64_t raw = bit_cast<uint64_t>(value);
310
311 uint32_t sign = raw >> 63;
312 uint32_t exponent = (1 << 8) - 1;
313 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
314 50, kDoubleMantissaBits - kFloatMantissaBits, raw));
315 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
316
317 return float_pack(sign, exponent, mantissa);
318 }
319
320 case FP_ZERO:
321 case FP_INFINITE: {
322 // In a C++ cast, any value representable in the target type will be
323 // unchanged. This is always the case for +/-0.0 and infinities.
324 return static_cast<float>(value);
325 }
326
327 case FP_NORMAL:
328 case FP_SUBNORMAL: {
329 // Convert double-to-float as the processor would, assuming that FPCR.FZ
330 // (flush-to-zero) is not set.
331 uint32_t sign = double_sign(value);
332 int64_t exponent =
333 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
334 uint64_t mantissa = double_mantissa(value);
335 if (std::fpclassify(value) == FP_NORMAL) {
336 // For normal FP values, add the hidden bit.
337 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
338 }
339 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
340 }
341 }
342
343 UNREACHABLE();
344 }
345
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)346 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
347 dst.ClearForWrite(vform);
348 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
349 dst.ReadUintFromMem(vform, i, addr);
350 addr += LaneSizeInBytesFromFormat(vform);
351 }
352 }
353
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)354 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
355 uint64_t addr) {
356 dst.ReadUintFromMem(vform, index, addr);
357 }
358
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)359 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
360 dst.ClearForWrite(vform);
361 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
362 dst.ReadUintFromMem(vform, i, addr);
363 }
364 }
365
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)366 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
367 LogicVRegister dst2, uint64_t addr1) {
368 dst1.ClearForWrite(vform);
369 dst2.ClearForWrite(vform);
370 int esize = LaneSizeInBytesFromFormat(vform);
371 uint64_t addr2 = addr1 + esize;
372 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
373 dst1.ReadUintFromMem(vform, i, addr1);
374 dst2.ReadUintFromMem(vform, i, addr2);
375 addr1 += 2 * esize;
376 addr2 += 2 * esize;
377 }
378 }
379
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)380 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
381 LogicVRegister dst2, int index, uint64_t addr1) {
382 dst1.ClearForWrite(vform);
383 dst2.ClearForWrite(vform);
384 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
385 dst1.ReadUintFromMem(vform, index, addr1);
386 dst2.ReadUintFromMem(vform, index, addr2);
387 }
388
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)389 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
390 LogicVRegister dst2, uint64_t addr) {
391 dst1.ClearForWrite(vform);
392 dst2.ClearForWrite(vform);
393 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
394 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
395 dst1.ReadUintFromMem(vform, i, addr);
396 dst2.ReadUintFromMem(vform, i, addr2);
397 }
398 }
399
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)400 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
401 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
402 dst1.ClearForWrite(vform);
403 dst2.ClearForWrite(vform);
404 dst3.ClearForWrite(vform);
405 int esize = LaneSizeInBytesFromFormat(vform);
406 uint64_t addr2 = addr1 + esize;
407 uint64_t addr3 = addr2 + esize;
408 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
409 dst1.ReadUintFromMem(vform, i, addr1);
410 dst2.ReadUintFromMem(vform, i, addr2);
411 dst3.ReadUintFromMem(vform, i, addr3);
412 addr1 += 3 * esize;
413 addr2 += 3 * esize;
414 addr3 += 3 * esize;
415 }
416 }
417
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)418 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
419 LogicVRegister dst2, LogicVRegister dst3, int index,
420 uint64_t addr1) {
421 dst1.ClearForWrite(vform);
422 dst2.ClearForWrite(vform);
423 dst3.ClearForWrite(vform);
424 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
425 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
426 dst1.ReadUintFromMem(vform, index, addr1);
427 dst2.ReadUintFromMem(vform, index, addr2);
428 dst3.ReadUintFromMem(vform, index, addr3);
429 }
430
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)431 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
432 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
433 dst1.ClearForWrite(vform);
434 dst2.ClearForWrite(vform);
435 dst3.ClearForWrite(vform);
436 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
437 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
438 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439 dst1.ReadUintFromMem(vform, i, addr);
440 dst2.ReadUintFromMem(vform, i, addr2);
441 dst3.ReadUintFromMem(vform, i, addr3);
442 }
443 }
444
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)445 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
446 LogicVRegister dst2, LogicVRegister dst3,
447 LogicVRegister dst4, uint64_t addr1) {
448 dst1.ClearForWrite(vform);
449 dst2.ClearForWrite(vform);
450 dst3.ClearForWrite(vform);
451 dst4.ClearForWrite(vform);
452 int esize = LaneSizeInBytesFromFormat(vform);
453 uint64_t addr2 = addr1 + esize;
454 uint64_t addr3 = addr2 + esize;
455 uint64_t addr4 = addr3 + esize;
456 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
457 dst1.ReadUintFromMem(vform, i, addr1);
458 dst2.ReadUintFromMem(vform, i, addr2);
459 dst3.ReadUintFromMem(vform, i, addr3);
460 dst4.ReadUintFromMem(vform, i, addr4);
461 addr1 += 4 * esize;
462 addr2 += 4 * esize;
463 addr3 += 4 * esize;
464 addr4 += 4 * esize;
465 }
466 }
467
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)468 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
469 LogicVRegister dst2, LogicVRegister dst3,
470 LogicVRegister dst4, int index, uint64_t addr1) {
471 dst1.ClearForWrite(vform);
472 dst2.ClearForWrite(vform);
473 dst3.ClearForWrite(vform);
474 dst4.ClearForWrite(vform);
475 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
476 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
477 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
478 dst1.ReadUintFromMem(vform, index, addr1);
479 dst2.ReadUintFromMem(vform, index, addr2);
480 dst3.ReadUintFromMem(vform, index, addr3);
481 dst4.ReadUintFromMem(vform, index, addr4);
482 }
483
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)484 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
485 LogicVRegister dst2, LogicVRegister dst3,
486 LogicVRegister dst4, uint64_t addr) {
487 dst1.ClearForWrite(vform);
488 dst2.ClearForWrite(vform);
489 dst3.ClearForWrite(vform);
490 dst4.ClearForWrite(vform);
491 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
492 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
493 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
494 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
495 dst1.ReadUintFromMem(vform, i, addr);
496 dst2.ReadUintFromMem(vform, i, addr2);
497 dst3.ReadUintFromMem(vform, i, addr3);
498 dst4.ReadUintFromMem(vform, i, addr4);
499 }
500 }
501
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)502 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
503 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504 src.WriteUintToMem(vform, i, addr);
505 addr += LaneSizeInBytesFromFormat(vform);
506 }
507 }
508
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)509 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
510 uint64_t addr) {
511 src.WriteUintToMem(vform, index, addr);
512 }
513
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)514 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
515 uint64_t addr) {
516 int esize = LaneSizeInBytesFromFormat(vform);
517 uint64_t addr2 = addr + esize;
518 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519 dst.WriteUintToMem(vform, i, addr);
520 dst2.WriteUintToMem(vform, i, addr2);
521 addr += 2 * esize;
522 addr2 += 2 * esize;
523 }
524 }
525
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)526 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
527 int index, uint64_t addr) {
528 int esize = LaneSizeInBytesFromFormat(vform);
529 dst.WriteUintToMem(vform, index, addr);
530 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
531 }
532
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)533 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
534 LogicVRegister dst3, uint64_t addr) {
535 int esize = LaneSizeInBytesFromFormat(vform);
536 uint64_t addr2 = addr + esize;
537 uint64_t addr3 = addr2 + esize;
538 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
539 dst.WriteUintToMem(vform, i, addr);
540 dst2.WriteUintToMem(vform, i, addr2);
541 dst3.WriteUintToMem(vform, i, addr3);
542 addr += 3 * esize;
543 addr2 += 3 * esize;
544 addr3 += 3 * esize;
545 }
546 }
547
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)548 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
549 LogicVRegister dst3, int index, uint64_t addr) {
550 int esize = LaneSizeInBytesFromFormat(vform);
551 dst.WriteUintToMem(vform, index, addr);
552 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
553 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
554 }
555
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)556 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
557 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
558 int esize = LaneSizeInBytesFromFormat(vform);
559 uint64_t addr2 = addr + esize;
560 uint64_t addr3 = addr2 + esize;
561 uint64_t addr4 = addr3 + esize;
562 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
563 dst.WriteUintToMem(vform, i, addr);
564 dst2.WriteUintToMem(vform, i, addr2);
565 dst3.WriteUintToMem(vform, i, addr3);
566 dst4.WriteUintToMem(vform, i, addr4);
567 addr += 4 * esize;
568 addr2 += 4 * esize;
569 addr3 += 4 * esize;
570 addr4 += 4 * esize;
571 }
572 }
573
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)574 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
575 LogicVRegister dst3, LogicVRegister dst4, int index,
576 uint64_t addr) {
577 int esize = LaneSizeInBytesFromFormat(vform);
578 dst.WriteUintToMem(vform, index, addr);
579 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
580 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
581 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
582 }
583
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)584 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
585 const LogicVRegister& src1,
586 const LogicVRegister& src2, Condition cond) {
587 dst.ClearForWrite(vform);
588 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
589 int64_t sa = src1.Int(vform, i);
590 int64_t sb = src2.Int(vform, i);
591 uint64_t ua = src1.Uint(vform, i);
592 uint64_t ub = src2.Uint(vform, i);
593 bool result = false;
594 switch (cond) {
595 case eq:
596 result = (ua == ub);
597 break;
598 case ge:
599 result = (sa >= sb);
600 break;
601 case gt:
602 result = (sa > sb);
603 break;
604 case hi:
605 result = (ua > ub);
606 break;
607 case hs:
608 result = (ua >= ub);
609 break;
610 case lt:
611 result = (sa < sb);
612 break;
613 case le:
614 result = (sa <= sb);
615 break;
616 default:
617 UNREACHABLE();
618 }
619 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
620 }
621 return dst;
622 }
623
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)624 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
625 const LogicVRegister& src1, int imm,
626 Condition cond) {
627 SimVRegister temp;
628 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
629 return cmp(vform, dst, src1, imm_reg, cond);
630 }
631
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)632 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
633 const LogicVRegister& src1,
634 const LogicVRegister& src2) {
635 dst.ClearForWrite(vform);
636 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
637 uint64_t ua = src1.Uint(vform, i);
638 uint64_t ub = src2.Uint(vform, i);
639 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
640 }
641 return dst;
642 }
643
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)644 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
645 const LogicVRegister& src1,
646 const LogicVRegister& src2) {
647 int lane_size = LaneSizeInBitsFromFormat(vform);
648 dst.ClearForWrite(vform);
649 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
650 // Test for unsigned saturation.
651 uint64_t ua = src1.UintLeftJustified(vform, i);
652 uint64_t ub = src2.UintLeftJustified(vform, i);
653 uint64_t ur = ua + ub;
654 if (ur < ua) {
655 dst.SetUnsignedSat(i, true);
656 }
657
658 // Test for signed saturation.
659 bool pos_a = (ua >> 63) == 0;
660 bool pos_b = (ub >> 63) == 0;
661 bool pos_r = (ur >> 63) == 0;
662 // If the signs of the operands are the same, but different from the result,
663 // there was an overflow.
664 if ((pos_a == pos_b) && (pos_a != pos_r)) {
665 dst.SetSignedSat(i, pos_a);
666 }
667
668 dst.SetInt(vform, i, ur >> (64 - lane_size));
669 }
670 return dst;
671 }
672
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)673 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
674 const LogicVRegister& src1,
675 const LogicVRegister& src2) {
676 SimVRegister temp1, temp2;
677 uzp1(vform, temp1, src1, src2);
678 uzp2(vform, temp2, src1, src2);
679 add(vform, dst, temp1, temp2);
680 return dst;
681 }
682
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)683 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
684 const LogicVRegister& src1,
685 const LogicVRegister& src2) {
686 SimVRegister temp;
687 mul(vform, temp, src1, src2);
688 add(vform, dst, dst, temp);
689 return dst;
690 }
691
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)692 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
693 const LogicVRegister& src1,
694 const LogicVRegister& src2) {
695 SimVRegister temp;
696 mul(vform, temp, src1, src2);
697 sub(vform, dst, dst, temp);
698 return dst;
699 }
700
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)701 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2) {
704 dst.ClearForWrite(vform);
705 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
706 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
707 }
708 return dst;
709 }
710
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)711 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
712 const LogicVRegister& src1,
713 const LogicVRegister& src2, int index) {
714 SimVRegister temp;
715 VectorFormat indexform = VectorFormatFillQ(vform);
716 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
717 }
718
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)719 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
720 const LogicVRegister& src1,
721 const LogicVRegister& src2, int index) {
722 SimVRegister temp;
723 VectorFormat indexform = VectorFormatFillQ(vform);
724 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
725 }
726
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)727 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
728 const LogicVRegister& src1,
729 const LogicVRegister& src2, int index) {
730 SimVRegister temp;
731 VectorFormat indexform = VectorFormatFillQ(vform);
732 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
733 }
734
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)735 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
736 const LogicVRegister& src1,
737 const LogicVRegister& src2, int index) {
738 SimVRegister temp;
739 VectorFormat indexform =
740 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
741 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
742 }
743
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)744 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
745 const LogicVRegister& src1,
746 const LogicVRegister& src2, int index) {
747 SimVRegister temp;
748 VectorFormat indexform =
749 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
750 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
751 }
752
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)753 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
754 const LogicVRegister& src1,
755 const LogicVRegister& src2, int index) {
756 SimVRegister temp;
757 VectorFormat indexform =
758 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
759 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
760 }
761
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)762 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
763 const LogicVRegister& src1,
764 const LogicVRegister& src2, int index) {
765 SimVRegister temp;
766 VectorFormat indexform =
767 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
768 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
769 }
770
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)771 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
772 const LogicVRegister& src1,
773 const LogicVRegister& src2, int index) {
774 SimVRegister temp;
775 VectorFormat indexform =
776 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
777 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
778 }
779
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)780 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
781 const LogicVRegister& src1,
782 const LogicVRegister& src2, int index) {
783 SimVRegister temp;
784 VectorFormat indexform =
785 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
786 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
787 }
788
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)789 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
790 const LogicVRegister& src1,
791 const LogicVRegister& src2, int index) {
792 SimVRegister temp;
793 VectorFormat indexform =
794 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
795 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
796 }
797
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)798 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
799 const LogicVRegister& src1,
800 const LogicVRegister& src2, int index) {
801 SimVRegister temp;
802 VectorFormat indexform =
803 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
804 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
808 const LogicVRegister& src1,
809 const LogicVRegister& src2, int index) {
810 SimVRegister temp;
811 VectorFormat indexform =
812 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
813 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
814 }
815
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)816 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
817 const LogicVRegister& src1,
818 const LogicVRegister& src2, int index) {
819 SimVRegister temp;
820 VectorFormat indexform =
821 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
822 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
823 }
824
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)825 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
826 const LogicVRegister& src1,
827 const LogicVRegister& src2, int index) {
828 SimVRegister temp;
829 VectorFormat indexform =
830 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
831 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
832 }
833
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)834 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
835 const LogicVRegister& src1,
836 const LogicVRegister& src2, int index) {
837 SimVRegister temp;
838 VectorFormat indexform =
839 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
840 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
841 }
842
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)843 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
844 const LogicVRegister& src1,
845 const LogicVRegister& src2, int index) {
846 SimVRegister temp;
847 VectorFormat indexform =
848 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
849 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
850 }
851
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)852 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
853 const LogicVRegister& src1,
854 const LogicVRegister& src2, int index) {
855 SimVRegister temp;
856 VectorFormat indexform =
857 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
858 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)861 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
862 const LogicVRegister& src1,
863 const LogicVRegister& src2, int index) {
864 SimVRegister temp;
865 VectorFormat indexform =
866 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868 }
869
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)870 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
871 const LogicVRegister& src1,
872 const LogicVRegister& src2, int index) {
873 SimVRegister temp;
874 VectorFormat indexform =
875 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
876 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
877 }
878
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
880 const LogicVRegister& src1,
881 const LogicVRegister& src2, int index) {
882 SimVRegister temp;
883 VectorFormat indexform =
884 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
885 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
886 }
887
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)888 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
889 const LogicVRegister& src1,
890 const LogicVRegister& src2, int index) {
891 SimVRegister temp;
892 VectorFormat indexform =
893 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
894 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
895 }
896
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)897 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
898 const LogicVRegister& src1,
899 const LogicVRegister& src2, int index) {
900 SimVRegister temp;
901 VectorFormat indexform = VectorFormatFillQ(vform);
902 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
903 }
904
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)905 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
906 const LogicVRegister& src1,
907 const LogicVRegister& src2, int index) {
908 SimVRegister temp;
909 VectorFormat indexform = VectorFormatFillQ(vform);
910 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
911 }
912
PolynomialMult(uint8_t op1,uint8_t op2)913 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
914 uint16_t result = 0;
915 uint16_t extended_op2 = op2;
916 for (int i = 0; i < 8; ++i) {
917 if ((op1 >> i) & 1) {
918 result = result ^ (extended_op2 << i);
919 }
920 }
921 return result;
922 }
923
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)924 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
925 const LogicVRegister& src1,
926 const LogicVRegister& src2) {
927 dst.ClearForWrite(vform);
928 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
929 dst.SetUint(vform, i,
930 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
931 }
932 return dst;
933 }
934
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
936 const LogicVRegister& src1,
937 const LogicVRegister& src2) {
938 VectorFormat vform_src = VectorFormatHalfWidth(vform);
939 dst.ClearForWrite(vform);
940 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
941 dst.SetUint(
942 vform, i,
943 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
944 }
945 return dst;
946 }
947
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)948 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
949 const LogicVRegister& src1,
950 const LogicVRegister& src2) {
951 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
952 dst.ClearForWrite(vform);
953 int lane_count = LaneCountFromFormat(vform);
954 for (int i = 0; i < lane_count; i++) {
955 dst.SetUint(vform, i,
956 PolynomialMult(src1.Uint(vform_src, lane_count + i),
957 src2.Uint(vform_src, lane_count + i)));
958 }
959 return dst;
960 }
961
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)962 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
963 const LogicVRegister& src1,
964 const LogicVRegister& src2) {
965 int lane_size = LaneSizeInBitsFromFormat(vform);
966 dst.ClearForWrite(vform);
967 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
968 // Test for unsigned saturation.
969 uint64_t ua = src1.UintLeftJustified(vform, i);
970 uint64_t ub = src2.UintLeftJustified(vform, i);
971 uint64_t ur = ua - ub;
972 if (ub > ua) {
973 dst.SetUnsignedSat(i, false);
974 }
975
976 // Test for signed saturation.
977 bool pos_a = (ua >> 63) == 0;
978 bool pos_b = (ub >> 63) == 0;
979 bool pos_r = (ur >> 63) == 0;
980 // If the signs of the operands are different, and the sign of the first
981 // operand doesn't match the result, there was an overflow.
982 if ((pos_a != pos_b) && (pos_a != pos_r)) {
983 dst.SetSignedSat(i, pos_a);
984 }
985
986 dst.SetInt(vform, i, ur >> (64 - lane_size));
987 }
988 return dst;
989 }
990
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)991 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
992 const LogicVRegister& src1,
993 const LogicVRegister& src2) {
994 dst.ClearForWrite(vform);
995 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
996 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
997 }
998 return dst;
999 }
1000
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1001 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1002 const LogicVRegister& src1,
1003 const LogicVRegister& src2) {
1004 dst.ClearForWrite(vform);
1005 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1006 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1007 }
1008 return dst;
1009 }
1010
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1011 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1012 const LogicVRegister& src1,
1013 const LogicVRegister& src2) {
1014 dst.ClearForWrite(vform);
1015 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1016 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1017 }
1018 return dst;
1019 }
1020
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1021 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1022 const LogicVRegister& src1,
1023 const LogicVRegister& src2) {
1024 dst.ClearForWrite(vform);
1025 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1026 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1027 }
1028 return dst;
1029 }
1030
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1031 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1032 const LogicVRegister& src1,
1033 const LogicVRegister& src2) {
1034 dst.ClearForWrite(vform);
1035 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1036 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1037 }
1038 return dst;
1039 }
1040
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1041 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1042 const LogicVRegister& src, uint64_t imm) {
1043 uint64_t result[16];
1044 int laneCount = LaneCountFromFormat(vform);
1045 for (int i = 0; i < laneCount; ++i) {
1046 result[i] = src.Uint(vform, i) & ~imm;
1047 }
1048 dst.SetUintArray(vform, result);
1049 return dst;
1050 }
1051
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1052 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1053 const LogicVRegister& src1,
1054 const LogicVRegister& src2) {
1055 dst.ClearForWrite(vform);
1056 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057 uint64_t operand1 = dst.Uint(vform, i);
1058 uint64_t operand2 = ~src2.Uint(vform, i);
1059 uint64_t operand3 = src1.Uint(vform, i);
1060 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1061 dst.SetUint(vform, i, result);
1062 }
1063 return dst;
1064 }
1065
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1066 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1067 const LogicVRegister& src1,
1068 const LogicVRegister& src2) {
1069 dst.ClearForWrite(vform);
1070 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1071 uint64_t operand1 = dst.Uint(vform, i);
1072 uint64_t operand2 = src2.Uint(vform, i);
1073 uint64_t operand3 = src1.Uint(vform, i);
1074 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1075 dst.SetUint(vform, i, result);
1076 }
1077 return dst;
1078 }
1079
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1080 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1081 const LogicVRegister& src1,
1082 const LogicVRegister& src2) {
1083 dst.ClearForWrite(vform);
1084 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1085 uint64_t operand1 = src2.Uint(vform, i);
1086 uint64_t operand2 = dst.Uint(vform, i);
1087 uint64_t operand3 = src1.Uint(vform, i);
1088 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1089 dst.SetUint(vform, i, result);
1090 }
1091 return dst;
1092 }
1093
SMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1094 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1095 const LogicVRegister& src1,
1096 const LogicVRegister& src2, bool max) {
1097 dst.ClearForWrite(vform);
1098 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1099 int64_t src1_val = src1.Int(vform, i);
1100 int64_t src2_val = src2.Int(vform, i);
1101 int64_t dst_val;
1102 if (max) {
1103 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1104 } else {
1105 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1106 }
1107 dst.SetInt(vform, i, dst_val);
1108 }
1109 return dst;
1110 }
1111
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1112 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1113 const LogicVRegister& src1,
1114 const LogicVRegister& src2) {
1115 return SMinMax(vform, dst, src1, src2, true);
1116 }
1117
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1118 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1119 const LogicVRegister& src1,
1120 const LogicVRegister& src2) {
1121 return SMinMax(vform, dst, src1, src2, false);
1122 }
1123
SMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1124 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1125 const LogicVRegister& src1,
1126 const LogicVRegister& src2, bool max) {
1127 int lanes = LaneCountFromFormat(vform);
1128 int64_t result[kMaxLanesPerVector];
1129 const LogicVRegister* src = &src1;
1130 for (int j = 0; j < 2; j++) {
1131 for (int i = 0; i < lanes; i += 2) {
1132 int64_t first_val = src->Int(vform, i);
1133 int64_t second_val = src->Int(vform, i + 1);
1134 int64_t dst_val;
1135 if (max) {
1136 dst_val = (first_val > second_val) ? first_val : second_val;
1137 } else {
1138 dst_val = (first_val < second_val) ? first_val : second_val;
1139 }
1140 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1141 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1142 }
1143 src = &src2;
1144 }
1145 dst.SetIntArray(vform, result);
1146 return dst;
1147 }
1148
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1149 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1150 const LogicVRegister& src1,
1151 const LogicVRegister& src2) {
1152 return SMinMaxP(vform, dst, src1, src2, true);
1153 }
1154
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1155 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1156 const LogicVRegister& src1,
1157 const LogicVRegister& src2) {
1158 return SMinMaxP(vform, dst, src1, src2, false);
1159 }
1160
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1161 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1162 const LogicVRegister& src) {
1163 DCHECK_EQ(vform, kFormatD);
1164
1165 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1166 dst.ClearForWrite(vform);
1167 dst.SetUint(vform, 0, dst_val);
1168 return dst;
1169 }
1170
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1171 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1172 const LogicVRegister& src) {
1173 VectorFormat vform_dst =
1174 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1175
1176 int64_t dst_val = 0;
1177 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1178 dst_val += src.Int(vform, i);
1179 }
1180
1181 dst.ClearForWrite(vform_dst);
1182 dst.SetInt(vform_dst, 0, dst_val);
1183 return dst;
1184 }
1185
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1186 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1187 const LogicVRegister& src) {
1188 VectorFormat vform_dst =
1189 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1190
1191 int64_t dst_val = 0;
1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193 dst_val += src.Int(vform, i);
1194 }
1195
1196 dst.ClearForWrite(vform_dst);
1197 dst.SetInt(vform_dst, 0, dst_val);
1198 return dst;
1199 }
1200
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1201 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1202 const LogicVRegister& src) {
1203 VectorFormat vform_dst =
1204 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1205
1206 uint64_t dst_val = 0;
1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1208 dst_val += src.Uint(vform, i);
1209 }
1210
1211 dst.ClearForWrite(vform_dst);
1212 dst.SetUint(vform_dst, 0, dst_val);
1213 return dst;
1214 }
1215
SMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1216 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1217 const LogicVRegister& src, bool max) {
1218 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1219 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1220 int64_t src_val = src.Int(vform, i);
1221 if (max) {
1222 dst_val = (src_val > dst_val) ? src_val : dst_val;
1223 } else {
1224 dst_val = (src_val < dst_val) ? src_val : dst_val;
1225 }
1226 }
1227 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1228 dst.SetInt(vform, 0, dst_val);
1229 return dst;
1230 }
1231
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1232 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1233 const LogicVRegister& src) {
1234 SMinMaxV(vform, dst, src, true);
1235 return dst;
1236 }
1237
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1238 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1239 const LogicVRegister& src) {
1240 SMinMaxV(vform, dst, src, false);
1241 return dst;
1242 }
1243
UMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1244 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1245 const LogicVRegister& src1,
1246 const LogicVRegister& src2, bool max) {
1247 dst.ClearForWrite(vform);
1248 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249 uint64_t src1_val = src1.Uint(vform, i);
1250 uint64_t src2_val = src2.Uint(vform, i);
1251 uint64_t dst_val;
1252 if (max) {
1253 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1254 } else {
1255 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1256 }
1257 dst.SetUint(vform, i, dst_val);
1258 }
1259 return dst;
1260 }
1261
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1262 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1263 const LogicVRegister& src1,
1264 const LogicVRegister& src2) {
1265 return UMinMax(vform, dst, src1, src2, true);
1266 }
1267
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1268 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1269 const LogicVRegister& src1,
1270 const LogicVRegister& src2) {
1271 return UMinMax(vform, dst, src1, src2, false);
1272 }
1273
UMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1274 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1275 const LogicVRegister& src1,
1276 const LogicVRegister& src2, bool max) {
1277 int lanes = LaneCountFromFormat(vform);
1278 uint64_t result[kMaxLanesPerVector];
1279 const LogicVRegister* src = &src1;
1280 for (int j = 0; j < 2; j++) {
1281 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1282 uint64_t first_val = src->Uint(vform, i);
1283 uint64_t second_val = src->Uint(vform, i + 1);
1284 uint64_t dst_val;
1285 if (max) {
1286 dst_val = (first_val > second_val) ? first_val : second_val;
1287 } else {
1288 dst_val = (first_val < second_val) ? first_val : second_val;
1289 }
1290 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1291 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1292 }
1293 src = &src2;
1294 }
1295 dst.SetUintArray(vform, result);
1296 return dst;
1297 }
1298
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1299 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1300 const LogicVRegister& src1,
1301 const LogicVRegister& src2) {
1302 return UMinMaxP(vform, dst, src1, src2, true);
1303 }
1304
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1305 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1306 const LogicVRegister& src1,
1307 const LogicVRegister& src2) {
1308 return UMinMaxP(vform, dst, src1, src2, false);
1309 }
1310
UMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1311 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1312 const LogicVRegister& src, bool max) {
1313 uint64_t dst_val = max ? 0 : UINT64_MAX;
1314 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1315 uint64_t src_val = src.Uint(vform, i);
1316 if (max) {
1317 dst_val = (src_val > dst_val) ? src_val : dst_val;
1318 } else {
1319 dst_val = (src_val < dst_val) ? src_val : dst_val;
1320 }
1321 }
1322 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1323 dst.SetUint(vform, 0, dst_val);
1324 return dst;
1325 }
1326
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1327 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1328 const LogicVRegister& src) {
1329 UMinMaxV(vform, dst, src, true);
1330 return dst;
1331 }
1332
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1333 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1334 const LogicVRegister& src) {
1335 UMinMaxV(vform, dst, src, false);
1336 return dst;
1337 }
1338
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1339 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1340 const LogicVRegister& src, int shift) {
1341 DCHECK_GE(shift, 0);
1342 SimVRegister temp;
1343 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1344 return ushl(vform, dst, src, shiftreg);
1345 }
1346
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1347 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1348 const LogicVRegister& src, int shift) {
1349 DCHECK_GE(shift, 0);
1350 SimVRegister temp1, temp2;
1351 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1352 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1353 return sshl(vform, dst, extendedreg, shiftreg);
1354 }
1355
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1356 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1357 const LogicVRegister& src, int shift) {
1358 DCHECK_GE(shift, 0);
1359 SimVRegister temp1, temp2;
1360 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1361 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1362 return sshl(vform, dst, extendedreg, shiftreg);
1363 }
1364
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1365 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1366 const LogicVRegister& src) {
1367 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1368 return sshll(vform, dst, src, shift);
1369 }
1370
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1371 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1372 const LogicVRegister& src) {
1373 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1374 return sshll2(vform, dst, src, shift);
1375 }
1376
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1377 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1378 const LogicVRegister& src, int shift) {
1379 DCHECK_GE(shift, 0);
1380 SimVRegister temp1, temp2;
1381 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1382 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1383 return ushl(vform, dst, extendedreg, shiftreg);
1384 }
1385
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1386 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1387 const LogicVRegister& src, int shift) {
1388 DCHECK_GE(shift, 0);
1389 SimVRegister temp1, temp2;
1390 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1391 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1392 return ushl(vform, dst, extendedreg, shiftreg);
1393 }
1394
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1395 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1396 const LogicVRegister& src, int shift) {
1397 dst.ClearForWrite(vform);
1398 int laneCount = LaneCountFromFormat(vform);
1399 for (int i = 0; i < laneCount; i++) {
1400 uint64_t src_lane = src.Uint(vform, i);
1401 uint64_t dst_lane = dst.Uint(vform, i);
1402 uint64_t shifted = src_lane << shift;
1403 uint64_t mask = MaxUintFromFormat(vform) << shift;
1404 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1405 }
1406 return dst;
1407 }
1408
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1409 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1410 const LogicVRegister& src, int shift) {
1411 DCHECK_GE(shift, 0);
1412 SimVRegister temp;
1413 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1414 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1415 }
1416
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1417 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1418 const LogicVRegister& src, int shift) {
1419 DCHECK_GE(shift, 0);
1420 SimVRegister temp;
1421 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1422 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1423 }
1424
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1425 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1426 const LogicVRegister& src, int shift) {
1427 DCHECK_GE(shift, 0);
1428 SimVRegister temp;
1429 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1430 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1431 }
1432
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1433 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1434 const LogicVRegister& src, int shift) {
1435 dst.ClearForWrite(vform);
1436 int laneCount = LaneCountFromFormat(vform);
1437 DCHECK((shift > 0) &&
1438 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1439 for (int i = 0; i < laneCount; i++) {
1440 uint64_t src_lane = src.Uint(vform, i);
1441 uint64_t dst_lane = dst.Uint(vform, i);
1442 uint64_t shifted;
1443 uint64_t mask;
1444 if (shift == 64) {
1445 shifted = 0;
1446 mask = 0;
1447 } else {
1448 shifted = src_lane >> shift;
1449 mask = MaxUintFromFormat(vform) >> shift;
1450 }
1451 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1452 }
1453 return dst;
1454 }
1455
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1456 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1457 const LogicVRegister& src, int shift) {
1458 DCHECK_GE(shift, 0);
1459 SimVRegister temp;
1460 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1461 return ushl(vform, dst, src, shiftreg);
1462 }
1463
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1464 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1465 const LogicVRegister& src, int shift) {
1466 DCHECK_GE(shift, 0);
1467 SimVRegister temp;
1468 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1469 return sshl(vform, dst, src, shiftreg);
1470 }
1471
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1472 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1473 const LogicVRegister& src, int shift) {
1474 SimVRegister temp;
1475 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1476 return add(vform, dst, dst, shifted_reg);
1477 }
1478
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1479 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1480 const LogicVRegister& src, int shift) {
1481 SimVRegister temp;
1482 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1483 return add(vform, dst, dst, shifted_reg);
1484 }
1485
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1487 const LogicVRegister& src, int shift) {
1488 SimVRegister temp;
1489 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1490 return add(vform, dst, dst, shifted_reg);
1491 }
1492
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1493 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1494 const LogicVRegister& src, int shift) {
1495 SimVRegister temp;
1496 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1497 return add(vform, dst, dst, shifted_reg);
1498 }
1499
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1500 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1501 const LogicVRegister& src) {
1502 uint64_t result[16];
1503 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1504 int laneCount = LaneCountFromFormat(vform);
1505 for (int i = 0; i < laneCount; i++) {
1506 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1507 }
1508
1509 dst.SetUintArray(vform, result);
1510 return dst;
1511 }
1512
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1513 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1514 const LogicVRegister& src) {
1515 uint64_t result[16];
1516 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1517 int laneCount = LaneCountFromFormat(vform);
1518 for (int i = 0; i < laneCount; i++) {
1519 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1520 }
1521
1522 dst.SetUintArray(vform, result);
1523 return dst;
1524 }
1525
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1526 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1527 const LogicVRegister& src) {
1528 uint64_t result[16];
1529 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1530 int laneCount = LaneCountFromFormat(vform);
1531 for (int i = 0; i < laneCount; i++) {
1532 uint64_t value = src.Uint(vform, i);
1533 result[i] = 0;
1534 for (int j = 0; j < laneSizeInBits; j++) {
1535 result[i] += (value & 1);
1536 value >>= 1;
1537 }
1538 }
1539
1540 dst.SetUintArray(vform, result);
1541 return dst;
1542 }
1543
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1544 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1545 const LogicVRegister& src1,
1546 const LogicVRegister& src2) {
1547 dst.ClearForWrite(vform);
1548 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1549 int8_t shift_val = src2.Int(vform, i);
1550 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1551
1552 // Set signed saturation state.
1553 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1554 (lj_src_val != 0)) {
1555 dst.SetSignedSat(i, lj_src_val >= 0);
1556 }
1557
1558 // Set unsigned saturation state.
1559 if (lj_src_val < 0) {
1560 dst.SetUnsignedSat(i, false);
1561 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1562 (lj_src_val != 0)) {
1563 dst.SetUnsignedSat(i, true);
1564 }
1565
1566 int64_t src_val = src1.Int(vform, i);
1567 bool src_is_negative = src_val < 0;
1568 if (shift_val > 63) {
1569 dst.SetInt(vform, i, 0);
1570 } else if (shift_val < -63) {
1571 dst.SetRounding(i, src_is_negative);
1572 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1573 } else {
1574 // Use unsigned types for shifts, as behaviour is undefined for signed
1575 // lhs.
1576 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1577
1578 if (shift_val < 0) {
1579 // Convert to right shift.
1580 shift_val = -shift_val;
1581
1582 // Set rounding state by testing most-significant bit shifted out.
1583 // Rounding only needed on right shifts.
1584 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1585 dst.SetRounding(i, true);
1586 }
1587
1588 usrc_val >>= shift_val;
1589
1590 if (src_is_negative) {
1591 // Simulate sign-extension.
1592 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1593 }
1594 } else {
1595 usrc_val <<= shift_val;
1596 }
1597 dst.SetUint(vform, i, usrc_val);
1598 }
1599 }
1600 return dst;
1601 }
1602
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1603 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1604 const LogicVRegister& src1,
1605 const LogicVRegister& src2) {
1606 dst.ClearForWrite(vform);
1607 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1608 int8_t shift_val = src2.Int(vform, i);
1609 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1610
1611 // Set saturation state.
1612 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1613 dst.SetUnsignedSat(i, true);
1614 }
1615
1616 uint64_t src_val = src1.Uint(vform, i);
1617 if ((shift_val > 63) || (shift_val < -64)) {
1618 dst.SetUint(vform, i, 0);
1619 } else {
1620 if (shift_val < 0) {
1621 // Set rounding state. Rounding only needed on right shifts.
1622 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1623 dst.SetRounding(i, true);
1624 }
1625
1626 if (shift_val == -64) {
1627 src_val = 0;
1628 } else {
1629 src_val >>= -shift_val;
1630 }
1631 } else {
1632 src_val <<= shift_val;
1633 }
1634 dst.SetUint(vform, i, src_val);
1635 }
1636 }
1637 return dst;
1638 }
1639
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1640 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1641 const LogicVRegister& src) {
1642 dst.ClearForWrite(vform);
1643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1644 // Test for signed saturation.
1645 int64_t sa = src.Int(vform, i);
1646 if (sa == MinIntFromFormat(vform)) {
1647 dst.SetSignedSat(i, true);
1648 }
1649 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1650 }
1651 return dst;
1652 }
1653
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1654 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1655 const LogicVRegister& src) {
1656 dst.ClearForWrite(vform);
1657 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1658 int64_t sa = dst.IntLeftJustified(vform, i);
1659 uint64_t ub = src.UintLeftJustified(vform, i);
1660 uint64_t ur = sa + ub;
1661
1662 int64_t sr = bit_cast<int64_t>(ur);
1663 if (sr < sa) { // Test for signed positive saturation.
1664 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1665 } else {
1666 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1667 }
1668 }
1669 return dst;
1670 }
1671
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1672 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1673 const LogicVRegister& src) {
1674 dst.ClearForWrite(vform);
1675 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1676 uint64_t ua = dst.UintLeftJustified(vform, i);
1677 int64_t sb = src.IntLeftJustified(vform, i);
1678 uint64_t ur = ua + sb;
1679
1680 if ((sb > 0) && (ur <= ua)) {
1681 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1682 } else if ((sb < 0) && (ur >= ua)) {
1683 dst.SetUint(vform, i, 0); // Negative saturation.
1684 } else {
1685 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1686 }
1687 }
1688 return dst;
1689 }
1690
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1691 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1692 const LogicVRegister& src) {
1693 dst.ClearForWrite(vform);
1694 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1695 // Test for signed saturation.
1696 int64_t sa = src.Int(vform, i);
1697 if (sa == MinIntFromFormat(vform)) {
1698 dst.SetSignedSat(i, true);
1699 }
1700 if (sa < 0) {
1701 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1702 } else {
1703 dst.SetInt(vform, i, sa);
1704 }
1705 }
1706 return dst;
1707 }
1708
ExtractNarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)1709 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1710 LogicVRegister dst, bool dstIsSigned,
1711 const LogicVRegister& src,
1712 bool srcIsSigned) {
1713 bool upperhalf = false;
1714 VectorFormat srcform = kFormatUndefined;
1715 int64_t ssrc[8];
1716 uint64_t usrc[8];
1717
1718 switch (dstform) {
1719 case kFormat8B:
1720 upperhalf = false;
1721 srcform = kFormat8H;
1722 break;
1723 case kFormat16B:
1724 upperhalf = true;
1725 srcform = kFormat8H;
1726 break;
1727 case kFormat4H:
1728 upperhalf = false;
1729 srcform = kFormat4S;
1730 break;
1731 case kFormat8H:
1732 upperhalf = true;
1733 srcform = kFormat4S;
1734 break;
1735 case kFormat2S:
1736 upperhalf = false;
1737 srcform = kFormat2D;
1738 break;
1739 case kFormat4S:
1740 upperhalf = true;
1741 srcform = kFormat2D;
1742 break;
1743 case kFormatB:
1744 upperhalf = false;
1745 srcform = kFormatH;
1746 break;
1747 case kFormatH:
1748 upperhalf = false;
1749 srcform = kFormatS;
1750 break;
1751 case kFormatS:
1752 upperhalf = false;
1753 srcform = kFormatD;
1754 break;
1755 default:
1756 UNIMPLEMENTED();
1757 }
1758
1759 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1760 ssrc[i] = src.Int(srcform, i);
1761 usrc[i] = src.Uint(srcform, i);
1762 }
1763
1764 int offset;
1765 if (upperhalf) {
1766 offset = LaneCountFromFormat(dstform) / 2;
1767 } else {
1768 offset = 0;
1769 dst.ClearForWrite(dstform);
1770 }
1771
1772 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1773 // Test for signed saturation
1774 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1775 dst.SetSignedSat(offset + i, true);
1776 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1777 dst.SetSignedSat(offset + i, false);
1778 }
1779
1780 // Test for unsigned saturation
1781 if (srcIsSigned) {
1782 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1783 dst.SetUnsignedSat(offset + i, true);
1784 } else if (ssrc[i] < 0) {
1785 dst.SetUnsignedSat(offset + i, false);
1786 }
1787 } else {
1788 if (usrc[i] > MaxUintFromFormat(dstform)) {
1789 dst.SetUnsignedSat(offset + i, true);
1790 }
1791 }
1792
1793 int64_t result;
1794 if (srcIsSigned) {
1795 result = ssrc[i] & MaxUintFromFormat(dstform);
1796 } else {
1797 result = usrc[i] & MaxUintFromFormat(dstform);
1798 }
1799
1800 if (dstIsSigned) {
1801 dst.SetInt(dstform, offset + i, result);
1802 } else {
1803 dst.SetUint(dstform, offset + i, result);
1804 }
1805 }
1806 return dst;
1807 }
1808
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1810 const LogicVRegister& src) {
1811 return ExtractNarrow(vform, dst, true, src, true);
1812 }
1813
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1814 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1815 const LogicVRegister& src) {
1816 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1817 }
1818
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1819 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1820 const LogicVRegister& src) {
1821 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1822 }
1823
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1824 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1825 const LogicVRegister& src) {
1826 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1827 }
1828
AbsDiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)1829 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1830 const LogicVRegister& src1,
1831 const LogicVRegister& src2, bool issigned) {
1832 dst.ClearForWrite(vform);
1833 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1834 if (issigned) {
1835 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1836 sr = sr > 0 ? sr : -sr;
1837 dst.SetInt(vform, i, sr);
1838 } else {
1839 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1840 sr = sr > 0 ? sr : -sr;
1841 dst.SetUint(vform, i, sr);
1842 }
1843 }
1844 return dst;
1845 }
1846
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1847 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1848 const LogicVRegister& src1,
1849 const LogicVRegister& src2) {
1850 SimVRegister temp;
1851 dst.ClearForWrite(vform);
1852 AbsDiff(vform, temp, src1, src2, true);
1853 add(vform, dst, dst, temp);
1854 return dst;
1855 }
1856
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1857 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1858 const LogicVRegister& src1,
1859 const LogicVRegister& src2) {
1860 SimVRegister temp;
1861 dst.ClearForWrite(vform);
1862 AbsDiff(vform, temp, src1, src2, false);
1863 add(vform, dst, dst, temp);
1864 return dst;
1865 }
1866
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1867 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1868 const LogicVRegister& src) {
1869 dst.ClearForWrite(vform);
1870 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1871 dst.SetUint(vform, i, ~src.Uint(vform, i));
1872 }
1873 return dst;
1874 }
1875
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1876 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1877 const LogicVRegister& src) {
1878 uint64_t result[16];
1879 int laneCount = LaneCountFromFormat(vform);
1880 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1881 uint64_t reversed_value;
1882 uint64_t value;
1883 for (int i = 0; i < laneCount; i++) {
1884 value = src.Uint(vform, i);
1885 reversed_value = 0;
1886 for (int j = 0; j < laneSizeInBits; j++) {
1887 reversed_value = (reversed_value << 1) | (value & 1);
1888 value >>= 1;
1889 }
1890 result[i] = reversed_value;
1891 }
1892
1893 dst.SetUintArray(vform, result);
1894 return dst;
1895 }
1896
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)1897 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1898 const LogicVRegister& src, int revSize) {
1899 uint64_t result[16];
1900 int laneCount = LaneCountFromFormat(vform);
1901 int laneSize = LaneSizeInBytesFromFormat(vform);
1902 int lanesPerLoop = revSize / laneSize;
1903 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1904 for (int j = 0; j < lanesPerLoop; j++) {
1905 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1906 }
1907 }
1908 dst.SetUintArray(vform, result);
1909 return dst;
1910 }
1911
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1912 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1913 const LogicVRegister& src) {
1914 return rev(vform, dst, src, 2);
1915 }
1916
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1917 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1918 const LogicVRegister& src) {
1919 return rev(vform, dst, src, 4);
1920 }
1921
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1922 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1923 const LogicVRegister& src) {
1924 return rev(vform, dst, src, 8);
1925 }
1926
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)1927 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1928 const LogicVRegister& src, bool is_signed,
1929 bool do_accumulate) {
1930 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1931 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1932 DCHECK_LE(LaneCountFromFormat(vform), 8);
1933
1934 uint64_t result[8];
1935 int lane_count = LaneCountFromFormat(vform);
1936 for (int i = 0; i < lane_count; i++) {
1937 if (is_signed) {
1938 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1939 src.Int(vformsrc, 2 * i + 1));
1940 } else {
1941 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1942 }
1943 }
1944
1945 dst.ClearForWrite(vform);
1946 for (int i = 0; i < lane_count; ++i) {
1947 if (do_accumulate) {
1948 result[i] += dst.Uint(vform, i);
1949 }
1950 dst.SetUint(vform, i, result[i]);
1951 }
1952
1953 return dst;
1954 }
1955
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1956 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1957 const LogicVRegister& src) {
1958 return addlp(vform, dst, src, true, false);
1959 }
1960
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1961 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1962 const LogicVRegister& src) {
1963 return addlp(vform, dst, src, false, false);
1964 }
1965
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1966 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1967 const LogicVRegister& src) {
1968 return addlp(vform, dst, src, true, true);
1969 }
1970
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1971 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1972 const LogicVRegister& src) {
1973 return addlp(vform, dst, src, false, true);
1974 }
1975
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1976 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1977 const LogicVRegister& src1,
1978 const LogicVRegister& src2, int index) {
1979 uint8_t result[16];
1980 int laneCount = LaneCountFromFormat(vform);
1981 for (int i = 0; i < laneCount - index; ++i) {
1982 result[i] = src1.Uint(vform, i + index);
1983 }
1984 for (int i = 0; i < index; ++i) {
1985 result[laneCount - index + i] = src2.Uint(vform, i);
1986 }
1987 dst.ClearForWrite(vform);
1988 for (int i = 0; i < laneCount; ++i) {
1989 dst.SetUint(vform, i, result[i]);
1990 }
1991 return dst;
1992 }
1993
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)1994 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1995 const LogicVRegister& src,
1996 int src_index) {
1997 int laneCount = LaneCountFromFormat(vform);
1998 uint64_t value = src.Uint(vform, src_index);
1999 dst.ClearForWrite(vform);
2000 for (int i = 0; i < laneCount; ++i) {
2001 dst.SetUint(vform, i, value);
2002 }
2003 return dst;
2004 }
2005
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2006 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2007 uint64_t imm) {
2008 int laneCount = LaneCountFromFormat(vform);
2009 uint64_t value = imm & MaxUintFromFormat(vform);
2010 dst.ClearForWrite(vform);
2011 for (int i = 0; i < laneCount; ++i) {
2012 dst.SetUint(vform, i, value);
2013 }
2014 return dst;
2015 }
2016
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2017 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2018 int dst_index, const LogicVRegister& src,
2019 int src_index) {
2020 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2021 return dst;
2022 }
2023
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2024 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2025 int dst_index, uint64_t imm) {
2026 uint64_t value = imm & MaxUintFromFormat(vform);
2027 dst.SetUint(vform, dst_index, value);
2028 return dst;
2029 }
2030
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2031 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2032 uint64_t imm) {
2033 int laneCount = LaneCountFromFormat(vform);
2034 dst.ClearForWrite(vform);
2035 for (int i = 0; i < laneCount; ++i) {
2036 dst.SetUint(vform, i, imm);
2037 }
2038 return dst;
2039 }
2040
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2041 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2042 uint64_t imm) {
2043 int laneCount = LaneCountFromFormat(vform);
2044 dst.ClearForWrite(vform);
2045 for (int i = 0; i < laneCount; ++i) {
2046 dst.SetUint(vform, i, ~imm);
2047 }
2048 return dst;
2049 }
2050
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2051 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2052 const LogicVRegister& src, uint64_t imm) {
2053 uint64_t result[16];
2054 int laneCount = LaneCountFromFormat(vform);
2055 for (int i = 0; i < laneCount; ++i) {
2056 result[i] = src.Uint(vform, i) | imm;
2057 }
2058 dst.SetUintArray(vform, result);
2059 return dst;
2060 }
2061
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2062 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2063 const LogicVRegister& src) {
2064 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2065
2066 dst.ClearForWrite(vform);
2067 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2068 dst.SetUint(vform, i, src.Uint(vform_half, i));
2069 }
2070 return dst;
2071 }
2072
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2073 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2074 const LogicVRegister& src) {
2075 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2076
2077 dst.ClearForWrite(vform);
2078 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2079 dst.SetInt(vform, i, src.Int(vform_half, i));
2080 }
2081 return dst;
2082 }
2083
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2084 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2085 const LogicVRegister& src) {
2086 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2087 int lane_count = LaneCountFromFormat(vform);
2088
2089 dst.ClearForWrite(vform);
2090 for (int i = 0; i < lane_count; i++) {
2091 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2092 }
2093 return dst;
2094 }
2095
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2096 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2097 const LogicVRegister& src) {
2098 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2099 int lane_count = LaneCountFromFormat(vform);
2100
2101 dst.ClearForWrite(vform);
2102 for (int i = 0; i < lane_count; i++) {
2103 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2104 }
2105 return dst;
2106 }
2107
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2108 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2109 const LogicVRegister& src, int shift) {
2110 SimVRegister temp;
2111 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2112 VectorFormat vform_dst = vform;
2113 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2114 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2115 }
2116
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2117 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2118 const LogicVRegister& src, int shift) {
2119 SimVRegister temp;
2120 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2121 VectorFormat vformdst = vform;
2122 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2123 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2124 }
2125
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2126 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2127 const LogicVRegister& src, int shift) {
2128 SimVRegister temp;
2129 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2130 VectorFormat vformdst = vform;
2131 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2132 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2133 }
2134
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2135 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2136 const LogicVRegister& src, int shift) {
2137 SimVRegister temp;
2138 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2139 VectorFormat vformdst = vform;
2140 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2141 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2142 }
2143
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2144 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2145 const LogicVRegister& ind,
2146 bool zero_out_of_bounds,
2147 const LogicVRegister* tab1,
2148 const LogicVRegister* tab2,
2149 const LogicVRegister* tab3,
2150 const LogicVRegister* tab4) {
2151 DCHECK_NOT_NULL(tab1);
2152 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2153 uint64_t result[kMaxLanesPerVector];
2154 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2155 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2156 }
2157 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2158 uint64_t j = ind.Uint(vform, i);
2159 int tab_idx = static_cast<int>(j >> 4);
2160 int j_idx = static_cast<int>(j & 15);
2161 if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2162 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2163 }
2164 }
2165 dst.SetUintArray(vform, result);
2166 return dst;
2167 }
2168
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2169 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2170 const LogicVRegister& tab,
2171 const LogicVRegister& ind) {
2172 return Table(vform, dst, ind, true, &tab);
2173 }
2174
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2175 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2176 const LogicVRegister& tab,
2177 const LogicVRegister& tab2,
2178 const LogicVRegister& ind) {
2179 return Table(vform, dst, ind, true, &tab, &tab2);
2180 }
2181
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2183 const LogicVRegister& tab,
2184 const LogicVRegister& tab2,
2185 const LogicVRegister& tab3,
2186 const LogicVRegister& ind) {
2187 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2188 }
2189
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2191 const LogicVRegister& tab,
2192 const LogicVRegister& tab2,
2193 const LogicVRegister& tab3,
2194 const LogicVRegister& tab4,
2195 const LogicVRegister& ind) {
2196 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2197 }
2198
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2199 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2200 const LogicVRegister& tab,
2201 const LogicVRegister& ind) {
2202 return Table(vform, dst, ind, false, &tab);
2203 }
2204
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2205 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2206 const LogicVRegister& tab,
2207 const LogicVRegister& tab2,
2208 const LogicVRegister& ind) {
2209 return Table(vform, dst, ind, false, &tab, &tab2);
2210 }
2211
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2213 const LogicVRegister& tab,
2214 const LogicVRegister& tab2,
2215 const LogicVRegister& tab3,
2216 const LogicVRegister& ind) {
2217 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2218 }
2219
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2221 const LogicVRegister& tab,
2222 const LogicVRegister& tab2,
2223 const LogicVRegister& tab3,
2224 const LogicVRegister& tab4,
2225 const LogicVRegister& ind) {
2226 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2227 }
2228
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2229 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2230 const LogicVRegister& src, int shift) {
2231 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2232 }
2233
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2234 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2235 const LogicVRegister& src, int shift) {
2236 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2237 }
2238
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2239 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2240 const LogicVRegister& src, int shift) {
2241 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2242 }
2243
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2244 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2245 const LogicVRegister& src, int shift) {
2246 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2247 }
2248
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2249 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2250 const LogicVRegister& src, int shift) {
2251 SimVRegister temp;
2252 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2253 VectorFormat vformdst = vform;
2254 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2255 return sqxtn(vformdst, dst, shifted_src);
2256 }
2257
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2258 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2259 const LogicVRegister& src, int shift) {
2260 SimVRegister temp;
2261 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2262 VectorFormat vformdst = vform;
2263 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2264 return sqxtn(vformdst, dst, shifted_src);
2265 }
2266
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2267 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2268 const LogicVRegister& src, int shift) {
2269 SimVRegister temp;
2270 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2271 VectorFormat vformdst = vform;
2272 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2273 return sqxtn(vformdst, dst, shifted_src);
2274 }
2275
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2276 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2277 const LogicVRegister& src, int shift) {
2278 SimVRegister temp;
2279 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2280 VectorFormat vformdst = vform;
2281 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2282 return sqxtn(vformdst, dst, shifted_src);
2283 }
2284
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2285 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2286 const LogicVRegister& src, int shift) {
2287 SimVRegister temp;
2288 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2289 VectorFormat vformdst = vform;
2290 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2291 return sqxtun(vformdst, dst, shifted_src);
2292 }
2293
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2294 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2295 const LogicVRegister& src, int shift) {
2296 SimVRegister temp;
2297 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2298 VectorFormat vformdst = vform;
2299 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2300 return sqxtun(vformdst, dst, shifted_src);
2301 }
2302
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2303 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2304 const LogicVRegister& src, int shift) {
2305 SimVRegister temp;
2306 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2307 VectorFormat vformdst = vform;
2308 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2309 return sqxtun(vformdst, dst, shifted_src);
2310 }
2311
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2312 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2313 const LogicVRegister& src, int shift) {
2314 SimVRegister temp;
2315 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2316 VectorFormat vformdst = vform;
2317 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2318 return sqxtun(vformdst, dst, shifted_src);
2319 }
2320
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2321 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2322 const LogicVRegister& src1,
2323 const LogicVRegister& src2) {
2324 SimVRegister temp1, temp2;
2325 uxtl(vform, temp1, src1);
2326 uxtl(vform, temp2, src2);
2327 add(vform, dst, temp1, temp2);
2328 return dst;
2329 }
2330
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2331 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2332 const LogicVRegister& src1,
2333 const LogicVRegister& src2) {
2334 SimVRegister temp1, temp2;
2335 uxtl2(vform, temp1, src1);
2336 uxtl2(vform, temp2, src2);
2337 add(vform, dst, temp1, temp2);
2338 return dst;
2339 }
2340
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2341 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2342 const LogicVRegister& src1,
2343 const LogicVRegister& src2) {
2344 SimVRegister temp;
2345 uxtl(vform, temp, src2);
2346 add(vform, dst, src1, temp);
2347 return dst;
2348 }
2349
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2350 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2351 const LogicVRegister& src1,
2352 const LogicVRegister& src2) {
2353 SimVRegister temp;
2354 uxtl2(vform, temp, src2);
2355 add(vform, dst, src1, temp);
2356 return dst;
2357 }
2358
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2359 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2360 const LogicVRegister& src1,
2361 const LogicVRegister& src2) {
2362 SimVRegister temp1, temp2;
2363 sxtl(vform, temp1, src1);
2364 sxtl(vform, temp2, src2);
2365 add(vform, dst, temp1, temp2);
2366 return dst;
2367 }
2368
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2369 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2370 const LogicVRegister& src1,
2371 const LogicVRegister& src2) {
2372 SimVRegister temp1, temp2;
2373 sxtl2(vform, temp1, src1);
2374 sxtl2(vform, temp2, src2);
2375 add(vform, dst, temp1, temp2);
2376 return dst;
2377 }
2378
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2379 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2380 const LogicVRegister& src1,
2381 const LogicVRegister& src2) {
2382 SimVRegister temp;
2383 sxtl(vform, temp, src2);
2384 add(vform, dst, src1, temp);
2385 return dst;
2386 }
2387
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2388 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2389 const LogicVRegister& src1,
2390 const LogicVRegister& src2) {
2391 SimVRegister temp;
2392 sxtl2(vform, temp, src2);
2393 add(vform, dst, src1, temp);
2394 return dst;
2395 }
2396
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2397 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2398 const LogicVRegister& src1,
2399 const LogicVRegister& src2) {
2400 SimVRegister temp1, temp2;
2401 uxtl(vform, temp1, src1);
2402 uxtl(vform, temp2, src2);
2403 sub(vform, dst, temp1, temp2);
2404 return dst;
2405 }
2406
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2407 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2408 const LogicVRegister& src1,
2409 const LogicVRegister& src2) {
2410 SimVRegister temp1, temp2;
2411 uxtl2(vform, temp1, src1);
2412 uxtl2(vform, temp2, src2);
2413 sub(vform, dst, temp1, temp2);
2414 return dst;
2415 }
2416
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2417 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2418 const LogicVRegister& src1,
2419 const LogicVRegister& src2) {
2420 SimVRegister temp;
2421 uxtl(vform, temp, src2);
2422 sub(vform, dst, src1, temp);
2423 return dst;
2424 }
2425
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2426 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2427 const LogicVRegister& src1,
2428 const LogicVRegister& src2) {
2429 SimVRegister temp;
2430 uxtl2(vform, temp, src2);
2431 sub(vform, dst, src1, temp);
2432 return dst;
2433 }
2434
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2435 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2436 const LogicVRegister& src1,
2437 const LogicVRegister& src2) {
2438 SimVRegister temp1, temp2;
2439 sxtl(vform, temp1, src1);
2440 sxtl(vform, temp2, src2);
2441 sub(vform, dst, temp1, temp2);
2442 return dst;
2443 }
2444
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2445 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2446 const LogicVRegister& src1,
2447 const LogicVRegister& src2) {
2448 SimVRegister temp1, temp2;
2449 sxtl2(vform, temp1, src1);
2450 sxtl2(vform, temp2, src2);
2451 sub(vform, dst, temp1, temp2);
2452 return dst;
2453 }
2454
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2455 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2456 const LogicVRegister& src1,
2457 const LogicVRegister& src2) {
2458 SimVRegister temp;
2459 sxtl(vform, temp, src2);
2460 sub(vform, dst, src1, temp);
2461 return dst;
2462 }
2463
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2464 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2465 const LogicVRegister& src1,
2466 const LogicVRegister& src2) {
2467 SimVRegister temp;
2468 sxtl2(vform, temp, src2);
2469 sub(vform, dst, src1, temp);
2470 return dst;
2471 }
2472
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2473 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2474 const LogicVRegister& src1,
2475 const LogicVRegister& src2) {
2476 SimVRegister temp1, temp2;
2477 uxtl(vform, temp1, src1);
2478 uxtl(vform, temp2, src2);
2479 uaba(vform, dst, temp1, temp2);
2480 return dst;
2481 }
2482
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2483 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2484 const LogicVRegister& src1,
2485 const LogicVRegister& src2) {
2486 SimVRegister temp1, temp2;
2487 uxtl2(vform, temp1, src1);
2488 uxtl2(vform, temp2, src2);
2489 uaba(vform, dst, temp1, temp2);
2490 return dst;
2491 }
2492
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2493 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2494 const LogicVRegister& src1,
2495 const LogicVRegister& src2) {
2496 SimVRegister temp1, temp2;
2497 sxtl(vform, temp1, src1);
2498 sxtl(vform, temp2, src2);
2499 saba(vform, dst, temp1, temp2);
2500 return dst;
2501 }
2502
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2503 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2504 const LogicVRegister& src1,
2505 const LogicVRegister& src2) {
2506 SimVRegister temp1, temp2;
2507 sxtl2(vform, temp1, src1);
2508 sxtl2(vform, temp2, src2);
2509 saba(vform, dst, temp1, temp2);
2510 return dst;
2511 }
2512
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2513 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2514 const LogicVRegister& src1,
2515 const LogicVRegister& src2) {
2516 SimVRegister temp1, temp2;
2517 uxtl(vform, temp1, src1);
2518 uxtl(vform, temp2, src2);
2519 AbsDiff(vform, dst, temp1, temp2, false);
2520 return dst;
2521 }
2522
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2523 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2524 const LogicVRegister& src1,
2525 const LogicVRegister& src2) {
2526 SimVRegister temp1, temp2;
2527 uxtl2(vform, temp1, src1);
2528 uxtl2(vform, temp2, src2);
2529 AbsDiff(vform, dst, temp1, temp2, false);
2530 return dst;
2531 }
2532
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2533 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2534 const LogicVRegister& src1,
2535 const LogicVRegister& src2) {
2536 SimVRegister temp1, temp2;
2537 sxtl(vform, temp1, src1);
2538 sxtl(vform, temp2, src2);
2539 AbsDiff(vform, dst, temp1, temp2, true);
2540 return dst;
2541 }
2542
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2543 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2544 const LogicVRegister& src1,
2545 const LogicVRegister& src2) {
2546 SimVRegister temp1, temp2;
2547 sxtl2(vform, temp1, src1);
2548 sxtl2(vform, temp2, src2);
2549 AbsDiff(vform, dst, temp1, temp2, true);
2550 return dst;
2551 }
2552
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2553 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2554 const LogicVRegister& src1,
2555 const LogicVRegister& src2) {
2556 SimVRegister temp1, temp2;
2557 uxtl(vform, temp1, src1);
2558 uxtl(vform, temp2, src2);
2559 mul(vform, dst, temp1, temp2);
2560 return dst;
2561 }
2562
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2563 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2564 const LogicVRegister& src1,
2565 const LogicVRegister& src2) {
2566 SimVRegister temp1, temp2;
2567 uxtl2(vform, temp1, src1);
2568 uxtl2(vform, temp2, src2);
2569 mul(vform, dst, temp1, temp2);
2570 return dst;
2571 }
2572
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2573 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2574 const LogicVRegister& src1,
2575 const LogicVRegister& src2) {
2576 SimVRegister temp1, temp2;
2577 sxtl(vform, temp1, src1);
2578 sxtl(vform, temp2, src2);
2579 mul(vform, dst, temp1, temp2);
2580 return dst;
2581 }
2582
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2583 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2584 const LogicVRegister& src1,
2585 const LogicVRegister& src2) {
2586 SimVRegister temp1, temp2;
2587 sxtl2(vform, temp1, src1);
2588 sxtl2(vform, temp2, src2);
2589 mul(vform, dst, temp1, temp2);
2590 return dst;
2591 }
2592
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2593 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2594 const LogicVRegister& src1,
2595 const LogicVRegister& src2) {
2596 SimVRegister temp1, temp2;
2597 uxtl(vform, temp1, src1);
2598 uxtl(vform, temp2, src2);
2599 mls(vform, dst, temp1, temp2);
2600 return dst;
2601 }
2602
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2603 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2604 const LogicVRegister& src1,
2605 const LogicVRegister& src2) {
2606 SimVRegister temp1, temp2;
2607 uxtl2(vform, temp1, src1);
2608 uxtl2(vform, temp2, src2);
2609 mls(vform, dst, temp1, temp2);
2610 return dst;
2611 }
2612
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2613 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2614 const LogicVRegister& src1,
2615 const LogicVRegister& src2) {
2616 SimVRegister temp1, temp2;
2617 sxtl(vform, temp1, src1);
2618 sxtl(vform, temp2, src2);
2619 mls(vform, dst, temp1, temp2);
2620 return dst;
2621 }
2622
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2623 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2624 const LogicVRegister& src1,
2625 const LogicVRegister& src2) {
2626 SimVRegister temp1, temp2;
2627 sxtl2(vform, temp1, src1);
2628 sxtl2(vform, temp2, src2);
2629 mls(vform, dst, temp1, temp2);
2630 return dst;
2631 }
2632
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2633 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2634 const LogicVRegister& src1,
2635 const LogicVRegister& src2) {
2636 SimVRegister temp1, temp2;
2637 uxtl(vform, temp1, src1);
2638 uxtl(vform, temp2, src2);
2639 mla(vform, dst, temp1, temp2);
2640 return dst;
2641 }
2642
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2643 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2644 const LogicVRegister& src1,
2645 const LogicVRegister& src2) {
2646 SimVRegister temp1, temp2;
2647 uxtl2(vform, temp1, src1);
2648 uxtl2(vform, temp2, src2);
2649 mla(vform, dst, temp1, temp2);
2650 return dst;
2651 }
2652
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2653 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2654 const LogicVRegister& src1,
2655 const LogicVRegister& src2) {
2656 SimVRegister temp1, temp2;
2657 sxtl(vform, temp1, src1);
2658 sxtl(vform, temp2, src2);
2659 mla(vform, dst, temp1, temp2);
2660 return dst;
2661 }
2662
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2663 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2664 const LogicVRegister& src1,
2665 const LogicVRegister& src2) {
2666 SimVRegister temp1, temp2;
2667 sxtl2(vform, temp1, src1);
2668 sxtl2(vform, temp2, src2);
2669 mla(vform, dst, temp1, temp2);
2670 return dst;
2671 }
2672
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2673 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2674 const LogicVRegister& src1,
2675 const LogicVRegister& src2) {
2676 SimVRegister temp;
2677 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2678 return add(vform, dst, dst, product).SignedSaturate(vform);
2679 }
2680
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2681 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2682 const LogicVRegister& src1,
2683 const LogicVRegister& src2) {
2684 SimVRegister temp;
2685 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2686 return add(vform, dst, dst, product).SignedSaturate(vform);
2687 }
2688
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2689 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2690 const LogicVRegister& src1,
2691 const LogicVRegister& src2) {
2692 SimVRegister temp;
2693 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2694 return sub(vform, dst, dst, product).SignedSaturate(vform);
2695 }
2696
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2697 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2698 const LogicVRegister& src1,
2699 const LogicVRegister& src2) {
2700 SimVRegister temp;
2701 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2702 return sub(vform, dst, dst, product).SignedSaturate(vform);
2703 }
2704
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2705 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2706 const LogicVRegister& src1,
2707 const LogicVRegister& src2) {
2708 SimVRegister temp;
2709 LogicVRegister product = smull(vform, temp, src1, src2);
2710 return add(vform, dst, product, product).SignedSaturate(vform);
2711 }
2712
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2713 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2714 const LogicVRegister& src1,
2715 const LogicVRegister& src2) {
2716 SimVRegister temp;
2717 LogicVRegister product = smull2(vform, temp, src1, src2);
2718 return add(vform, dst, product, product).SignedSaturate(vform);
2719 }
2720
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)2721 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2722 const LogicVRegister& src1,
2723 const LogicVRegister& src2, bool round) {
2724 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2725 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2726 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2727
2728 int esize = LaneSizeInBitsFromFormat(vform);
2729 int round_const = round ? (1 << (esize - 2)) : 0;
2730 int64_t product;
2731
2732 dst.ClearForWrite(vform);
2733 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2734 product = src1.Int(vform, i) * src2.Int(vform, i);
2735 product += round_const;
2736 product = product >> (esize - 1);
2737
2738 if (product > MaxIntFromFormat(vform)) {
2739 product = MaxIntFromFormat(vform);
2740 } else if (product < MinIntFromFormat(vform)) {
2741 product = MinIntFromFormat(vform);
2742 }
2743 dst.SetInt(vform, i, product);
2744 }
2745 return dst;
2746 }
2747
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2748 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2749 const LogicVRegister& src1,
2750 const LogicVRegister& src2) {
2751 return sqrdmulh(vform, dst, src1, src2, false);
2752 }
2753
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2754 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2755 const LogicVRegister& src1,
2756 const LogicVRegister& src2) {
2757 SimVRegister temp;
2758 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2759 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2760 return dst;
2761 }
2762
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2763 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2764 const LogicVRegister& src1,
2765 const LogicVRegister& src2) {
2766 SimVRegister temp;
2767 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2768 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2769 return dst;
2770 }
2771
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2772 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2773 const LogicVRegister& src1,
2774 const LogicVRegister& src2) {
2775 SimVRegister temp;
2776 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2777 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2778 return dst;
2779 }
2780
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2781 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2782 const LogicVRegister& src1,
2783 const LogicVRegister& src2) {
2784 SimVRegister temp;
2785 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2786 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2787 return dst;
2788 }
2789
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2790 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2791 const LogicVRegister& src1,
2792 const LogicVRegister& src2) {
2793 SimVRegister temp;
2794 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2795 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2796 return dst;
2797 }
2798
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2799 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2800 const LogicVRegister& src1,
2801 const LogicVRegister& src2) {
2802 SimVRegister temp;
2803 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2804 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2805 return dst;
2806 }
2807
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2808 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2809 const LogicVRegister& src1,
2810 const LogicVRegister& src2) {
2811 SimVRegister temp;
2812 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2813 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2814 return dst;
2815 }
2816
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2817 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2818 const LogicVRegister& src1,
2819 const LogicVRegister& src2) {
2820 SimVRegister temp;
2821 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2822 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2823 return dst;
2824 }
2825
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2826 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2827 const LogicVRegister& src1,
2828 const LogicVRegister& src2) {
2829 uint64_t result[16];
2830 int laneCount = LaneCountFromFormat(vform);
2831 int pairs = laneCount / 2;
2832 for (int i = 0; i < pairs; ++i) {
2833 result[2 * i] = src1.Uint(vform, 2 * i);
2834 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2835 }
2836
2837 dst.SetUintArray(vform, result);
2838 return dst;
2839 }
2840
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2841 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2842 const LogicVRegister& src1,
2843 const LogicVRegister& src2) {
2844 uint64_t result[16];
2845 int laneCount = LaneCountFromFormat(vform);
2846 int pairs = laneCount / 2;
2847 for (int i = 0; i < pairs; ++i) {
2848 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2849 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2850 }
2851
2852 dst.SetUintArray(vform, result);
2853 return dst;
2854 }
2855
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2856 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2857 const LogicVRegister& src1,
2858 const LogicVRegister& src2) {
2859 uint64_t result[16];
2860 int laneCount = LaneCountFromFormat(vform);
2861 int pairs = laneCount / 2;
2862 for (int i = 0; i < pairs; ++i) {
2863 result[2 * i] = src1.Uint(vform, i);
2864 result[(2 * i) + 1] = src2.Uint(vform, i);
2865 }
2866
2867 dst.SetUintArray(vform, result);
2868 return dst;
2869 }
2870
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2871 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2872 const LogicVRegister& src1,
2873 const LogicVRegister& src2) {
2874 uint64_t result[16];
2875 int laneCount = LaneCountFromFormat(vform);
2876 int pairs = laneCount / 2;
2877 for (int i = 0; i < pairs; ++i) {
2878 result[2 * i] = src1.Uint(vform, pairs + i);
2879 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2880 }
2881
2882 dst.SetUintArray(vform, result);
2883 return dst;
2884 }
2885
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2886 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2887 const LogicVRegister& src1,
2888 const LogicVRegister& src2) {
2889 uint64_t result[32];
2890 int laneCount = LaneCountFromFormat(vform);
2891 for (int i = 0; i < laneCount; ++i) {
2892 result[i] = src1.Uint(vform, i);
2893 result[laneCount + i] = src2.Uint(vform, i);
2894 }
2895
2896 dst.ClearForWrite(vform);
2897 for (int i = 0; i < laneCount; ++i) {
2898 dst.SetUint(vform, i, result[2 * i]);
2899 }
2900 return dst;
2901 }
2902
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2903 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2904 const LogicVRegister& src1,
2905 const LogicVRegister& src2) {
2906 uint64_t result[32];
2907 int laneCount = LaneCountFromFormat(vform);
2908 for (int i = 0; i < laneCount; ++i) {
2909 result[i] = src1.Uint(vform, i);
2910 result[laneCount + i] = src2.Uint(vform, i);
2911 }
2912
2913 dst.ClearForWrite(vform);
2914 for (int i = 0; i < laneCount; ++i) {
2915 dst.SetUint(vform, i, result[(2 * i) + 1]);
2916 }
2917 return dst;
2918 }
2919
2920 template <typename T>
FPAdd(T op1,T op2)2921 T Simulator::FPAdd(T op1, T op2) {
2922 T result = FPProcessNaNs(op1, op2);
2923 if (std::isnan(result)) return result;
2924
2925 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2926 // inf + -inf returns the default NaN.
2927 FPProcessException();
2928 return FPDefaultNaN<T>();
2929 } else {
2930 // Other cases should be handled by standard arithmetic.
2931 return op1 + op2;
2932 }
2933 }
2934
2935 template <typename T>
FPSub(T op1,T op2)2936 T Simulator::FPSub(T op1, T op2) {
2937 // NaNs should be handled elsewhere.
2938 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2939
2940 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2941 // inf - inf returns the default NaN.
2942 FPProcessException();
2943 return FPDefaultNaN<T>();
2944 } else {
2945 // Other cases should be handled by standard arithmetic.
2946 return op1 - op2;
2947 }
2948 }
2949
2950 template <typename T>
FPMul(T op1,T op2)2951 T Simulator::FPMul(T op1, T op2) {
2952 // NaNs should be handled elsewhere.
2953 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2954
2955 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2956 // inf * 0.0 returns the default NaN.
2957 FPProcessException();
2958 return FPDefaultNaN<T>();
2959 } else {
2960 // Other cases should be handled by standard arithmetic.
2961 return op1 * op2;
2962 }
2963 }
2964
2965 template <typename T>
FPMulx(T op1,T op2)2966 T Simulator::FPMulx(T op1, T op2) {
2967 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2968 // inf * 0.0 returns +/-2.0.
2969 T two = 2.0;
2970 return std::copysign(1.0, op1) * std::copysign(1.0, op2) * two;
2971 }
2972 return FPMul(op1, op2);
2973 }
2974
2975 template <typename T>
FPMulAdd(T a,T op1,T op2)2976 T Simulator::FPMulAdd(T a, T op1, T op2) {
2977 T result = FPProcessNaNs3(a, op1, op2);
2978
2979 T sign_a = std::copysign(1.0, a);
2980 T sign_prod = std::copysign(1.0, op1) * std::copysign(1.0, op2);
2981 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2982 bool operation_generates_nan =
2983 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2984 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
2985 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
2986
2987 if (std::isnan(result)) {
2988 // Generated NaNs override quiet NaNs propagated from a.
2989 if (operation_generates_nan && IsQuietNaN(a)) {
2990 FPProcessException();
2991 return FPDefaultNaN<T>();
2992 } else {
2993 return result;
2994 }
2995 }
2996
2997 // If the operation would produce a NaN, return the default NaN.
2998 if (operation_generates_nan) {
2999 FPProcessException();
3000 return FPDefaultNaN<T>();
3001 }
3002
3003 // Work around broken fma implementations for exact zero results: The sign of
3004 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3005 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3006 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3007 }
3008
3009 result = FusedMultiplyAdd(op1, op2, a);
3010 DCHECK(!std::isnan(result));
3011
3012 // Work around broken fma implementations for rounded zero results: If a is
3013 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3014 if ((a == 0.0) && (result == 0.0)) {
3015 return std::copysign(0.0, sign_prod);
3016 }
3017
3018 return result;
3019 }
3020
3021 template <typename T>
FPDiv(T op1,T op2)3022 T Simulator::FPDiv(T op1, T op2) {
3023 // NaNs should be handled elsewhere.
3024 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3025
3026 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3027 // inf / inf and 0.0 / 0.0 return the default NaN.
3028 FPProcessException();
3029 return FPDefaultNaN<T>();
3030 } else {
3031 if (op2 == 0.0) {
3032 FPProcessException();
3033 if (!std::isnan(op1)) {
3034 double op1_sign = std::copysign(1.0, op1);
3035 double op2_sign = std::copysign(1.0, op2);
3036 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3037 }
3038 }
3039
3040 // Other cases should be handled by standard arithmetic.
3041 return op1 / op2;
3042 }
3043 }
3044
3045 template <typename T>
FPSqrt(T op)3046 T Simulator::FPSqrt(T op) {
3047 if (std::isnan(op)) {
3048 return FPProcessNaN(op);
3049 } else if (op < 0.0) {
3050 FPProcessException();
3051 return FPDefaultNaN<T>();
3052 } else {
3053 return std::sqrt(op);
3054 }
3055 }
3056
3057 template <typename T>
FPMax(T a,T b)3058 T Simulator::FPMax(T a, T b) {
3059 T result = FPProcessNaNs(a, b);
3060 if (std::isnan(result)) return result;
3061
3062 if ((a == 0.0) && (b == 0.0) &&
3063 (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3064 // a and b are zero, and the sign differs: return +0.0.
3065 return 0.0;
3066 } else {
3067 return (a > b) ? a : b;
3068 }
3069 }
3070
3071 template <typename T>
FPMaxNM(T a,T b)3072 T Simulator::FPMaxNM(T a, T b) {
3073 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074 a = kFP64NegativeInfinity;
3075 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076 b = kFP64NegativeInfinity;
3077 }
3078
3079 T result = FPProcessNaNs(a, b);
3080 return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082
3083 template <typename T>
FPMin(T a,T b)3084 T Simulator::FPMin(T a, T b) {
3085 T result = FPProcessNaNs(a, b);
3086 if (std::isnan(result)) return result;
3087
3088 if ((a == 0.0) && (b == 0.0) &&
3089 (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3090 // a and b are zero, and the sign differs: return -0.0.
3091 return -0.0;
3092 } else {
3093 return (a < b) ? a : b;
3094 }
3095 }
3096
3097 template <typename T>
FPMinNM(T a,T b)3098 T Simulator::FPMinNM(T a, T b) {
3099 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3100 a = kFP64PositiveInfinity;
3101 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3102 b = kFP64PositiveInfinity;
3103 }
3104
3105 T result = FPProcessNaNs(a, b);
3106 return std::isnan(result) ? result : FPMin(a, b);
3107 }
3108
3109 template <typename T>
FPRecipStepFused(T op1,T op2)3110 T Simulator::FPRecipStepFused(T op1, T op2) {
3111 const T two = 2.0;
3112 if ((std::isinf(op1) && (op2 == 0.0)) ||
3113 ((op1 == 0.0) && (std::isinf(op2)))) {
3114 return two;
3115 } else if (std::isinf(op1) || std::isinf(op2)) {
3116 // Return +inf if signs match, otherwise -inf.
3117 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3118 : kFP64NegativeInfinity;
3119 } else {
3120 return FusedMultiplyAdd(op1, op2, two);
3121 }
3122 }
3123
3124 template <typename T>
FPRSqrtStepFused(T op1,T op2)3125 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3126 const T one_point_five = 1.5;
3127 const T two = 2.0;
3128
3129 if ((std::isinf(op1) && (op2 == 0.0)) ||
3130 ((op1 == 0.0) && (std::isinf(op2)))) {
3131 return one_point_five;
3132 } else if (std::isinf(op1) || std::isinf(op2)) {
3133 // Return +inf if signs match, otherwise -inf.
3134 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3135 : kFP64NegativeInfinity;
3136 } else {
3137 // The multiply-add-halve operation must be fully fused, so avoid interim
3138 // rounding by checking which operand can be losslessly divided by two
3139 // before doing the multiply-add.
3140 if (std::isnormal(op1 / two)) {
3141 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3142 } else if (std::isnormal(op2 / two)) {
3143 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3144 } else {
3145 // Neither operand is normal after halving: the result is dominated by
3146 // the addition term, so just return that.
3147 return one_point_five;
3148 }
3149 }
3150 }
3151
FPRoundInt(double value,FPRounding round_mode)3152 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3153 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3154 (value == kFP64NegativeInfinity)) {
3155 return value;
3156 } else if (std::isnan(value)) {
3157 return FPProcessNaN(value);
3158 }
3159
3160 double int_result = std::floor(value);
3161 double error = value - int_result;
3162 switch (round_mode) {
3163 case FPTieAway: {
3164 // Take care of correctly handling the range ]-0.5, -0.0], which must
3165 // yield -0.0.
3166 if ((-0.5 < value) && (value < 0.0)) {
3167 int_result = -0.0;
3168
3169 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3170 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3171 // result is positive, round up.
3172 int_result++;
3173 }
3174 break;
3175 }
3176 case FPTieEven: {
3177 // Take care of correctly handling the range [-0.5, -0.0], which must
3178 // yield -0.0.
3179 if ((-0.5 <= value) && (value < 0.0)) {
3180 int_result = -0.0;
3181
3182 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3183 // result is odd, round up.
3184 } else if ((error > 0.5) ||
3185 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3186 int_result++;
3187 }
3188 break;
3189 }
3190 case FPZero: {
3191 // If value>0 then we take floor(value)
3192 // otherwise, ceil(value).
3193 if (value < 0) {
3194 int_result = ceil(value);
3195 }
3196 break;
3197 }
3198 case FPNegativeInfinity: {
3199 // We always use floor(value).
3200 break;
3201 }
3202 case FPPositiveInfinity: {
3203 // Take care of correctly handling the range ]-1.0, -0.0], which must
3204 // yield -0.0.
3205 if ((-1.0 < value) && (value < 0.0)) {
3206 int_result = -0.0;
3207
3208 // If the error is non-zero, round up.
3209 } else if (error > 0.0) {
3210 int_result++;
3211 }
3212 break;
3213 }
3214 default:
3215 UNIMPLEMENTED();
3216 }
3217 return int_result;
3218 }
3219
FPToInt32(double value,FPRounding rmode)3220 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3221 value = FPRoundInt(value, rmode);
3222 if (value >= kWMaxInt) {
3223 return kWMaxInt;
3224 } else if (value < kWMinInt) {
3225 return kWMinInt;
3226 }
3227 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3228 }
3229
FPToInt64(double value,FPRounding rmode)3230 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3231 value = FPRoundInt(value, rmode);
3232 if (value >= kXMaxInt) {
3233 return kXMaxInt;
3234 } else if (value < kXMinInt) {
3235 return kXMinInt;
3236 }
3237 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3238 }
3239
FPToUInt32(double value,FPRounding rmode)3240 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3241 value = FPRoundInt(value, rmode);
3242 if (value >= kWMaxUInt) {
3243 return kWMaxUInt;
3244 } else if (value < 0.0) {
3245 return 0;
3246 }
3247 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3248 }
3249
FPToUInt64(double value,FPRounding rmode)3250 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3251 value = FPRoundInt(value, rmode);
3252 if (value >= kXMaxUInt) {
3253 return kXMaxUInt;
3254 } else if (value < 0.0) {
3255 return 0;
3256 }
3257 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3258 }
3259
3260 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3261 template <typename T> \
3262 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3263 const LogicVRegister& src1, \
3264 const LogicVRegister& src2) { \
3265 dst.ClearForWrite(vform); \
3266 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3267 T op1 = src1.Float<T>(i); \
3268 T op2 = src2.Float<T>(i); \
3269 T result; \
3270 if (PROCNAN) { \
3271 result = FPProcessNaNs(op1, op2); \
3272 if (!std::isnan(result)) { \
3273 result = OP(op1, op2); \
3274 } \
3275 } else { \
3276 result = OP(op1, op2); \
3277 } \
3278 dst.SetFloat(i, result); \
3279 } \
3280 return dst; \
3281 } \
3282 \
3283 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3284 const LogicVRegister& src1, \
3285 const LogicVRegister& src2) { \
3286 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3287 FN<float>(vform, dst, src1, src2); \
3288 } else { \
3289 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3290 FN<double>(vform, dst, src1, src2); \
3291 } \
3292 return dst; \
3293 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3294 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3295 #undef DEFINE_NEON_FP_VECTOR_OP
3296
3297 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3298 const LogicVRegister& src1,
3299 const LogicVRegister& src2) {
3300 SimVRegister temp;
3301 LogicVRegister product = fmul(vform, temp, src1, src2);
3302 return fneg(vform, dst, product);
3303 }
3304
3305 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3306 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3307 const LogicVRegister& src1,
3308 const LogicVRegister& src2) {
3309 dst.ClearForWrite(vform);
3310 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3311 T op1 = -src1.Float<T>(i);
3312 T op2 = src2.Float<T>(i);
3313 T result = FPProcessNaNs(op1, op2);
3314 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3315 }
3316 return dst;
3317 }
3318
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3319 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3320 const LogicVRegister& src1,
3321 const LogicVRegister& src2) {
3322 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3323 frecps<float>(vform, dst, src1, src2);
3324 } else {
3325 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3326 frecps<double>(vform, dst, src1, src2);
3327 }
3328 return dst;
3329 }
3330
3331 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3332 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3333 const LogicVRegister& src1,
3334 const LogicVRegister& src2) {
3335 dst.ClearForWrite(vform);
3336 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3337 T op1 = -src1.Float<T>(i);
3338 T op2 = src2.Float<T>(i);
3339 T result = FPProcessNaNs(op1, op2);
3340 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3341 }
3342 return dst;
3343 }
3344
FPToFixedJS(double value)3345 int32_t Simulator::FPToFixedJS(double value) {
3346 // The Z-flag is set when the conversion from double precision floating-point
3347 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
3348 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
3349 // Z-flag is unset.
3350 int Z = 1;
3351 int32_t result;
3352 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3353 (value == kFP64NegativeInfinity)) {
3354 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
3355 // unset the Z-flag.
3356 result = 0.0;
3357 if ((value != 0.0) || std::signbit(value)) {
3358 Z = 0;
3359 }
3360 } else if (std::isnan(value)) {
3361 // NaN values unset the Z-flag and set the result to 0.
3362 result = 0;
3363 Z = 0;
3364 } else {
3365 // All other values are converted to an integer representation, rounded
3366 // toward zero.
3367 double int_result = std::floor(value);
3368 double error = value - int_result;
3369 if ((error != 0.0) && (int_result < 0.0)) {
3370 int_result++;
3371 }
3372 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
3373 // write a one-liner with std::round, but the behaviour on ties is incorrect
3374 // for our purposes.
3375 double mod_const = static_cast<double>(UINT64_C(1) << 32);
3376 double mod_error =
3377 (int_result / mod_const) - std::floor(int_result / mod_const);
3378 double constrained;
3379 if (mod_error == 0.5) {
3380 constrained = INT32_MIN;
3381 } else {
3382 constrained = int_result - mod_const * round(int_result / mod_const);
3383 }
3384 DCHECK(std::floor(constrained) == constrained);
3385 DCHECK(constrained >= INT32_MIN);
3386 DCHECK(constrained <= INT32_MAX);
3387 // Take the bottom 32 bits of the result as a 32-bit integer.
3388 result = static_cast<int32_t>(constrained);
3389 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3390 (error != 0.0)) {
3391 // If the integer result is out of range or the conversion isn't exact,
3392 // take exception and unset the Z-flag.
3393 FPProcessException();
3394 Z = 0;
3395 }
3396 }
3397 nzcv().SetN(0);
3398 nzcv().SetZ(Z);
3399 nzcv().SetC(0);
3400 nzcv().SetV(0);
3401 return result;
3402 }
3403
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3404 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3405 const LogicVRegister& src1,
3406 const LogicVRegister& src2) {
3407 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3408 frsqrts<float>(vform, dst, src1, src2);
3409 } else {
3410 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3411 frsqrts<double>(vform, dst, src1, src2);
3412 }
3413 return dst;
3414 }
3415
3416 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3417 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3418 const LogicVRegister& src1,
3419 const LogicVRegister& src2, Condition cond) {
3420 dst.ClearForWrite(vform);
3421 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3422 bool result = false;
3423 T op1 = src1.Float<T>(i);
3424 T op2 = src2.Float<T>(i);
3425 T nan_result = FPProcessNaNs(op1, op2);
3426 if (!std::isnan(nan_result)) {
3427 switch (cond) {
3428 case eq:
3429 result = (op1 == op2);
3430 break;
3431 case ge:
3432 result = (op1 >= op2);
3433 break;
3434 case gt:
3435 result = (op1 > op2);
3436 break;
3437 case le:
3438 result = (op1 <= op2);
3439 break;
3440 case lt:
3441 result = (op1 < op2);
3442 break;
3443 default:
3444 UNREACHABLE();
3445 }
3446 }
3447 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3448 }
3449 return dst;
3450 }
3451
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3452 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3453 const LogicVRegister& src1,
3454 const LogicVRegister& src2, Condition cond) {
3455 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3456 fcmp<float>(vform, dst, src1, src2, cond);
3457 } else {
3458 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3459 fcmp<double>(vform, dst, src1, src2, cond);
3460 }
3461 return dst;
3462 }
3463
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)3464 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3465 const LogicVRegister& src, Condition cond) {
3466 SimVRegister temp;
3467 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3468 LogicVRegister zero_reg =
3469 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3470 fcmp<float>(vform, dst, src, zero_reg, cond);
3471 } else {
3472 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3473 LogicVRegister zero_reg =
3474 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3475 fcmp<double>(vform, dst, src, zero_reg, cond);
3476 }
3477 return dst;
3478 }
3479
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3480 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3481 const LogicVRegister& src1,
3482 const LogicVRegister& src2, Condition cond) {
3483 SimVRegister temp1, temp2;
3484 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3485 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3486 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3487 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3488 } else {
3489 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3490 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3491 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3492 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3493 }
3494 return dst;
3495 }
3496
3497 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3498 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3499 const LogicVRegister& src1,
3500 const LogicVRegister& src2) {
3501 dst.ClearForWrite(vform);
3502 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3503 T op1 = src1.Float<T>(i);
3504 T op2 = src2.Float<T>(i);
3505 T acc = dst.Float<T>(i);
3506 T result = FPMulAdd(acc, op1, op2);
3507 dst.SetFloat(i, result);
3508 }
3509 return dst;
3510 }
3511
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3512 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3513 const LogicVRegister& src1,
3514 const LogicVRegister& src2) {
3515 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3516 fmla<float>(vform, dst, src1, src2);
3517 } else {
3518 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3519 fmla<double>(vform, dst, src1, src2);
3520 }
3521 return dst;
3522 }
3523
3524 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3525 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3526 const LogicVRegister& src1,
3527 const LogicVRegister& src2) {
3528 dst.ClearForWrite(vform);
3529 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3530 T op1 = -src1.Float<T>(i);
3531 T op2 = src2.Float<T>(i);
3532 T acc = dst.Float<T>(i);
3533 T result = FPMulAdd(acc, op1, op2);
3534 dst.SetFloat(i, result);
3535 }
3536 return dst;
3537 }
3538
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3539 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3540 const LogicVRegister& src1,
3541 const LogicVRegister& src2) {
3542 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3543 fmls<float>(vform, dst, src1, src2);
3544 } else {
3545 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3546 fmls<double>(vform, dst, src1, src2);
3547 }
3548 return dst;
3549 }
3550
3551 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3552 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3553 const LogicVRegister& src) {
3554 dst.ClearForWrite(vform);
3555 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3556 T op = src.Float<T>(i);
3557 op = -op;
3558 dst.SetFloat(i, op);
3559 }
3560 return dst;
3561 }
3562
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3563 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3564 const LogicVRegister& src) {
3565 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3566 fneg<float>(vform, dst, src);
3567 } else {
3568 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3569 fneg<double>(vform, dst, src);
3570 }
3571 return dst;
3572 }
3573
3574 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3575 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3576 const LogicVRegister& src) {
3577 dst.ClearForWrite(vform);
3578 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3579 T op = src.Float<T>(i);
3580 if (std::copysign(1.0, op) < 0.0) {
3581 op = -op;
3582 }
3583 dst.SetFloat(i, op);
3584 }
3585 return dst;
3586 }
3587
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3588 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3589 const LogicVRegister& src) {
3590 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3591 fabs_<float>(vform, dst, src);
3592 } else {
3593 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3594 fabs_<double>(vform, dst, src);
3595 }
3596 return dst;
3597 }
3598
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3599 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3600 const LogicVRegister& src1,
3601 const LogicVRegister& src2) {
3602 SimVRegister temp;
3603 fsub(vform, temp, src1, src2);
3604 fabs_(vform, dst, temp);
3605 return dst;
3606 }
3607
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3608 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3609 const LogicVRegister& src) {
3610 dst.ClearForWrite(vform);
3611 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3612 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3613 float result = FPSqrt(src.Float<float>(i));
3614 dst.SetFloat(i, result);
3615 }
3616 } else {
3617 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3618 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3619 double result = FPSqrt(src.Float<double>(i));
3620 dst.SetFloat(i, result);
3621 }
3622 }
3623 return dst;
3624 }
3625
3626 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3627 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3628 const LogicVRegister& src1, \
3629 const LogicVRegister& src2) { \
3630 SimVRegister temp1, temp2; \
3631 uzp1(vform, temp1, src1, src2); \
3632 uzp2(vform, temp2, src1, src2); \
3633 FN(vform, dst, temp1, temp2); \
3634 return dst; \
3635 } \
3636 \
3637 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3638 const LogicVRegister& src) { \
3639 if (vform == kFormatS) { \
3640 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3641 dst.SetFloat(0, result); \
3642 } else { \
3643 DCHECK_EQ(vform, kFormatD); \
3644 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3645 dst.SetFloat(0, result); \
3646 } \
3647 dst.ClearForWrite(vform); \
3648 return dst; \
3649 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)3650 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3651 #undef DEFINE_NEON_FP_PAIR_OP
3652
3653 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3654 const LogicVRegister& src, FPMinMaxOp Op) {
3655 DCHECK_EQ(vform, kFormat4S);
3656 USE(vform);
3657 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3658 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3659 float result = (this->*Op)(result1, result2);
3660 dst.ClearForWrite(kFormatS);
3661 dst.SetFloat<float>(0, result);
3662 return dst;
3663 }
3664
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3665 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3666 const LogicVRegister& src) {
3667 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3668 }
3669
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3670 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3671 const LogicVRegister& src) {
3672 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3673 }
3674
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3675 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3676 const LogicVRegister& src) {
3677 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3678 }
3679
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3680 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3681 const LogicVRegister& src) {
3682 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3683 }
3684
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3685 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3686 const LogicVRegister& src1,
3687 const LogicVRegister& src2, int index) {
3688 dst.ClearForWrite(vform);
3689 SimVRegister temp;
3690 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3691 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3692 fmul<float>(vform, dst, src1, index_reg);
3693 } else {
3694 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3695 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3696 fmul<double>(vform, dst, src1, index_reg);
3697 }
3698 return dst;
3699 }
3700
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3701 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3702 const LogicVRegister& src1,
3703 const LogicVRegister& src2, int index) {
3704 dst.ClearForWrite(vform);
3705 SimVRegister temp;
3706 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3707 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3708 fmla<float>(vform, dst, src1, index_reg);
3709 } else {
3710 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3711 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3712 fmla<double>(vform, dst, src1, index_reg);
3713 }
3714 return dst;
3715 }
3716
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3717 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3718 const LogicVRegister& src1,
3719 const LogicVRegister& src2, int index) {
3720 dst.ClearForWrite(vform);
3721 SimVRegister temp;
3722 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3723 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3724 fmls<float>(vform, dst, src1, index_reg);
3725 } else {
3726 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3727 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3728 fmls<double>(vform, dst, src1, index_reg);
3729 }
3730 return dst;
3731 }
3732
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3733 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3734 const LogicVRegister& src1,
3735 const LogicVRegister& src2, int index) {
3736 dst.ClearForWrite(vform);
3737 SimVRegister temp;
3738 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3739 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3740 fmulx<float>(vform, dst, src1, index_reg);
3741
3742 } else {
3743 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3744 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3745 fmulx<double>(vform, dst, src1, index_reg);
3746 }
3747 return dst;
3748 }
3749
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)3750 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3751 const LogicVRegister& src,
3752 FPRounding rounding_mode,
3753 bool inexact_exception) {
3754 dst.ClearForWrite(vform);
3755 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3756 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3757 float input = src.Float<float>(i);
3758 float rounded = FPRoundInt(input, rounding_mode);
3759 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3760 FPProcessException();
3761 }
3762 dst.SetFloat<float>(i, rounded);
3763 }
3764 } else {
3765 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3766 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3767 double input = src.Float<double>(i);
3768 double rounded = FPRoundInt(input, rounding_mode);
3769 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3770 FPProcessException();
3771 }
3772 dst.SetFloat<double>(i, rounded);
3773 }
3774 }
3775 return dst;
3776 }
3777
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3778 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3779 const LogicVRegister& src,
3780 FPRounding rounding_mode, int fbits) {
3781 dst.ClearForWrite(vform);
3782 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3783 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3784 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3785 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3786 }
3787 } else {
3788 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3789 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3790 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3791 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3792 }
3793 }
3794 return dst;
3795 }
3796
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3797 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3798 const LogicVRegister& src,
3799 FPRounding rounding_mode, int fbits) {
3800 dst.ClearForWrite(vform);
3801 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3802 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3803 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3804 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3805 }
3806 } else {
3807 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3808 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3809 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3810 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3811 }
3812 }
3813 return dst;
3814 }
3815
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3816 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3817 const LogicVRegister& src) {
3818 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3819 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3820 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3821 }
3822 } else {
3823 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3824 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3825 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3826 }
3827 }
3828 return dst;
3829 }
3830
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3831 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3832 const LogicVRegister& src) {
3833 int lane_count = LaneCountFromFormat(vform);
3834 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3835 for (int i = 0; i < lane_count; i++) {
3836 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3837 }
3838 } else {
3839 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3840 for (int i = 0; i < lane_count; i++) {
3841 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3842 }
3843 }
3844 return dst;
3845 }
3846
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3847 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3848 const LogicVRegister& src) {
3849 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3850 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3851 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3852 }
3853 } else {
3854 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3855 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3856 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3857 }
3858 }
3859 return dst;
3860 }
3861
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3862 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3863 const LogicVRegister& src) {
3864 int lane_count = LaneCountFromFormat(vform) / 2;
3865 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3866 for (int i = lane_count - 1; i >= 0; i--) {
3867 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3868 }
3869 } else {
3870 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3871 for (int i = lane_count - 1; i >= 0; i--) {
3872 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3873 }
3874 }
3875 return dst;
3876 }
3877
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3878 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3879 const LogicVRegister& src) {
3880 dst.ClearForWrite(vform);
3881 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3882 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3883 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3884 }
3885 return dst;
3886 }
3887
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3888 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3889 const LogicVRegister& src) {
3890 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3891 int lane_count = LaneCountFromFormat(vform) / 2;
3892 for (int i = lane_count - 1; i >= 0; i--) {
3893 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3894 }
3895 return dst;
3896 }
3897
3898 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)3899 double Simulator::recip_sqrt_estimate(double a) {
3900 int q0, q1, s;
3901 double r;
3902 if (a < 0.5) {
3903 q0 = static_cast<int>(a * 512.0);
3904 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3905 } else {
3906 q1 = static_cast<int>(a * 256.0);
3907 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3908 }
3909 s = static_cast<int>(256.0 * r + 0.5);
3910 return static_cast<double>(s) / 256.0;
3911 }
3912
3913 namespace {
3914
Bits(uint64_t val,int start_bit,int end_bit)3915 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3916 return unsigned_bitextract_64(start_bit, end_bit, val);
3917 }
3918
3919 } // anonymous namespace
3920
3921 template <typename T>
FPRecipSqrtEstimate(T op)3922 T Simulator::FPRecipSqrtEstimate(T op) {
3923 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3924 "T must be a float or double");
3925
3926 if (std::isnan(op)) {
3927 return FPProcessNaN(op);
3928 } else if (op == 0.0) {
3929 if (std::copysign(1.0, op) < 0.0) {
3930 return kFP64NegativeInfinity;
3931 } else {
3932 return kFP64PositiveInfinity;
3933 }
3934 } else if (std::copysign(1.0, op) < 0.0) {
3935 FPProcessException();
3936 return FPDefaultNaN<T>();
3937 } else if (std::isinf(op)) {
3938 return 0.0;
3939 } else {
3940 uint64_t fraction;
3941 int32_t exp, result_exp;
3942
3943 if (sizeof(T) == sizeof(float)) {
3944 exp = static_cast<int32_t>(float_exp(op));
3945 fraction = float_mantissa(op);
3946 fraction <<= 29;
3947 } else {
3948 exp = static_cast<int32_t>(double_exp(op));
3949 fraction = double_mantissa(op);
3950 }
3951
3952 if (exp == 0) {
3953 while (Bits(fraction, 51, 51) == 0) {
3954 fraction = Bits(fraction, 50, 0) << 1;
3955 exp -= 1;
3956 }
3957 fraction = Bits(fraction, 50, 0) << 1;
3958 }
3959
3960 double scaled;
3961 if (Bits(exp, 0, 0) == 0) {
3962 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3963 } else {
3964 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3965 }
3966
3967 if (sizeof(T) == sizeof(float)) {
3968 result_exp = (380 - exp) / 2;
3969 } else {
3970 result_exp = (3068 - exp) / 2;
3971 }
3972
3973 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3974
3975 if (sizeof(T) == sizeof(float)) {
3976 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3977 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3978 return float_pack(0, exp_bits, est_bits);
3979 } else {
3980 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3981 }
3982 }
3983 }
3984
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3985 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3986 const LogicVRegister& src) {
3987 dst.ClearForWrite(vform);
3988 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3989 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3990 float input = src.Float<float>(i);
3991 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3992 }
3993 } else {
3994 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3995 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3996 double input = src.Float<double>(i);
3997 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3998 }
3999 }
4000 return dst;
4001 }
4002
4003 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4004 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4005 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
4006 "T must be a float or double");
4007 uint32_t sign;
4008
4009 if (sizeof(T) == sizeof(float)) {
4010 sign = float_sign(op);
4011 } else {
4012 sign = double_sign(op);
4013 }
4014
4015 if (std::isnan(op)) {
4016 return FPProcessNaN(op);
4017 } else if (std::isinf(op)) {
4018 return (sign == 1) ? -0.0 : 0.0;
4019 } else if (op == 0.0) {
4020 FPProcessException(); // FPExc_DivideByZero exception.
4021 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4022 } else if (((sizeof(T) == sizeof(float)) &&
4023 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4024 ((sizeof(T) == sizeof(double)) &&
4025 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4026 bool overflow_to_inf = false;
4027 switch (rounding) {
4028 case FPTieEven:
4029 overflow_to_inf = true;
4030 break;
4031 case FPPositiveInfinity:
4032 overflow_to_inf = (sign == 0);
4033 break;
4034 case FPNegativeInfinity:
4035 overflow_to_inf = (sign == 1);
4036 break;
4037 case FPZero:
4038 overflow_to_inf = false;
4039 break;
4040 default:
4041 break;
4042 }
4043 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4044 if (overflow_to_inf) {
4045 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4046 } else {
4047 // Return FPMaxNormal(sign).
4048 if (sizeof(T) == sizeof(float)) {
4049 return float_pack(sign, 0xFE, 0x07FFFFF);
4050 } else {
4051 return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
4052 }
4053 }
4054 } else {
4055 uint64_t fraction;
4056 int32_t exp, result_exp;
4057 uint32_t sign;
4058
4059 if (sizeof(T) == sizeof(float)) {
4060 sign = float_sign(op);
4061 exp = static_cast<int32_t>(float_exp(op));
4062 fraction = float_mantissa(op);
4063 fraction <<= 29;
4064 } else {
4065 sign = double_sign(op);
4066 exp = static_cast<int32_t>(double_exp(op));
4067 fraction = double_mantissa(op);
4068 }
4069
4070 if (exp == 0) {
4071 if (Bits(fraction, 51, 51) == 0) {
4072 exp -= 1;
4073 fraction = Bits(fraction, 49, 0) << 2;
4074 } else {
4075 fraction = Bits(fraction, 50, 0) << 1;
4076 }
4077 }
4078
4079 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4080
4081 if (sizeof(T) == sizeof(float)) {
4082 result_exp = 253 - exp;
4083 } else {
4084 result_exp = 2045 - exp;
4085 }
4086
4087 double estimate = recip_estimate(scaled);
4088
4089 fraction = double_mantissa(estimate);
4090 if (result_exp == 0) {
4091 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4092 } else if (result_exp == -1) {
4093 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4094 result_exp = 0;
4095 }
4096 if (sizeof(T) == sizeof(float)) {
4097 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4098 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4099 return float_pack(sign, exp_bits, frac_bits);
4100 } else {
4101 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4102 }
4103 }
4104 }
4105
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4106 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4107 const LogicVRegister& src, FPRounding round) {
4108 dst.ClearForWrite(vform);
4109 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4110 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4111 float input = src.Float<float>(i);
4112 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4113 }
4114 } else {
4115 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4116 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4117 double input = src.Float<double>(i);
4118 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4119 }
4120 }
4121 return dst;
4122 }
4123
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4124 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4125 const LogicVRegister& src) {
4126 dst.ClearForWrite(vform);
4127 uint64_t operand;
4128 uint32_t result;
4129 double dp_operand, dp_result;
4130 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4131 operand = src.Uint(vform, i);
4132 if (operand <= 0x3FFFFFFF) {
4133 result = 0xFFFFFFFF;
4134 } else {
4135 dp_operand = operand * std::pow(2.0, -32);
4136 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4137 result = static_cast<uint32_t>(dp_result);
4138 }
4139 dst.SetUint(vform, i, result);
4140 }
4141 return dst;
4142 }
4143
4144 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4145 double Simulator::recip_estimate(double a) {
4146 int q, s;
4147 double r;
4148 q = static_cast<int>(a * 512.0);
4149 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4150 s = static_cast<int>(256.0 * r + 0.5);
4151 return static_cast<double>(s) / 256.0;
4152 }
4153
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4154 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4155 const LogicVRegister& src) {
4156 dst.ClearForWrite(vform);
4157 uint64_t operand;
4158 uint32_t result;
4159 double dp_operand, dp_result;
4160 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4161 operand = src.Uint(vform, i);
4162 if (operand <= 0x7FFFFFFF) {
4163 result = 0xFFFFFFFF;
4164 } else {
4165 dp_operand = operand * std::pow(2.0, -32);
4166 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4167 result = static_cast<uint32_t>(dp_result);
4168 }
4169 dst.SetUint(vform, i, result);
4170 }
4171 return dst;
4172 }
4173
4174 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4175 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4176 const LogicVRegister& src) {
4177 dst.ClearForWrite(vform);
4178 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4179 T op = src.Float<T>(i);
4180 T result;
4181 if (std::isnan(op)) {
4182 result = FPProcessNaN(op);
4183 } else {
4184 int exp;
4185 uint32_t sign;
4186 if (sizeof(T) == sizeof(float)) {
4187 sign = float_sign(op);
4188 exp = static_cast<int>(float_exp(op));
4189 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4190 result = float_pack(sign, exp, 0);
4191 } else {
4192 sign = double_sign(op);
4193 exp = static_cast<int>(double_exp(op));
4194 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4195 result = double_pack(sign, exp, 0);
4196 }
4197 }
4198 dst.SetFloat(i, result);
4199 }
4200 return dst;
4201 }
4202
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4203 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4204 const LogicVRegister& src) {
4205 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4206 frecpx<float>(vform, dst, src);
4207 } else {
4208 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4209 frecpx<double>(vform, dst, src);
4210 }
4211 return dst;
4212 }
4213
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4214 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4215 const LogicVRegister& src, int fbits,
4216 FPRounding round) {
4217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4218 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4219 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4220 dst.SetFloat<float>(i, result);
4221 } else {
4222 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4223 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4224 dst.SetFloat<double>(i, result);
4225 }
4226 }
4227 return dst;
4228 }
4229
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4230 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4231 const LogicVRegister& src, int fbits,
4232 FPRounding round) {
4233 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4234 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4235 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4236 dst.SetFloat<float>(i, result);
4237 } else {
4238 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4239 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4240 dst.SetFloat<double>(i, result);
4241 }
4242 }
4243 return dst;
4244 }
4245
4246 } // namespace internal
4247 } // namespace v8
4248
4249 #endif // USE_SIMULATOR
4250