1 // Copyright 2015, ARM Limited 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifdef VIXL_INCLUDE_SIMULATOR 28 29 #include <cmath> 30 #include "vixl/a64/simulator-a64.h" 31 32 namespace vixl { 33 FPDefaultNaN()34 template<> double Simulator::FPDefaultNaN<double>() { 35 return kFP64DefaultNaN; 36 } 37 38 FPDefaultNaN()39 template<> float Simulator::FPDefaultNaN<float>() { 40 return kFP32DefaultNaN; 41 } 42 43 // See FPRound for a description of this function. FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)44 static inline double FPRoundToDouble(int64_t sign, int64_t exponent, 45 uint64_t mantissa, FPRounding round_mode) { 46 int64_t bits = 47 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 48 exponent, 49 mantissa, 50 round_mode); 51 return rawbits_to_double(bits); 52 } 53 54 55 // See FPRound for a description of this function. FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)56 static inline float FPRoundToFloat(int64_t sign, int64_t exponent, 57 uint64_t mantissa, FPRounding round_mode) { 58 int32_t bits = 59 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 60 exponent, 61 mantissa, 62 round_mode); 63 return rawbits_to_float(bits); 64 } 65 66 67 // See FPRound for a description of this function. FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)68 static inline float16 FPRoundToFloat16(int64_t sign, 69 int64_t exponent, 70 uint64_t mantissa, 71 FPRounding round_mode) { 72 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( 73 sign, exponent, mantissa, round_mode); 74 } 75 76 FixedToDouble(int64_t src,int fbits,FPRounding round)77 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 78 if (src >= 0) { 79 return UFixedToDouble(src, fbits, round); 80 } else { 81 // This works for all negative values, including INT64_MIN. 82 return -UFixedToDouble(-src, fbits, round); 83 } 84 } 85 86 UFixedToDouble(uint64_t src,int fbits,FPRounding round)87 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 88 // An input of 0 is a special case because the result is effectively 89 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 90 if (src == 0) { 91 return 0.0; 92 } 93 94 // Calculate the exponent. The highest significant bit will have the value 95 // 2^exponent. 96 const int highest_significant_bit = 63 - CountLeadingZeros(src); 97 const int64_t exponent = highest_significant_bit - fbits; 98 99 return FPRoundToDouble(0, exponent, src, round); 100 } 101 102 FixedToFloat(int64_t src,int fbits,FPRounding round)103 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 104 if (src >= 0) { 105 return UFixedToFloat(src, fbits, round); 106 } else { 107 // This works for all negative values, including INT64_MIN. 108 return -UFixedToFloat(-src, fbits, round); 109 } 110 } 111 112 UFixedToFloat(uint64_t src,int fbits,FPRounding round)113 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 114 // An input of 0 is a special case because the result is effectively 115 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 116 if (src == 0) { 117 return 0.0f; 118 } 119 120 // Calculate the exponent. The highest significant bit will have the value 121 // 2^exponent. 122 const int highest_significant_bit = 63 - CountLeadingZeros(src); 123 const int32_t exponent = highest_significant_bit - fbits; 124 125 return FPRoundToFloat(0, exponent, src, round); 126 } 127 128 FPToDouble(float value)129 double Simulator::FPToDouble(float value) { 130 switch (std::fpclassify(value)) { 131 case FP_NAN: { 132 if (IsSignallingNaN(value)) { 133 FPProcessException(); 134 } 135 if (DN()) return kFP64DefaultNaN; 136 137 // Convert NaNs as the processor would: 138 // - The sign is propagated. 139 // - The payload (mantissa) is transferred entirely, except that the top 140 // bit is forced to '1', making the result a quiet NaN. The unused 141 // (low-order) payload bits are set to 0. 142 uint32_t raw = float_to_rawbits(value); 143 144 uint64_t sign = raw >> 31; 145 uint64_t exponent = (1 << 11) - 1; 146 uint64_t payload = unsigned_bitextract_64(21, 0, raw); 147 payload <<= (52 - 23); // The unused low-order bits should be 0. 148 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 149 150 return rawbits_to_double((sign << 63) | (exponent << 52) | payload); 151 } 152 153 case FP_ZERO: 154 case FP_NORMAL: 155 case FP_SUBNORMAL: 156 case FP_INFINITE: { 157 // All other inputs are preserved in a standard cast, because every value 158 // representable using an IEEE-754 float is also representable using an 159 // IEEE-754 double. 160 return static_cast<double>(value); 161 } 162 } 163 164 VIXL_UNREACHABLE(); 165 return static_cast<double>(value); 166 } 167 168 FPToFloat(float16 value)169 float Simulator::FPToFloat(float16 value) { 170 uint32_t sign = value >> 15; 171 uint32_t exponent = unsigned_bitextract_32( 172 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits, 173 value); 174 uint32_t mantissa = unsigned_bitextract_32( 175 kFloat16MantissaBits - 1, 0, value); 176 177 switch (float16classify(value)) { 178 case FP_ZERO: 179 return (sign == 0) ? 0.0f : -0.0f; 180 181 case FP_INFINITE: 182 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 183 184 case FP_SUBNORMAL: { 185 // Calculate shift required to put mantissa into the most-significant bits 186 // of the destination mantissa. 187 int shift = CountLeadingZeros(mantissa << (32 - 10)); 188 189 // Shift mantissa and discard implicit '1'. 190 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 191 mantissa &= (1 << kFloatMantissaBits) - 1; 192 193 // Adjust the exponent for the shift applied, and rebias. 194 exponent = exponent - shift + (-15 + 127); 195 break; 196 } 197 198 case FP_NAN: 199 if (IsSignallingNaN(value)) { 200 FPProcessException(); 201 } 202 if (DN()) return kFP32DefaultNaN; 203 204 // Convert NaNs as the processor would: 205 // - The sign is propagated. 206 // - The payload (mantissa) is transferred entirely, except that the top 207 // bit is forced to '1', making the result a quiet NaN. The unused 208 // (low-order) payload bits are set to 0. 209 exponent = (1 << kFloatExponentBits) - 1; 210 211 // Increase bits in mantissa, making low-order bits 0. 212 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 213 mantissa |= 1 << 22; // Force a quiet NaN. 214 break; 215 216 case FP_NORMAL: 217 // Increase bits in mantissa, making low-order bits 0. 218 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 219 220 // Change exponent bias. 221 exponent += (-15 + 127); 222 break; 223 224 default: VIXL_UNREACHABLE(); 225 } 226 return rawbits_to_float((sign << 31) | 227 (exponent << kFloatMantissaBits) | 228 mantissa); 229 } 230 231 FPToFloat16(float value,FPRounding round_mode)232 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 233 // Only the FPTieEven rounding mode is implemented. 234 VIXL_ASSERT(round_mode == FPTieEven); 235 USE(round_mode); 236 237 uint32_t raw = float_to_rawbits(value); 238 int32_t sign = raw >> 31; 239 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127; 240 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw); 241 242 switch (std::fpclassify(value)) { 243 case FP_NAN: { 244 if (IsSignallingNaN(value)) { 245 FPProcessException(); 246 } 247 if (DN()) return kFP16DefaultNaN; 248 249 // Convert NaNs as the processor would: 250 // - The sign is propagated. 251 // - The payload (mantissa) is transferred as much as possible, except 252 // that the top bit is forced to '1', making the result a quiet NaN. 253 float16 result = (sign == 0) ? kFP16PositiveInfinity 254 : kFP16NegativeInfinity; 255 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 256 result |= (1 << 9); // Force a quiet NaN; 257 return result; 258 } 259 260 case FP_ZERO: 261 return (sign == 0) ? 0 : 0x8000; 262 263 case FP_INFINITE: 264 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 265 266 case FP_NORMAL: 267 case FP_SUBNORMAL: { 268 // Convert float-to-half as the processor would, assuming that FPCR.FZ 269 // (flush-to-zero) is not set. 270 271 // Add the implicit '1' bit to the mantissa. 272 mantissa += (1 << 23); 273 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 274 } 275 } 276 277 VIXL_UNREACHABLE(); 278 return 0; 279 } 280 281 FPToFloat16(double value,FPRounding round_mode)282 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 283 // Only the FPTieEven rounding mode is implemented. 284 VIXL_ASSERT(round_mode == FPTieEven); 285 USE(round_mode); 286 287 uint64_t raw = double_to_rawbits(value); 288 int32_t sign = raw >> 63; 289 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023; 290 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw); 291 292 switch (std::fpclassify(value)) { 293 case FP_NAN: { 294 if (IsSignallingNaN(value)) { 295 FPProcessException(); 296 } 297 if (DN()) return kFP16DefaultNaN; 298 299 // Convert NaNs as the processor would: 300 // - The sign is propagated. 301 // - The payload (mantissa) is transferred as much as possible, except 302 // that the top bit is forced to '1', making the result a quiet NaN. 303 float16 result = (sign == 0) ? kFP16PositiveInfinity 304 : kFP16NegativeInfinity; 305 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 306 result |= (1 << 9); // Force a quiet NaN; 307 return result; 308 } 309 310 case FP_ZERO: 311 return (sign == 0) ? 0 : 0x8000; 312 313 case FP_INFINITE: 314 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 315 316 case FP_NORMAL: 317 case FP_SUBNORMAL: { 318 // Convert double-to-half as the processor would, assuming that FPCR.FZ 319 // (flush-to-zero) is not set. 320 321 // Add the implicit '1' bit to the mantissa. 322 mantissa += (UINT64_C(1) << 52); 323 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 324 } 325 } 326 327 VIXL_UNREACHABLE(); 328 return 0; 329 } 330 331 FPToFloat(double value,FPRounding round_mode)332 float Simulator::FPToFloat(double value, FPRounding round_mode) { 333 // Only the FPTieEven rounding mode is implemented. 334 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 335 USE(round_mode); 336 337 switch (std::fpclassify(value)) { 338 case FP_NAN: { 339 if (IsSignallingNaN(value)) { 340 FPProcessException(); 341 } 342 if (DN()) return kFP32DefaultNaN; 343 344 // Convert NaNs as the processor would: 345 // - The sign is propagated. 346 // - The payload (mantissa) is transferred as much as possible, except 347 // that the top bit is forced to '1', making the result a quiet NaN. 348 uint64_t raw = double_to_rawbits(value); 349 350 uint32_t sign = raw >> 63; 351 uint32_t exponent = (1 << 8) - 1; 352 uint32_t payload = 353 static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw)); 354 payload |= (1 << 22); // Force a quiet NaN. 355 356 return rawbits_to_float((sign << 31) | (exponent << 23) | payload); 357 } 358 359 case FP_ZERO: 360 case FP_INFINITE: { 361 // In a C++ cast, any value representable in the target type will be 362 // unchanged. This is always the case for +/-0.0 and infinities. 363 return static_cast<float>(value); 364 } 365 366 case FP_NORMAL: 367 case FP_SUBNORMAL: { 368 // Convert double-to-float as the processor would, assuming that FPCR.FZ 369 // (flush-to-zero) is not set. 370 uint64_t raw = double_to_rawbits(value); 371 // Extract the IEEE-754 double components. 372 uint32_t sign = raw >> 63; 373 // Extract the exponent and remove the IEEE-754 encoding bias. 374 int32_t exponent = 375 static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023; 376 // Extract the mantissa and add the implicit '1' bit. 377 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw); 378 if (std::fpclassify(value) == FP_NORMAL) { 379 mantissa |= (UINT64_C(1) << 52); 380 } 381 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 382 } 383 } 384 385 VIXL_UNREACHABLE(); 386 return value; 387 } 388 389 ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)390 void Simulator::ld1(VectorFormat vform, 391 LogicVRegister dst, 392 uint64_t addr) { 393 dst.ClearForWrite(vform); 394 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 395 dst.ReadUintFromMem(vform, i, addr); 396 addr += LaneSizeInBytesFromFormat(vform); 397 } 398 } 399 400 ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)401 void Simulator::ld1(VectorFormat vform, 402 LogicVRegister dst, 403 int index, 404 uint64_t addr) { 405 dst.ReadUintFromMem(vform, index, addr); 406 } 407 408 ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)409 void Simulator::ld1r(VectorFormat vform, 410 LogicVRegister dst, 411 uint64_t addr) { 412 dst.ClearForWrite(vform); 413 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 414 dst.ReadUintFromMem(vform, i, addr); 415 } 416 } 417 418 ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)419 void Simulator::ld2(VectorFormat vform, 420 LogicVRegister dst1, 421 LogicVRegister dst2, 422 uint64_t addr1) { 423 dst1.ClearForWrite(vform); 424 dst2.ClearForWrite(vform); 425 int esize = LaneSizeInBytesFromFormat(vform); 426 uint64_t addr2 = addr1 + esize; 427 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 428 dst1.ReadUintFromMem(vform, i, addr1); 429 dst2.ReadUintFromMem(vform, i, addr2); 430 addr1 += 2 * esize; 431 addr2 += 2 * esize; 432 } 433 } 434 435 ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)436 void Simulator::ld2(VectorFormat vform, 437 LogicVRegister dst1, 438 LogicVRegister dst2, 439 int index, 440 uint64_t addr1) { 441 dst1.ClearForWrite(vform); 442 dst2.ClearForWrite(vform); 443 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 444 dst1.ReadUintFromMem(vform, index, addr1); 445 dst2.ReadUintFromMem(vform, index, addr2); 446 } 447 448 ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)449 void Simulator::ld2r(VectorFormat vform, 450 LogicVRegister dst1, 451 LogicVRegister dst2, 452 uint64_t addr) { 453 dst1.ClearForWrite(vform); 454 dst2.ClearForWrite(vform); 455 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 456 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 457 dst1.ReadUintFromMem(vform, i, addr); 458 dst2.ReadUintFromMem(vform, i, addr2); 459 } 460 } 461 462 ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)463 void Simulator::ld3(VectorFormat vform, 464 LogicVRegister dst1, 465 LogicVRegister dst2, 466 LogicVRegister dst3, 467 uint64_t addr1) { 468 dst1.ClearForWrite(vform); 469 dst2.ClearForWrite(vform); 470 dst3.ClearForWrite(vform); 471 int esize = LaneSizeInBytesFromFormat(vform); 472 uint64_t addr2 = addr1 + esize; 473 uint64_t addr3 = addr2 + esize; 474 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 475 dst1.ReadUintFromMem(vform, i, addr1); 476 dst2.ReadUintFromMem(vform, i, addr2); 477 dst3.ReadUintFromMem(vform, i, addr3); 478 addr1 += 3 * esize; 479 addr2 += 3 * esize; 480 addr3 += 3 * esize; 481 } 482 } 483 484 ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)485 void Simulator::ld3(VectorFormat vform, 486 LogicVRegister dst1, 487 LogicVRegister dst2, 488 LogicVRegister dst3, 489 int index, 490 uint64_t addr1) { 491 dst1.ClearForWrite(vform); 492 dst2.ClearForWrite(vform); 493 dst3.ClearForWrite(vform); 494 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 495 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 496 dst1.ReadUintFromMem(vform, index, addr1); 497 dst2.ReadUintFromMem(vform, index, addr2); 498 dst3.ReadUintFromMem(vform, index, addr3); 499 } 500 501 ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)502 void Simulator::ld3r(VectorFormat vform, 503 LogicVRegister dst1, 504 LogicVRegister dst2, 505 LogicVRegister dst3, 506 uint64_t addr) { 507 dst1.ClearForWrite(vform); 508 dst2.ClearForWrite(vform); 509 dst3.ClearForWrite(vform); 510 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 511 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 512 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 513 dst1.ReadUintFromMem(vform, i, addr); 514 dst2.ReadUintFromMem(vform, i, addr2); 515 dst3.ReadUintFromMem(vform, i, addr3); 516 } 517 } 518 519 ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)520 void Simulator::ld4(VectorFormat vform, 521 LogicVRegister dst1, 522 LogicVRegister dst2, 523 LogicVRegister dst3, 524 LogicVRegister dst4, 525 uint64_t addr1) { 526 dst1.ClearForWrite(vform); 527 dst2.ClearForWrite(vform); 528 dst3.ClearForWrite(vform); 529 dst4.ClearForWrite(vform); 530 int esize = LaneSizeInBytesFromFormat(vform); 531 uint64_t addr2 = addr1 + esize; 532 uint64_t addr3 = addr2 + esize; 533 uint64_t addr4 = addr3 + esize; 534 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 535 dst1.ReadUintFromMem(vform, i, addr1); 536 dst2.ReadUintFromMem(vform, i, addr2); 537 dst3.ReadUintFromMem(vform, i, addr3); 538 dst4.ReadUintFromMem(vform, i, addr4); 539 addr1 += 4 * esize; 540 addr2 += 4 * esize; 541 addr3 += 4 * esize; 542 addr4 += 4 * esize; 543 } 544 } 545 546 ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)547 void Simulator::ld4(VectorFormat vform, 548 LogicVRegister dst1, 549 LogicVRegister dst2, 550 LogicVRegister dst3, 551 LogicVRegister dst4, 552 int index, 553 uint64_t addr1) { 554 dst1.ClearForWrite(vform); 555 dst2.ClearForWrite(vform); 556 dst3.ClearForWrite(vform); 557 dst4.ClearForWrite(vform); 558 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 559 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 560 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 561 dst1.ReadUintFromMem(vform, index, addr1); 562 dst2.ReadUintFromMem(vform, index, addr2); 563 dst3.ReadUintFromMem(vform, index, addr3); 564 dst4.ReadUintFromMem(vform, index, addr4); 565 } 566 567 ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)568 void Simulator::ld4r(VectorFormat vform, 569 LogicVRegister dst1, 570 LogicVRegister dst2, 571 LogicVRegister dst3, 572 LogicVRegister dst4, 573 uint64_t addr) { 574 dst1.ClearForWrite(vform); 575 dst2.ClearForWrite(vform); 576 dst3.ClearForWrite(vform); 577 dst4.ClearForWrite(vform); 578 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 579 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 580 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 581 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 582 dst1.ReadUintFromMem(vform, i, addr); 583 dst2.ReadUintFromMem(vform, i, addr2); 584 dst3.ReadUintFromMem(vform, i, addr3); 585 dst4.ReadUintFromMem(vform, i, addr4); 586 } 587 } 588 589 st1(VectorFormat vform,LogicVRegister src,uint64_t addr)590 void Simulator::st1(VectorFormat vform, 591 LogicVRegister src, 592 uint64_t addr) { 593 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 594 src.WriteUintToMem(vform, i, addr); 595 addr += LaneSizeInBytesFromFormat(vform); 596 } 597 } 598 599 st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)600 void Simulator::st1(VectorFormat vform, 601 LogicVRegister src, 602 int index, 603 uint64_t addr) { 604 src.WriteUintToMem(vform, index, addr); 605 } 606 607 st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)608 void Simulator::st2(VectorFormat vform, 609 LogicVRegister dst, 610 LogicVRegister dst2, 611 uint64_t addr) { 612 int esize = LaneSizeInBytesFromFormat(vform); 613 uint64_t addr2 = addr + esize; 614 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 615 dst.WriteUintToMem(vform, i, addr); 616 dst2.WriteUintToMem(vform, i, addr2); 617 addr += 2 * esize; 618 addr2 += 2 * esize; 619 } 620 } 621 622 st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)623 void Simulator::st2(VectorFormat vform, 624 LogicVRegister dst, 625 LogicVRegister dst2, 626 int index, 627 uint64_t addr) { 628 int esize = LaneSizeInBytesFromFormat(vform); 629 dst.WriteUintToMem(vform, index, addr); 630 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 631 } 632 633 st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)634 void Simulator::st3(VectorFormat vform, 635 LogicVRegister dst, 636 LogicVRegister dst2, 637 LogicVRegister dst3, 638 uint64_t addr) { 639 int esize = LaneSizeInBytesFromFormat(vform); 640 uint64_t addr2 = addr + esize; 641 uint64_t addr3 = addr2 + esize; 642 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 643 dst.WriteUintToMem(vform, i, addr); 644 dst2.WriteUintToMem(vform, i, addr2); 645 dst3.WriteUintToMem(vform, i, addr3); 646 addr += 3 * esize; 647 addr2 += 3 * esize; 648 addr3 += 3 * esize; 649 } 650 } 651 652 st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)653 void Simulator::st3(VectorFormat vform, 654 LogicVRegister dst, 655 LogicVRegister dst2, 656 LogicVRegister dst3, 657 int index, 658 uint64_t addr) { 659 int esize = LaneSizeInBytesFromFormat(vform); 660 dst.WriteUintToMem(vform, index, addr); 661 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 662 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 663 } 664 665 st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)666 void Simulator::st4(VectorFormat vform, 667 LogicVRegister dst, 668 LogicVRegister dst2, 669 LogicVRegister dst3, 670 LogicVRegister dst4, 671 uint64_t addr) { 672 int esize = LaneSizeInBytesFromFormat(vform); 673 uint64_t addr2 = addr + esize; 674 uint64_t addr3 = addr2 + esize; 675 uint64_t addr4 = addr3 + esize; 676 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 677 dst.WriteUintToMem(vform, i, addr); 678 dst2.WriteUintToMem(vform, i, addr2); 679 dst3.WriteUintToMem(vform, i, addr3); 680 dst4.WriteUintToMem(vform, i, addr4); 681 addr += 4 * esize; 682 addr2 += 4 * esize; 683 addr3 += 4 * esize; 684 addr4 += 4 * esize; 685 } 686 } 687 688 st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)689 void Simulator::st4(VectorFormat vform, 690 LogicVRegister dst, 691 LogicVRegister dst2, 692 LogicVRegister dst3, 693 LogicVRegister dst4, 694 int index, 695 uint64_t addr) { 696 int esize = LaneSizeInBytesFromFormat(vform); 697 dst.WriteUintToMem(vform, index, addr); 698 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 699 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 700 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 701 } 702 703 cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)704 LogicVRegister Simulator::cmp(VectorFormat vform, 705 LogicVRegister dst, 706 const LogicVRegister& src1, 707 const LogicVRegister& src2, 708 Condition cond) { 709 dst.ClearForWrite(vform); 710 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 711 int64_t sa = src1.Int(vform, i); 712 int64_t sb = src2.Int(vform, i); 713 uint64_t ua = src1.Uint(vform, i); 714 uint64_t ub = src2.Uint(vform, i); 715 bool result = false; 716 switch (cond) { 717 case eq: result = (ua == ub); break; 718 case ge: result = (sa >= sb); break; 719 case gt: result = (sa > sb) ; break; 720 case hi: result = (ua > ub) ; break; 721 case hs: result = (ua >= ub); break; 722 case lt: result = (sa < sb) ; break; 723 case le: result = (sa <= sb); break; 724 default: VIXL_UNREACHABLE(); break; 725 } 726 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 727 } 728 return dst; 729 } 730 731 cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)732 LogicVRegister Simulator::cmp(VectorFormat vform, 733 LogicVRegister dst, 734 const LogicVRegister& src1, 735 int imm, 736 Condition cond) { 737 SimVRegister temp; 738 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 739 return cmp(vform, dst, src1, imm_reg, cond); 740 } 741 742 cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)743 LogicVRegister Simulator::cmptst(VectorFormat vform, 744 LogicVRegister dst, 745 const LogicVRegister& src1, 746 const LogicVRegister& src2) { 747 dst.ClearForWrite(vform); 748 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 749 uint64_t ua = src1.Uint(vform, i); 750 uint64_t ub = src2.Uint(vform, i); 751 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 752 } 753 return dst; 754 } 755 756 add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)757 LogicVRegister Simulator::add(VectorFormat vform, 758 LogicVRegister dst, 759 const LogicVRegister& src1, 760 const LogicVRegister& src2) { 761 dst.ClearForWrite(vform); 762 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 763 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 764 // Test for unsigned saturation. 765 uint64_t ua = src1.UintLeftJustified(vform, i); 766 uint64_t ub = src2.UintLeftJustified(vform, i); 767 uint64_t ur = ua + ub; 768 if (ur < ua) { 769 dst.SetUnsignedSat(i, true); 770 } 771 772 // Test for signed saturation. 773 int64_t sa = src1.IntLeftJustified(vform, i); 774 int64_t sb = src2.IntLeftJustified(vform, i); 775 int64_t sr = sa + sb; 776 // If the signs of the operands are the same, but different from the result, 777 // there was an overflow. 778 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 779 dst.SetSignedSat(i, sa >= 0); 780 } 781 782 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 783 } 784 return dst; 785 } 786 787 addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)788 LogicVRegister Simulator::addp(VectorFormat vform, 789 LogicVRegister dst, 790 const LogicVRegister& src1, 791 const LogicVRegister& src2) { 792 SimVRegister temp1, temp2; 793 uzp1(vform, temp1, src1, src2); 794 uzp2(vform, temp2, src1, src2); 795 add(vform, dst, temp1, temp2); 796 return dst; 797 } 798 799 mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)800 LogicVRegister Simulator::mla(VectorFormat vform, 801 LogicVRegister dst, 802 const LogicVRegister& src1, 803 const LogicVRegister& src2) { 804 SimVRegister temp; 805 mul(vform, temp, src1, src2); 806 add(vform, dst, dst, temp); 807 return dst; 808 } 809 810 mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)811 LogicVRegister Simulator::mls(VectorFormat vform, 812 LogicVRegister dst, 813 const LogicVRegister& src1, 814 const LogicVRegister& src2) { 815 SimVRegister temp; 816 mul(vform, temp, src1, src2); 817 sub(vform, dst, dst, temp); 818 return dst; 819 } 820 821 mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)822 LogicVRegister Simulator::mul(VectorFormat vform, 823 LogicVRegister dst, 824 const LogicVRegister& src1, 825 const LogicVRegister& src2) { 826 dst.ClearForWrite(vform); 827 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 828 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 829 } 830 return dst; 831 } 832 833 mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)834 LogicVRegister Simulator::mul(VectorFormat vform, 835 LogicVRegister dst, 836 const LogicVRegister& src1, 837 const LogicVRegister& src2, 838 int index) { 839 SimVRegister temp; 840 VectorFormat indexform = VectorFormatFillQ(vform); 841 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 842 } 843 844 mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)845 LogicVRegister Simulator::mla(VectorFormat vform, 846 LogicVRegister dst, 847 const LogicVRegister& src1, 848 const LogicVRegister& src2, 849 int index) { 850 SimVRegister temp; 851 VectorFormat indexform = VectorFormatFillQ(vform); 852 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 853 } 854 855 mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)856 LogicVRegister Simulator::mls(VectorFormat vform, 857 LogicVRegister dst, 858 const LogicVRegister& src1, 859 const LogicVRegister& src2, 860 int index) { 861 SimVRegister temp; 862 VectorFormat indexform = VectorFormatFillQ(vform); 863 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 864 } 865 866 smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)867 LogicVRegister Simulator::smull(VectorFormat vform, 868 LogicVRegister dst, 869 const LogicVRegister& src1, 870 const LogicVRegister& src2, 871 int index) { 872 SimVRegister temp; 873 VectorFormat indexform = 874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 875 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 876 } 877 878 smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::smull2(VectorFormat vform, 880 LogicVRegister dst, 881 const LogicVRegister& src1, 882 const LogicVRegister& src2, 883 int index) { 884 SimVRegister temp; 885 VectorFormat indexform = 886 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 887 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 888 } 889 890 umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)891 LogicVRegister Simulator::umull(VectorFormat vform, 892 LogicVRegister dst, 893 const LogicVRegister& src1, 894 const LogicVRegister& src2, 895 int index) { 896 SimVRegister temp; 897 VectorFormat indexform = 898 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 899 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 900 } 901 902 umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)903 LogicVRegister Simulator::umull2(VectorFormat vform, 904 LogicVRegister dst, 905 const LogicVRegister& src1, 906 const LogicVRegister& src2, 907 int index) { 908 SimVRegister temp; 909 VectorFormat indexform = 910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 911 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 912 } 913 914 smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::smlal(VectorFormat vform, 916 LogicVRegister dst, 917 const LogicVRegister& src1, 918 const LogicVRegister& src2, 919 int index) { 920 SimVRegister temp; 921 VectorFormat indexform = 922 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 923 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 924 } 925 926 smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)927 LogicVRegister Simulator::smlal2(VectorFormat vform, 928 LogicVRegister dst, 929 const LogicVRegister& src1, 930 const LogicVRegister& src2, 931 int index) { 932 SimVRegister temp; 933 VectorFormat indexform = 934 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 935 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 936 } 937 938 umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)939 LogicVRegister Simulator::umlal(VectorFormat vform, 940 LogicVRegister dst, 941 const LogicVRegister& src1, 942 const LogicVRegister& src2, 943 int index) { 944 SimVRegister temp; 945 VectorFormat indexform = 946 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 947 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 948 } 949 950 umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)951 LogicVRegister Simulator::umlal2(VectorFormat vform, 952 LogicVRegister dst, 953 const LogicVRegister& src1, 954 const LogicVRegister& src2, 955 int index) { 956 SimVRegister temp; 957 VectorFormat indexform = 958 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 959 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 960 } 961 962 smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)963 LogicVRegister Simulator::smlsl(VectorFormat vform, 964 LogicVRegister dst, 965 const LogicVRegister& src1, 966 const LogicVRegister& src2, 967 int index) { 968 SimVRegister temp; 969 VectorFormat indexform = 970 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 971 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 972 } 973 974 smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)975 LogicVRegister Simulator::smlsl2(VectorFormat vform, 976 LogicVRegister dst, 977 const LogicVRegister& src1, 978 const LogicVRegister& src2, 979 int index) { 980 SimVRegister temp; 981 VectorFormat indexform = 982 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 983 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 984 } 985 986 umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)987 LogicVRegister Simulator::umlsl(VectorFormat vform, 988 LogicVRegister dst, 989 const LogicVRegister& src1, 990 const LogicVRegister& src2, 991 int index) { 992 SimVRegister temp; 993 VectorFormat indexform = 994 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 995 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 996 } 997 998 umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)999 LogicVRegister Simulator::umlsl2(VectorFormat vform, 1000 LogicVRegister dst, 1001 const LogicVRegister& src1, 1002 const LogicVRegister& src2, 1003 int index) { 1004 SimVRegister temp; 1005 VectorFormat indexform = 1006 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1007 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1008 } 1009 1010 sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1011 LogicVRegister Simulator::sqdmull(VectorFormat vform, 1012 LogicVRegister dst, 1013 const LogicVRegister& src1, 1014 const LogicVRegister& src2, 1015 int index) { 1016 SimVRegister temp; 1017 VectorFormat indexform = 1018 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1019 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1020 } 1021 1022 sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1023 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1024 LogicVRegister dst, 1025 const LogicVRegister& src1, 1026 const LogicVRegister& src2, 1027 int index) { 1028 SimVRegister temp; 1029 VectorFormat indexform = 1030 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1031 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1032 } 1033 1034 sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1035 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1036 LogicVRegister dst, 1037 const LogicVRegister& src1, 1038 const LogicVRegister& src2, 1039 int index) { 1040 SimVRegister temp; 1041 VectorFormat indexform = 1042 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1043 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1044 } 1045 1046 sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1047 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1048 LogicVRegister dst, 1049 const LogicVRegister& src1, 1050 const LogicVRegister& src2, 1051 int index) { 1052 SimVRegister temp; 1053 VectorFormat indexform = 1054 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1055 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1056 } 1057 1058 sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1059 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1060 LogicVRegister dst, 1061 const LogicVRegister& src1, 1062 const LogicVRegister& src2, 1063 int index) { 1064 SimVRegister temp; 1065 VectorFormat indexform = 1066 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1067 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1068 } 1069 1070 sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1071 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1072 LogicVRegister dst, 1073 const LogicVRegister& src1, 1074 const LogicVRegister& src2, 1075 int index) { 1076 SimVRegister temp; 1077 VectorFormat indexform = 1078 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1079 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1080 } 1081 1082 sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1083 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1084 LogicVRegister dst, 1085 const LogicVRegister& src1, 1086 const LogicVRegister& src2, 1087 int index) { 1088 SimVRegister temp; 1089 VectorFormat indexform = VectorFormatFillQ(vform); 1090 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1091 } 1092 1093 sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1094 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1095 LogicVRegister dst, 1096 const LogicVRegister& src1, 1097 const LogicVRegister& src2, 1098 int index) { 1099 SimVRegister temp; 1100 VectorFormat indexform = VectorFormatFillQ(vform); 1101 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1102 } 1103 1104 PolynomialMult(uint8_t op1,uint8_t op2)1105 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { 1106 uint16_t result = 0; 1107 uint16_t extended_op2 = op2; 1108 for (int i = 0; i < 8; ++i) { 1109 if ((op1 >> i) & 1) { 1110 result = result ^ (extended_op2 << i); 1111 } 1112 } 1113 return result; 1114 } 1115 1116 pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1117 LogicVRegister Simulator::pmul(VectorFormat vform, 1118 LogicVRegister dst, 1119 const LogicVRegister& src1, 1120 const LogicVRegister& src2) { 1121 dst.ClearForWrite(vform); 1122 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1123 dst.SetUint(vform, i, 1124 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1125 } 1126 return dst; 1127 } 1128 1129 pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1130 LogicVRegister Simulator::pmull(VectorFormat vform, 1131 LogicVRegister dst, 1132 const LogicVRegister& src1, 1133 const LogicVRegister& src2) { 1134 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1135 dst.ClearForWrite(vform); 1136 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1137 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), 1138 src2.Uint(vform_src, i))); 1139 } 1140 return dst; 1141 } 1142 1143 pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1144 LogicVRegister Simulator::pmull2(VectorFormat vform, 1145 LogicVRegister dst, 1146 const LogicVRegister& src1, 1147 const LogicVRegister& src2) { 1148 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1149 dst.ClearForWrite(vform); 1150 int lane_count = LaneCountFromFormat(vform); 1151 for (int i = 0; i < lane_count; i++) { 1152 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), 1153 src2.Uint(vform_src, lane_count + i))); 1154 } 1155 return dst; 1156 } 1157 1158 sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::sub(VectorFormat vform, 1160 LogicVRegister dst, 1161 const LogicVRegister& src1, 1162 const LogicVRegister& src2) { 1163 dst.ClearForWrite(vform); 1164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1165 // Test for unsigned saturation. 1166 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 1167 dst.SetUnsignedSat(i, false); 1168 } 1169 1170 // Test for signed saturation. 1171 int64_t sa = src1.IntLeftJustified(vform, i); 1172 int64_t sb = src2.IntLeftJustified(vform, i); 1173 int64_t sr = sa - sb; 1174 // If the signs of the operands are different, and the sign of the first 1175 // operand doesn't match the result, there was an overflow. 1176 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 1177 dst.SetSignedSat(i, sr < 0); 1178 } 1179 1180 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 1181 } 1182 return dst; 1183 } 1184 1185 and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1186 LogicVRegister Simulator::and_(VectorFormat vform, 1187 LogicVRegister dst, 1188 const LogicVRegister& src1, 1189 const LogicVRegister& src2) { 1190 dst.ClearForWrite(vform); 1191 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1192 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1193 } 1194 return dst; 1195 } 1196 1197 orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1198 LogicVRegister Simulator::orr(VectorFormat vform, 1199 LogicVRegister dst, 1200 const LogicVRegister& src1, 1201 const LogicVRegister& src2) { 1202 dst.ClearForWrite(vform); 1203 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1204 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1205 } 1206 return dst; 1207 } 1208 1209 orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1210 LogicVRegister Simulator::orn(VectorFormat vform, 1211 LogicVRegister dst, 1212 const LogicVRegister& src1, 1213 const LogicVRegister& src2) { 1214 dst.ClearForWrite(vform); 1215 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1216 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1217 } 1218 return dst; 1219 } 1220 1221 eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1222 LogicVRegister Simulator::eor(VectorFormat vform, 1223 LogicVRegister dst, 1224 const LogicVRegister& src1, 1225 const LogicVRegister& src2) { 1226 dst.ClearForWrite(vform); 1227 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1228 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1229 } 1230 return dst; 1231 } 1232 1233 bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1234 LogicVRegister Simulator::bic(VectorFormat vform, 1235 LogicVRegister dst, 1236 const LogicVRegister& src1, 1237 const LogicVRegister& src2) { 1238 dst.ClearForWrite(vform); 1239 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1240 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1241 } 1242 return dst; 1243 } 1244 1245 bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1246 LogicVRegister Simulator::bic(VectorFormat vform, 1247 LogicVRegister dst, 1248 const LogicVRegister& src, 1249 uint64_t imm) { 1250 uint64_t result[16]; 1251 int laneCount = LaneCountFromFormat(vform); 1252 for (int i = 0; i < laneCount; ++i) { 1253 result[i] = src.Uint(vform, i) & ~imm; 1254 } 1255 dst.ClearForWrite(vform); 1256 for (int i = 0; i < laneCount; ++i) { 1257 dst.SetUint(vform, i, result[i]); 1258 } 1259 return dst; 1260 } 1261 1262 bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1263 LogicVRegister Simulator::bif(VectorFormat vform, 1264 LogicVRegister dst, 1265 const LogicVRegister& src1, 1266 const LogicVRegister& src2) { 1267 dst.ClearForWrite(vform); 1268 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1269 uint64_t operand1 = dst.Uint(vform, i); 1270 uint64_t operand2 = ~src2.Uint(vform, i); 1271 uint64_t operand3 = src1.Uint(vform, i); 1272 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1273 dst.SetUint(vform, i, result); 1274 } 1275 return dst; 1276 } 1277 1278 bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1279 LogicVRegister Simulator::bit(VectorFormat vform, 1280 LogicVRegister dst, 1281 const LogicVRegister& src1, 1282 const LogicVRegister& src2) { 1283 dst.ClearForWrite(vform); 1284 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1285 uint64_t operand1 = dst.Uint(vform, i); 1286 uint64_t operand2 = src2.Uint(vform, i); 1287 uint64_t operand3 = src1.Uint(vform, i); 1288 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1289 dst.SetUint(vform, i, result); 1290 } 1291 return dst; 1292 } 1293 1294 bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1295 LogicVRegister Simulator::bsl(VectorFormat vform, 1296 LogicVRegister dst, 1297 const LogicVRegister& src1, 1298 const LogicVRegister& src2) { 1299 dst.ClearForWrite(vform); 1300 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1301 uint64_t operand1 = src2.Uint(vform, i); 1302 uint64_t operand2 = dst.Uint(vform, i); 1303 uint64_t operand3 = src1.Uint(vform, i); 1304 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1305 dst.SetUint(vform, i, result); 1306 } 1307 return dst; 1308 } 1309 1310 sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1311 LogicVRegister Simulator::sminmax(VectorFormat vform, 1312 LogicVRegister dst, 1313 const LogicVRegister& src1, 1314 const LogicVRegister& src2, 1315 bool max) { 1316 dst.ClearForWrite(vform); 1317 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1318 int64_t src1_val = src1.Int(vform, i); 1319 int64_t src2_val = src2.Int(vform, i); 1320 int64_t dst_val; 1321 if (max == true) { 1322 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1323 } else { 1324 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1325 } 1326 dst.SetInt(vform, i, dst_val); 1327 } 1328 return dst; 1329 } 1330 1331 smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1332 LogicVRegister Simulator::smax(VectorFormat vform, 1333 LogicVRegister dst, 1334 const LogicVRegister& src1, 1335 const LogicVRegister& src2) { 1336 return sminmax(vform, dst, src1, src2, true); 1337 } 1338 1339 smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1340 LogicVRegister Simulator::smin(VectorFormat vform, 1341 LogicVRegister dst, 1342 const LogicVRegister& src1, 1343 const LogicVRegister& src2) { 1344 return sminmax(vform, dst, src1, src2, false); 1345 } 1346 1347 sminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1348 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1349 LogicVRegister dst, 1350 int dst_index, 1351 const LogicVRegister& src, 1352 bool max) { 1353 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1354 int64_t src1_val = src.Int(vform, i); 1355 int64_t src2_val = src.Int(vform, i + 1); 1356 int64_t dst_val; 1357 if (max == true) { 1358 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1359 } else { 1360 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1361 } 1362 dst.SetInt(vform, dst_index + (i >> 1), dst_val); 1363 } 1364 return dst; 1365 } 1366 1367 smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1368 LogicVRegister Simulator::smaxp(VectorFormat vform, 1369 LogicVRegister dst, 1370 const LogicVRegister& src1, 1371 const LogicVRegister& src2) { 1372 dst.ClearForWrite(vform); 1373 sminmaxp(vform, dst, 0, src1, true); 1374 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1375 return dst; 1376 } 1377 1378 sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1379 LogicVRegister Simulator::sminp(VectorFormat vform, 1380 LogicVRegister dst, 1381 const LogicVRegister& src1, 1382 const LogicVRegister& src2) { 1383 dst.ClearForWrite(vform); 1384 sminmaxp(vform, dst, 0, src1, false); 1385 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1386 return dst; 1387 } 1388 1389 addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1390 LogicVRegister Simulator::addp(VectorFormat vform, 1391 LogicVRegister dst, 1392 const LogicVRegister& src) { 1393 VIXL_ASSERT(vform == kFormatD); 1394 1395 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1396 dst.ClearForWrite(vform); 1397 dst.SetInt(vform, 0, dst_val); 1398 return dst; 1399 } 1400 1401 addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1402 LogicVRegister Simulator::addv(VectorFormat vform, 1403 LogicVRegister dst, 1404 const LogicVRegister& src) { 1405 VectorFormat vform_dst 1406 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1407 1408 1409 int64_t dst_val = 0; 1410 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1411 dst_val += src.Int(vform, i); 1412 } 1413 1414 dst.ClearForWrite(vform_dst); 1415 dst.SetInt(vform_dst, 0, dst_val); 1416 return dst; 1417 } 1418 1419 saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1420 LogicVRegister Simulator::saddlv(VectorFormat vform, 1421 LogicVRegister dst, 1422 const LogicVRegister& src) { 1423 VectorFormat vform_dst 1424 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1425 1426 int64_t dst_val = 0; 1427 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1428 dst_val += src.Int(vform, i); 1429 } 1430 1431 dst.ClearForWrite(vform_dst); 1432 dst.SetInt(vform_dst, 0, dst_val); 1433 return dst; 1434 } 1435 1436 uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1437 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1438 LogicVRegister dst, 1439 const LogicVRegister& src) { 1440 VectorFormat vform_dst 1441 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1442 1443 uint64_t dst_val = 0; 1444 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1445 dst_val += src.Uint(vform, i); 1446 } 1447 1448 dst.ClearForWrite(vform_dst); 1449 dst.SetUint(vform_dst, 0, dst_val); 1450 return dst; 1451 } 1452 1453 sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1454 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1455 LogicVRegister dst, 1456 const LogicVRegister& src, 1457 bool max) { 1458 dst.ClearForWrite(vform); 1459 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1460 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1461 dst.SetInt(vform, i, 0); 1462 int64_t src_val = src.Int(vform, i); 1463 if (max == true) { 1464 dst_val = (src_val > dst_val) ? src_val : dst_val; 1465 } else { 1466 dst_val = (src_val < dst_val) ? src_val : dst_val; 1467 } 1468 } 1469 dst.SetInt(vform, 0, dst_val); 1470 return dst; 1471 } 1472 1473 smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1474 LogicVRegister Simulator::smaxv(VectorFormat vform, 1475 LogicVRegister dst, 1476 const LogicVRegister& src) { 1477 sminmaxv(vform, dst, src, true); 1478 return dst; 1479 } 1480 1481 sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1482 LogicVRegister Simulator::sminv(VectorFormat vform, 1483 LogicVRegister dst, 1484 const LogicVRegister& src) { 1485 sminmaxv(vform, dst, src, false); 1486 return dst; 1487 } 1488 1489 uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1490 LogicVRegister Simulator::uminmax(VectorFormat vform, 1491 LogicVRegister dst, 1492 const LogicVRegister& src1, 1493 const LogicVRegister& src2, 1494 bool max) { 1495 dst.ClearForWrite(vform); 1496 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1497 uint64_t src1_val = src1.Uint(vform, i); 1498 uint64_t src2_val = src2.Uint(vform, i); 1499 uint64_t dst_val; 1500 if (max == true) { 1501 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1502 } else { 1503 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1504 } 1505 dst.SetUint(vform, i, dst_val); 1506 } 1507 return dst; 1508 } 1509 1510 umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1511 LogicVRegister Simulator::umax(VectorFormat vform, 1512 LogicVRegister dst, 1513 const LogicVRegister& src1, 1514 const LogicVRegister& src2) { 1515 return uminmax(vform, dst, src1, src2, true); 1516 } 1517 1518 umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1519 LogicVRegister Simulator::umin(VectorFormat vform, 1520 LogicVRegister dst, 1521 const LogicVRegister& src1, 1522 const LogicVRegister& src2) { 1523 return uminmax(vform, dst, src1, src2, false); 1524 } 1525 1526 uminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1527 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1528 LogicVRegister dst, 1529 int dst_index, 1530 const LogicVRegister& src, 1531 bool max) { 1532 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1533 uint64_t src1_val = src.Uint(vform, i); 1534 uint64_t src2_val = src.Uint(vform, i + 1); 1535 uint64_t dst_val; 1536 if (max == true) { 1537 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1538 } else { 1539 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1540 } 1541 dst.SetUint(vform, dst_index + (i >> 1), dst_val); 1542 } 1543 return dst; 1544 } 1545 1546 umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1547 LogicVRegister Simulator::umaxp(VectorFormat vform, 1548 LogicVRegister dst, 1549 const LogicVRegister& src1, 1550 const LogicVRegister& src2) { 1551 dst.ClearForWrite(vform); 1552 uminmaxp(vform, dst, 0, src1, true); 1553 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1554 return dst; 1555 } 1556 1557 uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1558 LogicVRegister Simulator::uminp(VectorFormat vform, 1559 LogicVRegister dst, 1560 const LogicVRegister& src1, 1561 const LogicVRegister& src2) { 1562 dst.ClearForWrite(vform); 1563 uminmaxp(vform, dst, 0, src1, false); 1564 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1565 return dst; 1566 } 1567 1568 uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1569 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1570 LogicVRegister dst, 1571 const LogicVRegister& src, 1572 bool max) { 1573 dst.ClearForWrite(vform); 1574 uint64_t dst_val = max ? 0 : UINT64_MAX; 1575 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1576 dst.SetUint(vform, i, 0); 1577 uint64_t src_val = src.Uint(vform, i); 1578 if (max == true) { 1579 dst_val = (src_val > dst_val) ? src_val : dst_val; 1580 } else { 1581 dst_val = (src_val < dst_val) ? src_val : dst_val; 1582 } 1583 } 1584 dst.SetUint(vform, 0, dst_val); 1585 return dst; 1586 } 1587 1588 umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1589 LogicVRegister Simulator::umaxv(VectorFormat vform, 1590 LogicVRegister dst, 1591 const LogicVRegister& src) { 1592 uminmaxv(vform, dst, src, true); 1593 return dst; 1594 } 1595 1596 uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1597 LogicVRegister Simulator::uminv(VectorFormat vform, 1598 LogicVRegister dst, 1599 const LogicVRegister& src) { 1600 uminmaxv(vform, dst, src, false); 1601 return dst; 1602 } 1603 1604 shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1605 LogicVRegister Simulator::shl(VectorFormat vform, 1606 LogicVRegister dst, 1607 const LogicVRegister& src, 1608 int shift) { 1609 VIXL_ASSERT(shift >= 0); 1610 SimVRegister temp; 1611 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1612 return ushl(vform, dst, src, shiftreg); 1613 } 1614 1615 sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1616 LogicVRegister Simulator::sshll(VectorFormat vform, 1617 LogicVRegister dst, 1618 const LogicVRegister& src, 1619 int shift) { 1620 VIXL_ASSERT(shift >= 0); 1621 SimVRegister temp1, temp2; 1622 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1623 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1624 return sshl(vform, dst, extendedreg, shiftreg); 1625 } 1626 1627 sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1628 LogicVRegister Simulator::sshll2(VectorFormat vform, 1629 LogicVRegister dst, 1630 const LogicVRegister& src, 1631 int shift) { 1632 VIXL_ASSERT(shift >= 0); 1633 SimVRegister temp1, temp2; 1634 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1635 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1636 return sshl(vform, dst, extendedreg, shiftreg); 1637 } 1638 1639 shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1640 LogicVRegister Simulator::shll(VectorFormat vform, 1641 LogicVRegister dst, 1642 const LogicVRegister& src) { 1643 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1644 return sshll(vform, dst, src, shift); 1645 } 1646 1647 shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1648 LogicVRegister Simulator::shll2(VectorFormat vform, 1649 LogicVRegister dst, 1650 const LogicVRegister& src) { 1651 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1652 return sshll2(vform, dst, src, shift); 1653 } 1654 1655 ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1656 LogicVRegister Simulator::ushll(VectorFormat vform, 1657 LogicVRegister dst, 1658 const LogicVRegister& src, 1659 int shift) { 1660 VIXL_ASSERT(shift >= 0); 1661 SimVRegister temp1, temp2; 1662 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1663 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1664 return ushl(vform, dst, extendedreg, shiftreg); 1665 } 1666 1667 ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1668 LogicVRegister Simulator::ushll2(VectorFormat vform, 1669 LogicVRegister dst, 1670 const LogicVRegister& src, 1671 int shift) { 1672 VIXL_ASSERT(shift >= 0); 1673 SimVRegister temp1, temp2; 1674 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1675 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1676 return ushl(vform, dst, extendedreg, shiftreg); 1677 } 1678 1679 sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1680 LogicVRegister Simulator::sli(VectorFormat vform, 1681 LogicVRegister dst, 1682 const LogicVRegister& src, 1683 int shift) { 1684 dst.ClearForWrite(vform); 1685 int laneCount = LaneCountFromFormat(vform); 1686 for (int i = 0; i < laneCount; i++) { 1687 uint64_t src_lane = src.Uint(vform, i); 1688 uint64_t dst_lane = dst.Uint(vform, i); 1689 uint64_t shifted = src_lane << shift; 1690 uint64_t mask = MaxUintFromFormat(vform) << shift; 1691 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1692 } 1693 return dst; 1694 } 1695 1696 sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1697 LogicVRegister Simulator::sqshl(VectorFormat vform, 1698 LogicVRegister dst, 1699 const LogicVRegister& src, 1700 int shift) { 1701 VIXL_ASSERT(shift >= 0); 1702 SimVRegister temp; 1703 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1704 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1705 } 1706 1707 uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1708 LogicVRegister Simulator::uqshl(VectorFormat vform, 1709 LogicVRegister dst, 1710 const LogicVRegister& src, 1711 int shift) { 1712 VIXL_ASSERT(shift >= 0); 1713 SimVRegister temp; 1714 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1715 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1716 } 1717 1718 sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1719 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1720 LogicVRegister dst, 1721 const LogicVRegister& src, 1722 int shift) { 1723 VIXL_ASSERT(shift >= 0); 1724 SimVRegister temp; 1725 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1726 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1727 } 1728 1729 sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1730 LogicVRegister Simulator::sri(VectorFormat vform, 1731 LogicVRegister dst, 1732 const LogicVRegister& src, 1733 int shift) { 1734 dst.ClearForWrite(vform); 1735 int laneCount = LaneCountFromFormat(vform); 1736 VIXL_ASSERT((shift > 0) && 1737 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1738 for (int i = 0; i < laneCount; i++) { 1739 uint64_t src_lane = src.Uint(vform, i); 1740 uint64_t dst_lane = dst.Uint(vform, i); 1741 uint64_t shifted; 1742 uint64_t mask; 1743 if (shift == 64) { 1744 shifted = 0; 1745 mask = 0; 1746 } else { 1747 shifted = src_lane >> shift; 1748 mask = MaxUintFromFormat(vform) >> shift; 1749 } 1750 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1751 } 1752 return dst; 1753 } 1754 1755 ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1756 LogicVRegister Simulator::ushr(VectorFormat vform, 1757 LogicVRegister dst, 1758 const LogicVRegister& src, 1759 int shift) { 1760 VIXL_ASSERT(shift >= 0); 1761 SimVRegister temp; 1762 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1763 return ushl(vform, dst, src, shiftreg); 1764 } 1765 1766 sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1767 LogicVRegister Simulator::sshr(VectorFormat vform, 1768 LogicVRegister dst, 1769 const LogicVRegister& src, 1770 int shift) { 1771 VIXL_ASSERT(shift >= 0); 1772 SimVRegister temp; 1773 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1774 return sshl(vform, dst, src, shiftreg); 1775 } 1776 1777 ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ssra(VectorFormat vform, 1779 LogicVRegister dst, 1780 const LogicVRegister& src, 1781 int shift) { 1782 SimVRegister temp; 1783 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1784 return add(vform, dst, dst, shifted_reg); 1785 } 1786 1787 usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1788 LogicVRegister Simulator::usra(VectorFormat vform, 1789 LogicVRegister dst, 1790 const LogicVRegister& src, 1791 int shift) { 1792 SimVRegister temp; 1793 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1794 return add(vform, dst, dst, shifted_reg); 1795 } 1796 1797 srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1798 LogicVRegister Simulator::srsra(VectorFormat vform, 1799 LogicVRegister dst, 1800 const LogicVRegister& src, 1801 int shift) { 1802 SimVRegister temp; 1803 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1804 return add(vform, dst, dst, shifted_reg); 1805 } 1806 1807 ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1808 LogicVRegister Simulator::ursra(VectorFormat vform, 1809 LogicVRegister dst, 1810 const LogicVRegister& src, 1811 int shift) { 1812 SimVRegister temp; 1813 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1814 return add(vform, dst, dst, shifted_reg); 1815 } 1816 1817 cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1818 LogicVRegister Simulator::cls(VectorFormat vform, 1819 LogicVRegister dst, 1820 const LogicVRegister& src) { 1821 uint64_t result[16]; 1822 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1823 int laneCount = LaneCountFromFormat(vform); 1824 for (int i = 0; i < laneCount; i++) { 1825 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1826 } 1827 1828 dst.ClearForWrite(vform); 1829 for (int i = 0; i < laneCount; ++i) { 1830 dst.SetUint(vform, i, result[i]); 1831 } 1832 return dst; 1833 } 1834 1835 clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1836 LogicVRegister Simulator::clz(VectorFormat vform, 1837 LogicVRegister dst, 1838 const LogicVRegister& src) { 1839 uint64_t result[16]; 1840 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1841 int laneCount = LaneCountFromFormat(vform); 1842 for (int i = 0; i < laneCount; i++) { 1843 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1844 } 1845 1846 dst.ClearForWrite(vform); 1847 for (int i = 0; i < laneCount; ++i) { 1848 dst.SetUint(vform, i, result[i]); 1849 } 1850 return dst; 1851 } 1852 1853 cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1854 LogicVRegister Simulator::cnt(VectorFormat vform, 1855 LogicVRegister dst, 1856 const LogicVRegister& src) { 1857 uint64_t result[16]; 1858 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1859 int laneCount = LaneCountFromFormat(vform); 1860 for (int i = 0; i < laneCount; i++) { 1861 uint64_t value = src.Uint(vform, i); 1862 result[i] = 0; 1863 for (int j = 0; j < laneSizeInBits; j++) { 1864 result[i] += (value & 1); 1865 value >>= 1; 1866 } 1867 } 1868 1869 dst.ClearForWrite(vform); 1870 for (int i = 0; i < laneCount; ++i) { 1871 dst.SetUint(vform, i, result[i]); 1872 } 1873 return dst; 1874 } 1875 1876 sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1877 LogicVRegister Simulator::sshl(VectorFormat vform, 1878 LogicVRegister dst, 1879 const LogicVRegister& src1, 1880 const LogicVRegister& src2) { 1881 dst.ClearForWrite(vform); 1882 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1883 int8_t shift_val = src2.Int(vform, i); 1884 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1885 1886 // Set signed saturation state. 1887 if ((shift_val > CountLeadingSignBits(lj_src_val)) && 1888 (lj_src_val != 0)) { 1889 dst.SetSignedSat(i, lj_src_val >= 0); 1890 } 1891 1892 // Set unsigned saturation state. 1893 if (lj_src_val < 0) { 1894 dst.SetUnsignedSat(i, false); 1895 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1896 (lj_src_val != 0)) { 1897 dst.SetUnsignedSat(i, true); 1898 } 1899 1900 int64_t src_val = src1.Int(vform, i); 1901 if (shift_val > 63) { 1902 dst.SetInt(vform, i, 0); 1903 } else if (shift_val < -63) { 1904 dst.SetRounding(i, src_val < 0); 1905 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1906 } else { 1907 if (shift_val < 0) { 1908 // Set rounding state. Rounding only needed on right shifts. 1909 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1910 dst.SetRounding(i, true); 1911 } 1912 src_val >>= -shift_val; 1913 } else { 1914 src_val <<= shift_val; 1915 } 1916 dst.SetInt(vform, i, src_val); 1917 } 1918 } 1919 return dst; 1920 } 1921 1922 ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1923 LogicVRegister Simulator::ushl(VectorFormat vform, 1924 LogicVRegister dst, 1925 const LogicVRegister& src1, 1926 const LogicVRegister& src2) { 1927 dst.ClearForWrite(vform); 1928 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1929 int8_t shift_val = src2.Int(vform, i); 1930 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1931 1932 // Set saturation state. 1933 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1934 dst.SetUnsignedSat(i, true); 1935 } 1936 1937 uint64_t src_val = src1.Uint(vform, i); 1938 if ((shift_val > 63) || (shift_val < -64)) { 1939 dst.SetUint(vform, i, 0); 1940 } else { 1941 if (shift_val < 0) { 1942 // Set rounding state. Rounding only needed on right shifts. 1943 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1944 dst.SetRounding(i, true); 1945 } 1946 1947 if (shift_val == -64) { 1948 src_val = 0; 1949 } else { 1950 src_val >>= -shift_val; 1951 } 1952 } else { 1953 src_val <<= shift_val; 1954 } 1955 dst.SetUint(vform, i, src_val); 1956 } 1957 } 1958 return dst; 1959 } 1960 1961 neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1962 LogicVRegister Simulator::neg(VectorFormat vform, 1963 LogicVRegister dst, 1964 const LogicVRegister& src) { 1965 dst.ClearForWrite(vform); 1966 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1967 // Test for signed saturation. 1968 int64_t sa = src.Int(vform, i); 1969 if (sa == MinIntFromFormat(vform)) { 1970 dst.SetSignedSat(i, true); 1971 } 1972 dst.SetInt(vform, i, -sa); 1973 } 1974 return dst; 1975 } 1976 1977 suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1978 LogicVRegister Simulator::suqadd(VectorFormat vform, 1979 LogicVRegister dst, 1980 const LogicVRegister& src) { 1981 dst.ClearForWrite(vform); 1982 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1983 int64_t sa = dst.IntLeftJustified(vform, i); 1984 uint64_t ub = src.UintLeftJustified(vform, i); 1985 int64_t sr = sa + ub; 1986 1987 if (sr < sa) { // Test for signed positive saturation. 1988 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1989 } else { 1990 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 1991 } 1992 } 1993 return dst; 1994 } 1995 1996 usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1997 LogicVRegister Simulator::usqadd(VectorFormat vform, 1998 LogicVRegister dst, 1999 const LogicVRegister& src) { 2000 dst.ClearForWrite(vform); 2001 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2002 uint64_t ua = dst.UintLeftJustified(vform, i); 2003 int64_t sb = src.IntLeftJustified(vform, i); 2004 uint64_t ur = ua + sb; 2005 2006 if ((sb > 0) && (ur <= ua)) { 2007 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2008 } else if ((sb < 0) && (ur >= ua)) { 2009 dst.SetUint(vform, i, 0); // Negative saturation. 2010 } else { 2011 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2012 } 2013 } 2014 return dst; 2015 } 2016 2017 abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2018 LogicVRegister Simulator::abs(VectorFormat vform, 2019 LogicVRegister dst, 2020 const LogicVRegister& src) { 2021 dst.ClearForWrite(vform); 2022 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2023 // Test for signed saturation. 2024 int64_t sa = src.Int(vform, i); 2025 if (sa == MinIntFromFormat(vform)) { 2026 dst.SetSignedSat(i, true); 2027 } 2028 if (sa < 0) { 2029 dst.SetInt(vform, i, -sa); 2030 } else { 2031 dst.SetInt(vform, i, sa); 2032 } 2033 } 2034 return dst; 2035 } 2036 2037 extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2038 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2039 LogicVRegister dst, 2040 bool dstIsSigned, 2041 const LogicVRegister& src, 2042 bool srcIsSigned) { 2043 bool upperhalf = false; 2044 VectorFormat srcform = kFormatUndefined; 2045 int64_t ssrc[8]; 2046 uint64_t usrc[8]; 2047 2048 switch (dstform) { 2049 case kFormat8B : upperhalf = false; srcform = kFormat8H; break; 2050 case kFormat16B: upperhalf = true; srcform = kFormat8H; break; 2051 case kFormat4H : upperhalf = false; srcform = kFormat4S; break; 2052 case kFormat8H : upperhalf = true; srcform = kFormat4S; break; 2053 case kFormat2S : upperhalf = false; srcform = kFormat2D; break; 2054 case kFormat4S : upperhalf = true; srcform = kFormat2D; break; 2055 case kFormatB : upperhalf = false; srcform = kFormatH; break; 2056 case kFormatH : upperhalf = false; srcform = kFormatS; break; 2057 case kFormatS : upperhalf = false; srcform = kFormatD; break; 2058 default:VIXL_UNIMPLEMENTED(); 2059 } 2060 2061 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2062 ssrc[i] = src.Int(srcform, i); 2063 usrc[i] = src.Uint(srcform, i); 2064 } 2065 2066 int offset; 2067 if (upperhalf) { 2068 offset = LaneCountFromFormat(dstform) / 2; 2069 } else { 2070 offset = 0; 2071 dst.ClearForWrite(dstform); 2072 } 2073 2074 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2075 // Test for signed saturation 2076 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2077 dst.SetSignedSat(offset + i, true); 2078 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2079 dst.SetSignedSat(offset + i, false); 2080 } 2081 2082 // Test for unsigned saturation 2083 if (srcIsSigned) { 2084 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2085 dst.SetUnsignedSat(offset + i, true); 2086 } else if (ssrc[i] < 0) { 2087 dst.SetUnsignedSat(offset + i, false); 2088 } 2089 } else { 2090 if (usrc[i] > MaxUintFromFormat(dstform)) { 2091 dst.SetUnsignedSat(offset + i, true); 2092 } 2093 } 2094 2095 int64_t result; 2096 if (srcIsSigned) { 2097 result = ssrc[i] & MaxUintFromFormat(dstform); 2098 } else { 2099 result = usrc[i] & MaxUintFromFormat(dstform); 2100 } 2101 2102 if (dstIsSigned) { 2103 dst.SetInt(dstform, offset + i, result); 2104 } else { 2105 dst.SetUint(dstform, offset + i, result); 2106 } 2107 } 2108 return dst; 2109 } 2110 2111 xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2112 LogicVRegister Simulator::xtn(VectorFormat vform, 2113 LogicVRegister dst, 2114 const LogicVRegister& src) { 2115 return extractnarrow(vform, dst, true, src, true); 2116 } 2117 2118 sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2119 LogicVRegister Simulator::sqxtn(VectorFormat vform, 2120 LogicVRegister dst, 2121 const LogicVRegister& src) { 2122 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2123 } 2124 2125 sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2126 LogicVRegister Simulator::sqxtun(VectorFormat vform, 2127 LogicVRegister dst, 2128 const LogicVRegister& src) { 2129 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2130 } 2131 2132 uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2133 LogicVRegister Simulator::uqxtn(VectorFormat vform, 2134 LogicVRegister dst, 2135 const LogicVRegister& src) { 2136 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2137 } 2138 2139 absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2140 LogicVRegister Simulator::absdiff(VectorFormat vform, 2141 LogicVRegister dst, 2142 const LogicVRegister& src1, 2143 const LogicVRegister& src2, 2144 bool issigned) { 2145 dst.ClearForWrite(vform); 2146 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2147 if (issigned) { 2148 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2149 sr = sr > 0 ? sr : -sr; 2150 dst.SetInt(vform, i, sr); 2151 } else { 2152 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2153 sr = sr > 0 ? sr : -sr; 2154 dst.SetUint(vform, i, sr); 2155 } 2156 } 2157 return dst; 2158 } 2159 2160 saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2161 LogicVRegister Simulator::saba(VectorFormat vform, 2162 LogicVRegister dst, 2163 const LogicVRegister& src1, 2164 const LogicVRegister& src2) { 2165 SimVRegister temp; 2166 dst.ClearForWrite(vform); 2167 absdiff(vform, temp, src1, src2, true); 2168 add(vform, dst, dst, temp); 2169 return dst; 2170 } 2171 2172 uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2173 LogicVRegister Simulator::uaba(VectorFormat vform, 2174 LogicVRegister dst, 2175 const LogicVRegister& src1, 2176 const LogicVRegister& src2) { 2177 SimVRegister temp; 2178 dst.ClearForWrite(vform); 2179 absdiff(vform, temp, src1, src2, false); 2180 add(vform, dst, dst, temp); 2181 return dst; 2182 } 2183 2184 not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2185 LogicVRegister Simulator::not_(VectorFormat vform, 2186 LogicVRegister dst, 2187 const LogicVRegister& src) { 2188 dst.ClearForWrite(vform); 2189 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2190 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2191 } 2192 return dst; 2193 } 2194 2195 rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2196 LogicVRegister Simulator::rbit(VectorFormat vform, 2197 LogicVRegister dst, 2198 const LogicVRegister& src) { 2199 uint64_t result[16]; 2200 int laneCount = LaneCountFromFormat(vform); 2201 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2202 uint64_t reversed_value; 2203 uint64_t value; 2204 for (int i = 0; i < laneCount; i++) { 2205 value = src.Uint(vform, i); 2206 reversed_value = 0; 2207 for (int j = 0; j < laneSizeInBits; j++) { 2208 reversed_value = (reversed_value << 1) | (value & 1); 2209 value >>= 1; 2210 } 2211 result[i] = reversed_value; 2212 } 2213 2214 dst.ClearForWrite(vform); 2215 for (int i = 0; i < laneCount; ++i) { 2216 dst.SetUint(vform, i, result[i]); 2217 } 2218 return dst; 2219 } 2220 2221 rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2222 LogicVRegister Simulator::rev(VectorFormat vform, 2223 LogicVRegister dst, 2224 const LogicVRegister& src, 2225 int revSize) { 2226 uint64_t result[16]; 2227 int laneCount = LaneCountFromFormat(vform); 2228 int laneSize = LaneSizeInBytesFromFormat(vform); 2229 int lanesPerLoop = revSize / laneSize; 2230 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2231 for (int j = 0; j < lanesPerLoop; j++) { 2232 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2233 } 2234 } 2235 dst.ClearForWrite(vform); 2236 for (int i = 0; i < laneCount; ++i) { 2237 dst.SetUint(vform, i, result[i]); 2238 } 2239 return dst; 2240 } 2241 2242 rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2243 LogicVRegister Simulator::rev16(VectorFormat vform, 2244 LogicVRegister dst, 2245 const LogicVRegister& src) { 2246 return rev(vform, dst, src, 2); 2247 } 2248 2249 rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2250 LogicVRegister Simulator::rev32(VectorFormat vform, 2251 LogicVRegister dst, 2252 const LogicVRegister& src) { 2253 return rev(vform, dst, src, 4); 2254 } 2255 2256 rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2257 LogicVRegister Simulator::rev64(VectorFormat vform, 2258 LogicVRegister dst, 2259 const LogicVRegister& src) { 2260 return rev(vform, dst, src, 8); 2261 } 2262 2263 addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2264 LogicVRegister Simulator::addlp(VectorFormat vform, 2265 LogicVRegister dst, 2266 const LogicVRegister& src, 2267 bool is_signed, 2268 bool do_accumulate) { 2269 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2270 2271 int64_t sr[16]; 2272 uint64_t ur[16]; 2273 2274 int laneCount = LaneCountFromFormat(vform); 2275 for (int i = 0; i < laneCount; ++i) { 2276 if (is_signed) { 2277 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2278 } else { 2279 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2280 } 2281 } 2282 2283 dst.ClearForWrite(vform); 2284 for (int i = 0; i < laneCount; ++i) { 2285 if (do_accumulate) { 2286 if (is_signed) { 2287 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2288 } else { 2289 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2290 } 2291 } else { 2292 if (is_signed) { 2293 dst.SetInt(vform, i, sr[i]); 2294 } else { 2295 dst.SetUint(vform, i, ur[i]); 2296 } 2297 } 2298 } 2299 2300 return dst; 2301 } 2302 2303 saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2304 LogicVRegister Simulator::saddlp(VectorFormat vform, 2305 LogicVRegister dst, 2306 const LogicVRegister& src) { 2307 return addlp(vform, dst, src, true, false); 2308 } 2309 2310 uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2311 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2312 LogicVRegister dst, 2313 const LogicVRegister& src) { 2314 return addlp(vform, dst, src, false, false); 2315 } 2316 2317 sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2318 LogicVRegister Simulator::sadalp(VectorFormat vform, 2319 LogicVRegister dst, 2320 const LogicVRegister& src) { 2321 return addlp(vform, dst, src, true, true); 2322 } 2323 2324 uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2325 LogicVRegister Simulator::uadalp(VectorFormat vform, 2326 LogicVRegister dst, 2327 const LogicVRegister& src) { 2328 return addlp(vform, dst, src, false, true); 2329 } 2330 2331 ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2332 LogicVRegister Simulator::ext(VectorFormat vform, 2333 LogicVRegister dst, 2334 const LogicVRegister& src1, 2335 const LogicVRegister& src2, 2336 int index) { 2337 uint8_t result[16]; 2338 int laneCount = LaneCountFromFormat(vform); 2339 for (int i = 0; i < laneCount - index; ++i) { 2340 result[i] = src1.Uint(vform, i + index); 2341 } 2342 for (int i = 0; i < index; ++i) { 2343 result[laneCount - index + i] = src2.Uint(vform, i); 2344 } 2345 dst.ClearForWrite(vform); 2346 for (int i = 0; i < laneCount; ++i) { 2347 dst.SetUint(vform, i, result[i]); 2348 } 2349 return dst; 2350 } 2351 2352 dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2353 LogicVRegister Simulator::dup_element(VectorFormat vform, 2354 LogicVRegister dst, 2355 const LogicVRegister& src, 2356 int src_index) { 2357 int laneCount = LaneCountFromFormat(vform); 2358 uint64_t value = src.Uint(vform, src_index); 2359 dst.ClearForWrite(vform); 2360 for (int i = 0; i < laneCount; ++i) { 2361 dst.SetUint(vform, i, value); 2362 } 2363 return dst; 2364 } 2365 2366 dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2367 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2368 LogicVRegister dst, 2369 uint64_t imm) { 2370 int laneCount = LaneCountFromFormat(vform); 2371 uint64_t value = imm & MaxUintFromFormat(vform); 2372 dst.ClearForWrite(vform); 2373 for (int i = 0; i < laneCount; ++i) { 2374 dst.SetUint(vform, i, value); 2375 } 2376 return dst; 2377 } 2378 2379 ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2380 LogicVRegister Simulator::ins_element(VectorFormat vform, 2381 LogicVRegister dst, 2382 int dst_index, 2383 const LogicVRegister& src, 2384 int src_index) { 2385 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2386 return dst; 2387 } 2388 2389 ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2390 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2391 LogicVRegister dst, 2392 int dst_index, 2393 uint64_t imm) { 2394 uint64_t value = imm & MaxUintFromFormat(vform); 2395 dst.SetUint(vform, dst_index, value); 2396 return dst; 2397 } 2398 2399 movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2400 LogicVRegister Simulator::movi(VectorFormat vform, 2401 LogicVRegister dst, 2402 uint64_t imm) { 2403 int laneCount = LaneCountFromFormat(vform); 2404 dst.ClearForWrite(vform); 2405 for (int i = 0; i < laneCount; ++i) { 2406 dst.SetUint(vform, i, imm); 2407 } 2408 return dst; 2409 } 2410 2411 mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2412 LogicVRegister Simulator::mvni(VectorFormat vform, 2413 LogicVRegister dst, 2414 uint64_t imm) { 2415 int laneCount = LaneCountFromFormat(vform); 2416 dst.ClearForWrite(vform); 2417 for (int i = 0; i < laneCount; ++i) { 2418 dst.SetUint(vform, i, ~imm); 2419 } 2420 return dst; 2421 } 2422 2423 orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2424 LogicVRegister Simulator::orr(VectorFormat vform, 2425 LogicVRegister dst, 2426 const LogicVRegister& src, 2427 uint64_t imm) { 2428 uint64_t result[16]; 2429 int laneCount = LaneCountFromFormat(vform); 2430 for (int i = 0; i < laneCount; ++i) { 2431 result[i] = src.Uint(vform, i) | imm; 2432 } 2433 dst.ClearForWrite(vform); 2434 for (int i = 0; i < laneCount; ++i) { 2435 dst.SetUint(vform, i, result[i]); 2436 } 2437 return dst; 2438 } 2439 2440 uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2441 LogicVRegister Simulator::uxtl(VectorFormat vform, 2442 LogicVRegister dst, 2443 const LogicVRegister& src) { 2444 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2445 2446 dst.ClearForWrite(vform); 2447 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2448 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2449 } 2450 return dst; 2451 } 2452 2453 sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2454 LogicVRegister Simulator::sxtl(VectorFormat vform, 2455 LogicVRegister dst, 2456 const LogicVRegister& src) { 2457 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2458 2459 dst.ClearForWrite(vform); 2460 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2461 dst.SetInt(vform, i, src.Int(vform_half, i)); 2462 } 2463 return dst; 2464 } 2465 2466 uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2467 LogicVRegister Simulator::uxtl2(VectorFormat vform, 2468 LogicVRegister dst, 2469 const LogicVRegister& src) { 2470 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2471 int lane_count = LaneCountFromFormat(vform); 2472 2473 dst.ClearForWrite(vform); 2474 for (int i = 0; i < lane_count; i++) { 2475 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2476 } 2477 return dst; 2478 } 2479 2480 sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2481 LogicVRegister Simulator::sxtl2(VectorFormat vform, 2482 LogicVRegister dst, 2483 const LogicVRegister& src) { 2484 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2485 int lane_count = LaneCountFromFormat(vform); 2486 2487 dst.ClearForWrite(vform); 2488 for (int i = 0; i < lane_count; i++) { 2489 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2490 } 2491 return dst; 2492 } 2493 2494 shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2495 LogicVRegister Simulator::shrn(VectorFormat vform, 2496 LogicVRegister dst, 2497 const LogicVRegister& src, 2498 int shift) { 2499 SimVRegister temp; 2500 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2501 VectorFormat vform_dst = vform; 2502 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2503 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2504 } 2505 2506 shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2507 LogicVRegister Simulator::shrn2(VectorFormat vform, 2508 LogicVRegister dst, 2509 const LogicVRegister& src, 2510 int shift) { 2511 SimVRegister temp; 2512 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2513 VectorFormat vformdst = vform; 2514 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2515 return extractnarrow(vformdst, dst, false, shifted_src, false); 2516 } 2517 2518 rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2519 LogicVRegister Simulator::rshrn(VectorFormat vform, 2520 LogicVRegister dst, 2521 const LogicVRegister& src, 2522 int shift) { 2523 SimVRegister temp; 2524 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2525 VectorFormat vformdst = vform; 2526 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2527 return extractnarrow(vformdst, dst, false, shifted_src, false); 2528 } 2529 2530 rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2531 LogicVRegister Simulator::rshrn2(VectorFormat vform, 2532 LogicVRegister dst, 2533 const LogicVRegister& src, 2534 int shift) { 2535 SimVRegister temp; 2536 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2537 VectorFormat vformdst = vform; 2538 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2539 return extractnarrow(vformdst, dst, false, shifted_src, false); 2540 } 2541 2542 tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2543 LogicVRegister Simulator::tbl(VectorFormat vform, 2544 LogicVRegister dst, 2545 const LogicVRegister& tab, 2546 const LogicVRegister& ind) { 2547 movi(vform, dst, 0); 2548 return tbx(vform, dst, tab, ind); 2549 } 2550 2551 tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2552 LogicVRegister Simulator::tbl(VectorFormat vform, 2553 LogicVRegister dst, 2554 const LogicVRegister& tab, 2555 const LogicVRegister& tab2, 2556 const LogicVRegister& ind) { 2557 movi(vform, dst, 0); 2558 return tbx(vform, dst, tab, tab2, ind); 2559 } 2560 2561 tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2562 LogicVRegister Simulator::tbl(VectorFormat vform, 2563 LogicVRegister dst, 2564 const LogicVRegister& tab, 2565 const LogicVRegister& tab2, 2566 const LogicVRegister& tab3, 2567 const LogicVRegister& ind) { 2568 movi(vform, dst, 0); 2569 return tbx(vform, dst, tab, tab2, tab3, ind); 2570 } 2571 2572 tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2573 LogicVRegister Simulator::tbl(VectorFormat vform, 2574 LogicVRegister dst, 2575 const LogicVRegister& tab, 2576 const LogicVRegister& tab2, 2577 const LogicVRegister& tab3, 2578 const LogicVRegister& tab4, 2579 const LogicVRegister& ind) { 2580 movi(vform, dst, 0); 2581 return tbx(vform, dst, tab, tab2, tab3, tab4, ind); 2582 } 2583 2584 tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2585 LogicVRegister Simulator::tbx(VectorFormat vform, 2586 LogicVRegister dst, 2587 const LogicVRegister& tab, 2588 const LogicVRegister& ind) { 2589 dst.ClearForWrite(vform); 2590 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2591 uint64_t j = ind.Uint(vform, i); 2592 switch (j >> 4) { 2593 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2594 } 2595 } 2596 return dst; 2597 } 2598 2599 tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2600 LogicVRegister Simulator::tbx(VectorFormat vform, 2601 LogicVRegister dst, 2602 const LogicVRegister& tab, 2603 const LogicVRegister& tab2, 2604 const LogicVRegister& ind) { 2605 dst.ClearForWrite(vform); 2606 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2607 uint64_t j = ind.Uint(vform, i); 2608 switch (j >> 4) { 2609 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2610 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2611 } 2612 } 2613 return dst; 2614 } 2615 2616 tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2617 LogicVRegister Simulator::tbx(VectorFormat vform, 2618 LogicVRegister dst, 2619 const LogicVRegister& tab, 2620 const LogicVRegister& tab2, 2621 const LogicVRegister& tab3, 2622 const LogicVRegister& ind) { 2623 dst.ClearForWrite(vform); 2624 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2625 uint64_t j = ind.Uint(vform, i); 2626 switch (j >> 4) { 2627 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2628 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2629 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2630 } 2631 } 2632 return dst; 2633 } 2634 2635 tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2636 LogicVRegister Simulator::tbx(VectorFormat vform, 2637 LogicVRegister dst, 2638 const LogicVRegister& tab, 2639 const LogicVRegister& tab2, 2640 const LogicVRegister& tab3, 2641 const LogicVRegister& tab4, 2642 const LogicVRegister& ind) { 2643 dst.ClearForWrite(vform); 2644 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2645 uint64_t j = ind.Uint(vform, i); 2646 switch (j >> 4) { 2647 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2648 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2649 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2650 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break; 2651 } 2652 } 2653 return dst; 2654 } 2655 2656 uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2657 LogicVRegister Simulator::uqshrn(VectorFormat vform, 2658 LogicVRegister dst, 2659 const LogicVRegister& src, 2660 int shift) { 2661 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2662 } 2663 2664 uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2665 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2666 LogicVRegister dst, 2667 const LogicVRegister& src, 2668 int shift) { 2669 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2670 } 2671 2672 uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2673 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2674 LogicVRegister dst, 2675 const LogicVRegister& src, 2676 int shift) { 2677 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2678 } 2679 2680 uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2681 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2682 LogicVRegister dst, 2683 const LogicVRegister& src, 2684 int shift) { 2685 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2686 } 2687 2688 sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2689 LogicVRegister Simulator::sqshrn(VectorFormat vform, 2690 LogicVRegister dst, 2691 const LogicVRegister& src, 2692 int shift) { 2693 SimVRegister temp; 2694 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2695 VectorFormat vformdst = vform; 2696 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2697 return sqxtn(vformdst, dst, shifted_src); 2698 } 2699 2700 sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2701 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2702 LogicVRegister dst, 2703 const LogicVRegister& src, 2704 int shift) { 2705 SimVRegister temp; 2706 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2707 VectorFormat vformdst = vform; 2708 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2709 return sqxtn(vformdst, dst, shifted_src); 2710 } 2711 2712 sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2713 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2714 LogicVRegister dst, 2715 const LogicVRegister& src, 2716 int shift) { 2717 SimVRegister temp; 2718 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2719 VectorFormat vformdst = vform; 2720 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2721 return sqxtn(vformdst, dst, shifted_src); 2722 } 2723 2724 sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2725 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2726 LogicVRegister dst, 2727 const LogicVRegister& src, 2728 int shift) { 2729 SimVRegister temp; 2730 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2731 VectorFormat vformdst = vform; 2732 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2733 return sqxtn(vformdst, dst, shifted_src); 2734 } 2735 2736 sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2737 LogicVRegister Simulator::sqshrun(VectorFormat vform, 2738 LogicVRegister dst, 2739 const LogicVRegister& src, 2740 int shift) { 2741 SimVRegister temp; 2742 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2743 VectorFormat vformdst = vform; 2744 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2745 return sqxtun(vformdst, dst, shifted_src); 2746 } 2747 2748 sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2749 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2750 LogicVRegister dst, 2751 const LogicVRegister& src, 2752 int shift) { 2753 SimVRegister temp; 2754 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2755 VectorFormat vformdst = vform; 2756 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2757 return sqxtun(vformdst, dst, shifted_src); 2758 } 2759 2760 sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2761 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2762 LogicVRegister dst, 2763 const LogicVRegister& src, 2764 int shift) { 2765 SimVRegister temp; 2766 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2767 VectorFormat vformdst = vform; 2768 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2769 return sqxtun(vformdst, dst, shifted_src); 2770 } 2771 2772 sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2773 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2774 LogicVRegister dst, 2775 const LogicVRegister& src, 2776 int shift) { 2777 SimVRegister temp; 2778 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2779 VectorFormat vformdst = vform; 2780 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2781 return sqxtun(vformdst, dst, shifted_src); 2782 } 2783 2784 uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2785 LogicVRegister Simulator::uaddl(VectorFormat vform, 2786 LogicVRegister dst, 2787 const LogicVRegister& src1, 2788 const LogicVRegister& src2) { 2789 SimVRegister temp1, temp2; 2790 uxtl(vform, temp1, src1); 2791 uxtl(vform, temp2, src2); 2792 add(vform, dst, temp1, temp2); 2793 return dst; 2794 } 2795 2796 uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2797 LogicVRegister Simulator::uaddl2(VectorFormat vform, 2798 LogicVRegister dst, 2799 const LogicVRegister& src1, 2800 const LogicVRegister& src2) { 2801 SimVRegister temp1, temp2; 2802 uxtl2(vform, temp1, src1); 2803 uxtl2(vform, temp2, src2); 2804 add(vform, dst, temp1, temp2); 2805 return dst; 2806 } 2807 2808 uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2809 LogicVRegister Simulator::uaddw(VectorFormat vform, 2810 LogicVRegister dst, 2811 const LogicVRegister& src1, 2812 const LogicVRegister& src2) { 2813 SimVRegister temp; 2814 uxtl(vform, temp, src2); 2815 add(vform, dst, src1, temp); 2816 return dst; 2817 } 2818 2819 uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2820 LogicVRegister Simulator::uaddw2(VectorFormat vform, 2821 LogicVRegister dst, 2822 const LogicVRegister& src1, 2823 const LogicVRegister& src2) { 2824 SimVRegister temp; 2825 uxtl2(vform, temp, src2); 2826 add(vform, dst, src1, temp); 2827 return dst; 2828 } 2829 2830 saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2831 LogicVRegister Simulator::saddl(VectorFormat vform, 2832 LogicVRegister dst, 2833 const LogicVRegister& src1, 2834 const LogicVRegister& src2) { 2835 SimVRegister temp1, temp2; 2836 sxtl(vform, temp1, src1); 2837 sxtl(vform, temp2, src2); 2838 add(vform, dst, temp1, temp2); 2839 return dst; 2840 } 2841 2842 saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2843 LogicVRegister Simulator::saddl2(VectorFormat vform, 2844 LogicVRegister dst, 2845 const LogicVRegister& src1, 2846 const LogicVRegister& src2) { 2847 SimVRegister temp1, temp2; 2848 sxtl2(vform, temp1, src1); 2849 sxtl2(vform, temp2, src2); 2850 add(vform, dst, temp1, temp2); 2851 return dst; 2852 } 2853 2854 saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2855 LogicVRegister Simulator::saddw(VectorFormat vform, 2856 LogicVRegister dst, 2857 const LogicVRegister& src1, 2858 const LogicVRegister& src2) { 2859 SimVRegister temp; 2860 sxtl(vform, temp, src2); 2861 add(vform, dst, src1, temp); 2862 return dst; 2863 } 2864 2865 saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2866 LogicVRegister Simulator::saddw2(VectorFormat vform, 2867 LogicVRegister dst, 2868 const LogicVRegister& src1, 2869 const LogicVRegister& src2) { 2870 SimVRegister temp; 2871 sxtl2(vform, temp, src2); 2872 add(vform, dst, src1, temp); 2873 return dst; 2874 } 2875 2876 usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2877 LogicVRegister Simulator::usubl(VectorFormat vform, 2878 LogicVRegister dst, 2879 const LogicVRegister& src1, 2880 const LogicVRegister& src2) { 2881 SimVRegister temp1, temp2; 2882 uxtl(vform, temp1, src1); 2883 uxtl(vform, temp2, src2); 2884 sub(vform, dst, temp1, temp2); 2885 return dst; 2886 } 2887 2888 usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2889 LogicVRegister Simulator::usubl2(VectorFormat vform, 2890 LogicVRegister dst, 2891 const LogicVRegister& src1, 2892 const LogicVRegister& src2) { 2893 SimVRegister temp1, temp2; 2894 uxtl2(vform, temp1, src1); 2895 uxtl2(vform, temp2, src2); 2896 sub(vform, dst, temp1, temp2); 2897 return dst; 2898 } 2899 2900 usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2901 LogicVRegister Simulator::usubw(VectorFormat vform, 2902 LogicVRegister dst, 2903 const LogicVRegister& src1, 2904 const LogicVRegister& src2) { 2905 SimVRegister temp; 2906 uxtl(vform, temp, src2); 2907 sub(vform, dst, src1, temp); 2908 return dst; 2909 } 2910 2911 usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2912 LogicVRegister Simulator::usubw2(VectorFormat vform, 2913 LogicVRegister dst, 2914 const LogicVRegister& src1, 2915 const LogicVRegister& src2) { 2916 SimVRegister temp; 2917 uxtl2(vform, temp, src2); 2918 sub(vform, dst, src1, temp); 2919 return dst; 2920 } 2921 2922 ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2923 LogicVRegister Simulator::ssubl(VectorFormat vform, 2924 LogicVRegister dst, 2925 const LogicVRegister& src1, 2926 const LogicVRegister& src2) { 2927 SimVRegister temp1, temp2; 2928 sxtl(vform, temp1, src1); 2929 sxtl(vform, temp2, src2); 2930 sub(vform, dst, temp1, temp2); 2931 return dst; 2932 } 2933 2934 ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2935 LogicVRegister Simulator::ssubl2(VectorFormat vform, 2936 LogicVRegister dst, 2937 const LogicVRegister& src1, 2938 const LogicVRegister& src2) { 2939 SimVRegister temp1, temp2; 2940 sxtl2(vform, temp1, src1); 2941 sxtl2(vform, temp2, src2); 2942 sub(vform, dst, temp1, temp2); 2943 return dst; 2944 } 2945 2946 ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2947 LogicVRegister Simulator::ssubw(VectorFormat vform, 2948 LogicVRegister dst, 2949 const LogicVRegister& src1, 2950 const LogicVRegister& src2) { 2951 SimVRegister temp; 2952 sxtl(vform, temp, src2); 2953 sub(vform, dst, src1, temp); 2954 return dst; 2955 } 2956 2957 ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2958 LogicVRegister Simulator::ssubw2(VectorFormat vform, 2959 LogicVRegister dst, 2960 const LogicVRegister& src1, 2961 const LogicVRegister& src2) { 2962 SimVRegister temp; 2963 sxtl2(vform, temp, src2); 2964 sub(vform, dst, src1, temp); 2965 return dst; 2966 } 2967 2968 uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2969 LogicVRegister Simulator::uabal(VectorFormat vform, 2970 LogicVRegister dst, 2971 const LogicVRegister& src1, 2972 const LogicVRegister& src2) { 2973 SimVRegister temp1, temp2; 2974 uxtl(vform, temp1, src1); 2975 uxtl(vform, temp2, src2); 2976 uaba(vform, dst, temp1, temp2); 2977 return dst; 2978 } 2979 2980 uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2981 LogicVRegister Simulator::uabal2(VectorFormat vform, 2982 LogicVRegister dst, 2983 const LogicVRegister& src1, 2984 const LogicVRegister& src2) { 2985 SimVRegister temp1, temp2; 2986 uxtl2(vform, temp1, src1); 2987 uxtl2(vform, temp2, src2); 2988 uaba(vform, dst, temp1, temp2); 2989 return dst; 2990 } 2991 2992 sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2993 LogicVRegister Simulator::sabal(VectorFormat vform, 2994 LogicVRegister dst, 2995 const LogicVRegister& src1, 2996 const LogicVRegister& src2) { 2997 SimVRegister temp1, temp2; 2998 sxtl(vform, temp1, src1); 2999 sxtl(vform, temp2, src2); 3000 saba(vform, dst, temp1, temp2); 3001 return dst; 3002 } 3003 3004 sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3005 LogicVRegister Simulator::sabal2(VectorFormat vform, 3006 LogicVRegister dst, 3007 const LogicVRegister& src1, 3008 const LogicVRegister& src2) { 3009 SimVRegister temp1, temp2; 3010 sxtl2(vform, temp1, src1); 3011 sxtl2(vform, temp2, src2); 3012 saba(vform, dst, temp1, temp2); 3013 return dst; 3014 } 3015 3016 uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3017 LogicVRegister Simulator::uabdl(VectorFormat vform, 3018 LogicVRegister dst, 3019 const LogicVRegister& src1, 3020 const LogicVRegister& src2) { 3021 SimVRegister temp1, temp2; 3022 uxtl(vform, temp1, src1); 3023 uxtl(vform, temp2, src2); 3024 absdiff(vform, dst, temp1, temp2, false); 3025 return dst; 3026 } 3027 3028 uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3029 LogicVRegister Simulator::uabdl2(VectorFormat vform, 3030 LogicVRegister dst, 3031 const LogicVRegister& src1, 3032 const LogicVRegister& src2) { 3033 SimVRegister temp1, temp2; 3034 uxtl2(vform, temp1, src1); 3035 uxtl2(vform, temp2, src2); 3036 absdiff(vform, dst, temp1, temp2, false); 3037 return dst; 3038 } 3039 3040 sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3041 LogicVRegister Simulator::sabdl(VectorFormat vform, 3042 LogicVRegister dst, 3043 const LogicVRegister& src1, 3044 const LogicVRegister& src2) { 3045 SimVRegister temp1, temp2; 3046 sxtl(vform, temp1, src1); 3047 sxtl(vform, temp2, src2); 3048 absdiff(vform, dst, temp1, temp2, true); 3049 return dst; 3050 } 3051 3052 sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3053 LogicVRegister Simulator::sabdl2(VectorFormat vform, 3054 LogicVRegister dst, 3055 const LogicVRegister& src1, 3056 const LogicVRegister& src2) { 3057 SimVRegister temp1, temp2; 3058 sxtl2(vform, temp1, src1); 3059 sxtl2(vform, temp2, src2); 3060 absdiff(vform, dst, temp1, temp2, true); 3061 return dst; 3062 } 3063 3064 umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3065 LogicVRegister Simulator::umull(VectorFormat vform, 3066 LogicVRegister dst, 3067 const LogicVRegister& src1, 3068 const LogicVRegister& src2) { 3069 SimVRegister temp1, temp2; 3070 uxtl(vform, temp1, src1); 3071 uxtl(vform, temp2, src2); 3072 mul(vform, dst, temp1, temp2); 3073 return dst; 3074 } 3075 3076 umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3077 LogicVRegister Simulator::umull2(VectorFormat vform, 3078 LogicVRegister dst, 3079 const LogicVRegister& src1, 3080 const LogicVRegister& src2) { 3081 SimVRegister temp1, temp2; 3082 uxtl2(vform, temp1, src1); 3083 uxtl2(vform, temp2, src2); 3084 mul(vform, dst, temp1, temp2); 3085 return dst; 3086 } 3087 3088 smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3089 LogicVRegister Simulator::smull(VectorFormat vform, 3090 LogicVRegister dst, 3091 const LogicVRegister& src1, 3092 const LogicVRegister& src2) { 3093 SimVRegister temp1, temp2; 3094 sxtl(vform, temp1, src1); 3095 sxtl(vform, temp2, src2); 3096 mul(vform, dst, temp1, temp2); 3097 return dst; 3098 } 3099 3100 smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3101 LogicVRegister Simulator::smull2(VectorFormat vform, 3102 LogicVRegister dst, 3103 const LogicVRegister& src1, 3104 const LogicVRegister& src2) { 3105 SimVRegister temp1, temp2; 3106 sxtl2(vform, temp1, src1); 3107 sxtl2(vform, temp2, src2); 3108 mul(vform, dst, temp1, temp2); 3109 return dst; 3110 } 3111 3112 umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3113 LogicVRegister Simulator::umlsl(VectorFormat vform, 3114 LogicVRegister dst, 3115 const LogicVRegister& src1, 3116 const LogicVRegister& src2) { 3117 SimVRegister temp1, temp2; 3118 uxtl(vform, temp1, src1); 3119 uxtl(vform, temp2, src2); 3120 mls(vform, dst, temp1, temp2); 3121 return dst; 3122 } 3123 3124 umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3125 LogicVRegister Simulator::umlsl2(VectorFormat vform, 3126 LogicVRegister dst, 3127 const LogicVRegister& src1, 3128 const LogicVRegister& src2) { 3129 SimVRegister temp1, temp2; 3130 uxtl2(vform, temp1, src1); 3131 uxtl2(vform, temp2, src2); 3132 mls(vform, dst, temp1, temp2); 3133 return dst; 3134 } 3135 3136 smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3137 LogicVRegister Simulator::smlsl(VectorFormat vform, 3138 LogicVRegister dst, 3139 const LogicVRegister& src1, 3140 const LogicVRegister& src2) { 3141 SimVRegister temp1, temp2; 3142 sxtl(vform, temp1, src1); 3143 sxtl(vform, temp2, src2); 3144 mls(vform, dst, temp1, temp2); 3145 return dst; 3146 } 3147 3148 smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3149 LogicVRegister Simulator::smlsl2(VectorFormat vform, 3150 LogicVRegister dst, 3151 const LogicVRegister& src1, 3152 const LogicVRegister& src2) { 3153 SimVRegister temp1, temp2; 3154 sxtl2(vform, temp1, src1); 3155 sxtl2(vform, temp2, src2); 3156 mls(vform, dst, temp1, temp2); 3157 return dst; 3158 } 3159 3160 umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3161 LogicVRegister Simulator::umlal(VectorFormat vform, 3162 LogicVRegister dst, 3163 const LogicVRegister& src1, 3164 const LogicVRegister& src2) { 3165 SimVRegister temp1, temp2; 3166 uxtl(vform, temp1, src1); 3167 uxtl(vform, temp2, src2); 3168 mla(vform, dst, temp1, temp2); 3169 return dst; 3170 } 3171 3172 umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3173 LogicVRegister Simulator::umlal2(VectorFormat vform, 3174 LogicVRegister dst, 3175 const LogicVRegister& src1, 3176 const LogicVRegister& src2) { 3177 SimVRegister temp1, temp2; 3178 uxtl2(vform, temp1, src1); 3179 uxtl2(vform, temp2, src2); 3180 mla(vform, dst, temp1, temp2); 3181 return dst; 3182 } 3183 3184 smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3185 LogicVRegister Simulator::smlal(VectorFormat vform, 3186 LogicVRegister dst, 3187 const LogicVRegister& src1, 3188 const LogicVRegister& src2) { 3189 SimVRegister temp1, temp2; 3190 sxtl(vform, temp1, src1); 3191 sxtl(vform, temp2, src2); 3192 mla(vform, dst, temp1, temp2); 3193 return dst; 3194 } 3195 3196 smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3197 LogicVRegister Simulator::smlal2(VectorFormat vform, 3198 LogicVRegister dst, 3199 const LogicVRegister& src1, 3200 const LogicVRegister& src2) { 3201 SimVRegister temp1, temp2; 3202 sxtl2(vform, temp1, src1); 3203 sxtl2(vform, temp2, src2); 3204 mla(vform, dst, temp1, temp2); 3205 return dst; 3206 } 3207 3208 sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3209 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3210 LogicVRegister dst, 3211 const LogicVRegister& src1, 3212 const LogicVRegister& src2) { 3213 SimVRegister temp; 3214 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3215 return add(vform, dst, dst, product).SignedSaturate(vform); 3216 } 3217 3218 sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3219 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3220 LogicVRegister dst, 3221 const LogicVRegister& src1, 3222 const LogicVRegister& src2) { 3223 SimVRegister temp; 3224 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3225 return add(vform, dst, dst, product).SignedSaturate(vform); 3226 } 3227 3228 sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3229 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3230 LogicVRegister dst, 3231 const LogicVRegister& src1, 3232 const LogicVRegister& src2) { 3233 SimVRegister temp; 3234 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3235 return sub(vform, dst, dst, product).SignedSaturate(vform); 3236 } 3237 3238 sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3239 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3240 LogicVRegister dst, 3241 const LogicVRegister& src1, 3242 const LogicVRegister& src2) { 3243 SimVRegister temp; 3244 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3245 return sub(vform, dst, dst, product).SignedSaturate(vform); 3246 } 3247 3248 sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3249 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3250 LogicVRegister dst, 3251 const LogicVRegister& src1, 3252 const LogicVRegister& src2) { 3253 SimVRegister temp; 3254 LogicVRegister product = smull(vform, temp, src1, src2); 3255 return add(vform, dst, product, product).SignedSaturate(vform); 3256 } 3257 3258 sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3259 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3260 LogicVRegister dst, 3261 const LogicVRegister& src1, 3262 const LogicVRegister& src2) { 3263 SimVRegister temp; 3264 LogicVRegister product = smull2(vform, temp, src1, src2); 3265 return add(vform, dst, product, product).SignedSaturate(vform); 3266 } 3267 3268 sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3269 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3270 LogicVRegister dst, 3271 const LogicVRegister& src1, 3272 const LogicVRegister& src2, 3273 bool round) { 3274 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3275 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3276 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3277 3278 int esize = LaneSizeInBitsFromFormat(vform); 3279 int round_const = round ? (1 << (esize - 2)) : 0; 3280 int64_t product; 3281 3282 dst.ClearForWrite(vform); 3283 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3284 product = src1.Int(vform, i) * src2.Int(vform, i); 3285 product += round_const; 3286 product = product >> (esize - 1); 3287 3288 if (product > MaxIntFromFormat(vform)) { 3289 product = MaxIntFromFormat(vform); 3290 } else if (product < MinIntFromFormat(vform)) { 3291 product = MinIntFromFormat(vform); 3292 } 3293 dst.SetInt(vform, i, product); 3294 } 3295 return dst; 3296 } 3297 3298 sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3299 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3300 LogicVRegister dst, 3301 const LogicVRegister& src1, 3302 const LogicVRegister& src2) { 3303 return sqrdmulh(vform, dst, src1, src2, false); 3304 } 3305 3306 addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3307 LogicVRegister Simulator::addhn(VectorFormat vform, 3308 LogicVRegister dst, 3309 const LogicVRegister& src1, 3310 const LogicVRegister& src2) { 3311 SimVRegister temp; 3312 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3313 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3314 return dst; 3315 } 3316 3317 addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3318 LogicVRegister Simulator::addhn2(VectorFormat vform, 3319 LogicVRegister dst, 3320 const LogicVRegister& src1, 3321 const LogicVRegister& src2) { 3322 SimVRegister temp; 3323 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3324 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3325 return dst; 3326 } 3327 3328 raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3329 LogicVRegister Simulator::raddhn(VectorFormat vform, 3330 LogicVRegister dst, 3331 const LogicVRegister& src1, 3332 const LogicVRegister& src2) { 3333 SimVRegister temp; 3334 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3335 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3336 return dst; 3337 } 3338 3339 raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3340 LogicVRegister Simulator::raddhn2(VectorFormat vform, 3341 LogicVRegister dst, 3342 const LogicVRegister& src1, 3343 const LogicVRegister& src2) { 3344 SimVRegister temp; 3345 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3346 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3347 return dst; 3348 } 3349 3350 subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3351 LogicVRegister Simulator::subhn(VectorFormat vform, 3352 LogicVRegister dst, 3353 const LogicVRegister& src1, 3354 const LogicVRegister& src2) { 3355 SimVRegister temp; 3356 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3357 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3358 return dst; 3359 } 3360 3361 subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3362 LogicVRegister Simulator::subhn2(VectorFormat vform, 3363 LogicVRegister dst, 3364 const LogicVRegister& src1, 3365 const LogicVRegister& src2) { 3366 SimVRegister temp; 3367 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3368 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3369 return dst; 3370 } 3371 3372 rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3373 LogicVRegister Simulator::rsubhn(VectorFormat vform, 3374 LogicVRegister dst, 3375 const LogicVRegister& src1, 3376 const LogicVRegister& src2) { 3377 SimVRegister temp; 3378 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3379 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3380 return dst; 3381 } 3382 3383 rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3384 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3385 LogicVRegister dst, 3386 const LogicVRegister& src1, 3387 const LogicVRegister& src2) { 3388 SimVRegister temp; 3389 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3390 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3391 return dst; 3392 } 3393 3394 trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3395 LogicVRegister Simulator::trn1(VectorFormat vform, 3396 LogicVRegister dst, 3397 const LogicVRegister& src1, 3398 const LogicVRegister& src2) { 3399 uint64_t result[16]; 3400 int laneCount = LaneCountFromFormat(vform); 3401 int pairs = laneCount / 2; 3402 for (int i = 0; i < pairs; ++i) { 3403 result[2 * i] = src1.Uint(vform, 2 * i); 3404 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3405 } 3406 3407 dst.ClearForWrite(vform); 3408 for (int i = 0; i < laneCount; ++i) { 3409 dst.SetUint(vform, i, result[i]); 3410 } 3411 return dst; 3412 } 3413 3414 trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3415 LogicVRegister Simulator::trn2(VectorFormat vform, 3416 LogicVRegister dst, 3417 const LogicVRegister& src1, 3418 const LogicVRegister& src2) { 3419 uint64_t result[16]; 3420 int laneCount = LaneCountFromFormat(vform); 3421 int pairs = laneCount / 2; 3422 for (int i = 0; i < pairs; ++i) { 3423 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3424 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3425 } 3426 3427 dst.ClearForWrite(vform); 3428 for (int i = 0; i < laneCount; ++i) { 3429 dst.SetUint(vform, i, result[i]); 3430 } 3431 return dst; 3432 } 3433 3434 zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3435 LogicVRegister Simulator::zip1(VectorFormat vform, 3436 LogicVRegister dst, 3437 const LogicVRegister& src1, 3438 const LogicVRegister& src2) { 3439 uint64_t result[16]; 3440 int laneCount = LaneCountFromFormat(vform); 3441 int pairs = laneCount / 2; 3442 for (int i = 0; i < pairs; ++i) { 3443 result[2 * i] = src1.Uint(vform, i); 3444 result[(2 * i) + 1] = src2.Uint(vform, i); 3445 } 3446 3447 dst.ClearForWrite(vform); 3448 for (int i = 0; i < laneCount; ++i) { 3449 dst.SetUint(vform, i, result[i]); 3450 } 3451 return dst; 3452 } 3453 3454 zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3455 LogicVRegister Simulator::zip2(VectorFormat vform, 3456 LogicVRegister dst, 3457 const LogicVRegister& src1, 3458 const LogicVRegister& src2) { 3459 uint64_t result[16]; 3460 int laneCount = LaneCountFromFormat(vform); 3461 int pairs = laneCount / 2; 3462 for (int i = 0; i < pairs; ++i) { 3463 result[2 * i] = src1.Uint(vform, pairs + i); 3464 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3465 } 3466 3467 dst.ClearForWrite(vform); 3468 for (int i = 0; i < laneCount; ++i) { 3469 dst.SetUint(vform, i, result[i]); 3470 } 3471 return dst; 3472 } 3473 3474 uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3475 LogicVRegister Simulator::uzp1(VectorFormat vform, 3476 LogicVRegister dst, 3477 const LogicVRegister& src1, 3478 const LogicVRegister& src2) { 3479 uint64_t result[32]; 3480 int laneCount = LaneCountFromFormat(vform); 3481 for (int i = 0; i < laneCount; ++i) { 3482 result[i] = src1.Uint(vform, i); 3483 result[laneCount + i] = src2.Uint(vform, i); 3484 } 3485 3486 dst.ClearForWrite(vform); 3487 for (int i = 0; i < laneCount; ++i) { 3488 dst.SetUint(vform, i, result[2 * i]); 3489 } 3490 return dst; 3491 } 3492 3493 uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3494 LogicVRegister Simulator::uzp2(VectorFormat vform, 3495 LogicVRegister dst, 3496 const LogicVRegister& src1, 3497 const LogicVRegister& src2) { 3498 uint64_t result[32]; 3499 int laneCount = LaneCountFromFormat(vform); 3500 for (int i = 0; i < laneCount; ++i) { 3501 result[i] = src1.Uint(vform, i); 3502 result[laneCount + i] = src2.Uint(vform, i); 3503 } 3504 3505 dst.ClearForWrite(vform); 3506 for (int i = 0; i < laneCount; ++i) { 3507 dst.SetUint(vform, i, result[ (2 * i) + 1]); 3508 } 3509 return dst; 3510 } 3511 3512 3513 template <typename T> FPAdd(T op1,T op2)3514 T Simulator::FPAdd(T op1, T op2) { 3515 T result = FPProcessNaNs(op1, op2); 3516 if (std::isnan(result)) return result; 3517 3518 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3519 // inf + -inf returns the default NaN. 3520 FPProcessException(); 3521 return FPDefaultNaN<T>(); 3522 } else { 3523 // Other cases should be handled by standard arithmetic. 3524 return op1 + op2; 3525 } 3526 } 3527 3528 3529 template <typename T> FPSub(T op1,T op2)3530 T Simulator::FPSub(T op1, T op2) { 3531 // NaNs should be handled elsewhere. 3532 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3533 3534 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3535 // inf - inf returns the default NaN. 3536 FPProcessException(); 3537 return FPDefaultNaN<T>(); 3538 } else { 3539 // Other cases should be handled by standard arithmetic. 3540 return op1 - op2; 3541 } 3542 } 3543 3544 3545 template <typename T> FPMul(T op1,T op2)3546 T Simulator::FPMul(T op1, T op2) { 3547 // NaNs should be handled elsewhere. 3548 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3549 3550 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3551 // inf * 0.0 returns the default NaN. 3552 FPProcessException(); 3553 return FPDefaultNaN<T>(); 3554 } else { 3555 // Other cases should be handled by standard arithmetic. 3556 return op1 * op2; 3557 } 3558 } 3559 3560 3561 template<typename T> FPMulx(T op1,T op2)3562 T Simulator::FPMulx(T op1, T op2) { 3563 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3564 // inf * 0.0 returns +/-2.0. 3565 T two = 2.0; 3566 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3567 } 3568 return FPMul(op1, op2); 3569 } 3570 3571 3572 template<typename T> FPMulAdd(T a,T op1,T op2)3573 T Simulator::FPMulAdd(T a, T op1, T op2) { 3574 T result = FPProcessNaNs3(a, op1, op2); 3575 3576 T sign_a = copysign(1.0, a); 3577 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3578 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3579 bool operation_generates_nan = 3580 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3581 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3582 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3583 3584 if (std::isnan(result)) { 3585 // Generated NaNs override quiet NaNs propagated from a. 3586 if (operation_generates_nan && IsQuietNaN(a)) { 3587 FPProcessException(); 3588 return FPDefaultNaN<T>(); 3589 } else { 3590 return result; 3591 } 3592 } 3593 3594 // If the operation would produce a NaN, return the default NaN. 3595 if (operation_generates_nan) { 3596 FPProcessException(); 3597 return FPDefaultNaN<T>(); 3598 } 3599 3600 // Work around broken fma implementations for exact zero results: The sign of 3601 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3602 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3603 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3604 } 3605 3606 result = FusedMultiplyAdd(op1, op2, a); 3607 VIXL_ASSERT(!std::isnan(result)); 3608 3609 // Work around broken fma implementations for rounded zero results: If a is 3610 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3611 if ((a == 0.0) && (result == 0.0)) { 3612 return copysign(0.0, sign_prod); 3613 } 3614 3615 return result; 3616 } 3617 3618 3619 template <typename T> FPDiv(T op1,T op2)3620 T Simulator::FPDiv(T op1, T op2) { 3621 // NaNs should be handled elsewhere. 3622 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3623 3624 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3625 // inf / inf and 0.0 / 0.0 return the default NaN. 3626 FPProcessException(); 3627 return FPDefaultNaN<T>(); 3628 } else { 3629 if (op2 == 0.0) FPProcessException(); 3630 3631 // Other cases should be handled by standard arithmetic. 3632 return op1 / op2; 3633 } 3634 } 3635 3636 3637 template <typename T> FPSqrt(T op)3638 T Simulator::FPSqrt(T op) { 3639 if (std::isnan(op)) { 3640 return FPProcessNaN(op); 3641 } else if (op < 0.0) { 3642 FPProcessException(); 3643 return FPDefaultNaN<T>(); 3644 } else { 3645 return sqrt(op); 3646 } 3647 } 3648 3649 3650 template <typename T> FPMax(T a,T b)3651 T Simulator::FPMax(T a, T b) { 3652 T result = FPProcessNaNs(a, b); 3653 if (std::isnan(result)) return result; 3654 3655 if ((a == 0.0) && (b == 0.0) && 3656 (copysign(1.0, a) != copysign(1.0, b))) { 3657 // a and b are zero, and the sign differs: return +0.0. 3658 return 0.0; 3659 } else { 3660 return (a > b) ? a : b; 3661 } 3662 } 3663 3664 3665 template <typename T> FPMaxNM(T a,T b)3666 T Simulator::FPMaxNM(T a, T b) { 3667 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3668 a = kFP64NegativeInfinity; 3669 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3670 b = kFP64NegativeInfinity; 3671 } 3672 3673 T result = FPProcessNaNs(a, b); 3674 return std::isnan(result) ? result : FPMax(a, b); 3675 } 3676 3677 3678 template <typename T> FPMin(T a,T b)3679 T Simulator::FPMin(T a, T b) { 3680 T result = FPProcessNaNs(a, b); 3681 if (std::isnan(result)) return result; 3682 3683 if ((a == 0.0) && (b == 0.0) && 3684 (copysign(1.0, a) != copysign(1.0, b))) { 3685 // a and b are zero, and the sign differs: return -0.0. 3686 return -0.0; 3687 } else { 3688 return (a < b) ? a : b; 3689 } 3690 } 3691 3692 3693 template <typename T> FPMinNM(T a,T b)3694 T Simulator::FPMinNM(T a, T b) { 3695 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3696 a = kFP64PositiveInfinity; 3697 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3698 b = kFP64PositiveInfinity; 3699 } 3700 3701 T result = FPProcessNaNs(a, b); 3702 return std::isnan(result) ? result : FPMin(a, b); 3703 } 3704 3705 3706 template <typename T> FPRecipStepFused(T op1,T op2)3707 T Simulator::FPRecipStepFused(T op1, T op2) { 3708 const T two = 2.0; 3709 if ((std::isinf(op1) && (op2 == 0.0)) 3710 || ((op1 == 0.0) && (std::isinf(op2)))) { 3711 return two; 3712 } else if (std::isinf(op1) || std::isinf(op2)) { 3713 // Return +inf if signs match, otherwise -inf. 3714 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3715 : kFP64NegativeInfinity; 3716 } else { 3717 return FusedMultiplyAdd(op1, op2, two); 3718 } 3719 } 3720 3721 3722 template <typename T> FPRSqrtStepFused(T op1,T op2)3723 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3724 const T one_point_five = 1.5; 3725 const T two = 2.0; 3726 3727 if ((std::isinf(op1) && (op2 == 0.0)) 3728 || ((op1 == 0.0) && (std::isinf(op2)))) { 3729 return one_point_five; 3730 } else if (std::isinf(op1) || std::isinf(op2)) { 3731 // Return +inf if signs match, otherwise -inf. 3732 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3733 : kFP64NegativeInfinity; 3734 } else { 3735 // The multiply-add-halve operation must be fully fused, so avoid interim 3736 // rounding by checking which operand can be losslessly divided by two 3737 // before doing the multiply-add. 3738 if (std::isnormal(op1 / two)) { 3739 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3740 } else if (std::isnormal(op2 / two)) { 3741 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3742 } else { 3743 // Neither operand is normal after halving: the result is dominated by 3744 // the addition term, so just return that. 3745 return one_point_five; 3746 } 3747 } 3748 } 3749 3750 FPRoundInt(double value,FPRounding round_mode)3751 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3752 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3753 (value == kFP64NegativeInfinity)) { 3754 return value; 3755 } else if (std::isnan(value)) { 3756 return FPProcessNaN(value); 3757 } 3758 3759 double int_result = std::floor(value); 3760 double error = value - int_result; 3761 switch (round_mode) { 3762 case FPTieAway: { 3763 // Take care of correctly handling the range ]-0.5, -0.0], which must 3764 // yield -0.0. 3765 if ((-0.5 < value) && (value < 0.0)) { 3766 int_result = -0.0; 3767 3768 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3769 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3770 // result is positive, round up. 3771 int_result++; 3772 } 3773 break; 3774 } 3775 case FPTieEven: { 3776 // Take care of correctly handling the range [-0.5, -0.0], which must 3777 // yield -0.0. 3778 if ((-0.5 <= value) && (value < 0.0)) { 3779 int_result = -0.0; 3780 3781 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3782 // result is odd, round up. 3783 } else if ((error > 0.5) || 3784 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3785 int_result++; 3786 } 3787 break; 3788 } 3789 case FPZero: { 3790 // If value>0 then we take floor(value) 3791 // otherwise, ceil(value). 3792 if (value < 0) { 3793 int_result = ceil(value); 3794 } 3795 break; 3796 } 3797 case FPNegativeInfinity: { 3798 // We always use floor(value). 3799 break; 3800 } 3801 case FPPositiveInfinity: { 3802 // Take care of correctly handling the range ]-1.0, -0.0], which must 3803 // yield -0.0. 3804 if ((-1.0 < value) && (value < 0.0)) { 3805 int_result = -0.0; 3806 3807 // If the error is non-zero, round up. 3808 } else if (error > 0.0) { 3809 int_result++; 3810 } 3811 break; 3812 } 3813 default: VIXL_UNIMPLEMENTED(); 3814 } 3815 return int_result; 3816 } 3817 3818 FPToInt32(double value,FPRounding rmode)3819 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3820 value = FPRoundInt(value, rmode); 3821 if (value >= kWMaxInt) { 3822 return kWMaxInt; 3823 } else if (value < kWMinInt) { 3824 return kWMinInt; 3825 } 3826 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3827 } 3828 3829 FPToInt64(double value,FPRounding rmode)3830 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3831 value = FPRoundInt(value, rmode); 3832 if (value >= kXMaxInt) { 3833 return kXMaxInt; 3834 } else if (value < kXMinInt) { 3835 return kXMinInt; 3836 } 3837 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3838 } 3839 3840 FPToUInt32(double value,FPRounding rmode)3841 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3842 value = FPRoundInt(value, rmode); 3843 if (value >= kWMaxUInt) { 3844 return kWMaxUInt; 3845 } else if (value < 0.0) { 3846 return 0; 3847 } 3848 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3849 } 3850 3851 FPToUInt64(double value,FPRounding rmode)3852 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3853 value = FPRoundInt(value, rmode); 3854 if (value >= kXMaxUInt) { 3855 return kXMaxUInt; 3856 } else if (value < 0.0) { 3857 return 0; 3858 } 3859 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3860 } 3861 3862 3863 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3864 template <typename T> \ 3865 LogicVRegister Simulator::FN(VectorFormat vform, \ 3866 LogicVRegister dst, \ 3867 const LogicVRegister& src1, \ 3868 const LogicVRegister& src2) { \ 3869 dst.ClearForWrite(vform); \ 3870 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3871 T op1 = src1.Float<T>(i); \ 3872 T op2 = src2.Float<T>(i); \ 3873 T result; \ 3874 if (PROCNAN) { \ 3875 result = FPProcessNaNs(op1, op2); \ 3876 if (!std::isnan(result)) { \ 3877 result = OP(op1, op2); \ 3878 } \ 3879 } else { \ 3880 result = OP(op1, op2); \ 3881 } \ 3882 dst.SetFloat(i, result); \ 3883 } \ 3884 return dst; \ 3885 } \ 3886 \ 3887 LogicVRegister Simulator::FN(VectorFormat vform, \ 3888 LogicVRegister dst, \ 3889 const LogicVRegister& src1, \ 3890 const LogicVRegister& src2) { \ 3891 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3892 FN<float>(vform, dst, src1, src2); \ 3893 } else { \ 3894 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3895 FN<double>(vform, dst, src1, src2); \ 3896 } \ 3897 return dst; \ 3898 } NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3899 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3900 #undef DEFINE_NEON_FP_VECTOR_OP 3901 3902 3903 LogicVRegister Simulator::fnmul(VectorFormat vform, 3904 LogicVRegister dst, 3905 const LogicVRegister& src1, 3906 const LogicVRegister& src2) { 3907 SimVRegister temp; 3908 LogicVRegister product = fmul(vform, temp, src1, src2); 3909 return fneg(vform, dst, product); 3910 } 3911 3912 3913 template <typename T> frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3914 LogicVRegister Simulator::frecps(VectorFormat vform, 3915 LogicVRegister dst, 3916 const LogicVRegister& src1, 3917 const LogicVRegister& src2) { 3918 dst.ClearForWrite(vform); 3919 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3920 T op1 = -src1.Float<T>(i); 3921 T op2 = src2.Float<T>(i); 3922 T result = FPProcessNaNs(op1, op2); 3923 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3924 } 3925 return dst; 3926 } 3927 3928 frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3929 LogicVRegister Simulator::frecps(VectorFormat vform, 3930 LogicVRegister dst, 3931 const LogicVRegister& src1, 3932 const LogicVRegister& src2) { 3933 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3934 frecps<float>(vform, dst, src1, src2); 3935 } else { 3936 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3937 frecps<double>(vform, dst, src1, src2); 3938 } 3939 return dst; 3940 } 3941 3942 3943 template <typename T> frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3944 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3945 LogicVRegister dst, 3946 const LogicVRegister& src1, 3947 const LogicVRegister& src2) { 3948 dst.ClearForWrite(vform); 3949 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3950 T op1 = -src1.Float<T>(i); 3951 T op2 = src2.Float<T>(i); 3952 T result = FPProcessNaNs(op1, op2); 3953 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3954 } 3955 return dst; 3956 } 3957 3958 frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3959 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3960 LogicVRegister dst, 3961 const LogicVRegister& src1, 3962 const LogicVRegister& src2) { 3963 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3964 frsqrts<float>(vform, dst, src1, src2); 3965 } else { 3966 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3967 frsqrts<double>(vform, dst, src1, src2); 3968 } 3969 return dst; 3970 } 3971 3972 3973 template <typename T> fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3974 LogicVRegister Simulator::fcmp(VectorFormat vform, 3975 LogicVRegister dst, 3976 const LogicVRegister& src1, 3977 const LogicVRegister& src2, 3978 Condition cond) { 3979 dst.ClearForWrite(vform); 3980 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3981 bool result = false; 3982 T op1 = src1.Float<T>(i); 3983 T op2 = src2.Float<T>(i); 3984 T nan_result = FPProcessNaNs(op1, op2); 3985 if (!std::isnan(nan_result)) { 3986 switch (cond) { 3987 case eq: result = (op1 == op2); break; 3988 case ge: result = (op1 >= op2); break; 3989 case gt: result = (op1 > op2) ; break; 3990 case le: result = (op1 <= op2); break; 3991 case lt: result = (op1 < op2) ; break; 3992 default: VIXL_UNREACHABLE(); break; 3993 } 3994 } 3995 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 3996 } 3997 return dst; 3998 } 3999 4000 fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4001 LogicVRegister Simulator::fcmp(VectorFormat vform, 4002 LogicVRegister dst, 4003 const LogicVRegister& src1, 4004 const LogicVRegister& src2, 4005 Condition cond) { 4006 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4007 fcmp<float>(vform, dst, src1, src2, cond); 4008 } else { 4009 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4010 fcmp<double>(vform, dst, src1, src2, cond); 4011 } 4012 return dst; 4013 } 4014 4015 fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4016 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4017 LogicVRegister dst, 4018 const LogicVRegister& src, 4019 Condition cond) { 4020 SimVRegister temp; 4021 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4022 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0)); 4023 fcmp<float>(vform, dst, src, zero_reg, cond); 4024 } else { 4025 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4026 LogicVRegister zero_reg = dup_immediate(vform, temp, 4027 double_to_rawbits(0.0)); 4028 fcmp<double>(vform, dst, src, zero_reg, cond); 4029 } 4030 return dst; 4031 } 4032 4033 fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4034 LogicVRegister Simulator::fabscmp(VectorFormat vform, 4035 LogicVRegister dst, 4036 const LogicVRegister& src1, 4037 const LogicVRegister& src2, 4038 Condition cond) { 4039 SimVRegister temp1, temp2; 4040 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4041 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4042 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4043 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4044 } else { 4045 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4046 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4047 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4048 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4049 } 4050 return dst; 4051 } 4052 4053 4054 template <typename T> fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4055 LogicVRegister Simulator::fmla(VectorFormat vform, 4056 LogicVRegister dst, 4057 const LogicVRegister& src1, 4058 const LogicVRegister& src2) { 4059 dst.ClearForWrite(vform); 4060 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4061 T op1 = src1.Float<T>(i); 4062 T op2 = src2.Float<T>(i); 4063 T acc = dst.Float<T>(i); 4064 T result = FPMulAdd(acc, op1, op2); 4065 dst.SetFloat(i, result); 4066 } 4067 return dst; 4068 } 4069 4070 fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4071 LogicVRegister Simulator::fmla(VectorFormat vform, 4072 LogicVRegister dst, 4073 const LogicVRegister& src1, 4074 const LogicVRegister& src2) { 4075 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4076 fmla<float>(vform, dst, src1, src2); 4077 } else { 4078 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4079 fmla<double>(vform, dst, src1, src2); 4080 } 4081 return dst; 4082 } 4083 4084 4085 template <typename T> fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4086 LogicVRegister Simulator::fmls(VectorFormat vform, 4087 LogicVRegister dst, 4088 const LogicVRegister& src1, 4089 const LogicVRegister& src2) { 4090 dst.ClearForWrite(vform); 4091 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4092 T op1 = -src1.Float<T>(i); 4093 T op2 = src2.Float<T>(i); 4094 T acc = dst.Float<T>(i); 4095 T result = FPMulAdd(acc, op1, op2); 4096 dst.SetFloat(i, result); 4097 } 4098 return dst; 4099 } 4100 4101 fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4102 LogicVRegister Simulator::fmls(VectorFormat vform, 4103 LogicVRegister dst, 4104 const LogicVRegister& src1, 4105 const LogicVRegister& src2) { 4106 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4107 fmls<float>(vform, dst, src1, src2); 4108 } else { 4109 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4110 fmls<double>(vform, dst, src1, src2); 4111 } 4112 return dst; 4113 } 4114 4115 4116 template <typename T> fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4117 LogicVRegister Simulator::fneg(VectorFormat vform, 4118 LogicVRegister dst, 4119 const LogicVRegister& src) { 4120 dst.ClearForWrite(vform); 4121 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4122 T op = src.Float<T>(i); 4123 op = -op; 4124 dst.SetFloat(i, op); 4125 } 4126 return dst; 4127 } 4128 4129 fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4130 LogicVRegister Simulator::fneg(VectorFormat vform, 4131 LogicVRegister dst, 4132 const LogicVRegister& src) { 4133 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4134 fneg<float>(vform, dst, src); 4135 } else { 4136 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4137 fneg<double>(vform, dst, src); 4138 } 4139 return dst; 4140 } 4141 4142 4143 template <typename T> fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4144 LogicVRegister Simulator::fabs_(VectorFormat vform, 4145 LogicVRegister dst, 4146 const LogicVRegister& src) { 4147 dst.ClearForWrite(vform); 4148 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4149 T op = src.Float<T>(i); 4150 if (copysign(1.0, op) < 0.0) { 4151 op = -op; 4152 } 4153 dst.SetFloat(i, op); 4154 } 4155 return dst; 4156 } 4157 4158 fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4159 LogicVRegister Simulator::fabs_(VectorFormat vform, 4160 LogicVRegister dst, 4161 const LogicVRegister& src) { 4162 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4163 fabs_<float>(vform, dst, src); 4164 } else { 4165 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4166 fabs_<double>(vform, dst, src); 4167 } 4168 return dst; 4169 } 4170 4171 fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4172 LogicVRegister Simulator::fabd(VectorFormat vform, 4173 LogicVRegister dst, 4174 const LogicVRegister& src1, 4175 const LogicVRegister& src2) { 4176 SimVRegister temp; 4177 fsub(vform, temp, src1, src2); 4178 fabs_(vform, dst, temp); 4179 return dst; 4180 } 4181 4182 fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4183 LogicVRegister Simulator::fsqrt(VectorFormat vform, 4184 LogicVRegister dst, 4185 const LogicVRegister& src) { 4186 dst.ClearForWrite(vform); 4187 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4188 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4189 float result = FPSqrt(src.Float<float>(i)); 4190 dst.SetFloat(i, result); 4191 } 4192 } else { 4193 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4194 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4195 double result = FPSqrt(src.Float<double>(i)); 4196 dst.SetFloat(i, result); 4197 } 4198 } 4199 return dst; 4200 } 4201 4202 4203 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4204 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4205 LogicVRegister dst, \ 4206 const LogicVRegister& src1, \ 4207 const LogicVRegister& src2) { \ 4208 SimVRegister temp1, temp2; \ 4209 uzp1(vform, temp1, src1, src2); \ 4210 uzp2(vform, temp2, src1, src2); \ 4211 FN(vform, dst, temp1, temp2); \ 4212 return dst; \ 4213 } \ 4214 \ 4215 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4216 LogicVRegister dst, \ 4217 const LogicVRegister& src) { \ 4218 if (vform == kFormatS) { \ 4219 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4220 dst.SetFloat(0, result); \ 4221 } else { \ 4222 VIXL_ASSERT(vform == kFormatD); \ 4223 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4224 dst.SetFloat(0, result); \ 4225 } \ 4226 dst.ClearForWrite(vform); \ 4227 return dst; \ 4228 } NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4229 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4230 #undef DEFINE_NEON_FP_PAIR_OP 4231 4232 4233 LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4234 LogicVRegister dst, 4235 const LogicVRegister& src, 4236 FPMinMaxOp Op) { 4237 VIXL_ASSERT(vform == kFormat4S); 4238 USE(vform); 4239 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4240 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4241 float result = (this->*Op)(result1, result2); 4242 dst.ClearForWrite(kFormatS); 4243 dst.SetFloat<float>(0, result); 4244 return dst; 4245 } 4246 4247 fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4248 LogicVRegister Simulator::fmaxv(VectorFormat vform, 4249 LogicVRegister dst, 4250 const LogicVRegister& src) { 4251 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4252 } 4253 4254 fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4255 LogicVRegister Simulator::fminv(VectorFormat vform, 4256 LogicVRegister dst, 4257 const LogicVRegister& src) { 4258 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4259 } 4260 4261 fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4262 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4263 LogicVRegister dst, 4264 const LogicVRegister& src) { 4265 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4266 } 4267 4268 fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4269 LogicVRegister Simulator::fminnmv(VectorFormat vform, 4270 LogicVRegister dst, 4271 const LogicVRegister& src) { 4272 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4273 } 4274 4275 fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4276 LogicVRegister Simulator::fmul(VectorFormat vform, 4277 LogicVRegister dst, 4278 const LogicVRegister& src1, 4279 const LogicVRegister& src2, 4280 int index) { 4281 dst.ClearForWrite(vform); 4282 SimVRegister temp; 4283 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4284 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4285 fmul<float>(vform, dst, src1, index_reg); 4286 4287 } else { 4288 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4289 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4290 fmul<double>(vform, dst, src1, index_reg); 4291 } 4292 return dst; 4293 } 4294 4295 fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4296 LogicVRegister Simulator::fmla(VectorFormat vform, 4297 LogicVRegister dst, 4298 const LogicVRegister& src1, 4299 const LogicVRegister& src2, 4300 int index) { 4301 dst.ClearForWrite(vform); 4302 SimVRegister temp; 4303 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4304 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4305 fmla<float>(vform, dst, src1, index_reg); 4306 4307 } else { 4308 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4309 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4310 fmla<double>(vform, dst, src1, index_reg); 4311 } 4312 return dst; 4313 } 4314 4315 fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4316 LogicVRegister Simulator::fmls(VectorFormat vform, 4317 LogicVRegister dst, 4318 const LogicVRegister& src1, 4319 const LogicVRegister& src2, 4320 int index) { 4321 dst.ClearForWrite(vform); 4322 SimVRegister temp; 4323 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4324 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4325 fmls<float>(vform, dst, src1, index_reg); 4326 4327 } else { 4328 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4329 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4330 fmls<double>(vform, dst, src1, index_reg); 4331 } 4332 return dst; 4333 } 4334 4335 fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4336 LogicVRegister Simulator::fmulx(VectorFormat vform, 4337 LogicVRegister dst, 4338 const LogicVRegister& src1, 4339 const LogicVRegister& src2, 4340 int index) { 4341 dst.ClearForWrite(vform); 4342 SimVRegister temp; 4343 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4344 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4345 fmulx<float>(vform, dst, src1, index_reg); 4346 4347 } else { 4348 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4349 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4350 fmulx<double>(vform, dst, src1, index_reg); 4351 } 4352 return dst; 4353 } 4354 4355 frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4356 LogicVRegister Simulator::frint(VectorFormat vform, 4357 LogicVRegister dst, 4358 const LogicVRegister& src, 4359 FPRounding rounding_mode, 4360 bool inexact_exception) { 4361 dst.ClearForWrite(vform); 4362 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4363 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4364 float input = src.Float<float>(i); 4365 float rounded = FPRoundInt(input, rounding_mode); 4366 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4367 FPProcessException(); 4368 } 4369 dst.SetFloat<float>(i, rounded); 4370 } 4371 } else { 4372 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4373 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4374 double input = src.Float<double>(i); 4375 double rounded = FPRoundInt(input, rounding_mode); 4376 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4377 FPProcessException(); 4378 } 4379 dst.SetFloat<double>(i, rounded); 4380 } 4381 } 4382 return dst; 4383 } 4384 4385 fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4386 LogicVRegister Simulator::fcvts(VectorFormat vform, 4387 LogicVRegister dst, 4388 const LogicVRegister& src, 4389 FPRounding rounding_mode, 4390 int fbits) { 4391 dst.ClearForWrite(vform); 4392 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4393 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4394 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4395 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4396 } 4397 } else { 4398 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4399 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4400 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4401 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4402 } 4403 } 4404 return dst; 4405 } 4406 4407 fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4408 LogicVRegister Simulator::fcvtu(VectorFormat vform, 4409 LogicVRegister dst, 4410 const LogicVRegister& src, 4411 FPRounding rounding_mode, 4412 int fbits) { 4413 dst.ClearForWrite(vform); 4414 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4415 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4416 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4417 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4418 } 4419 } else { 4420 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4421 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4422 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4423 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4424 } 4425 } 4426 return dst; 4427 } 4428 4429 fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4430 LogicVRegister Simulator::fcvtl(VectorFormat vform, 4431 LogicVRegister dst, 4432 const LogicVRegister& src) { 4433 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4434 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4435 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4436 } 4437 } else { 4438 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4439 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4440 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4441 } 4442 } 4443 return dst; 4444 } 4445 4446 fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4447 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4448 LogicVRegister dst, 4449 const LogicVRegister& src) { 4450 int lane_count = LaneCountFromFormat(vform); 4451 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4452 for (int i = 0; i < lane_count; i++) { 4453 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4454 } 4455 } else { 4456 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4457 for (int i = 0; i < lane_count; i++) { 4458 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4459 } 4460 } 4461 return dst; 4462 } 4463 4464 fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4465 LogicVRegister Simulator::fcvtn(VectorFormat vform, 4466 LogicVRegister dst, 4467 const LogicVRegister& src) { 4468 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4469 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4470 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4471 } 4472 } else { 4473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4474 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4475 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4476 } 4477 } 4478 return dst; 4479 } 4480 4481 fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4482 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4483 LogicVRegister dst, 4484 const LogicVRegister& src) { 4485 int lane_count = LaneCountFromFormat(vform) / 2; 4486 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4487 for (int i = lane_count - 1; i >= 0; i--) { 4488 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4489 } 4490 } else { 4491 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4492 for (int i = lane_count - 1; i >= 0; i--) { 4493 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4494 } 4495 } 4496 return dst; 4497 } 4498 4499 fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4500 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4501 LogicVRegister dst, 4502 const LogicVRegister& src) { 4503 dst.ClearForWrite(vform); 4504 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4505 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4506 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4507 } 4508 return dst; 4509 } 4510 4511 fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4512 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4513 LogicVRegister dst, 4514 const LogicVRegister& src) { 4515 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4516 int lane_count = LaneCountFromFormat(vform) / 2; 4517 for (int i = lane_count - 1; i >= 0; i--) { 4518 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4519 } 4520 return dst; 4521 } 4522 4523 4524 // Based on reference C function recip_sqrt_estimate from ARM ARM. recip_sqrt_estimate(double a)4525 double Simulator::recip_sqrt_estimate(double a) { 4526 int q0, q1, s; 4527 double r; 4528 if (a < 0.5) { 4529 q0 = static_cast<int>(a * 512.0); 4530 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4531 } else { 4532 q1 = static_cast<int>(a * 256.0); 4533 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4534 } 4535 s = static_cast<int>(256.0 * r + 0.5); 4536 return static_cast<double>(s) / 256.0; 4537 } 4538 4539 Bits(uint64_t val,int start_bit,int end_bit)4540 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4541 return unsigned_bitextract_64(start_bit, end_bit, val); 4542 } 4543 4544 4545 template <typename T> FPRecipSqrtEstimate(T op)4546 T Simulator::FPRecipSqrtEstimate(T op) { 4547 if (std::isnan(op)) { 4548 return FPProcessNaN(op); 4549 } else if (op == 0.0) { 4550 if (copysign(1.0, op) < 0.0) { 4551 return kFP64NegativeInfinity; 4552 } else { 4553 return kFP64PositiveInfinity; 4554 } 4555 } else if (copysign(1.0, op) < 0.0) { 4556 FPProcessException(); 4557 return FPDefaultNaN<T>(); 4558 } else if (std::isinf(op)) { 4559 return 0.0; 4560 } else { 4561 uint64_t fraction; 4562 int exp, result_exp; 4563 4564 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4565 exp = float_exp(op); 4566 fraction = float_mantissa(op); 4567 fraction <<= 29; 4568 } else { 4569 exp = double_exp(op); 4570 fraction = double_mantissa(op); 4571 } 4572 4573 if (exp == 0) { 4574 while (Bits(fraction, 51, 51) == 0) { 4575 fraction = Bits(fraction, 50, 0) << 1; 4576 exp -= 1; 4577 } 4578 fraction = Bits(fraction, 50, 0) << 1; 4579 } 4580 4581 double scaled; 4582 if (Bits(exp, 0, 0) == 0) { 4583 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4584 } else { 4585 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); 4586 } 4587 4588 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4589 result_exp = (380 - exp) / 2; 4590 } else { 4591 result_exp = (3068 - exp) / 2; 4592 } 4593 4594 uint64_t estimate = double_to_rawbits(recip_sqrt_estimate(scaled)); 4595 4596 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4597 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4598 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4599 return float_pack(0, exp_bits, est_bits); 4600 } else { 4601 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4602 } 4603 } 4604 } 4605 4606 frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4607 LogicVRegister Simulator::frsqrte(VectorFormat vform, 4608 LogicVRegister dst, 4609 const LogicVRegister& src) { 4610 dst.ClearForWrite(vform); 4611 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4612 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4613 float input = src.Float<float>(i); 4614 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4615 } 4616 } else { 4617 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4618 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4619 double input = src.Float<double>(i); 4620 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4621 } 4622 } 4623 return dst; 4624 } 4625 4626 template <typename T> FPRecipEstimate(T op,FPRounding rounding)4627 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4628 uint32_t sign; 4629 4630 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4631 sign = float_sign(op); 4632 } else { 4633 sign = double_sign(op); 4634 } 4635 4636 if (std::isnan(op)) { 4637 return FPProcessNaN(op); 4638 } else if (std::isinf(op)) { 4639 return (sign == 1) ? -0.0 : 0.0; 4640 } else if (op == 0.0) { 4641 FPProcessException(); // FPExc_DivideByZero exception. 4642 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4643 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4644 (std::fabs(op) < std::pow(2.0, -128.0))) || 4645 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4646 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4647 bool overflow_to_inf = false; 4648 switch (rounding) { 4649 case FPTieEven: overflow_to_inf = true; break; 4650 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break; 4651 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break; 4652 case FPZero: overflow_to_inf = false; break; 4653 default: break; 4654 } 4655 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4656 if (overflow_to_inf) { 4657 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4658 } else { 4659 // Return FPMaxNormal(sign). 4660 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4661 return float_pack(sign, 0xfe, 0x07fffff); 4662 } else { 4663 return double_pack(sign, 0x7fe, 0x0fffffffffffffl); 4664 } 4665 } 4666 } else { 4667 uint64_t fraction; 4668 int exp, result_exp; 4669 uint32_t sign; 4670 4671 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4672 sign = float_sign(op); 4673 exp = float_exp(op); 4674 fraction = float_mantissa(op); 4675 fraction <<= 29; 4676 } else { 4677 sign = double_sign(op); 4678 exp = double_exp(op); 4679 fraction = double_mantissa(op); 4680 } 4681 4682 if (exp == 0) { 4683 if (Bits(fraction, 51, 51) == 0) { 4684 exp -= 1; 4685 fraction = Bits(fraction, 49, 0) << 2; 4686 } else { 4687 fraction = Bits(fraction, 50, 0) << 1; 4688 } 4689 } 4690 4691 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4692 4693 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4694 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4695 } else { 4696 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4697 } 4698 4699 double estimate = recip_estimate(scaled); 4700 4701 fraction = double_mantissa(estimate); 4702 if (result_exp == 0) { 4703 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4704 } else if (result_exp == -1) { 4705 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4706 result_exp = 0; 4707 } 4708 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4709 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4710 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4711 return float_pack(sign, exp_bits, frac_bits); 4712 } else { 4713 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4714 } 4715 } 4716 } 4717 4718 frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4719 LogicVRegister Simulator::frecpe(VectorFormat vform, 4720 LogicVRegister dst, 4721 const LogicVRegister& src, 4722 FPRounding round) { 4723 dst.ClearForWrite(vform); 4724 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4725 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4726 float input = src.Float<float>(i); 4727 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4728 } 4729 } else { 4730 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4731 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4732 double input = src.Float<double>(i); 4733 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4734 } 4735 } 4736 return dst; 4737 } 4738 4739 ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4740 LogicVRegister Simulator::ursqrte(VectorFormat vform, 4741 LogicVRegister dst, 4742 const LogicVRegister& src) { 4743 dst.ClearForWrite(vform); 4744 uint64_t operand; 4745 uint32_t result; 4746 double dp_operand, dp_result; 4747 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4748 operand = src.Uint(vform, i); 4749 if (operand <= 0x3FFFFFFF) { 4750 result = 0xFFFFFFFF; 4751 } else { 4752 dp_operand = operand * std::pow(2.0, -32); 4753 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4754 result = static_cast<uint32_t>(dp_result); 4755 } 4756 dst.SetUint(vform, i, result); 4757 } 4758 return dst; 4759 } 4760 4761 4762 // Based on reference C function recip_estimate from ARM ARM. recip_estimate(double a)4763 double Simulator::recip_estimate(double a) { 4764 int q, s; 4765 double r; 4766 q = static_cast<int>(a * 512.0); 4767 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4768 s = static_cast<int>(256.0 * r + 0.5); 4769 return static_cast<double>(s) / 256.0; 4770 } 4771 4772 urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4773 LogicVRegister Simulator::urecpe(VectorFormat vform, 4774 LogicVRegister dst, 4775 const LogicVRegister& src) { 4776 dst.ClearForWrite(vform); 4777 uint64_t operand; 4778 uint32_t result; 4779 double dp_operand, dp_result; 4780 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4781 operand = src.Uint(vform, i); 4782 if (operand <= 0x7FFFFFFF) { 4783 result = 0xFFFFFFFF; 4784 } else { 4785 dp_operand = operand * std::pow(2.0, -32); 4786 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4787 result = static_cast<uint32_t>(dp_result); 4788 } 4789 dst.SetUint(vform, i, result); 4790 } 4791 return dst; 4792 } 4793 4794 template <typename T> frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4795 LogicVRegister Simulator::frecpx(VectorFormat vform, 4796 LogicVRegister dst, 4797 const LogicVRegister& src) { 4798 dst.ClearForWrite(vform); 4799 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4800 T op = src.Float<T>(i); 4801 T result; 4802 if (std::isnan(op)) { 4803 result = FPProcessNaN(op); 4804 } else { 4805 int exp; 4806 uint32_t sign; 4807 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4808 sign = float_sign(op); 4809 exp = float_exp(op); 4810 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4811 result = float_pack(sign, exp, 0); 4812 } else { 4813 sign = double_sign(op); 4814 exp = double_exp(op); 4815 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4816 result = double_pack(sign, exp, 0); 4817 } 4818 } 4819 dst.SetFloat(i, result); 4820 } 4821 return dst; 4822 } 4823 4824 frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4825 LogicVRegister Simulator::frecpx(VectorFormat vform, 4826 LogicVRegister dst, 4827 const LogicVRegister& src) { 4828 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4829 frecpx<float>(vform, dst, src); 4830 } else { 4831 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4832 frecpx<double>(vform, dst, src); 4833 } 4834 return dst; 4835 } 4836 scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4837 LogicVRegister Simulator::scvtf(VectorFormat vform, 4838 LogicVRegister dst, 4839 const LogicVRegister& src, 4840 int fbits, 4841 FPRounding round) { 4842 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4843 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4844 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4845 dst.SetFloat<float>(i, result); 4846 } else { 4847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4848 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4849 dst.SetFloat<double>(i, result); 4850 } 4851 } 4852 return dst; 4853 } 4854 4855 ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4856 LogicVRegister Simulator::ucvtf(VectorFormat vform, 4857 LogicVRegister dst, 4858 const LogicVRegister& src, 4859 int fbits, 4860 FPRounding round) { 4861 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4862 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4863 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4864 dst.SetFloat<float>(i, result); 4865 } else { 4866 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4867 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4868 dst.SetFloat<double>(i, result); 4869 } 4870 } 4871 return dst; 4872 } 4873 4874 4875 } // namespace vixl 4876 4877 #endif // VIXL_INCLUDE_SIMULATOR 4878