1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 6 #define V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 7 8 #include <deque> 9 #include <list> 10 #include <map> 11 #include <memory> 12 #include <vector> 13 14 #include "src/base/optional.h" 15 #include "src/codegen/arm64/constants-arm64.h" 16 #include "src/codegen/arm64/instructions-arm64.h" 17 #include "src/codegen/arm64/register-arm64.h" 18 #include "src/codegen/assembler.h" 19 #include "src/codegen/constant-pool.h" 20 #include "src/common/globals.h" 21 #include "src/utils/utils.h" 22 23 // Windows arm64 SDK defines mvn to NEON intrinsic neon_not which will not 24 // be used here. 25 #if defined(V8_OS_WIN) && defined(mvn) 26 #undef mvn 27 #endif 28 29 #if defined(V8_OS_WIN) 30 #include "src/diagnostics/unwinding-info-win64.h" 31 #endif // V8_OS_WIN 32 33 namespace v8 { 34 namespace internal { 35 36 class SafepointTableBuilder; 37 38 // ----------------------------------------------------------------------------- 39 // Immediates. 40 class Immediate { 41 public: 42 template <typename T> 43 inline explicit Immediate( 44 Handle<T> handle, RelocInfo::Mode mode = RelocInfo::FULL_EMBEDDED_OBJECT); 45 46 // This is allowed to be an implicit constructor because Immediate is 47 // a wrapper class that doesn't normally perform any type conversion. 48 template <typename T> 49 inline Immediate(T value); // NOLINT(runtime/explicit) 50 51 template <typename T> 52 inline Immediate(T value, RelocInfo::Mode rmode); 53 value()54 int64_t value() const { return value_; } rmode()55 RelocInfo::Mode rmode() const { return rmode_; } 56 57 private: 58 int64_t value_; 59 RelocInfo::Mode rmode_; 60 }; 61 62 // ----------------------------------------------------------------------------- 63 // Operands. 64 constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; 65 constexpr uint64_t kSmiShiftMask = (1ULL << kSmiShift) - 1; 66 67 // Represents an operand in a machine instruction. 68 class Operand { 69 // TODO(all): If necessary, study more in details which methods 70 // TODO(all): should be inlined or not. 71 public: 72 // rm, {<shift> {#<shift_amount>}} 73 // where <shift> is one of {LSL, LSR, ASR, ROR}. 74 // <shift_amount> is uint6_t. 75 // This is allowed to be an implicit constructor because Operand is 76 // a wrapper class that doesn't normally perform any type conversion. 77 inline Operand(Register reg, Shift shift = LSL, 78 unsigned shift_amount = 0); // NOLINT(runtime/explicit) 79 80 // rm, <extend> {#<shift_amount>} 81 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. 82 // <shift_amount> is uint2_t. 83 inline Operand(Register reg, Extend extend, unsigned shift_amount = 0); 84 85 static Operand EmbeddedNumber(double number); // Smi or HeapNumber. 86 static Operand EmbeddedStringConstant(const StringConstantBase* str); 87 88 inline bool IsHeapObjectRequest() const; 89 inline HeapObjectRequest heap_object_request() const; 90 inline Immediate immediate_for_heap_object_request() const; 91 92 // Implicit constructor for all int types, ExternalReference, and Smi. 93 template <typename T> 94 inline Operand(T t); // NOLINT(runtime/explicit) 95 96 // Implicit constructor for int types. 97 template <typename T> 98 inline Operand(T t, RelocInfo::Mode rmode); 99 100 inline bool IsImmediate() const; 101 inline bool IsShiftedRegister() const; 102 inline bool IsExtendedRegister() const; 103 inline bool IsZero() const; 104 105 // This returns an LSL shift (<= 4) operand as an equivalent extend operand, 106 // which helps in the encoding of instructions that use the stack pointer. 107 inline Operand ToExtendedRegister() const; 108 109 // Returns new Operand adapted for using with W registers. 110 inline Operand ToW() const; 111 112 inline Immediate immediate() const; 113 inline int64_t ImmediateValue() const; 114 inline RelocInfo::Mode ImmediateRMode() const; 115 inline Register reg() const; 116 inline Shift shift() const; 117 inline Extend extend() const; 118 inline unsigned shift_amount() const; 119 120 // Relocation information. 121 bool NeedsRelocation(const Assembler* assembler) const; 122 123 private: 124 base::Optional<HeapObjectRequest> heap_object_request_; 125 Immediate immediate_; 126 Register reg_; 127 Shift shift_; 128 Extend extend_; 129 unsigned shift_amount_; 130 }; 131 132 // MemOperand represents a memory operand in a load or store instruction. 133 class MemOperand { 134 public: 135 inline MemOperand(); 136 inline explicit MemOperand(Register base, int64_t offset = 0, 137 AddrMode addrmode = Offset); 138 inline explicit MemOperand(Register base, Register regoffset, 139 Shift shift = LSL, unsigned shift_amount = 0); 140 inline explicit MemOperand(Register base, Register regoffset, Extend extend, 141 unsigned shift_amount = 0); 142 inline explicit MemOperand(Register base, const Operand& offset, 143 AddrMode addrmode = Offset); 144 base()145 const Register& base() const { return base_; } regoffset()146 const Register& regoffset() const { return regoffset_; } offset()147 int64_t offset() const { return offset_; } addrmode()148 AddrMode addrmode() const { return addrmode_; } shift()149 Shift shift() const { return shift_; } extend()150 Extend extend() const { return extend_; } shift_amount()151 unsigned shift_amount() const { return shift_amount_; } 152 inline bool IsImmediateOffset() const; 153 inline bool IsRegisterOffset() const; 154 inline bool IsPreIndex() const; 155 inline bool IsPostIndex() const; 156 157 private: 158 Register base_; 159 Register regoffset_; 160 int64_t offset_; 161 AddrMode addrmode_; 162 Shift shift_; 163 Extend extend_; 164 unsigned shift_amount_; 165 }; 166 167 // ----------------------------------------------------------------------------- 168 // Assembler. 169 170 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { 171 public: 172 // Create an assembler. Instructions and relocation information are emitted 173 // into a buffer, with the instructions starting from the beginning and the 174 // relocation information starting from the end of the buffer. See CodeDesc 175 // for a detailed comment on the layout (globals.h). 176 // 177 // If the provided buffer is nullptr, the assembler allocates and grows its 178 // own buffer. Otherwise it takes ownership of the provided buffer. 179 explicit Assembler(const AssemblerOptions&, 180 std::unique_ptr<AssemblerBuffer> = {}); 181 182 ~Assembler() override; 183 184 void AbortedCodeGeneration() override; 185 186 // System functions --------------------------------------------------------- 187 // Start generating code from the beginning of the buffer, discarding any code 188 // and data that has already been emitted into the buffer. 189 // 190 // In order to avoid any accidental transfer of state, Reset DCHECKs that the 191 // constant pool is not blocked. 192 void Reset(); 193 194 // GetCode emits any pending (non-emitted) code and fills the descriptor desc. 195 static constexpr int kNoHandlerTable = 0; 196 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; 197 void GetCode(Isolate* isolate, CodeDesc* desc, 198 SafepointTableBuilder* safepoint_table_builder, 199 int handler_table_offset); 200 201 // Convenience wrapper for code without safepoint or handler tables. GetCode(Isolate * isolate,CodeDesc * desc)202 void GetCode(Isolate* isolate, CodeDesc* desc) { 203 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); 204 } 205 206 // Insert the smallest number of nop instructions 207 // possible to align the pc offset to a multiple 208 // of m. m must be a power of 2 (>= 4). 209 void Align(int m); 210 // Insert the smallest number of zero bytes possible to align the pc offset 211 // to a mulitple of m. m must be a power of 2 (>= 2). 212 void DataAlign(int m); 213 // Aligns code to something that's optimal for a jump target for the platform. 214 void CodeTargetAlign(); 215 216 inline void Unreachable(); 217 218 // Label -------------------------------------------------------------------- 219 // Bind a label to the current pc. Note that labels can only be bound once, 220 // and if labels are linked to other instructions, they _must_ be bound 221 // before they go out of scope. 222 void bind(Label* label); 223 224 // RelocInfo and pools ------------------------------------------------------ 225 226 // Record relocation information for current pc_. 227 enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY }; 228 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0, 229 ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY); 230 231 // Generate a B immediate instruction with the corresponding relocation info. 232 // 'offset' is the immediate to encode in the B instruction (so it is the 233 // difference between the target and the PC of the instruction, divided by 234 // the instruction size). 235 void near_jump(int offset, RelocInfo::Mode rmode); 236 // Generate a BL immediate instruction with the corresponding relocation info. 237 // As for near_jump, 'offset' is the immediate to encode in the BL 238 // instruction. 239 void near_call(int offset, RelocInfo::Mode rmode); 240 // Generate a BL immediate instruction with the corresponding relocation info 241 // for the input HeapObjectRequest. 242 void near_call(HeapObjectRequest request); 243 244 // Return the address in the constant pool of the code target address used by 245 // the branch/call instruction at pc. 246 inline static Address target_pointer_address_at(Address pc); 247 248 // Read/Modify the code target address in the branch/call instruction at pc. 249 // The isolate argument is unused (and may be nullptr) when skipping flushing. 250 inline static Address target_address_at(Address pc, Address constant_pool); 251 252 // Read/Modify the code target address in the branch/call instruction at pc. 253 inline static Tagged_t target_compressed_address_at(Address pc, 254 Address constant_pool); 255 inline static void set_target_address_at( 256 Address pc, Address constant_pool, Address target, 257 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 258 259 inline static void set_target_compressed_address_at( 260 Address pc, Address constant_pool, Tagged_t target, 261 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 262 263 // Returns the handle for the code object called at 'pc'. 264 // This might need to be temporarily encoded as an offset into code_targets_. 265 inline Handle<Code> code_target_object_handle_at(Address pc); 266 inline EmbeddedObjectIndex embedded_object_index_referenced_from(Address pc); 267 inline void set_embedded_object_index_referenced_from( 268 Address p, EmbeddedObjectIndex index); 269 // Returns the handle for the heap object referenced at 'pc'. 270 inline Handle<HeapObject> target_object_handle_at(Address pc); 271 272 // Returns the target address for a runtime function for the call encoded 273 // at 'pc'. 274 // Runtime entries can be temporarily encoded as the offset between the 275 // runtime function entrypoint and the code range start (stored in the 276 // code_range_start field), in order to be encodable as we generate the code, 277 // before it is moved into the code space. 278 inline Address runtime_entry_at(Address pc); 279 280 // This sets the branch destination. 'location' here can be either the pc of 281 // an immediate branch or the address of an entry in the constant pool. 282 // This is for calls and branches within generated code. 283 inline static void deserialization_set_special_target_at(Address location, 284 Code code, 285 Address target); 286 287 // Get the size of the special target encoded at 'location'. 288 inline static int deserialization_special_target_size(Address location); 289 290 // This sets the internal reference at the pc. 291 inline static void deserialization_set_target_internal_reference_at( 292 Address pc, Address target, 293 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 294 295 // This value is used in the serialization process and must be zero for 296 // ARM64, as the code target is split across multiple instructions and does 297 // not exist separately in the code, so the serializer should not step 298 // forwards in memory after a target is resolved and written. 299 static constexpr int kSpecialTargetSize = 0; 300 301 // Size of the generated code in bytes SizeOfGeneratedCode()302 uint64_t SizeOfGeneratedCode() const { 303 DCHECK((pc_ >= buffer_start_) && (pc_ < (buffer_start_ + buffer_->size()))); 304 return pc_ - buffer_start_; 305 } 306 307 // Return the code size generated from label to the current position. SizeOfCodeGeneratedSince(const Label * label)308 uint64_t SizeOfCodeGeneratedSince(const Label* label) { 309 DCHECK(label->is_bound()); 310 DCHECK_GE(pc_offset(), label->pos()); 311 DCHECK_LT(pc_offset(), buffer_->size()); 312 return pc_offset() - label->pos(); 313 } 314 315 // Return the number of instructions generated from label to the 316 // current position. InstructionsGeneratedSince(const Label * label)317 uint64_t InstructionsGeneratedSince(const Label* label) { 318 return SizeOfCodeGeneratedSince(label) / kInstrSize; 319 } 320 321 static bool IsConstantPoolAt(Instruction* instr); 322 static int ConstantPoolSizeAt(Instruction* instr); 323 // See Assembler::CheckConstPool for more info. 324 void EmitPoolGuard(); 325 326 // Prevent veneer pool emission until EndBlockVeneerPool is called. 327 // Call to this function can be nested but must be followed by an equal 328 // number of calls to EndBlockConstpool. 329 void StartBlockVeneerPool(); 330 331 // Resume constant pool emission. Need to be called as many time as 332 // StartBlockVeneerPool to have an effect. 333 void EndBlockVeneerPool(); 334 is_veneer_pool_blocked()335 bool is_veneer_pool_blocked() const { 336 return veneer_pool_blocked_nesting_ > 0; 337 } 338 339 // Record a deoptimization reason that can be used by a log or cpu profiler. 340 // Use --trace-deopt to enable. 341 void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, 342 int id); 343 344 int buffer_space() const; 345 346 // Record the emission of a constant pool. 347 // 348 // The emission of constant and veneer pools depends on the size of the code 349 // generated and the number of RelocInfo recorded. 350 // The Debug mechanism needs to map code offsets between two versions of a 351 // function, compiled with and without debugger support (see for example 352 // Debug::PrepareForBreakPoints()). 353 // Compiling functions with debugger support generates additional code 354 // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools 355 // and cause the version of the code with debugger support to have pools 356 // generated in different places. 357 // Recording the position and size of emitted pools allows to correctly 358 // compute the offset mappings between the different versions of a function in 359 // all situations. 360 // 361 // The parameter indicates the size of the pool (in bytes), including 362 // the marker and branch over the data. 363 void RecordConstPool(int size); 364 365 // Instruction set functions ------------------------------------------------ 366 367 // Branch / Jump instructions. 368 // For branches offsets are scaled, i.e. in instructions not in bytes. 369 // Branch to register. 370 void br(const Register& xn); 371 372 // Branch-link to register. 373 void blr(const Register& xn); 374 375 // Branch to register with return hint. 376 void ret(const Register& xn = lr); 377 378 // Unconditional branch to label. 379 void b(Label* label); 380 381 // Conditional branch to label. 382 void b(Label* label, Condition cond); 383 384 // Unconditional branch to PC offset. 385 void b(int imm26); 386 387 // Conditional branch to PC offset. 388 void b(int imm19, Condition cond); 389 390 // Branch-link to label / pc offset. 391 void bl(Label* label); 392 void bl(int imm26); 393 394 // Compare and branch to label / pc offset if zero. 395 void cbz(const Register& rt, Label* label); 396 void cbz(const Register& rt, int imm19); 397 398 // Compare and branch to label / pc offset if not zero. 399 void cbnz(const Register& rt, Label* label); 400 void cbnz(const Register& rt, int imm19); 401 402 // Test bit and branch to label / pc offset if zero. 403 void tbz(const Register& rt, unsigned bit_pos, Label* label); 404 void tbz(const Register& rt, unsigned bit_pos, int imm14); 405 406 // Test bit and branch to label / pc offset if not zero. 407 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 408 void tbnz(const Register& rt, unsigned bit_pos, int imm14); 409 410 // Address calculation instructions. 411 // Calculate a PC-relative address. Unlike for branches the offset in adr is 412 // unscaled (i.e. the result can be unaligned). 413 void adr(const Register& rd, Label* label); 414 void adr(const Register& rd, int imm21); 415 416 // Data Processing instructions. 417 // Add. 418 void add(const Register& rd, const Register& rn, const Operand& operand); 419 420 // Add and update status flags. 421 void adds(const Register& rd, const Register& rn, const Operand& operand); 422 423 // Compare negative. 424 void cmn(const Register& rn, const Operand& operand); 425 426 // Subtract. 427 void sub(const Register& rd, const Register& rn, const Operand& operand); 428 429 // Subtract and update status flags. 430 void subs(const Register& rd, const Register& rn, const Operand& operand); 431 432 // Compare. 433 void cmp(const Register& rn, const Operand& operand); 434 435 // Negate. 436 void neg(const Register& rd, const Operand& operand); 437 438 // Negate and update status flags. 439 void negs(const Register& rd, const Operand& operand); 440 441 // Add with carry bit. 442 void adc(const Register& rd, const Register& rn, const Operand& operand); 443 444 // Add with carry bit and update status flags. 445 void adcs(const Register& rd, const Register& rn, const Operand& operand); 446 447 // Subtract with carry bit. 448 void sbc(const Register& rd, const Register& rn, const Operand& operand); 449 450 // Subtract with carry bit and update status flags. 451 void sbcs(const Register& rd, const Register& rn, const Operand& operand); 452 453 // Negate with carry bit. 454 void ngc(const Register& rd, const Operand& operand); 455 456 // Negate with carry bit and update status flags. 457 void ngcs(const Register& rd, const Operand& operand); 458 459 // Logical instructions. 460 // Bitwise and (A & B). 461 void and_(const Register& rd, const Register& rn, const Operand& operand); 462 463 // Bitwise and (A & B) and update status flags. 464 void ands(const Register& rd, const Register& rn, const Operand& operand); 465 466 // Bit test, and set flags. 467 void tst(const Register& rn, const Operand& operand); 468 469 // Bit clear (A & ~B). 470 void bic(const Register& rd, const Register& rn, const Operand& operand); 471 472 // Bit clear (A & ~B) and update status flags. 473 void bics(const Register& rd, const Register& rn, const Operand& operand); 474 475 // Bitwise and. 476 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); 477 478 // Bit clear immediate. 479 void bic(const VRegister& vd, const int imm8, const int left_shift = 0); 480 481 // Bit clear. 482 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); 483 484 // Bitwise insert if false. 485 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); 486 487 // Bitwise insert if true. 488 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); 489 490 // Bitwise select. 491 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 492 493 // Polynomial multiply. 494 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 495 496 // Vector move immediate. 497 void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL, 498 const int shift_amount = 0); 499 500 // Bitwise not. 501 void mvn(const VRegister& vd, const VRegister& vn); 502 503 // Vector move inverted immediate. 504 void mvni(const VRegister& vd, const int imm8, Shift shift = LSL, 505 const int shift_amount = 0); 506 507 // Signed saturating accumulate of unsigned value. 508 void suqadd(const VRegister& vd, const VRegister& vn); 509 510 // Unsigned saturating accumulate of signed value. 511 void usqadd(const VRegister& vd, const VRegister& vn); 512 513 // Absolute value. 514 void abs(const VRegister& vd, const VRegister& vn); 515 516 // Signed saturating absolute value. 517 void sqabs(const VRegister& vd, const VRegister& vn); 518 519 // Negate. 520 void neg(const VRegister& vd, const VRegister& vn); 521 522 // Signed saturating negate. 523 void sqneg(const VRegister& vd, const VRegister& vn); 524 525 // Bitwise not. 526 void not_(const VRegister& vd, const VRegister& vn); 527 528 // Extract narrow. 529 void xtn(const VRegister& vd, const VRegister& vn); 530 531 // Extract narrow (second part). 532 void xtn2(const VRegister& vd, const VRegister& vn); 533 534 // Signed saturating extract narrow. 535 void sqxtn(const VRegister& vd, const VRegister& vn); 536 537 // Signed saturating extract narrow (second part). 538 void sqxtn2(const VRegister& vd, const VRegister& vn); 539 540 // Unsigned saturating extract narrow. 541 void uqxtn(const VRegister& vd, const VRegister& vn); 542 543 // Unsigned saturating extract narrow (second part). 544 void uqxtn2(const VRegister& vd, const VRegister& vn); 545 546 // Signed saturating extract unsigned narrow. 547 void sqxtun(const VRegister& vd, const VRegister& vn); 548 549 // Signed saturating extract unsigned narrow (second part). 550 void sqxtun2(const VRegister& vd, const VRegister& vn); 551 552 // Move register to register. 553 void mov(const VRegister& vd, const VRegister& vn); 554 555 // Bitwise not or. 556 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 557 558 // Bitwise exclusive or. 559 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); 560 561 // Bitwise or (A | B). 562 void orr(const Register& rd, const Register& rn, const Operand& operand); 563 564 // Bitwise or. 565 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); 566 567 // Bitwise or immediate. 568 void orr(const VRegister& vd, const int imm8, const int left_shift = 0); 569 570 // Bitwise nor (A | ~B). 571 void orn(const Register& rd, const Register& rn, const Operand& operand); 572 573 // Bitwise eor/xor (A ^ B). 574 void eor(const Register& rd, const Register& rn, const Operand& operand); 575 576 // Bitwise enor/xnor (A ^ ~B). 577 void eon(const Register& rd, const Register& rn, const Operand& operand); 578 579 // Logical shift left variable. 580 void lslv(const Register& rd, const Register& rn, const Register& rm); 581 582 // Logical shift right variable. 583 void lsrv(const Register& rd, const Register& rn, const Register& rm); 584 585 // Arithmetic shift right variable. 586 void asrv(const Register& rd, const Register& rn, const Register& rm); 587 588 // Rotate right variable. 589 void rorv(const Register& rd, const Register& rn, const Register& rm); 590 591 // Bitfield instructions. 592 // Bitfield move. 593 void bfm(const Register& rd, const Register& rn, int immr, int imms); 594 595 // Signed bitfield move. 596 void sbfm(const Register& rd, const Register& rn, int immr, int imms); 597 598 // Unsigned bitfield move. 599 void ubfm(const Register& rd, const Register& rn, int immr, int imms); 600 601 // Bfm aliases. 602 // Bitfield insert. bfi(const Register & rd,const Register & rn,int lsb,int width)603 void bfi(const Register& rd, const Register& rn, int lsb, int width) { 604 DCHECK_GE(width, 1); 605 DCHECK(lsb + width <= rn.SizeInBits()); 606 bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 607 } 608 609 // Bitfield extract and insert low. bfxil(const Register & rd,const Register & rn,int lsb,int width)610 void bfxil(const Register& rd, const Register& rn, int lsb, int width) { 611 DCHECK_GE(width, 1); 612 DCHECK(lsb + width <= rn.SizeInBits()); 613 bfm(rd, rn, lsb, lsb + width - 1); 614 } 615 616 // Sbfm aliases. 617 // Arithmetic shift right. asr(const Register & rd,const Register & rn,int shift)618 void asr(const Register& rd, const Register& rn, int shift) { 619 DCHECK(shift < rd.SizeInBits()); 620 sbfm(rd, rn, shift, rd.SizeInBits() - 1); 621 } 622 623 // Signed bitfield insert in zero. sbfiz(const Register & rd,const Register & rn,int lsb,int width)624 void sbfiz(const Register& rd, const Register& rn, int lsb, int width) { 625 DCHECK_GE(width, 1); 626 DCHECK(lsb + width <= rn.SizeInBits()); 627 sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 628 } 629 630 // Signed bitfield extract. sbfx(const Register & rd,const Register & rn,int lsb,int width)631 void sbfx(const Register& rd, const Register& rn, int lsb, int width) { 632 DCHECK_GE(width, 1); 633 DCHECK(lsb + width <= rn.SizeInBits()); 634 sbfm(rd, rn, lsb, lsb + width - 1); 635 } 636 637 // Signed extend byte. sxtb(const Register & rd,const Register & rn)638 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } 639 640 // Signed extend halfword. sxth(const Register & rd,const Register & rn)641 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } 642 643 // Signed extend word. sxtw(const Register & rd,const Register & rn)644 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } 645 646 // Ubfm aliases. 647 // Logical shift left. lsl(const Register & rd,const Register & rn,int shift)648 void lsl(const Register& rd, const Register& rn, int shift) { 649 int reg_size = rd.SizeInBits(); 650 DCHECK(shift < reg_size); 651 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 652 } 653 654 // Logical shift right. lsr(const Register & rd,const Register & rn,int shift)655 void lsr(const Register& rd, const Register& rn, int shift) { 656 DCHECK(shift < rd.SizeInBits()); 657 ubfm(rd, rn, shift, rd.SizeInBits() - 1); 658 } 659 660 // Unsigned bitfield insert in zero. ubfiz(const Register & rd,const Register & rn,int lsb,int width)661 void ubfiz(const Register& rd, const Register& rn, int lsb, int width) { 662 DCHECK_GE(width, 1); 663 DCHECK(lsb + width <= rn.SizeInBits()); 664 ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 665 } 666 667 // Unsigned bitfield extract. ubfx(const Register & rd,const Register & rn,int lsb,int width)668 void ubfx(const Register& rd, const Register& rn, int lsb, int width) { 669 DCHECK_GE(width, 1); 670 DCHECK(lsb + width <= rn.SizeInBits()); 671 ubfm(rd, rn, lsb, lsb + width - 1); 672 } 673 674 // Unsigned extend byte. uxtb(const Register & rd,const Register & rn)675 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } 676 677 // Unsigned extend halfword. uxth(const Register & rd,const Register & rn)678 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } 679 680 // Unsigned extend word. uxtw(const Register & rd,const Register & rn)681 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } 682 683 // Extract. 684 void extr(const Register& rd, const Register& rn, const Register& rm, 685 int lsb); 686 687 // Conditional select: rd = cond ? rn : rm. 688 void csel(const Register& rd, const Register& rn, const Register& rm, 689 Condition cond); 690 691 // Conditional select increment: rd = cond ? rn : rm + 1. 692 void csinc(const Register& rd, const Register& rn, const Register& rm, 693 Condition cond); 694 695 // Conditional select inversion: rd = cond ? rn : ~rm. 696 void csinv(const Register& rd, const Register& rn, const Register& rm, 697 Condition cond); 698 699 // Conditional select negation: rd = cond ? rn : -rm. 700 void csneg(const Register& rd, const Register& rn, const Register& rm, 701 Condition cond); 702 703 // Conditional set: rd = cond ? 1 : 0. 704 void cset(const Register& rd, Condition cond); 705 706 // Conditional set minus: rd = cond ? -1 : 0. 707 void csetm(const Register& rd, Condition cond); 708 709 // Conditional increment: rd = cond ? rn + 1 : rn. 710 void cinc(const Register& rd, const Register& rn, Condition cond); 711 712 // Conditional invert: rd = cond ? ~rn : rn. 713 void cinv(const Register& rd, const Register& rn, Condition cond); 714 715 // Conditional negate: rd = cond ? -rn : rn. 716 void cneg(const Register& rd, const Register& rn, Condition cond); 717 718 // Extr aliases. ror(const Register & rd,const Register & rs,unsigned shift)719 void ror(const Register& rd, const Register& rs, unsigned shift) { 720 extr(rd, rs, rs, shift); 721 } 722 723 // Conditional comparison. 724 // Conditional compare negative. 725 void ccmn(const Register& rn, const Operand& operand, StatusFlags nzcv, 726 Condition cond); 727 728 // Conditional compare. 729 void ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv, 730 Condition cond); 731 732 // Multiplication. 733 // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply. 734 void mul(const Register& rd, const Register& rn, const Register& rm); 735 736 // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate. 737 void madd(const Register& rd, const Register& rn, const Register& rm, 738 const Register& ra); 739 740 // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply. 741 void mneg(const Register& rd, const Register& rn, const Register& rm); 742 743 // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract. 744 void msub(const Register& rd, const Register& rn, const Register& rm, 745 const Register& ra); 746 747 // 32 x 32 -> 64-bit multiply. 748 void smull(const Register& rd, const Register& rn, const Register& rm); 749 750 // Xd = bits<127:64> of Xn * Xm. 751 void smulh(const Register& rd, const Register& rn, const Register& rm); 752 753 // Signed 32 x 32 -> 64-bit multiply and accumulate. 754 void smaddl(const Register& rd, const Register& rn, const Register& rm, 755 const Register& ra); 756 757 // Unsigned 32 x 32 -> 64-bit multiply and accumulate. 758 void umaddl(const Register& rd, const Register& rn, const Register& rm, 759 const Register& ra); 760 761 // Signed 32 x 32 -> 64-bit multiply and subtract. 762 void smsubl(const Register& rd, const Register& rn, const Register& rm, 763 const Register& ra); 764 765 // Unsigned 32 x 32 -> 64-bit multiply and subtract. 766 void umsubl(const Register& rd, const Register& rn, const Register& rm, 767 const Register& ra); 768 769 // Signed integer divide. 770 void sdiv(const Register& rd, const Register& rn, const Register& rm); 771 772 // Unsigned integer divide. 773 void udiv(const Register& rd, const Register& rn, const Register& rm); 774 775 // Bit count, bit reverse and endian reverse. 776 void rbit(const Register& rd, const Register& rn); 777 void rev16(const Register& rd, const Register& rn); 778 void rev32(const Register& rd, const Register& rn); 779 void rev(const Register& rd, const Register& rn); 780 void clz(const Register& rd, const Register& rn); 781 void cls(const Register& rd, const Register& rn); 782 783 // Pointer Authentication Code for Instruction address, using key B, with 784 // address in x17 and modifier in x16 [Armv8.3]. 785 void pacib1716(); 786 787 // Pointer Authentication Code for Instruction address, using key B, with 788 // address in LR and modifier in SP [Armv8.3]. 789 void pacibsp(); 790 791 // Authenticate Instruction address, using key B, with address in x17 and 792 // modifier in x16 [Armv8.3]. 793 void autib1716(); 794 795 // Authenticate Instruction address, using key B, with address in LR and 796 // modifier in SP [Armv8.3]. 797 void autibsp(); 798 799 // Memory instructions. 800 801 // Load integer or FP register. 802 void ldr(const CPURegister& rt, const MemOperand& src); 803 804 // Store integer or FP register. 805 void str(const CPURegister& rt, const MemOperand& dst); 806 807 // Load word with sign extension. 808 void ldrsw(const Register& rt, const MemOperand& src); 809 810 // Load byte. 811 void ldrb(const Register& rt, const MemOperand& src); 812 813 // Store byte. 814 void strb(const Register& rt, const MemOperand& dst); 815 816 // Load byte with sign extension. 817 void ldrsb(const Register& rt, const MemOperand& src); 818 819 // Load half-word. 820 void ldrh(const Register& rt, const MemOperand& src); 821 822 // Store half-word. 823 void strh(const Register& rt, const MemOperand& dst); 824 825 // Load half-word with sign extension. 826 void ldrsh(const Register& rt, const MemOperand& src); 827 828 // Load integer or FP register pair. 829 void ldp(const CPURegister& rt, const CPURegister& rt2, 830 const MemOperand& src); 831 832 // Store integer or FP register pair. 833 void stp(const CPURegister& rt, const CPURegister& rt2, 834 const MemOperand& dst); 835 836 // Load word pair with sign extension. 837 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); 838 839 // Load literal to register from a pc relative address. 840 void ldr_pcrel(const CPURegister& rt, int imm19); 841 842 // Load literal to register. 843 void ldr(const CPURegister& rt, const Immediate& imm); 844 void ldr(const CPURegister& rt, const Operand& operand); 845 846 // Load-acquire word. 847 void ldar(const Register& rt, const Register& rn); 848 849 // Load-acquire exclusive word. 850 void ldaxr(const Register& rt, const Register& rn); 851 852 // Store-release word. 853 void stlr(const Register& rt, const Register& rn); 854 855 // Store-release exclusive word. 856 void stlxr(const Register& rs, const Register& rt, const Register& rn); 857 858 // Load-acquire byte. 859 void ldarb(const Register& rt, const Register& rn); 860 861 // Load-acquire exclusive byte. 862 void ldaxrb(const Register& rt, const Register& rn); 863 864 // Store-release byte. 865 void stlrb(const Register& rt, const Register& rn); 866 867 // Store-release exclusive byte. 868 void stlxrb(const Register& rs, const Register& rt, const Register& rn); 869 870 // Load-acquire half-word. 871 void ldarh(const Register& rt, const Register& rn); 872 873 // Load-acquire exclusive half-word. 874 void ldaxrh(const Register& rt, const Register& rn); 875 876 // Store-release half-word. 877 void stlrh(const Register& rt, const Register& rn); 878 879 // Store-release exclusive half-word. 880 void stlxrh(const Register& rs, const Register& rt, const Register& rn); 881 882 // Move instructions. The default shift of -1 indicates that the move 883 // instruction will calculate an appropriate 16-bit immediate and left shift 884 // that is equal to the 64-bit immediate argument. If an explicit left shift 885 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 886 // 887 // For movk, an explicit shift can be used to indicate which half word should 888 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 889 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 890 // most-significant. 891 892 // Move and keep. 893 void movk(const Register& rd, uint64_t imm, int shift = -1) { 894 MoveWide(rd, imm, shift, MOVK); 895 } 896 897 // Move with non-zero. 898 void movn(const Register& rd, uint64_t imm, int shift = -1) { 899 MoveWide(rd, imm, shift, MOVN); 900 } 901 902 // Move with zero. 903 void movz(const Register& rd, uint64_t imm, int shift = -1) { 904 MoveWide(rd, imm, shift, MOVZ); 905 } 906 907 // Misc instructions. 908 // Monitor debug-mode breakpoint. 909 void brk(int code); 910 911 // Halting debug-mode breakpoint. 912 void hlt(int code); 913 914 // Move register to register. 915 void mov(const Register& rd, const Register& rn); 916 917 // Move NOT(operand) to register. 918 void mvn(const Register& rd, const Operand& operand); 919 920 // System instructions. 921 // Move to register from system register. 922 void mrs(const Register& rt, SystemRegister sysreg); 923 924 // Move from register to system register. 925 void msr(SystemRegister sysreg, const Register& rt); 926 927 // System hint. 928 void hint(SystemHint code); 929 930 // Data memory barrier 931 void dmb(BarrierDomain domain, BarrierType type); 932 933 // Data synchronization barrier 934 void dsb(BarrierDomain domain, BarrierType type); 935 936 // Instruction synchronization barrier 937 void isb(); 938 939 // Conditional speculation barrier. 940 void csdb(); 941 942 // Branch target identification. 943 void bti(BranchTargetIdentifier id); 944 945 // No-op. nop()946 void nop() { hint(NOP); } 947 948 // Different nop operations are used by the code generator to detect certain 949 // states of the generated code. 950 enum NopMarkerTypes { 951 DEBUG_BREAK_NOP, 952 INTERRUPT_CODE_NOP, 953 ADR_FAR_NOP, 954 FIRST_NOP_MARKER = DEBUG_BREAK_NOP, 955 LAST_NOP_MARKER = ADR_FAR_NOP 956 }; 957 958 void nop(NopMarkerTypes n); 959 960 // Add. 961 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); 962 963 // Unsigned halving add. 964 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 965 966 // Subtract. 967 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 968 969 // Signed halving add. 970 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 971 972 // Multiply by scalar element. 973 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 974 int vm_index); 975 976 // Multiply-add by scalar element. 977 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 978 int vm_index); 979 980 // Multiply-subtract by scalar element. 981 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 982 int vm_index); 983 984 // Signed long multiply-add by scalar element. 985 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 986 int vm_index); 987 988 // Signed long multiply-add by scalar element (second part). 989 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 990 int vm_index); 991 992 // Unsigned long multiply-add by scalar element. 993 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 994 int vm_index); 995 996 // Unsigned long multiply-add by scalar element (second part). 997 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 998 int vm_index); 999 1000 // Signed long multiply-sub by scalar element. 1001 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1002 int vm_index); 1003 1004 // Signed long multiply-sub by scalar element (second part). 1005 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1006 int vm_index); 1007 1008 // Unsigned long multiply-sub by scalar element. 1009 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1010 int vm_index); 1011 1012 // Unsigned long multiply-sub by scalar element (second part). 1013 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1014 int vm_index); 1015 1016 // Signed long multiply by scalar element. 1017 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1018 int vm_index); 1019 1020 // Signed long multiply by scalar element (second part). 1021 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1022 int vm_index); 1023 1024 // Unsigned long multiply by scalar element. 1025 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1026 int vm_index); 1027 1028 // Unsigned long multiply by scalar element (second part). 1029 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1030 int vm_index); 1031 1032 // Add narrow returning high half. 1033 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1034 1035 // Add narrow returning high half (second part). 1036 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1037 1038 // Signed saturating double long multiply by element. 1039 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1040 int vm_index); 1041 1042 // Signed saturating double long multiply by element (second part). 1043 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1044 int vm_index); 1045 1046 // Signed saturating doubling long multiply-add by element. 1047 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1048 int vm_index); 1049 1050 // Signed saturating doubling long multiply-add by element (second part). 1051 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1052 int vm_index); 1053 1054 // Signed saturating doubling long multiply-sub by element. 1055 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1056 int vm_index); 1057 1058 // Signed saturating doubling long multiply-sub by element (second part). 1059 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1060 int vm_index); 1061 1062 // Compare bitwise to zero. 1063 void cmeq(const VRegister& vd, const VRegister& vn, int value); 1064 1065 // Compare signed greater than or equal to zero. 1066 void cmge(const VRegister& vd, const VRegister& vn, int value); 1067 1068 // Compare signed greater than zero. 1069 void cmgt(const VRegister& vd, const VRegister& vn, int value); 1070 1071 // Compare signed less than or equal to zero. 1072 void cmle(const VRegister& vd, const VRegister& vn, int value); 1073 1074 // Compare signed less than zero. 1075 void cmlt(const VRegister& vd, const VRegister& vn, int value); 1076 1077 // Unsigned rounding halving add. 1078 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1079 1080 // Compare equal. 1081 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1082 1083 // Compare signed greater than or equal. 1084 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1085 1086 // Compare signed greater than. 1087 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1088 1089 // Compare unsigned higher. 1090 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1091 1092 // Compare unsigned higher or same. 1093 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1094 1095 // Compare bitwise test bits nonzero. 1096 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1097 1098 // Signed shift left by register. 1099 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1100 1101 // Unsigned shift left by register. 1102 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1103 1104 // Signed saturating doubling long multiply-subtract. 1105 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1106 1107 // Signed saturating doubling long multiply-subtract (second part). 1108 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1109 1110 // Signed saturating doubling long multiply. 1111 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1112 1113 // Signed saturating doubling long multiply (second part). 1114 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1115 1116 // Signed saturating doubling multiply returning high half. 1117 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1118 1119 // Signed saturating rounding doubling multiply returning high half. 1120 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1121 1122 // Signed saturating doubling multiply element returning high half. 1123 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1124 int vm_index); 1125 1126 // Signed saturating rounding doubling multiply element returning high half. 1127 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1128 int vm_index); 1129 1130 // Unsigned long multiply long. 1131 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1132 1133 // Unsigned long multiply (second part). 1134 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1135 1136 // Rounding add narrow returning high half. 1137 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1138 1139 // Subtract narrow returning high half. 1140 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1141 1142 // Subtract narrow returning high half (second part). 1143 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1144 1145 // Rounding add narrow returning high half (second part). 1146 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1147 1148 // Rounding subtract narrow returning high half. 1149 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1150 1151 // Rounding subtract narrow returning high half (second part). 1152 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1153 1154 // Signed saturating shift left by register. 1155 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1156 1157 // Unsigned saturating shift left by register. 1158 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1159 1160 // Signed rounding shift left by register. 1161 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1162 1163 // Unsigned rounding shift left by register. 1164 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1165 1166 // Signed saturating rounding shift left by register. 1167 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1168 1169 // Unsigned saturating rounding shift left by register. 1170 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1171 1172 // Signed absolute difference. 1173 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1174 1175 // Unsigned absolute difference and accumulate. 1176 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1177 1178 // Shift left by immediate and insert. 1179 void sli(const VRegister& vd, const VRegister& vn, int shift); 1180 1181 // Shift right by immediate and insert. 1182 void sri(const VRegister& vd, const VRegister& vn, int shift); 1183 1184 // Signed maximum. 1185 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1186 1187 // Signed pairwise maximum. 1188 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1189 1190 // Add across vector. 1191 void addv(const VRegister& vd, const VRegister& vn); 1192 1193 // Signed add long across vector. 1194 void saddlv(const VRegister& vd, const VRegister& vn); 1195 1196 // Unsigned add long across vector. 1197 void uaddlv(const VRegister& vd, const VRegister& vn); 1198 1199 // FP maximum number across vector. 1200 void fmaxnmv(const VRegister& vd, const VRegister& vn); 1201 1202 // FP maximum across vector. 1203 void fmaxv(const VRegister& vd, const VRegister& vn); 1204 1205 // FP minimum number across vector. 1206 void fminnmv(const VRegister& vd, const VRegister& vn); 1207 1208 // FP minimum across vector. 1209 void fminv(const VRegister& vd, const VRegister& vn); 1210 1211 // Signed maximum across vector. 1212 void smaxv(const VRegister& vd, const VRegister& vn); 1213 1214 // Signed minimum. 1215 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1216 1217 // Signed minimum pairwise. 1218 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1219 1220 // Signed minimum across vector. 1221 void sminv(const VRegister& vd, const VRegister& vn); 1222 1223 // One-element structure store from one register. 1224 void st1(const VRegister& vt, const MemOperand& src); 1225 1226 // One-element structure store from two registers. 1227 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1228 1229 // One-element structure store from three registers. 1230 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1231 const MemOperand& src); 1232 1233 // One-element structure store from four registers. 1234 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1235 const VRegister& vt4, const MemOperand& src); 1236 1237 // One-element single structure store from one lane. 1238 void st1(const VRegister& vt, int lane, const MemOperand& src); 1239 1240 // Two-element structure store from two registers. 1241 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1242 1243 // Two-element single structure store from two lanes. 1244 void st2(const VRegister& vt, const VRegister& vt2, int lane, 1245 const MemOperand& src); 1246 1247 // Three-element structure store from three registers. 1248 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1249 const MemOperand& src); 1250 1251 // Three-element single structure store from three lanes. 1252 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1253 int lane, const MemOperand& src); 1254 1255 // Four-element structure store from four registers. 1256 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1257 const VRegister& vt4, const MemOperand& src); 1258 1259 // Four-element single structure store from four lanes. 1260 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1261 const VRegister& vt4, int lane, const MemOperand& src); 1262 1263 // Unsigned add long. 1264 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1265 1266 // Unsigned add long (second part). 1267 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1268 1269 // Unsigned add wide. 1270 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1271 1272 // Unsigned add wide (second part). 1273 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1274 1275 // Signed add long. 1276 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1277 1278 // Signed add long (second part). 1279 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1280 1281 // Signed add wide. 1282 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1283 1284 // Signed add wide (second part). 1285 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1286 1287 // Unsigned subtract long. 1288 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1289 1290 // Unsigned subtract long (second part). 1291 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1292 1293 // Unsigned subtract wide. 1294 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1295 1296 // Signed subtract long. 1297 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1298 1299 // Signed subtract long (second part). 1300 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1301 1302 // Signed integer subtract wide. 1303 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1304 1305 // Signed integer subtract wide (second part). 1306 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1307 1308 // Unsigned subtract wide (second part). 1309 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1310 1311 // Unsigned maximum. 1312 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1313 1314 // Unsigned pairwise maximum. 1315 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1316 1317 // Unsigned maximum across vector. 1318 void umaxv(const VRegister& vd, const VRegister& vn); 1319 1320 // Unsigned minimum. 1321 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1322 1323 // Unsigned pairwise minimum. 1324 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1325 1326 // Unsigned minimum across vector. 1327 void uminv(const VRegister& vd, const VRegister& vn); 1328 1329 // Transpose vectors (primary). 1330 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1331 1332 // Transpose vectors (secondary). 1333 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1334 1335 // Unzip vectors (primary). 1336 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1337 1338 // Unzip vectors (secondary). 1339 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1340 1341 // Zip vectors (primary). 1342 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1343 1344 // Zip vectors (secondary). 1345 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1346 1347 // Signed shift right by immediate. 1348 void sshr(const VRegister& vd, const VRegister& vn, int shift); 1349 1350 // Unsigned shift right by immediate. 1351 void ushr(const VRegister& vd, const VRegister& vn, int shift); 1352 1353 // Signed rounding shift right by immediate. 1354 void srshr(const VRegister& vd, const VRegister& vn, int shift); 1355 1356 // Unsigned rounding shift right by immediate. 1357 void urshr(const VRegister& vd, const VRegister& vn, int shift); 1358 1359 // Signed shift right by immediate and accumulate. 1360 void ssra(const VRegister& vd, const VRegister& vn, int shift); 1361 1362 // Unsigned shift right by immediate and accumulate. 1363 void usra(const VRegister& vd, const VRegister& vn, int shift); 1364 1365 // Signed rounding shift right by immediate and accumulate. 1366 void srsra(const VRegister& vd, const VRegister& vn, int shift); 1367 1368 // Unsigned rounding shift right by immediate and accumulate. 1369 void ursra(const VRegister& vd, const VRegister& vn, int shift); 1370 1371 // Shift right narrow by immediate. 1372 void shrn(const VRegister& vd, const VRegister& vn, int shift); 1373 1374 // Shift right narrow by immediate (second part). 1375 void shrn2(const VRegister& vd, const VRegister& vn, int shift); 1376 1377 // Rounding shift right narrow by immediate. 1378 void rshrn(const VRegister& vd, const VRegister& vn, int shift); 1379 1380 // Rounding shift right narrow by immediate (second part). 1381 void rshrn2(const VRegister& vd, const VRegister& vn, int shift); 1382 1383 // Unsigned saturating shift right narrow by immediate. 1384 void uqshrn(const VRegister& vd, const VRegister& vn, int shift); 1385 1386 // Unsigned saturating shift right narrow by immediate (second part). 1387 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1388 1389 // Unsigned saturating rounding shift right narrow by immediate. 1390 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1391 1392 // Unsigned saturating rounding shift right narrow by immediate (second part). 1393 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1394 1395 // Signed saturating shift right narrow by immediate. 1396 void sqshrn(const VRegister& vd, const VRegister& vn, int shift); 1397 1398 // Signed saturating shift right narrow by immediate (second part). 1399 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1400 1401 // Signed saturating rounded shift right narrow by immediate. 1402 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1403 1404 // Signed saturating rounded shift right narrow by immediate (second part). 1405 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1406 1407 // Signed saturating shift right unsigned narrow by immediate. 1408 void sqshrun(const VRegister& vd, const VRegister& vn, int shift); 1409 1410 // Signed saturating shift right unsigned narrow by immediate (second part). 1411 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); 1412 1413 // Signed sat rounded shift right unsigned narrow by immediate. 1414 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); 1415 1416 // Signed sat rounded shift right unsigned narrow by immediate (second part). 1417 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); 1418 1419 // FP reciprocal step. 1420 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1421 1422 // FP reciprocal estimate. 1423 void frecpe(const VRegister& vd, const VRegister& vn); 1424 1425 // FP reciprocal square root estimate. 1426 void frsqrte(const VRegister& vd, const VRegister& vn); 1427 1428 // FP reciprocal square root step. 1429 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1430 1431 // Signed absolute difference and accumulate long. 1432 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1433 1434 // Signed absolute difference and accumulate long (second part). 1435 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1436 1437 // Unsigned absolute difference and accumulate long. 1438 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1439 1440 // Unsigned absolute difference and accumulate long (second part). 1441 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1442 1443 // Signed absolute difference long. 1444 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1445 1446 // Signed absolute difference long (second part). 1447 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1448 1449 // Unsigned absolute difference long. 1450 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1451 1452 // Unsigned absolute difference long (second part). 1453 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1454 1455 // Polynomial multiply long. 1456 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1457 1458 // Polynomial multiply long (second part). 1459 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1460 1461 // Signed long multiply-add. 1462 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1463 1464 // Signed long multiply-add (second part). 1465 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1466 1467 // Unsigned long multiply-add. 1468 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1469 1470 // Unsigned long multiply-add (second part). 1471 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1472 1473 // Signed long multiply-sub. 1474 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1475 1476 // Signed long multiply-sub (second part). 1477 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1478 1479 // Unsigned long multiply-sub. 1480 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1481 1482 // Unsigned long multiply-sub (second part). 1483 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1484 1485 // Signed long multiply. 1486 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1487 1488 // Signed long multiply (second part). 1489 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1490 1491 // Signed saturating doubling long multiply-add. 1492 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1493 1494 // Signed saturating doubling long multiply-add (second part). 1495 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1496 1497 // Unsigned absolute difference. 1498 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1499 1500 // Signed absolute difference and accumulate. 1501 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1502 1503 // FP instructions. 1504 // Move immediate to FP register. 1505 void fmov(const VRegister& fd, double imm); 1506 void fmov(const VRegister& fd, float imm); 1507 1508 // Move FP register to register. 1509 void fmov(const Register& rd, const VRegister& fn); 1510 1511 // Move register to FP register. 1512 void fmov(const VRegister& fd, const Register& rn); 1513 1514 // Move FP register to FP register. 1515 void fmov(const VRegister& fd, const VRegister& fn); 1516 1517 // Move 64-bit register to top half of 128-bit FP register. 1518 void fmov(const VRegister& vd, int index, const Register& rn); 1519 1520 // Move top half of 128-bit FP register to 64-bit register. 1521 void fmov(const Register& rd, const VRegister& vn, int index); 1522 1523 // FP add. 1524 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1525 1526 // FP subtract. 1527 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1528 1529 // FP multiply. 1530 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1531 1532 // FP compare equal to zero. 1533 void fcmeq(const VRegister& vd, const VRegister& vn, double imm); 1534 1535 // FP greater than zero. 1536 void fcmgt(const VRegister& vd, const VRegister& vn, double imm); 1537 1538 // FP greater than or equal to zero. 1539 void fcmge(const VRegister& vd, const VRegister& vn, double imm); 1540 1541 // FP less than or equal to zero. 1542 void fcmle(const VRegister& vd, const VRegister& vn, double imm); 1543 1544 // FP less than to zero. 1545 void fcmlt(const VRegister& vd, const VRegister& vn, double imm); 1546 1547 // FP absolute difference. 1548 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1549 1550 // FP pairwise add vector. 1551 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1552 1553 // FP pairwise add scalar. 1554 void faddp(const VRegister& vd, const VRegister& vn); 1555 1556 // FP pairwise maximum scalar. 1557 void fmaxp(const VRegister& vd, const VRegister& vn); 1558 1559 // FP pairwise maximum number scalar. 1560 void fmaxnmp(const VRegister& vd, const VRegister& vn); 1561 1562 // FP pairwise minimum number scalar. 1563 void fminnmp(const VRegister& vd, const VRegister& vn); 1564 1565 // FP vector multiply accumulate. 1566 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1567 1568 // FP vector multiply subtract. 1569 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1570 1571 // FP vector multiply extended. 1572 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1573 1574 // FP absolute greater than or equal. 1575 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1576 1577 // FP absolute greater than. 1578 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1579 1580 // FP multiply by element. 1581 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1582 int vm_index); 1583 1584 // FP fused multiply-add to accumulator by element. 1585 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1586 int vm_index); 1587 1588 // FP fused multiply-sub from accumulator by element. 1589 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1590 int vm_index); 1591 1592 // FP multiply extended by element. 1593 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1594 int vm_index); 1595 1596 // FP compare equal. 1597 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1598 1599 // FP greater than. 1600 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1601 1602 // FP greater than or equal. 1603 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1604 1605 // FP pairwise maximum vector. 1606 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1607 1608 // FP pairwise minimum vector. 1609 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1610 1611 // FP pairwise minimum scalar. 1612 void fminp(const VRegister& vd, const VRegister& vn); 1613 1614 // FP pairwise maximum number vector. 1615 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1616 1617 // FP pairwise minimum number vector. 1618 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1619 1620 // FP fused multiply-add. 1621 void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1622 const VRegister& va); 1623 1624 // FP fused multiply-subtract. 1625 void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1626 const VRegister& va); 1627 1628 // FP fused multiply-add and negate. 1629 void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1630 const VRegister& va); 1631 1632 // FP fused multiply-subtract and negate. 1633 void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1634 const VRegister& va); 1635 1636 // FP multiply-negate scalar. 1637 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1638 1639 // FP reciprocal exponent scalar. 1640 void frecpx(const VRegister& vd, const VRegister& vn); 1641 1642 // FP divide. 1643 void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1644 1645 // FP maximum. 1646 void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1647 1648 // FP minimum. 1649 void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1650 1651 // FP maximum. 1652 void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1653 1654 // FP minimum. 1655 void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1656 1657 // FP absolute. 1658 void fabs(const VRegister& vd, const VRegister& vn); 1659 1660 // FP negate. 1661 void fneg(const VRegister& vd, const VRegister& vn); 1662 1663 // FP square root. 1664 void fsqrt(const VRegister& vd, const VRegister& vn); 1665 1666 // FP round to integer nearest with ties to away. 1667 void frinta(const VRegister& vd, const VRegister& vn); 1668 1669 // FP round to integer, implicit rounding. 1670 void frinti(const VRegister& vd, const VRegister& vn); 1671 1672 // FP round to integer toward minus infinity. 1673 void frintm(const VRegister& vd, const VRegister& vn); 1674 1675 // FP round to integer nearest with ties to even. 1676 void frintn(const VRegister& vd, const VRegister& vn); 1677 1678 // FP round to integer towards plus infinity. 1679 void frintp(const VRegister& vd, const VRegister& vn); 1680 1681 // FP round to integer, exact, implicit rounding. 1682 void frintx(const VRegister& vd, const VRegister& vn); 1683 1684 // FP round to integer towards zero. 1685 void frintz(const VRegister& vd, const VRegister& vn); 1686 1687 // FP compare registers. 1688 void fcmp(const VRegister& vn, const VRegister& vm); 1689 1690 // FP compare immediate. 1691 void fcmp(const VRegister& vn, double value); 1692 1693 // FP conditional compare. 1694 void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv, 1695 Condition cond); 1696 1697 // FP conditional select. 1698 void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1699 Condition cond); 1700 1701 // Common FP Convert functions. 1702 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); 1703 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); 1704 1705 // FP convert between precisions. 1706 void fcvt(const VRegister& vd, const VRegister& vn); 1707 1708 // FP convert to higher precision. 1709 void fcvtl(const VRegister& vd, const VRegister& vn); 1710 1711 // FP convert to higher precision (second part). 1712 void fcvtl2(const VRegister& vd, const VRegister& vn); 1713 1714 // FP convert to lower precision. 1715 void fcvtn(const VRegister& vd, const VRegister& vn); 1716 1717 // FP convert to lower prevision (second part). 1718 void fcvtn2(const VRegister& vd, const VRegister& vn); 1719 1720 // FP convert to lower precision, rounding to odd. 1721 void fcvtxn(const VRegister& vd, const VRegister& vn); 1722 1723 // FP convert to lower precision, rounding to odd (second part). 1724 void fcvtxn2(const VRegister& vd, const VRegister& vn); 1725 1726 // FP convert to signed integer, nearest with ties to away. 1727 void fcvtas(const Register& rd, const VRegister& vn); 1728 1729 // FP convert to unsigned integer, nearest with ties to away. 1730 void fcvtau(const Register& rd, const VRegister& vn); 1731 1732 // FP convert to signed integer, nearest with ties to away. 1733 void fcvtas(const VRegister& vd, const VRegister& vn); 1734 1735 // FP convert to unsigned integer, nearest with ties to away. 1736 void fcvtau(const VRegister& vd, const VRegister& vn); 1737 1738 // FP convert to signed integer, round towards -infinity. 1739 void fcvtms(const Register& rd, const VRegister& vn); 1740 1741 // FP convert to unsigned integer, round towards -infinity. 1742 void fcvtmu(const Register& rd, const VRegister& vn); 1743 1744 // FP convert to signed integer, round towards -infinity. 1745 void fcvtms(const VRegister& vd, const VRegister& vn); 1746 1747 // FP convert to unsigned integer, round towards -infinity. 1748 void fcvtmu(const VRegister& vd, const VRegister& vn); 1749 1750 // FP convert to signed integer, nearest with ties to even. 1751 void fcvtns(const Register& rd, const VRegister& vn); 1752 1753 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3]. 1754 void fjcvtzs(const Register& rd, const VRegister& vn); 1755 1756 // FP convert to unsigned integer, nearest with ties to even. 1757 void fcvtnu(const Register& rd, const VRegister& vn); 1758 1759 // FP convert to signed integer, nearest with ties to even. 1760 void fcvtns(const VRegister& rd, const VRegister& vn); 1761 1762 // FP convert to unsigned integer, nearest with ties to even. 1763 void fcvtnu(const VRegister& rd, const VRegister& vn); 1764 1765 // FP convert to signed integer or fixed-point, round towards zero. 1766 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 1767 1768 // FP convert to unsigned integer or fixed-point, round towards zero. 1769 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 1770 1771 // FP convert to signed integer or fixed-point, round towards zero. 1772 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 1773 1774 // FP convert to unsigned integer or fixed-point, round towards zero. 1775 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 1776 1777 // FP convert to signed integer, round towards +infinity. 1778 void fcvtps(const Register& rd, const VRegister& vn); 1779 1780 // FP convert to unsigned integer, round towards +infinity. 1781 void fcvtpu(const Register& rd, const VRegister& vn); 1782 1783 // FP convert to signed integer, round towards +infinity. 1784 void fcvtps(const VRegister& vd, const VRegister& vn); 1785 1786 // FP convert to unsigned integer, round towards +infinity. 1787 void fcvtpu(const VRegister& vd, const VRegister& vn); 1788 1789 // Convert signed integer or fixed point to FP. 1790 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1791 1792 // Convert unsigned integer or fixed point to FP. 1793 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1794 1795 // Convert signed integer or fixed-point to FP. 1796 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1797 1798 // Convert unsigned integer or fixed-point to FP. 1799 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1800 1801 // Extract vector from pair of vectors. 1802 void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1803 int index); 1804 1805 // Duplicate vector element to vector or scalar. 1806 void dup(const VRegister& vd, const VRegister& vn, int vn_index); 1807 1808 // Duplicate general-purpose register to vector. 1809 void dup(const VRegister& vd, const Register& rn); 1810 1811 // Insert vector element from general-purpose register. 1812 void ins(const VRegister& vd, int vd_index, const Register& rn); 1813 1814 // Move general-purpose register to a vector element. 1815 void mov(const VRegister& vd, int vd_index, const Register& rn); 1816 1817 // Unsigned move vector element to general-purpose register. 1818 void umov(const Register& rd, const VRegister& vn, int vn_index); 1819 1820 // Move vector element to general-purpose register. 1821 void mov(const Register& rd, const VRegister& vn, int vn_index); 1822 1823 // Move vector element to scalar. 1824 void mov(const VRegister& vd, const VRegister& vn, int vn_index); 1825 1826 // Insert vector element from another vector element. 1827 void ins(const VRegister& vd, int vd_index, const VRegister& vn, 1828 int vn_index); 1829 1830 // Move vector element to another vector element. 1831 void mov(const VRegister& vd, int vd_index, const VRegister& vn, 1832 int vn_index); 1833 1834 // Signed move vector element to general-purpose register. 1835 void smov(const Register& rd, const VRegister& vn, int vn_index); 1836 1837 // One-element structure load to one register. 1838 void ld1(const VRegister& vt, const MemOperand& src); 1839 1840 // One-element structure load to two registers. 1841 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1842 1843 // One-element structure load to three registers. 1844 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1845 const MemOperand& src); 1846 1847 // One-element structure load to four registers. 1848 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1849 const VRegister& vt4, const MemOperand& src); 1850 1851 // One-element single structure load to one lane. 1852 void ld1(const VRegister& vt, int lane, const MemOperand& src); 1853 1854 // One-element single structure load to all lanes. 1855 void ld1r(const VRegister& vt, const MemOperand& src); 1856 1857 // Two-element structure load. 1858 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1859 1860 // Two-element single structure load to one lane. 1861 void ld2(const VRegister& vt, const VRegister& vt2, int lane, 1862 const MemOperand& src); 1863 1864 // Two-element single structure load to all lanes. 1865 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1866 1867 // Three-element structure load. 1868 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1869 const MemOperand& src); 1870 1871 // Three-element single structure load to one lane. 1872 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1873 int lane, const MemOperand& src); 1874 1875 // Three-element single structure load to all lanes. 1876 void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1877 const MemOperand& src); 1878 1879 // Four-element structure load. 1880 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1881 const VRegister& vt4, const MemOperand& src); 1882 1883 // Four-element single structure load to one lane. 1884 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1885 const VRegister& vt4, int lane, const MemOperand& src); 1886 1887 // Four-element single structure load to all lanes. 1888 void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1889 const VRegister& vt4, const MemOperand& src); 1890 1891 // Count leading sign bits. 1892 void cls(const VRegister& vd, const VRegister& vn); 1893 1894 // Count leading zero bits (vector). 1895 void clz(const VRegister& vd, const VRegister& vn); 1896 1897 // Population count per byte. 1898 void cnt(const VRegister& vd, const VRegister& vn); 1899 1900 // Reverse bit order. 1901 void rbit(const VRegister& vd, const VRegister& vn); 1902 1903 // Reverse elements in 16-bit halfwords. 1904 void rev16(const VRegister& vd, const VRegister& vn); 1905 1906 // Reverse elements in 32-bit words. 1907 void rev32(const VRegister& vd, const VRegister& vn); 1908 1909 // Reverse elements in 64-bit doublewords. 1910 void rev64(const VRegister& vd, const VRegister& vn); 1911 1912 // Unsigned reciprocal square root estimate. 1913 void ursqrte(const VRegister& vd, const VRegister& vn); 1914 1915 // Unsigned reciprocal estimate. 1916 void urecpe(const VRegister& vd, const VRegister& vn); 1917 1918 // Signed pairwise long add and accumulate. 1919 void sadalp(const VRegister& vd, const VRegister& vn); 1920 1921 // Signed pairwise long add. 1922 void saddlp(const VRegister& vd, const VRegister& vn); 1923 1924 // Unsigned pairwise long add. 1925 void uaddlp(const VRegister& vd, const VRegister& vn); 1926 1927 // Unsigned pairwise long add and accumulate. 1928 void uadalp(const VRegister& vd, const VRegister& vn); 1929 1930 // Shift left by immediate. 1931 void shl(const VRegister& vd, const VRegister& vn, int shift); 1932 1933 // Signed saturating shift left by immediate. 1934 void sqshl(const VRegister& vd, const VRegister& vn, int shift); 1935 1936 // Signed saturating shift left unsigned by immediate. 1937 void sqshlu(const VRegister& vd, const VRegister& vn, int shift); 1938 1939 // Unsigned saturating shift left by immediate. 1940 void uqshl(const VRegister& vd, const VRegister& vn, int shift); 1941 1942 // Signed shift left long by immediate. 1943 void sshll(const VRegister& vd, const VRegister& vn, int shift); 1944 1945 // Signed shift left long by immediate (second part). 1946 void sshll2(const VRegister& vd, const VRegister& vn, int shift); 1947 1948 // Signed extend long. 1949 void sxtl(const VRegister& vd, const VRegister& vn); 1950 1951 // Signed extend long (second part). 1952 void sxtl2(const VRegister& vd, const VRegister& vn); 1953 1954 // Unsigned shift left long by immediate. 1955 void ushll(const VRegister& vd, const VRegister& vn, int shift); 1956 1957 // Unsigned shift left long by immediate (second part). 1958 void ushll2(const VRegister& vd, const VRegister& vn, int shift); 1959 1960 // Shift left long by element size. 1961 void shll(const VRegister& vd, const VRegister& vn, int shift); 1962 1963 // Shift left long by element size (second part). 1964 void shll2(const VRegister& vd, const VRegister& vn, int shift); 1965 1966 // Unsigned extend long. 1967 void uxtl(const VRegister& vd, const VRegister& vn); 1968 1969 // Unsigned extend long (second part). 1970 void uxtl2(const VRegister& vd, const VRegister& vn); 1971 1972 // Signed rounding halving add. 1973 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1974 1975 // Unsigned halving sub. 1976 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1977 1978 // Signed halving sub. 1979 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1980 1981 // Unsigned saturating add. 1982 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1983 1984 // Signed saturating add. 1985 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1986 1987 // Unsigned saturating subtract. 1988 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1989 1990 // Signed saturating subtract. 1991 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1992 1993 // Add pairwise. 1994 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1995 1996 // Add pair of elements scalar. 1997 void addp(const VRegister& vd, const VRegister& vn); 1998 1999 // Multiply-add to accumulator. 2000 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2001 2002 // Multiply-subtract to accumulator. 2003 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2004 2005 // Multiply. 2006 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2007 2008 // Table lookup from one register. 2009 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2010 2011 // Table lookup from two registers. 2012 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2013 const VRegister& vm); 2014 2015 // Table lookup from three registers. 2016 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2017 const VRegister& vn3, const VRegister& vm); 2018 2019 // Table lookup from four registers. 2020 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2021 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2022 2023 // Table lookup extension from one register. 2024 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2025 2026 // Table lookup extension from two registers. 2027 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2028 const VRegister& vm); 2029 2030 // Table lookup extension from three registers. 2031 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2032 const VRegister& vn3, const VRegister& vm); 2033 2034 // Table lookup extension from four registers. 2035 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2036 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2037 2038 // Instruction functions used only for test, debug, and patching. 2039 // Emit raw instructions in the instruction stream. dci(Instr raw_inst)2040 void dci(Instr raw_inst) { Emit(raw_inst); } 2041 2042 // Emit 8 bits of data in the instruction stream. dc8(uint8_t data)2043 void dc8(uint8_t data) { EmitData(&data, sizeof(data)); } 2044 2045 // Emit 32 bits of data in the instruction stream. dc32(uint32_t data)2046 void dc32(uint32_t data) { EmitData(&data, sizeof(data)); } 2047 2048 // Emit 64 bits of data in the instruction stream. dc64(uint64_t data)2049 void dc64(uint64_t data) { EmitData(&data, sizeof(data)); } 2050 2051 // Emit an address in the instruction stream. 2052 void dcptr(Label* label); 2053 2054 // Copy a string into the instruction stream, including the terminating 2055 // nullptr character. The instruction pointer (pc_) is then aligned correctly 2056 // for subsequent instructions. 2057 void EmitStringData(const char* string); 2058 2059 // Pseudo-instructions ------------------------------------------------------ 2060 2061 // Parameters are described in arm64/instructions-arm64.h. 2062 void debug(const char* message, uint32_t code, Instr params = BREAK); 2063 2064 // Required by V8. dd(uint32_t data)2065 void dd(uint32_t data) { dc32(data); } db(uint8_t data)2066 void db(uint8_t data) { dc8(data); } dq(uint64_t data)2067 void dq(uint64_t data) { dc64(data); } dp(uintptr_t data)2068 void dp(uintptr_t data) { dc64(data); } 2069 2070 // Code generation helpers -------------------------------------------------- 2071 pc()2072 Instruction* pc() const { return Instruction::Cast(pc_); } 2073 InstructionAt(ptrdiff_t offset)2074 Instruction* InstructionAt(ptrdiff_t offset) const { 2075 return reinterpret_cast<Instruction*>(buffer_start_ + offset); 2076 } 2077 InstructionOffset(Instruction * instr)2078 ptrdiff_t InstructionOffset(Instruction* instr) const { 2079 return reinterpret_cast<byte*>(instr) - buffer_start_; 2080 } 2081 2082 // Register encoding. Rd(CPURegister rd)2083 static Instr Rd(CPURegister rd) { 2084 DCHECK_NE(rd.code(), kSPRegInternalCode); 2085 return rd.code() << Rd_offset; 2086 } 2087 Rn(CPURegister rn)2088 static Instr Rn(CPURegister rn) { 2089 DCHECK_NE(rn.code(), kSPRegInternalCode); 2090 return rn.code() << Rn_offset; 2091 } 2092 Rm(CPURegister rm)2093 static Instr Rm(CPURegister rm) { 2094 DCHECK_NE(rm.code(), kSPRegInternalCode); 2095 return rm.code() << Rm_offset; 2096 } 2097 RmNot31(CPURegister rm)2098 static Instr RmNot31(CPURegister rm) { 2099 DCHECK_NE(rm.code(), kSPRegInternalCode); 2100 DCHECK(!rm.IsZero()); 2101 return Rm(rm); 2102 } 2103 Ra(CPURegister ra)2104 static Instr Ra(CPURegister ra) { 2105 DCHECK_NE(ra.code(), kSPRegInternalCode); 2106 return ra.code() << Ra_offset; 2107 } 2108 Rt(CPURegister rt)2109 static Instr Rt(CPURegister rt) { 2110 DCHECK_NE(rt.code(), kSPRegInternalCode); 2111 return rt.code() << Rt_offset; 2112 } 2113 Rt2(CPURegister rt2)2114 static Instr Rt2(CPURegister rt2) { 2115 DCHECK_NE(rt2.code(), kSPRegInternalCode); 2116 return rt2.code() << Rt2_offset; 2117 } 2118 Rs(CPURegister rs)2119 static Instr Rs(CPURegister rs) { 2120 DCHECK_NE(rs.code(), kSPRegInternalCode); 2121 return rs.code() << Rs_offset; 2122 } 2123 2124 // These encoding functions allow the stack pointer to be encoded, and 2125 // disallow the zero register. RdSP(Register rd)2126 static Instr RdSP(Register rd) { 2127 DCHECK(!rd.IsZero()); 2128 return (rd.code() & kRegCodeMask) << Rd_offset; 2129 } 2130 RnSP(Register rn)2131 static Instr RnSP(Register rn) { 2132 DCHECK(!rn.IsZero()); 2133 return (rn.code() & kRegCodeMask) << Rn_offset; 2134 } 2135 2136 // Flags encoding. 2137 inline static Instr Flags(FlagsUpdate S); 2138 inline static Instr Cond(Condition cond); 2139 2140 // PC-relative address encoding. 2141 inline static Instr ImmPCRelAddress(int imm21); 2142 2143 // Branch encoding. 2144 inline static Instr ImmUncondBranch(int imm26); 2145 inline static Instr ImmCondBranch(int imm19); 2146 inline static Instr ImmCmpBranch(int imm19); 2147 inline static Instr ImmTestBranch(int imm14); 2148 inline static Instr ImmTestBranchBit(unsigned bit_pos); 2149 2150 // Data Processing encoding. 2151 inline static Instr SF(Register rd); 2152 inline static Instr ImmAddSub(int imm); 2153 inline static Instr ImmS(unsigned imms, unsigned reg_size); 2154 inline static Instr ImmR(unsigned immr, unsigned reg_size); 2155 inline static Instr ImmSetBits(unsigned imms, unsigned reg_size); 2156 inline static Instr ImmRotate(unsigned immr, unsigned reg_size); 2157 inline static Instr ImmLLiteral(int imm19); 2158 inline static Instr BitN(unsigned bitn, unsigned reg_size); 2159 inline static Instr ShiftDP(Shift shift); 2160 inline static Instr ImmDPShift(unsigned amount); 2161 inline static Instr ExtendMode(Extend extend); 2162 inline static Instr ImmExtendShift(unsigned left_shift); 2163 inline static Instr ImmCondCmp(unsigned imm); 2164 inline static Instr Nzcv(StatusFlags nzcv); 2165 2166 static bool IsImmAddSub(int64_t immediate); 2167 static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n, 2168 unsigned* imm_s, unsigned* imm_r); 2169 2170 // MemOperand offset encoding. 2171 inline static Instr ImmLSUnsigned(int imm12); 2172 inline static Instr ImmLS(int imm9); 2173 inline static Instr ImmLSPair(int imm7, unsigned size); 2174 inline static Instr ImmShiftLS(unsigned shift_amount); 2175 inline static Instr ImmException(int imm16); 2176 inline static Instr ImmSystemRegister(int imm15); 2177 inline static Instr ImmHint(int imm7); 2178 inline static Instr ImmBarrierDomain(int imm2); 2179 inline static Instr ImmBarrierType(int imm2); 2180 inline static unsigned CalcLSDataSize(LoadStoreOp op); 2181 2182 // Instruction bits for vector format in data processing operations. VFormat(VRegister vd)2183 static Instr VFormat(VRegister vd) { 2184 if (vd.Is64Bits()) { 2185 switch (vd.LaneCount()) { 2186 case 2: 2187 return NEON_2S; 2188 case 4: 2189 return NEON_4H; 2190 case 8: 2191 return NEON_8B; 2192 default: 2193 UNREACHABLE(); 2194 } 2195 } else { 2196 DCHECK(vd.Is128Bits()); 2197 switch (vd.LaneCount()) { 2198 case 2: 2199 return NEON_2D; 2200 case 4: 2201 return NEON_4S; 2202 case 8: 2203 return NEON_8H; 2204 case 16: 2205 return NEON_16B; 2206 default: 2207 UNREACHABLE(); 2208 } 2209 } 2210 } 2211 2212 // Instruction bits for vector format in floating point data processing 2213 // operations. FPFormat(VRegister vd)2214 static Instr FPFormat(VRegister vd) { 2215 if (vd.LaneCount() == 1) { 2216 // Floating point scalar formats. 2217 DCHECK(vd.Is32Bits() || vd.Is64Bits()); 2218 return vd.Is64Bits() ? FP64 : FP32; 2219 } 2220 2221 // Two lane floating point vector formats. 2222 if (vd.LaneCount() == 2) { 2223 DCHECK(vd.Is64Bits() || vd.Is128Bits()); 2224 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 2225 } 2226 2227 // Four lane floating point vector format. 2228 DCHECK((vd.LaneCount() == 4) && vd.Is128Bits()); 2229 return NEON_FP_4S; 2230 } 2231 2232 // Instruction bits for vector format in load and store operations. LSVFormat(VRegister vd)2233 static Instr LSVFormat(VRegister vd) { 2234 if (vd.Is64Bits()) { 2235 switch (vd.LaneCount()) { 2236 case 1: 2237 return LS_NEON_1D; 2238 case 2: 2239 return LS_NEON_2S; 2240 case 4: 2241 return LS_NEON_4H; 2242 case 8: 2243 return LS_NEON_8B; 2244 default: 2245 UNREACHABLE(); 2246 } 2247 } else { 2248 DCHECK(vd.Is128Bits()); 2249 switch (vd.LaneCount()) { 2250 case 2: 2251 return LS_NEON_2D; 2252 case 4: 2253 return LS_NEON_4S; 2254 case 8: 2255 return LS_NEON_8H; 2256 case 16: 2257 return LS_NEON_16B; 2258 default: 2259 UNREACHABLE(); 2260 } 2261 } 2262 } 2263 2264 // Instruction bits for scalar format in data processing operations. SFormat(VRegister vd)2265 static Instr SFormat(VRegister vd) { 2266 DCHECK(vd.IsScalar()); 2267 switch (vd.SizeInBytes()) { 2268 case 1: 2269 return NEON_B; 2270 case 2: 2271 return NEON_H; 2272 case 4: 2273 return NEON_S; 2274 case 8: 2275 return NEON_D; 2276 default: 2277 UNREACHABLE(); 2278 } 2279 } 2280 ImmNEONHLM(int index,int num_bits)2281 static Instr ImmNEONHLM(int index, int num_bits) { 2282 int h, l, m; 2283 if (num_bits == 3) { 2284 DCHECK(is_uint3(index)); 2285 h = (index >> 2) & 1; 2286 l = (index >> 1) & 1; 2287 m = (index >> 0) & 1; 2288 } else if (num_bits == 2) { 2289 DCHECK(is_uint2(index)); 2290 h = (index >> 1) & 1; 2291 l = (index >> 0) & 1; 2292 m = 0; 2293 } else { 2294 DCHECK(is_uint1(index) && (num_bits == 1)); 2295 h = (index >> 0) & 1; 2296 l = 0; 2297 m = 0; 2298 } 2299 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 2300 } 2301 ImmNEONExt(int imm4)2302 static Instr ImmNEONExt(int imm4) { 2303 DCHECK(is_uint4(imm4)); 2304 return imm4 << ImmNEONExt_offset; 2305 } 2306 ImmNEON5(Instr format,int index)2307 static Instr ImmNEON5(Instr format, int index) { 2308 DCHECK(is_uint4(index)); 2309 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2310 int imm5 = (index << (s + 1)) | (1 << s); 2311 return imm5 << ImmNEON5_offset; 2312 } 2313 ImmNEON4(Instr format,int index)2314 static Instr ImmNEON4(Instr format, int index) { 2315 DCHECK(is_uint4(index)); 2316 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2317 int imm4 = index << s; 2318 return imm4 << ImmNEON4_offset; 2319 } 2320 ImmNEONabcdefgh(int imm8)2321 static Instr ImmNEONabcdefgh(int imm8) { 2322 DCHECK(is_uint8(imm8)); 2323 Instr instr; 2324 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 2325 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 2326 return instr; 2327 } 2328 NEONCmode(int cmode)2329 static Instr NEONCmode(int cmode) { 2330 DCHECK(is_uint4(cmode)); 2331 return cmode << NEONCmode_offset; 2332 } 2333 NEONModImmOp(int op)2334 static Instr NEONModImmOp(int op) { 2335 DCHECK(is_uint1(op)); 2336 return op << NEONModImmOp_offset; 2337 } 2338 2339 static bool IsImmLSUnscaled(int64_t offset); 2340 static bool IsImmLSScaled(int64_t offset, unsigned size); 2341 static bool IsImmLLiteral(int64_t offset); 2342 2343 // Move immediates encoding. 2344 inline static Instr ImmMoveWide(int imm); 2345 inline static Instr ShiftMoveWide(int shift); 2346 2347 // FP Immediates. 2348 static Instr ImmFP(double imm); 2349 static Instr ImmNEONFP(double imm); 2350 inline static Instr FPScale(unsigned scale); 2351 2352 // FP register type. 2353 inline static Instr FPType(VRegister fd); 2354 2355 // Unused on this architecture. MaybeEmitOutOfLineConstantPool()2356 void MaybeEmitOutOfLineConstantPool() {} 2357 ForceConstantPoolEmissionWithoutJump()2358 void ForceConstantPoolEmissionWithoutJump() { 2359 constpool_.Check(Emission::kForced, Jump::kOmitted); 2360 } ForceConstantPoolEmissionWithJump()2361 void ForceConstantPoolEmissionWithJump() { 2362 constpool_.Check(Emission::kForced, Jump::kRequired); 2363 } 2364 // Check if the const pool needs to be emitted while pretending that {margin} 2365 // more bytes of instructions have already been emitted. 2366 void EmitConstPoolWithJumpIfNeeded(size_t margin = 0) { 2367 constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin); 2368 } 2369 2370 // Returns true if we should emit a veneer as soon as possible for a branch 2371 // which can at most reach to specified pc. 2372 bool ShouldEmitVeneer(int max_reachable_pc, 2373 size_t margin = kVeneerDistanceMargin); 2374 bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) { 2375 return ShouldEmitVeneer(unresolved_branches_first_limit(), margin); 2376 } 2377 2378 // The maximum code size generated for a veneer. Currently one branch 2379 // instruction. This is for code size checking purposes, and can be extended 2380 // in the future for example if we decide to add nops between the veneers. 2381 static constexpr int kMaxVeneerCodeSize = 1 * kInstrSize; 2382 2383 void RecordVeneerPool(int location_offset, int size); 2384 // Emits veneers for branches that are approaching their maximum range. 2385 // If need_protection is true, the veneers are protected by a branch jumping 2386 // over the code. 2387 void EmitVeneers(bool force_emit, bool need_protection, 2388 size_t margin = kVeneerDistanceMargin); EmitVeneersGuard()2389 void EmitVeneersGuard() { EmitPoolGuard(); } 2390 // Checks whether veneers need to be emitted at this point. 2391 // If force_emit is set, a veneer is generated for *all* unresolved branches. 2392 void CheckVeneerPool(bool force_emit, bool require_jump, 2393 size_t margin = kVeneerDistanceMargin); 2394 2395 using BlockConstPoolScope = ConstantPool::BlockScope; 2396 2397 class BlockPoolsScope { 2398 public: 2399 // Block veneer and constant pool. Emits pools if necessary to ensure that 2400 // {margin} more bytes can be emitted without triggering pool emission. 2401 explicit BlockPoolsScope(Assembler* assem, size_t margin = 0) assem_(assem)2402 : assem_(assem), block_const_pool_(assem, margin) { 2403 assem_->CheckVeneerPool(false, true, margin); 2404 assem_->StartBlockVeneerPool(); 2405 } 2406 BlockPoolsScope(Assembler * assem,PoolEmissionCheck check)2407 BlockPoolsScope(Assembler* assem, PoolEmissionCheck check) 2408 : assem_(assem), block_const_pool_(assem, check) { 2409 assem_->StartBlockVeneerPool(); 2410 } ~BlockPoolsScope()2411 ~BlockPoolsScope() { assem_->EndBlockVeneerPool(); } 2412 2413 private: 2414 Assembler* assem_; 2415 BlockConstPoolScope block_const_pool_; 2416 DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope); 2417 }; 2418 2419 #if defined(V8_OS_WIN) GetXdataEncoder()2420 win64_unwindinfo::XdataEncoder* GetXdataEncoder() { 2421 return xdata_encoder_.get(); 2422 } 2423 2424 win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const; 2425 #endif 2426 2427 protected: 2428 inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const; 2429 2430 void LoadStore(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op); 2431 void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, 2432 const MemOperand& addr, LoadStorePairOp op); 2433 void LoadStoreStruct(const VRegister& vt, const MemOperand& addr, 2434 NEONLoadStoreMultiStructOp op); 2435 void LoadStoreStruct1(const VRegister& vt, int reg_count, 2436 const MemOperand& addr); 2437 void LoadStoreStructSingle(const VRegister& vt, uint32_t lane, 2438 const MemOperand& addr, 2439 NEONLoadStoreSingleStructOp op); 2440 void LoadStoreStructSingleAllLanes(const VRegister& vt, 2441 const MemOperand& addr, 2442 NEONLoadStoreSingleStructOp op); 2443 void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr, 2444 Instr op); 2445 2446 static bool IsImmLSPair(int64_t offset, unsigned size); 2447 2448 void Logical(const Register& rd, const Register& rn, const Operand& operand, 2449 LogicalOp op); 2450 void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, 2451 unsigned imm_s, unsigned imm_r, LogicalOp op); 2452 2453 void ConditionalCompare(const Register& rn, const Operand& operand, 2454 StatusFlags nzcv, Condition cond, 2455 ConditionalCompareOp op); 2456 static bool IsImmConditionalCompare(int64_t immediate); 2457 2458 void AddSubWithCarry(const Register& rd, const Register& rn, 2459 const Operand& operand, FlagsUpdate S, 2460 AddSubWithCarryOp op); 2461 2462 // Functions for emulating operands not directly supported by the instruction 2463 // set. 2464 void EmitShift(const Register& rd, const Register& rn, Shift shift, 2465 unsigned amount); 2466 void EmitExtendShift(const Register& rd, const Register& rn, Extend extend, 2467 unsigned left_shift); 2468 2469 void AddSub(const Register& rd, const Register& rn, const Operand& operand, 2470 FlagsUpdate S, AddSubOp op); 2471 2472 static bool IsImmFP32(float imm); 2473 static bool IsImmFP64(double imm); 2474 2475 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 2476 // registers. Only simple loads are supported; sign- and zero-extension (such 2477 // as in LDPSW_x or LDRB_w) are not supported. 2478 static inline LoadStoreOp LoadOpFor(const CPURegister& rt); 2479 static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 2480 const CPURegister& rt2); 2481 static inline LoadStoreOp StoreOpFor(const CPURegister& rt); 2482 static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt, 2483 const CPURegister& rt2); 2484 static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 2485 2486 // Remove the specified branch from the unbound label link chain. 2487 // If available, a veneer for this label can be used for other branches in the 2488 // chain if the link chain cannot be fixed up without this branch. 2489 void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label, 2490 Instruction* label_veneer = nullptr); 2491 2492 private: 2493 static uint32_t FPToImm8(double imm); 2494 2495 // Instruction helpers. 2496 void MoveWide(const Register& rd, uint64_t imm, int shift, 2497 MoveWideImmediateOp mov_op); 2498 void DataProcShiftedRegister(const Register& rd, const Register& rn, 2499 const Operand& operand, FlagsUpdate S, Instr op); 2500 void DataProcExtendedRegister(const Register& rd, const Register& rn, 2501 const Operand& operand, FlagsUpdate S, 2502 Instr op); 2503 void ConditionalSelect(const Register& rd, const Register& rn, 2504 const Register& rm, Condition cond, 2505 ConditionalSelectOp op); 2506 void DataProcessing1Source(const Register& rd, const Register& rn, 2507 DataProcessing1SourceOp op); 2508 void DataProcessing3Source(const Register& rd, const Register& rn, 2509 const Register& rm, const Register& ra, 2510 DataProcessing3SourceOp op); 2511 void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn, 2512 FPDataProcessing1SourceOp op); 2513 void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn, 2514 const VRegister& fm, 2515 FPDataProcessing2SourceOp op); 2516 void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn, 2517 const VRegister& fm, const VRegister& fa, 2518 FPDataProcessing3SourceOp op); 2519 void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn, 2520 NEONAcrossLanesOp op); 2521 void NEONAcrossLanes(const VRegister& vd, const VRegister& vn, 2522 NEONAcrossLanesOp op); 2523 void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8, 2524 const int left_shift, 2525 NEONModifiedImmediateOp op); 2526 void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8, 2527 const int shift_amount, 2528 NEONModifiedImmediateOp op); 2529 void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2530 NEON3SameOp vop); 2531 void NEONFP3Same(const VRegister& vd, const VRegister& vn, 2532 const VRegister& vm, Instr op); 2533 void NEON3DifferentL(const VRegister& vd, const VRegister& vn, 2534 const VRegister& vm, NEON3DifferentOp vop); 2535 void NEON3DifferentW(const VRegister& vd, const VRegister& vn, 2536 const VRegister& vm, NEON3DifferentOp vop); 2537 void NEON3DifferentHN(const VRegister& vd, const VRegister& vn, 2538 const VRegister& vm, NEON3DifferentOp vop); 2539 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, 2540 NEON2RegMiscOp vop, double value = 0.0); 2541 void NEON2RegMisc(const VRegister& vd, const VRegister& vn, 2542 NEON2RegMiscOp vop, int value = 0); 2543 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); 2544 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); 2545 void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2546 NEONPermOp op); 2547 void NEONFPByElement(const VRegister& vd, const VRegister& vn, 2548 const VRegister& vm, int vm_index, 2549 NEONByIndexedElementOp op); 2550 void NEONByElement(const VRegister& vd, const VRegister& vn, 2551 const VRegister& vm, int vm_index, 2552 NEONByIndexedElementOp op); 2553 void NEONByElementL(const VRegister& vd, const VRegister& vn, 2554 const VRegister& vm, int vm_index, 2555 NEONByIndexedElementOp op); 2556 void NEONShiftImmediate(const VRegister& vd, const VRegister& vn, 2557 NEONShiftImmediateOp op, int immh_immb); 2558 void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, 2559 int shift, NEONShiftImmediateOp op); 2560 void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, 2561 int shift, NEONShiftImmediateOp op); 2562 void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, 2563 NEONShiftImmediateOp op); 2564 void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift, 2565 NEONShiftImmediateOp op); 2566 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); 2567 void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2568 NEONTableOp op); 2569 2570 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 2571 2572 // Label helpers. 2573 2574 // Return an offset for a label-referencing instruction, typically a branch. 2575 int LinkAndGetByteOffsetTo(Label* label); 2576 2577 // This is the same as LinkAndGetByteOffsetTo, but return an offset 2578 // suitable for fields that take instruction offsets. 2579 inline int LinkAndGetInstructionOffsetTo(Label* label); 2580 2581 static constexpr int kStartOfLabelLinkChain = 0; 2582 2583 // Verify that a label's link chain is intact. 2584 void CheckLabelLinkChain(Label const* label); 2585 2586 // Emit the instruction at pc_. Emit(Instr instruction)2587 void Emit(Instr instruction) { 2588 STATIC_ASSERT(sizeof(*pc_) == 1); 2589 STATIC_ASSERT(sizeof(instruction) == kInstrSize); 2590 DCHECK_LE(pc_ + sizeof(instruction), buffer_start_ + buffer_->size()); 2591 2592 memcpy(pc_, &instruction, sizeof(instruction)); 2593 pc_ += sizeof(instruction); 2594 CheckBuffer(); 2595 } 2596 2597 // Emit data inline in the instruction stream. EmitData(void const * data,unsigned size)2598 void EmitData(void const* data, unsigned size) { 2599 DCHECK_EQ(sizeof(*pc_), 1); 2600 DCHECK_LE(pc_ + size, buffer_start_ + buffer_->size()); 2601 2602 // TODO(all): Somehow register we have some data here. Then we can 2603 // disassemble it correctly. 2604 memcpy(pc_, data, size); 2605 pc_ += size; 2606 CheckBuffer(); 2607 } 2608 2609 void GrowBuffer(); 2610 void CheckBufferSpace(); 2611 void CheckBuffer(); 2612 2613 // Emission of the veneer pools may be blocked in some code sequences. 2614 int veneer_pool_blocked_nesting_; // Block emission if this is not zero. 2615 2616 // Relocation info generation 2617 // Each relocation is encoded as a variable size value 2618 static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; 2619 RelocInfoWriter reloc_info_writer; 2620 2621 // Internal reference positions, required for (potential) patching in 2622 // GrowBuffer(); contains only those internal references whose labels 2623 // are already bound. 2624 std::deque<int> internal_reference_positions_; 2625 2626 protected: 2627 // Code generation 2628 // The relocation writer's position is at least kGap bytes below the end of 2629 // the generated instructions. This is so that multi-instruction sequences do 2630 // not have to check for overflow. The same is true for writes of large 2631 // relocation info entries, and debug strings encoded in the instruction 2632 // stream. 2633 static constexpr int kGap = 64; 2634 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); 2635 2636 public: 2637 #ifdef DEBUG 2638 // Functions used for testing. GetConstantPoolEntriesSizeForTesting()2639 size_t GetConstantPoolEntriesSizeForTesting() const { 2640 // Do not include branch over the pool. 2641 return constpool_.Entry32Count() * kInt32Size + 2642 constpool_.Entry64Count() * kInt64Size; 2643 } 2644 GetCheckConstPoolIntervalForTesting()2645 static size_t GetCheckConstPoolIntervalForTesting() { 2646 return ConstantPool::kCheckInterval; 2647 } 2648 GetApproxMaxDistToConstPoolForTesting()2649 static size_t GetApproxMaxDistToConstPoolForTesting() { 2650 return ConstantPool::kApproxDistToPool64; 2651 } 2652 #endif 2653 2654 class FarBranchInfo { 2655 public: FarBranchInfo(int offset,Label * label)2656 FarBranchInfo(int offset, Label* label) 2657 : pc_offset_(offset), label_(label) {} 2658 // Offset of the branch in the code generation buffer. 2659 int pc_offset_; 2660 // The label branched to. 2661 Label* label_; 2662 }; 2663 2664 protected: 2665 // Information about unresolved (forward) branches. 2666 // The Assembler is only allowed to delete out-of-date information from here 2667 // after a label is bound. The MacroAssembler uses this information to 2668 // generate veneers. 2669 // 2670 // The second member gives information about the unresolved branch. The first 2671 // member of the pair is the maximum offset that the branch can reach in the 2672 // buffer. The map is sorted according to this reachable offset, allowing to 2673 // easily check when veneers need to be emitted. 2674 // Note that the maximum reachable offset (first member of the pairs) should 2675 // always be positive but has the same type as the return value for 2676 // pc_offset() for convenience. 2677 std::multimap<int, FarBranchInfo> unresolved_branches_; 2678 2679 // We generate a veneer for a branch if we reach within this distance of the 2680 // limit of the range. 2681 static constexpr int kVeneerDistanceMargin = 1 * KB; 2682 // The factor of 2 is a finger in the air guess. With a default margin of 2683 // 1KB, that leaves us an addional 256 instructions to avoid generating a 2684 // protective branch. 2685 static constexpr int kVeneerNoProtectionFactor = 2; 2686 static constexpr int kVeneerDistanceCheckMargin = 2687 kVeneerNoProtectionFactor * kVeneerDistanceMargin; unresolved_branches_first_limit()2688 int unresolved_branches_first_limit() const { 2689 DCHECK(!unresolved_branches_.empty()); 2690 return unresolved_branches_.begin()->first; 2691 } 2692 // This PC-offset of the next veneer pool check helps reduce the overhead 2693 // of checking for veneer pools. 2694 // It is maintained to the closest unresolved branch limit minus the maximum 2695 // veneer margin (or kMaxInt if there are no unresolved branches). 2696 int next_veneer_pool_check_; 2697 2698 #if defined(V8_OS_WIN) 2699 std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_; 2700 #endif 2701 2702 private: 2703 // Avoid overflows for displacements etc. 2704 static const int kMaximalBufferSize = 512 * MB; 2705 2706 // If a veneer is emitted for a branch instruction, that instruction must be 2707 // removed from the associated label's link chain so that the assembler does 2708 // not later attempt (likely unsuccessfully) to patch it to branch directly to 2709 // the label. 2710 void DeleteUnresolvedBranchInfoForLabel(Label* label); 2711 // This function deletes the information related to the label by traversing 2712 // the label chain, and for each PC-relative instruction in the chain checking 2713 // if pending unresolved information exists. Its complexity is proportional to 2714 // the length of the label chain. 2715 void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label); 2716 2717 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); 2718 2719 int WriteCodeComments(); 2720 2721 // The pending constant pool. 2722 ConstantPool constpool_; 2723 2724 friend class EnsureSpace; 2725 friend class ConstantPool; 2726 }; 2727 2728 class PatchingAssembler : public Assembler { 2729 public: 2730 // Create an Assembler with a buffer starting at 'start'. 2731 // The buffer size is 2732 // size of instructions to patch + kGap 2733 // Where kGap is the distance from which the Assembler tries to grow the 2734 // buffer. 2735 // If more or fewer instructions than expected are generated or if some 2736 // relocation information takes space in the buffer, the PatchingAssembler 2737 // will crash trying to grow the buffer. 2738 // Note that the instruction cache will not be flushed. PatchingAssembler(const AssemblerOptions & options,byte * start,unsigned count)2739 PatchingAssembler(const AssemblerOptions& options, byte* start, 2740 unsigned count) 2741 : Assembler(options, 2742 ExternalAssemblerBuffer(start, count * kInstrSize + kGap)), 2743 block_constant_pool_emission_scope(this) {} 2744 ~PatchingAssembler()2745 ~PatchingAssembler() { 2746 // Verify we have generated the number of instruction we expected. 2747 DCHECK_EQ(pc_offset() + kGap, buffer_->size()); 2748 } 2749 2750 // See definition of PatchAdrFar() for details. 2751 static constexpr int kAdrFarPatchableNNops = 2; 2752 static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2; 2753 void PatchAdrFar(int64_t target_offset); 2754 void PatchSubSp(uint32_t immediate); 2755 2756 private: 2757 BlockPoolsScope block_constant_pool_emission_scope; 2758 }; 2759 2760 class EnsureSpace { 2761 public: EnsureSpace(Assembler * assembler)2762 explicit EnsureSpace(Assembler* assembler) : block_pools_scope_(assembler) { 2763 assembler->CheckBufferSpace(); 2764 } 2765 2766 private: 2767 Assembler::BlockPoolsScope block_pools_scope_; 2768 }; 2769 2770 } // namespace internal 2771 } // namespace v8 2772 2773 #endif // V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 2774