1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37
38 namespace vixl {
39 namespace aarch64 {
40
41 class LabelTestHelper; // Forward declaration.
42
43
44 class Label {
45 public:
Label()46 Label() : location_(kLocationUnbound) {}
~Label()47 ~Label() {
48 // All links to a label must have been resolved before it is destructed.
49 VIXL_ASSERT(!IsLinked());
50 }
51
IsBound()52 bool IsBound() const { return location_ >= 0; }
IsLinked()53 bool IsLinked() const { return !links_.empty(); }
54
GetLocation()55 ptrdiff_t GetLocation() const { return location_; }
56 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57 return GetLocation();
58 }
59
60 static const int kNPreallocatedLinks = 4;
61 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62 static const size_t kReclaimFrom = 512;
63 static const size_t kReclaimFactor = 2;
64
65 typedef InvalSet<ptrdiff_t,
66 kNPreallocatedLinks,
67 ptrdiff_t,
68 kInvalidLinkKey,
69 kReclaimFrom,
70 kReclaimFactor>
71 LinksSetBase;
72 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
73
74 private:
75 class LinksSet : public LinksSetBase {
76 public:
LinksSet()77 LinksSet() : LinksSetBase() {}
78 };
79
80 // Allows iterating over the links of a label. The behaviour is undefined if
81 // the list of links is modified in any way while iterating.
82 class LabelLinksIterator : public LabelLinksIteratorBase {
83 public:
LabelLinksIterator(Label * label)84 explicit LabelLinksIterator(Label* label)
85 : LabelLinksIteratorBase(&label->links_) {}
86
87 // TODO: Remove these and use the STL-like interface instead.
88 using LabelLinksIteratorBase::Advance;
89 using LabelLinksIteratorBase::Current;
90 };
91
Bind(ptrdiff_t location)92 void Bind(ptrdiff_t location) {
93 // Labels can only be bound once.
94 VIXL_ASSERT(!IsBound());
95 location_ = location;
96 }
97
AddLink(ptrdiff_t instruction)98 void AddLink(ptrdiff_t instruction) {
99 // If a label is bound, the assembler already has the information it needs
100 // to write the instruction, so there is no need to add it to links_.
101 VIXL_ASSERT(!IsBound());
102 links_.insert(instruction);
103 }
104
DeleteLink(ptrdiff_t instruction)105 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
106
ClearAllLinks()107 void ClearAllLinks() { links_.clear(); }
108
109 // TODO: The comment below considers average case complexity for our
110 // usual use-cases. The elements of interest are:
111 // - Branches to a label are emitted in order: branch instructions to a label
112 // are generated at an offset in the code generation buffer greater than any
113 // other branch to that same label already generated. As an example, this can
114 // be broken when an instruction is patched to become a branch. Note that the
115 // code will still work, but the complexity considerations below may locally
116 // not apply any more.
117 // - Veneers are generated in order: for multiple branches of the same type
118 // branching to the same unbound label going out of range, veneers are
119 // generated in growing order of the branch instruction offset from the start
120 // of the buffer.
121 //
122 // When creating a veneer for a branch going out of range, the link for this
123 // branch needs to be removed from this `links_`. Since all branches are
124 // tracked in one underlying InvalSet, the complexity for this deletion is the
125 // same as for finding the element, ie. O(n), where n is the number of links
126 // in the set.
127 // This could be reduced to O(1) by using the same trick as used when tracking
128 // branch information for veneers: split the container to use one set per type
129 // of branch. With that setup, when a veneer is created and the link needs to
130 // be deleted, if the two points above hold, it must be the minimum element of
131 // the set for its type of branch, and that minimum element will be accessible
132 // in O(1).
133
134 // The offsets of the instructions that have linked to this label.
135 LinksSet links_;
136 // The label location.
137 ptrdiff_t location_;
138
139 static const ptrdiff_t kLocationUnbound = -1;
140
141 // It is not safe to copy labels, so disable the copy constructor and operator
142 // by declaring them private (without an implementation).
143 #if __cplusplus >= 201103L
144 Label(const Label&) = delete;
145 void operator=(const Label&) = delete;
146 #else
147 Label(const Label&);
148 void operator=(const Label&);
149 #endif
150
151 // The Assembler class is responsible for binding and linking labels, since
152 // the stored offsets need to be consistent with the Assembler's buffer.
153 friend class Assembler;
154 // The MacroAssembler and VeneerPool handle resolution of branches to distant
155 // targets.
156 friend class MacroAssembler;
157 friend class VeneerPool;
158 };
159
160
161 class Assembler;
162 class LiteralPool;
163
164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
165 // stream and loaded through a pc relative load. The same literal can be
166 // referred to by multiple instructions but a literal can only reside at one
167 // place in memory. A literal can be used by a load before or after being
168 // placed in memory.
169 //
170 // Internally an offset of 0 is associated with a literal which has been
171 // neither used nor placed. Then two possibilities arise:
172 // 1) the label is placed, the offset (stored as offset + 1) is used to
173 // resolve any subsequent load using the label.
174 // 2) the label is not placed and offset is the offset of the last load using
175 // the literal (stored as -offset -1). If multiple loads refer to this
176 // literal then the last load holds the offset of the preceding load and
177 // all loads form a chain. Once the offset is placed all the loads in the
178 // chain are resolved and future loads fall back to possibility 1.
179 class RawLiteral {
180 public:
181 enum DeletionPolicy {
182 kDeletedOnPlacementByPool,
183 kDeletedOnPoolDestruction,
184 kManuallyDeleted
185 };
186
187 RawLiteral(size_t size,
188 LiteralPool* literal_pool,
189 DeletionPolicy deletion_policy = kManuallyDeleted);
190
191 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
192 // actually pointing to `Literal<T>` objects.
~RawLiteral()193 virtual ~RawLiteral() {}
194
GetSize()195 size_t GetSize() const {
196 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
197 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
198 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
199 (size_ == kQRegSizeInBytes));
200 return size_;
201 }
202 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
203
GetRawValue128Low64()204 uint64_t GetRawValue128Low64() const {
205 VIXL_ASSERT(size_ == kQRegSizeInBytes);
206 return low64_;
207 }
208 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
209 return GetRawValue128Low64();
210 }
211
GetRawValue128High64()212 uint64_t GetRawValue128High64() const {
213 VIXL_ASSERT(size_ == kQRegSizeInBytes);
214 return high64_;
215 }
216 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
217 return GetRawValue128High64();
218 }
219
GetRawValue64()220 uint64_t GetRawValue64() const {
221 VIXL_ASSERT(size_ == kXRegSizeInBytes);
222 VIXL_ASSERT(high64_ == 0);
223 return low64_;
224 }
225 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
226 return GetRawValue64();
227 }
228
GetRawValue32()229 uint32_t GetRawValue32() const {
230 VIXL_ASSERT(size_ == kWRegSizeInBytes);
231 VIXL_ASSERT(high64_ == 0);
232 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
233 return static_cast<uint32_t>(low64_);
234 }
235 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
236 return GetRawValue32();
237 }
238
IsUsed()239 bool IsUsed() const { return offset_ < 0; }
IsPlaced()240 bool IsPlaced() const { return offset_ > 0; }
241
GetLiteralPool()242 LiteralPool* GetLiteralPool() const { return literal_pool_; }
243
GetOffset()244 ptrdiff_t GetOffset() const {
245 VIXL_ASSERT(IsPlaced());
246 return offset_ - 1;
247 }
248 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
249
250 protected:
SetOffset(ptrdiff_t offset)251 void SetOffset(ptrdiff_t offset) {
252 VIXL_ASSERT(offset >= 0);
253 VIXL_ASSERT(IsWordAligned(offset));
254 VIXL_ASSERT(!IsPlaced());
255 offset_ = offset + 1;
256 }
set_offset(ptrdiff_t offset)257 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
258 SetOffset(offset);
259 }
260
GetLastUse()261 ptrdiff_t GetLastUse() const {
262 VIXL_ASSERT(IsUsed());
263 return -offset_ - 1;
264 }
265 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
266
SetLastUse(ptrdiff_t offset)267 void SetLastUse(ptrdiff_t offset) {
268 VIXL_ASSERT(offset >= 0);
269 VIXL_ASSERT(IsWordAligned(offset));
270 VIXL_ASSERT(!IsPlaced());
271 offset_ = -offset - 1;
272 }
set_last_use(ptrdiff_t offset)273 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
274 SetLastUse(offset);
275 }
276
277 size_t size_;
278 ptrdiff_t offset_;
279 uint64_t low64_;
280 uint64_t high64_;
281
282 private:
283 LiteralPool* literal_pool_;
284 DeletionPolicy deletion_policy_;
285
286 friend class Assembler;
287 friend class LiteralPool;
288 };
289
290
291 template <typename T>
292 class Literal : public RawLiteral {
293 public:
294 explicit Literal(T value,
295 LiteralPool* literal_pool = NULL,
296 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)297 : RawLiteral(sizeof(value), literal_pool, ownership) {
298 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
299 UpdateValue(value);
300 }
301
302 Literal(T high64,
303 T low64,
304 LiteralPool* literal_pool = NULL,
305 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)306 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
307 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
308 UpdateValue(high64, low64);
309 }
310
~Literal()311 virtual ~Literal() {}
312
313 // Update the value of this literal, if necessary by rewriting the value in
314 // the pool.
315 // If the literal has already been placed in a literal pool, the address of
316 // the start of the code buffer must be provided, as the literal only knows it
317 // offset from there. This also allows patching the value after the code has
318 // been moved in memory.
319 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
320 VIXL_ASSERT(sizeof(new_value) == size_);
321 memcpy(&low64_, &new_value, sizeof(new_value));
322 if (IsPlaced()) {
323 VIXL_ASSERT(code_buffer != NULL);
324 RewriteValueInCode(code_buffer);
325 }
326 }
327
328 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
329 VIXL_ASSERT(sizeof(low64) == size_ / 2);
330 memcpy(&low64_, &low64, sizeof(low64));
331 memcpy(&high64_, &high64, sizeof(high64));
332 if (IsPlaced()) {
333 VIXL_ASSERT(code_buffer != NULL);
334 RewriteValueInCode(code_buffer);
335 }
336 }
337
338 void UpdateValue(T new_value, const Assembler* assembler);
339 void UpdateValue(T high64, T low64, const Assembler* assembler);
340
341 private:
RewriteValueInCode(uint8_t * code_buffer)342 void RewriteValueInCode(uint8_t* code_buffer) {
343 VIXL_ASSERT(IsPlaced());
344 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
345 switch (GetSize()) {
346 case kSRegSizeInBytes:
347 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
348 GetRawValue32();
349 break;
350 case kDRegSizeInBytes:
351 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
352 GetRawValue64();
353 break;
354 default:
355 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
356 uint64_t* base_address =
357 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
358 *base_address = GetRawValue128Low64();
359 *(base_address + 1) = GetRawValue128High64();
360 }
361 }
362 };
363
364
365 // Control whether or not position-independent code should be emitted.
366 enum PositionIndependentCodeOption {
367 // All code generated will be position-independent; all branches and
368 // references to labels generated with the Label class will use PC-relative
369 // addressing.
370 PositionIndependentCode,
371
372 // Allow VIXL to generate code that refers to absolute addresses. With this
373 // option, it will not be possible to copy the code buffer and run it from a
374 // different address; code must be generated in its final location.
375 PositionDependentCode,
376
377 // Allow VIXL to assume that the bottom 12 bits of the address will be
378 // constant, but that the top 48 bits may change. This allows `adrp` to
379 // function in systems which copy code between pages, but otherwise maintain
380 // 4KB page alignment.
381 PageOffsetDependentCode
382 };
383
384
385 // Control how scaled- and unscaled-offset loads and stores are generated.
386 enum LoadStoreScalingOption {
387 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
388 // register-offset, pre-index or post-index instructions if necessary.
389 PreferScaledOffset,
390
391 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
392 // register-offset, pre-index or post-index instructions if necessary.
393 PreferUnscaledOffset,
394
395 // Require scaled-immediate-offset instructions.
396 RequireScaledOffset,
397
398 // Require unscaled-immediate-offset instructions.
399 RequireUnscaledOffset
400 };
401
402
403 // Assembler.
404 class Assembler : public vixl::internal::AssemblerBase {
405 public:
406 explicit Assembler(
407 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)408 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
409 explicit Assembler(
410 size_t capacity,
411 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)412 : AssemblerBase(capacity),
413 pic_(pic),
414 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
415 Assembler(byte* buffer,
416 size_t capacity,
417 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)418 : AssemblerBase(buffer, capacity),
419 pic_(pic),
420 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
421
422 // Upon destruction, the code will assert that one of the following is true:
423 // * The Assembler object has not been used.
424 // * Nothing has been emitted since the last Reset() call.
425 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()426 ~Assembler() {}
427
428 // System functions.
429
430 // Start generating code from the beginning of the buffer, discarding any code
431 // and data that has already been emitted into the buffer.
432 void Reset();
433
434 // Bind a label to the current PC.
435 void bind(Label* label);
436
437 // Bind a label to a specified offset from the start of the buffer.
438 void BindToOffset(Label* label, ptrdiff_t offset);
439
440 // Place a literal at the current PC.
441 void place(RawLiteral* literal);
442
443 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
444 return GetCursorOffset();
445 }
446
447 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448 ptrdiff_t GetBufferEndOffset() const) {
449 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
450 }
451 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
452 ptrdiff_t BufferEndOffset() const) {
453 return GetBuffer().GetCapacity();
454 }
455
456 // Return the address of a bound label.
457 template <typename T>
GetLabelAddress(const Label * label)458 T GetLabelAddress(const Label* label) const {
459 VIXL_ASSERT(label->IsBound());
460 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
461 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
462 }
463
GetInstructionAt(ptrdiff_t instruction_offset)464 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
465 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
466 }
467 VIXL_DEPRECATED("GetInstructionAt",
468 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
469 return GetInstructionAt(instruction_offset);
470 }
471
GetInstructionOffset(Instruction * instruction)472 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
473 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
474 ptrdiff_t offset =
475 instruction - GetBuffer()->GetStartAddress<Instruction*>();
476 VIXL_ASSERT((0 <= offset) &&
477 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
478 return offset;
479 }
480 VIXL_DEPRECATED("GetInstructionOffset",
481 ptrdiff_t InstructionOffset(Instruction* instruction)) {
482 return GetInstructionOffset(instruction);
483 }
484
485 // Instruction set functions.
486
487 // Branch / Jump instructions.
488
489 // Branch to register.
490 void br(const Register& xn);
491
492 // Branch with link to register.
493 void blr(const Register& xn);
494
495 // Branch to register with return hint.
496 void ret(const Register& xn = lr);
497
498 // Branch to register, with pointer authentication. Using key A and a modifier
499 // of zero [Armv8.3].
500 void braaz(const Register& xn);
501
502 // Branch to register, with pointer authentication. Using key B and a modifier
503 // of zero [Armv8.3].
504 void brabz(const Register& xn);
505
506 // Branch with link to register, with pointer authentication. Using key A and
507 // a modifier of zero [Armv8.3].
508 void blraaz(const Register& xn);
509
510 // Branch with link to register, with pointer authentication. Using key B and
511 // a modifier of zero [Armv8.3].
512 void blrabz(const Register& xn);
513
514 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
515 void retaa();
516
517 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
518 void retab();
519
520 // Branch to register, with pointer authentication. Using key A [Armv8.3].
521 void braa(const Register& xn, const Register& xm);
522
523 // Branch to register, with pointer authentication. Using key B [Armv8.3].
524 void brab(const Register& xn, const Register& xm);
525
526 // Branch with link to register, with pointer authentication. Using key A
527 // [Armv8.3].
528 void blraa(const Register& xn, const Register& xm);
529
530 // Branch with link to register, with pointer authentication. Using key B
531 // [Armv8.3].
532 void blrab(const Register& xn, const Register& xm);
533
534 // Unconditional branch to label.
535 void b(Label* label);
536
537 // Conditional branch to label.
538 void b(Label* label, Condition cond);
539
540 // Unconditional branch to PC offset.
541 void b(int64_t imm26);
542
543 // Conditional branch to PC offset.
544 void b(int64_t imm19, Condition cond);
545
546 // Branch with link to label.
547 void bl(Label* label);
548
549 // Branch with link to PC offset.
550 void bl(int64_t imm26);
551
552 // Compare and branch to label if zero.
553 void cbz(const Register& rt, Label* label);
554
555 // Compare and branch to PC offset if zero.
556 void cbz(const Register& rt, int64_t imm19);
557
558 // Compare and branch to label if not zero.
559 void cbnz(const Register& rt, Label* label);
560
561 // Compare and branch to PC offset if not zero.
562 void cbnz(const Register& rt, int64_t imm19);
563
564 // Table lookup from one register.
565 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566
567 // Table lookup from two registers.
568 void tbl(const VRegister& vd,
569 const VRegister& vn,
570 const VRegister& vn2,
571 const VRegister& vm);
572
573 // Table lookup from three registers.
574 void tbl(const VRegister& vd,
575 const VRegister& vn,
576 const VRegister& vn2,
577 const VRegister& vn3,
578 const VRegister& vm);
579
580 // Table lookup from four registers.
581 void tbl(const VRegister& vd,
582 const VRegister& vn,
583 const VRegister& vn2,
584 const VRegister& vn3,
585 const VRegister& vn4,
586 const VRegister& vm);
587
588 // Table lookup extension from one register.
589 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590
591 // Table lookup extension from two registers.
592 void tbx(const VRegister& vd,
593 const VRegister& vn,
594 const VRegister& vn2,
595 const VRegister& vm);
596
597 // Table lookup extension from three registers.
598 void tbx(const VRegister& vd,
599 const VRegister& vn,
600 const VRegister& vn2,
601 const VRegister& vn3,
602 const VRegister& vm);
603
604 // Table lookup extension from four registers.
605 void tbx(const VRegister& vd,
606 const VRegister& vn,
607 const VRegister& vn2,
608 const VRegister& vn3,
609 const VRegister& vn4,
610 const VRegister& vm);
611
612 // Test bit and branch to label if zero.
613 void tbz(const Register& rt, unsigned bit_pos, Label* label);
614
615 // Test bit and branch to PC offset if zero.
616 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
617
618 // Test bit and branch to label if not zero.
619 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
620
621 // Test bit and branch to PC offset if not zero.
622 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
623
624 // Address calculation instructions.
625 // Calculate a PC-relative address. Unlike for branches the offset in adr is
626 // unscaled (i.e. the result can be unaligned).
627
628 // Calculate the address of a label.
629 void adr(const Register& xd, Label* label);
630
631 // Calculate the address of a PC offset.
632 void adr(const Register& xd, int64_t imm21);
633
634 // Calculate the page address of a label.
635 void adrp(const Register& xd, Label* label);
636
637 // Calculate the page address of a PC offset.
638 void adrp(const Register& xd, int64_t imm21);
639
640 // Data Processing instructions.
641
642 // Add.
643 void add(const Register& rd, const Register& rn, const Operand& operand);
644
645 // Add and update status flags.
646 void adds(const Register& rd, const Register& rn, const Operand& operand);
647
648 // Compare negative.
649 void cmn(const Register& rn, const Operand& operand);
650
651 // Subtract.
652 void sub(const Register& rd, const Register& rn, const Operand& operand);
653
654 // Subtract and update status flags.
655 void subs(const Register& rd, const Register& rn, const Operand& operand);
656
657 // Compare.
658 void cmp(const Register& rn, const Operand& operand);
659
660 // Negate.
661 void neg(const Register& rd, const Operand& operand);
662
663 // Negate and update status flags.
664 void negs(const Register& rd, const Operand& operand);
665
666 // Add with carry bit.
667 void adc(const Register& rd, const Register& rn, const Operand& operand);
668
669 // Add with carry bit and update status flags.
670 void adcs(const Register& rd, const Register& rn, const Operand& operand);
671
672 // Subtract with carry bit.
673 void sbc(const Register& rd, const Register& rn, const Operand& operand);
674
675 // Subtract with carry bit and update status flags.
676 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
677
678 // Rotate register right and insert into NZCV flags under the control of a
679 // mask [Armv8.4].
680 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
681
682 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
683 void setf8(const Register& rn);
684
685 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
686 void setf16(const Register& rn);
687
688 // Negate with carry bit.
689 void ngc(const Register& rd, const Operand& operand);
690
691 // Negate with carry bit and update status flags.
692 void ngcs(const Register& rd, const Operand& operand);
693
694 // Logical instructions.
695
696 // Bitwise and (A & B).
697 void and_(const Register& rd, const Register& rn, const Operand& operand);
698
699 // Bitwise and (A & B) and update status flags.
700 void ands(const Register& rd, const Register& rn, const Operand& operand);
701
702 // Bit test and set flags.
703 void tst(const Register& rn, const Operand& operand);
704
705 // Bit clear (A & ~B).
706 void bic(const Register& rd, const Register& rn, const Operand& operand);
707
708 // Bit clear (A & ~B) and update status flags.
709 void bics(const Register& rd, const Register& rn, const Operand& operand);
710
711 // Bitwise or (A | B).
712 void orr(const Register& rd, const Register& rn, const Operand& operand);
713
714 // Bitwise nor (A | ~B).
715 void orn(const Register& rd, const Register& rn, const Operand& operand);
716
717 // Bitwise eor/xor (A ^ B).
718 void eor(const Register& rd, const Register& rn, const Operand& operand);
719
720 // Bitwise enor/xnor (A ^ ~B).
721 void eon(const Register& rd, const Register& rn, const Operand& operand);
722
723 // Logical shift left by variable.
724 void lslv(const Register& rd, const Register& rn, const Register& rm);
725
726 // Logical shift right by variable.
727 void lsrv(const Register& rd, const Register& rn, const Register& rm);
728
729 // Arithmetic shift right by variable.
730 void asrv(const Register& rd, const Register& rn, const Register& rm);
731
732 // Rotate right by variable.
733 void rorv(const Register& rd, const Register& rn, const Register& rm);
734
735 // Bitfield instructions.
736
737 // Bitfield move.
738 void bfm(const Register& rd,
739 const Register& rn,
740 unsigned immr,
741 unsigned imms);
742
743 // Signed bitfield move.
744 void sbfm(const Register& rd,
745 const Register& rn,
746 unsigned immr,
747 unsigned imms);
748
749 // Unsigned bitfield move.
750 void ubfm(const Register& rd,
751 const Register& rn,
752 unsigned immr,
753 unsigned imms);
754
755 // Bfm aliases.
756
757 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)758 void bfi(const Register& rd,
759 const Register& rn,
760 unsigned lsb,
761 unsigned width) {
762 VIXL_ASSERT(width >= 1);
763 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
764 bfm(rd,
765 rn,
766 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
767 width - 1);
768 }
769
770 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)771 void bfxil(const Register& rd,
772 const Register& rn,
773 unsigned lsb,
774 unsigned width) {
775 VIXL_ASSERT(width >= 1);
776 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
777 bfm(rd, rn, lsb, lsb + width - 1);
778 }
779
780 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)781 void bfc(const Register& rd, unsigned lsb, unsigned width) {
782 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
783 }
784
785 // Sbfm aliases.
786
787 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)788 void asr(const Register& rd, const Register& rn, unsigned shift) {
789 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
790 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
791 }
792
793 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)794 void sbfiz(const Register& rd,
795 const Register& rn,
796 unsigned lsb,
797 unsigned width) {
798 VIXL_ASSERT(width >= 1);
799 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
800 sbfm(rd,
801 rn,
802 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
803 width - 1);
804 }
805
806 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)807 void sbfx(const Register& rd,
808 const Register& rn,
809 unsigned lsb,
810 unsigned width) {
811 VIXL_ASSERT(width >= 1);
812 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
813 sbfm(rd, rn, lsb, lsb + width - 1);
814 }
815
816 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)817 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
818
819 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)820 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
821
822 // Signed extend word.
sxtw(const Register & rd,const Register & rn)823 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
824
825 // Ubfm aliases.
826
827 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)828 void lsl(const Register& rd, const Register& rn, unsigned shift) {
829 unsigned reg_size = rd.GetSizeInBits();
830 VIXL_ASSERT(shift < reg_size);
831 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
832 }
833
834 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)835 void lsr(const Register& rd, const Register& rn, unsigned shift) {
836 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
837 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
838 }
839
840 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)841 void ubfiz(const Register& rd,
842 const Register& rn,
843 unsigned lsb,
844 unsigned width) {
845 VIXL_ASSERT(width >= 1);
846 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
847 ubfm(rd,
848 rn,
849 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
850 width - 1);
851 }
852
853 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)854 void ubfx(const Register& rd,
855 const Register& rn,
856 unsigned lsb,
857 unsigned width) {
858 VIXL_ASSERT(width >= 1);
859 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
860 ubfm(rd, rn, lsb, lsb + width - 1);
861 }
862
863 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)864 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
865
866 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)867 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
868
869 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)870 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
871
872 // Extract.
873 void extr(const Register& rd,
874 const Register& rn,
875 const Register& rm,
876 unsigned lsb);
877
878 // Conditional select: rd = cond ? rn : rm.
879 void csel(const Register& rd,
880 const Register& rn,
881 const Register& rm,
882 Condition cond);
883
884 // Conditional select increment: rd = cond ? rn : rm + 1.
885 void csinc(const Register& rd,
886 const Register& rn,
887 const Register& rm,
888 Condition cond);
889
890 // Conditional select inversion: rd = cond ? rn : ~rm.
891 void csinv(const Register& rd,
892 const Register& rn,
893 const Register& rm,
894 Condition cond);
895
896 // Conditional select negation: rd = cond ? rn : -rm.
897 void csneg(const Register& rd,
898 const Register& rn,
899 const Register& rm,
900 Condition cond);
901
902 // Conditional set: rd = cond ? 1 : 0.
903 void cset(const Register& rd, Condition cond);
904
905 // Conditional set mask: rd = cond ? -1 : 0.
906 void csetm(const Register& rd, Condition cond);
907
908 // Conditional increment: rd = cond ? rn + 1 : rn.
909 void cinc(const Register& rd, const Register& rn, Condition cond);
910
911 // Conditional invert: rd = cond ? ~rn : rn.
912 void cinv(const Register& rd, const Register& rn, Condition cond);
913
914 // Conditional negate: rd = cond ? -rn : rn.
915 void cneg(const Register& rd, const Register& rn, Condition cond);
916
917 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)918 void ror(const Register& rd, const Register& rs, unsigned shift) {
919 extr(rd, rs, rs, shift);
920 }
921
922 // Conditional comparison.
923
924 // Conditional compare negative.
925 void ccmn(const Register& rn,
926 const Operand& operand,
927 StatusFlags nzcv,
928 Condition cond);
929
930 // Conditional compare.
931 void ccmp(const Register& rn,
932 const Operand& operand,
933 StatusFlags nzcv,
934 Condition cond);
935
936 // CRC-32 checksum from byte.
937 void crc32b(const Register& wd, const Register& wn, const Register& wm);
938
939 // CRC-32 checksum from half-word.
940 void crc32h(const Register& wd, const Register& wn, const Register& wm);
941
942 // CRC-32 checksum from word.
943 void crc32w(const Register& wd, const Register& wn, const Register& wm);
944
945 // CRC-32 checksum from double word.
946 void crc32x(const Register& wd, const Register& wn, const Register& xm);
947
948 // CRC-32 C checksum from byte.
949 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
950
951 // CRC-32 C checksum from half-word.
952 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
953
954 // CRC-32 C checksum from word.
955 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
956
957 // CRC-32C checksum from double word.
958 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
959
960 // Multiply.
961 void mul(const Register& rd, const Register& rn, const Register& rm);
962
963 // Negated multiply.
964 void mneg(const Register& rd, const Register& rn, const Register& rm);
965
966 // Signed long multiply: 32 x 32 -> 64-bit.
967 void smull(const Register& xd, const Register& wn, const Register& wm);
968
969 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
970 void smulh(const Register& xd, const Register& xn, const Register& xm);
971
972 // Multiply and accumulate.
973 void madd(const Register& rd,
974 const Register& rn,
975 const Register& rm,
976 const Register& ra);
977
978 // Multiply and subtract.
979 void msub(const Register& rd,
980 const Register& rn,
981 const Register& rm,
982 const Register& ra);
983
984 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
985 void smaddl(const Register& xd,
986 const Register& wn,
987 const Register& wm,
988 const Register& xa);
989
990 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
991 void umaddl(const Register& xd,
992 const Register& wn,
993 const Register& wm,
994 const Register& xa);
995
996 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)997 void umull(const Register& xd, const Register& wn, const Register& wm) {
998 umaddl(xd, wn, wm, xzr);
999 }
1000
1001 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1002 void umulh(const Register& xd, const Register& xn, const Register& xm);
1003
1004 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1005 void smsubl(const Register& xd,
1006 const Register& wn,
1007 const Register& wm,
1008 const Register& xa);
1009
1010 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1011 void umsubl(const Register& xd,
1012 const Register& wn,
1013 const Register& wm,
1014 const Register& xa);
1015
1016 // Signed integer divide.
1017 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1018
1019 // Unsigned integer divide.
1020 void udiv(const Register& rd, const Register& rn, const Register& rm);
1021
1022 // Bit reverse.
1023 void rbit(const Register& rd, const Register& rn);
1024
1025 // Reverse bytes in 16-bit half words.
1026 void rev16(const Register& rd, const Register& rn);
1027
1028 // Reverse bytes in 32-bit words.
1029 void rev32(const Register& xd, const Register& xn);
1030
1031 // Reverse bytes in 64-bit general purpose register, an alias for rev
1032 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1033 void rev64(const Register& xd, const Register& xn) {
1034 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1035 rev(xd, xn);
1036 }
1037
1038 // Reverse bytes.
1039 void rev(const Register& rd, const Register& rn);
1040
1041 // Count leading zeroes.
1042 void clz(const Register& rd, const Register& rn);
1043
1044 // Count leading sign bits.
1045 void cls(const Register& rd, const Register& rn);
1046
1047 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1048 void pacia(const Register& xd, const Register& rn);
1049
1050 // Pointer Authentication Code for Instruction address, using key A and a
1051 // modifier of zero [Armv8.3].
1052 void paciza(const Register& xd);
1053
1054 // Pointer Authentication Code for Instruction address, using key A, with
1055 // address in x17 and modifier in x16 [Armv8.3].
1056 void pacia1716();
1057
1058 // Pointer Authentication Code for Instruction address, using key A, with
1059 // address in LR and modifier in SP [Armv8.3].
1060 void paciasp();
1061
1062 // Pointer Authentication Code for Instruction address, using key A, with
1063 // address in LR and a modifier of zero [Armv8.3].
1064 void paciaz();
1065
1066 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1067 void pacib(const Register& xd, const Register& xn);
1068
1069 // Pointer Authentication Code for Instruction address, using key B and a
1070 // modifier of zero [Armv8.3].
1071 void pacizb(const Register& xd);
1072
1073 // Pointer Authentication Code for Instruction address, using key B, with
1074 // address in x17 and modifier in x16 [Armv8.3].
1075 void pacib1716();
1076
1077 // Pointer Authentication Code for Instruction address, using key B, with
1078 // address in LR and modifier in SP [Armv8.3].
1079 void pacibsp();
1080
1081 // Pointer Authentication Code for Instruction address, using key B, with
1082 // address in LR and a modifier of zero [Armv8.3].
1083 void pacibz();
1084
1085 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1086 void pacda(const Register& xd, const Register& xn);
1087
1088 // Pointer Authentication Code for Data address, using key A and a modifier of
1089 // zero [Armv8.3].
1090 void pacdza(const Register& xd);
1091
1092 // Pointer Authentication Code for Data address, using key A, with address in
1093 // x17 and modifier in x16 [Armv8.3].
1094 void pacda1716();
1095
1096 // Pointer Authentication Code for Data address, using key A, with address in
1097 // LR and modifier in SP [Armv8.3].
1098 void pacdasp();
1099
1100 // Pointer Authentication Code for Data address, using key A, with address in
1101 // LR and a modifier of zero [Armv8.3].
1102 void pacdaz();
1103
1104 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1105 void pacdb(const Register& xd, const Register& xn);
1106
1107 // Pointer Authentication Code for Data address, using key B and a modifier of
1108 // zero [Armv8.3].
1109 void pacdzb(const Register& xd);
1110
1111 // Pointer Authentication Code for Data address, using key B, with address in
1112 // x17 and modifier in x16 [Armv8.3].
1113 void pacdb1716();
1114
1115 // Pointer Authentication Code for Data address, using key B, with address in
1116 // LR and modifier in SP [Armv8.3].
1117 void pacdbsp();
1118
1119 // Pointer Authentication Code for Data address, using key B, with address in
1120 // LR and a modifier of zero [Armv8.3].
1121 void pacdbz();
1122
1123 // Pointer Authentication Code, using Generic key [Armv8.3].
1124 void pacga(const Register& xd, const Register& xn, const Register& xm);
1125
1126 // Authenticate Instruction address, using key A [Armv8.3].
1127 void autia(const Register& xd, const Register& xn);
1128
1129 // Authenticate Instruction address, using key A and a modifier of zero
1130 // [Armv8.3].
1131 void autiza(const Register& xd);
1132
1133 // Authenticate Instruction address, using key A, with address in x17 and
1134 // modifier in x16 [Armv8.3].
1135 void autia1716();
1136
1137 // Authenticate Instruction address, using key A, with address in LR and
1138 // modifier in SP [Armv8.3].
1139 void autiasp();
1140
1141 // Authenticate Instruction address, using key A, with address in LR and a
1142 // modifier of zero [Armv8.3].
1143 void autiaz();
1144
1145 // Authenticate Instruction address, using key B [Armv8.3].
1146 void autib(const Register& xd, const Register& xn);
1147
1148 // Authenticate Instruction address, using key B and a modifier of zero
1149 // [Armv8.3].
1150 void autizb(const Register& xd);
1151
1152 // Authenticate Instruction address, using key B, with address in x17 and
1153 // modifier in x16 [Armv8.3].
1154 void autib1716();
1155
1156 // Authenticate Instruction address, using key B, with address in LR and
1157 // modifier in SP [Armv8.3].
1158 void autibsp();
1159
1160 // Authenticate Instruction address, using key B, with address in LR and a
1161 // modifier of zero [Armv8.3].
1162 void autibz();
1163
1164 // Authenticate Data address, using key A [Armv8.3].
1165 void autda(const Register& xd, const Register& xn);
1166
1167 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1168 void autdza(const Register& xd);
1169
1170 // Authenticate Data address, using key A, with address in x17 and modifier in
1171 // x16 [Armv8.3].
1172 void autda1716();
1173
1174 // Authenticate Data address, using key A, with address in LR and modifier in
1175 // SP [Armv8.3].
1176 void autdasp();
1177
1178 // Authenticate Data address, using key A, with address in LR and a modifier
1179 // of zero [Armv8.3].
1180 void autdaz();
1181
1182 // Authenticate Data address, using key B [Armv8.3].
1183 void autdb(const Register& xd, const Register& xn);
1184
1185 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1186 void autdzb(const Register& xd);
1187
1188 // Authenticate Data address, using key B, with address in x17 and modifier in
1189 // x16 [Armv8.3].
1190 void autdb1716();
1191
1192 // Authenticate Data address, using key B, with address in LR and modifier in
1193 // SP [Armv8.3].
1194 void autdbsp();
1195
1196 // Authenticate Data address, using key B, with address in LR and a modifier
1197 // of zero [Armv8.3].
1198 void autdbz();
1199
1200 // Strip Pointer Authentication Code of Data address [Armv8.3].
1201 void xpacd(const Register& xd);
1202
1203 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1204 void xpaci(const Register& xd);
1205
1206 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1207 void xpaclri();
1208
1209 // Memory instructions.
1210
1211 // Load integer or FP register.
1212 void ldr(const CPURegister& rt,
1213 const MemOperand& src,
1214 LoadStoreScalingOption option = PreferScaledOffset);
1215
1216 // Store integer or FP register.
1217 void str(const CPURegister& rt,
1218 const MemOperand& dst,
1219 LoadStoreScalingOption option = PreferScaledOffset);
1220
1221 // Load word with sign extension.
1222 void ldrsw(const Register& xt,
1223 const MemOperand& src,
1224 LoadStoreScalingOption option = PreferScaledOffset);
1225
1226 // Load byte.
1227 void ldrb(const Register& rt,
1228 const MemOperand& src,
1229 LoadStoreScalingOption option = PreferScaledOffset);
1230
1231 // Store byte.
1232 void strb(const Register& rt,
1233 const MemOperand& dst,
1234 LoadStoreScalingOption option = PreferScaledOffset);
1235
1236 // Load byte with sign extension.
1237 void ldrsb(const Register& rt,
1238 const MemOperand& src,
1239 LoadStoreScalingOption option = PreferScaledOffset);
1240
1241 // Load half-word.
1242 void ldrh(const Register& rt,
1243 const MemOperand& src,
1244 LoadStoreScalingOption option = PreferScaledOffset);
1245
1246 // Store half-word.
1247 void strh(const Register& rt,
1248 const MemOperand& dst,
1249 LoadStoreScalingOption option = PreferScaledOffset);
1250
1251 // Load half-word with sign extension.
1252 void ldrsh(const Register& rt,
1253 const MemOperand& src,
1254 LoadStoreScalingOption option = PreferScaledOffset);
1255
1256 // Load integer or FP register (with unscaled offset).
1257 void ldur(const CPURegister& rt,
1258 const MemOperand& src,
1259 LoadStoreScalingOption option = PreferUnscaledOffset);
1260
1261 // Store integer or FP register (with unscaled offset).
1262 void stur(const CPURegister& rt,
1263 const MemOperand& src,
1264 LoadStoreScalingOption option = PreferUnscaledOffset);
1265
1266 // Load word with sign extension.
1267 void ldursw(const Register& xt,
1268 const MemOperand& src,
1269 LoadStoreScalingOption option = PreferUnscaledOffset);
1270
1271 // Load byte (with unscaled offset).
1272 void ldurb(const Register& rt,
1273 const MemOperand& src,
1274 LoadStoreScalingOption option = PreferUnscaledOffset);
1275
1276 // Store byte (with unscaled offset).
1277 void sturb(const Register& rt,
1278 const MemOperand& dst,
1279 LoadStoreScalingOption option = PreferUnscaledOffset);
1280
1281 // Load byte with sign extension (and unscaled offset).
1282 void ldursb(const Register& rt,
1283 const MemOperand& src,
1284 LoadStoreScalingOption option = PreferUnscaledOffset);
1285
1286 // Load half-word (with unscaled offset).
1287 void ldurh(const Register& rt,
1288 const MemOperand& src,
1289 LoadStoreScalingOption option = PreferUnscaledOffset);
1290
1291 // Store half-word (with unscaled offset).
1292 void sturh(const Register& rt,
1293 const MemOperand& dst,
1294 LoadStoreScalingOption option = PreferUnscaledOffset);
1295
1296 // Load half-word with sign extension (and unscaled offset).
1297 void ldursh(const Register& rt,
1298 const MemOperand& src,
1299 LoadStoreScalingOption option = PreferUnscaledOffset);
1300
1301 // Load double-word with pointer authentication, using data key A and a
1302 // modifier of zero [Armv8.3].
1303 void ldraa(const Register& xt, const MemOperand& src);
1304
1305 // Load double-word with pointer authentication, using data key B and a
1306 // modifier of zero [Armv8.3].
1307 void ldrab(const Register& xt, const MemOperand& src);
1308
1309 // Load integer or FP register pair.
1310 void ldp(const CPURegister& rt,
1311 const CPURegister& rt2,
1312 const MemOperand& src);
1313
1314 // Store integer or FP register pair.
1315 void stp(const CPURegister& rt,
1316 const CPURegister& rt2,
1317 const MemOperand& dst);
1318
1319 // Load word pair with sign extension.
1320 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1321
1322 // Load integer or FP register pair, non-temporal.
1323 void ldnp(const CPURegister& rt,
1324 const CPURegister& rt2,
1325 const MemOperand& src);
1326
1327 // Store integer or FP register pair, non-temporal.
1328 void stnp(const CPURegister& rt,
1329 const CPURegister& rt2,
1330 const MemOperand& dst);
1331
1332 // Load integer or FP register from literal pool.
1333 void ldr(const CPURegister& rt, RawLiteral* literal);
1334
1335 // Load word with sign extension from literal pool.
1336 void ldrsw(const Register& xt, RawLiteral* literal);
1337
1338 // Load integer or FP register from pc + imm19 << 2.
1339 void ldr(const CPURegister& rt, int64_t imm19);
1340
1341 // Load word with sign extension from pc + imm19 << 2.
1342 void ldrsw(const Register& xt, int64_t imm19);
1343
1344 // Store exclusive byte.
1345 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1346
1347 // Store exclusive half-word.
1348 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1349
1350 // Store exclusive register.
1351 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1352
1353 // Load exclusive byte.
1354 void ldxrb(const Register& rt, const MemOperand& src);
1355
1356 // Load exclusive half-word.
1357 void ldxrh(const Register& rt, const MemOperand& src);
1358
1359 // Load exclusive register.
1360 void ldxr(const Register& rt, const MemOperand& src);
1361
1362 // Store exclusive register pair.
1363 void stxp(const Register& rs,
1364 const Register& rt,
1365 const Register& rt2,
1366 const MemOperand& dst);
1367
1368 // Load exclusive register pair.
1369 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1370
1371 // Store-release exclusive byte.
1372 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1373
1374 // Store-release exclusive half-word.
1375 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1376
1377 // Store-release exclusive register.
1378 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1379
1380 // Load-acquire exclusive byte.
1381 void ldaxrb(const Register& rt, const MemOperand& src);
1382
1383 // Load-acquire exclusive half-word.
1384 void ldaxrh(const Register& rt, const MemOperand& src);
1385
1386 // Load-acquire exclusive register.
1387 void ldaxr(const Register& rt, const MemOperand& src);
1388
1389 // Store-release exclusive register pair.
1390 void stlxp(const Register& rs,
1391 const Register& rt,
1392 const Register& rt2,
1393 const MemOperand& dst);
1394
1395 // Load-acquire exclusive register pair.
1396 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1397
1398 // Store-release byte.
1399 void stlrb(const Register& rt, const MemOperand& dst);
1400
1401 // Store-release half-word.
1402 void stlrh(const Register& rt, const MemOperand& dst);
1403
1404 // Store-release register.
1405 void stlr(const Register& rt, const MemOperand& dst);
1406
1407 // Load-acquire byte.
1408 void ldarb(const Register& rt, const MemOperand& src);
1409
1410 // Load-acquire half-word.
1411 void ldarh(const Register& rt, const MemOperand& src);
1412
1413 // Load-acquire register.
1414 void ldar(const Register& rt, const MemOperand& src);
1415
1416 // Store LORelease byte [Armv8.1].
1417 void stllrb(const Register& rt, const MemOperand& dst);
1418
1419 // Store LORelease half-word [Armv8.1].
1420 void stllrh(const Register& rt, const MemOperand& dst);
1421
1422 // Store LORelease register [Armv8.1].
1423 void stllr(const Register& rt, const MemOperand& dst);
1424
1425 // Load LORelease byte [Armv8.1].
1426 void ldlarb(const Register& rt, const MemOperand& src);
1427
1428 // Load LORelease half-word [Armv8.1].
1429 void ldlarh(const Register& rt, const MemOperand& src);
1430
1431 // Load LORelease register [Armv8.1].
1432 void ldlar(const Register& rt, const MemOperand& src);
1433
1434 // Compare and Swap word or doubleword in memory [Armv8.1].
1435 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1436
1437 // Compare and Swap word or doubleword in memory [Armv8.1].
1438 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1439
1440 // Compare and Swap word or doubleword in memory [Armv8.1].
1441 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1442
1443 // Compare and Swap word or doubleword in memory [Armv8.1].
1444 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1445
1446 // Compare and Swap byte in memory [Armv8.1].
1447 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1448
1449 // Compare and Swap byte in memory [Armv8.1].
1450 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1451
1452 // Compare and Swap byte in memory [Armv8.1].
1453 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1454
1455 // Compare and Swap byte in memory [Armv8.1].
1456 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1457
1458 // Compare and Swap halfword in memory [Armv8.1].
1459 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1460
1461 // Compare and Swap halfword in memory [Armv8.1].
1462 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1463
1464 // Compare and Swap halfword in memory [Armv8.1].
1465 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1466
1467 // Compare and Swap halfword in memory [Armv8.1].
1468 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1469
1470 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1471 void casp(const Register& rs,
1472 const Register& rs2,
1473 const Register& rt,
1474 const Register& rt2,
1475 const MemOperand& src);
1476
1477 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1478 void caspa(const Register& rs,
1479 const Register& rs2,
1480 const Register& rt,
1481 const Register& rt2,
1482 const MemOperand& src);
1483
1484 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1485 void caspl(const Register& rs,
1486 const Register& rs2,
1487 const Register& rt,
1488 const Register& rt2,
1489 const MemOperand& src);
1490
1491 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1492 void caspal(const Register& rs,
1493 const Register& rs2,
1494 const Register& rt,
1495 const Register& rt2,
1496 const MemOperand& src);
1497
1498 // Store-release byte (with unscaled offset) [Armv8.4].
1499 void stlurb(const Register& rt, const MemOperand& dst);
1500
1501 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1502 void ldapurb(const Register& rt, const MemOperand& src);
1503
1504 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1505 void ldapursb(const Register& rt, const MemOperand& src);
1506
1507 // Store-release half-word (with unscaled offset) [Armv8.4].
1508 void stlurh(const Register& rt, const MemOperand& dst);
1509
1510 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1511 void ldapurh(const Register& rt, const MemOperand& src);
1512
1513 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1514 // [Armv8.4].
1515 void ldapursh(const Register& rt, const MemOperand& src);
1516
1517 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1518 void stlur(const Register& rt, const MemOperand& dst);
1519
1520 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1521 // [Armv8.4].
1522 void ldapur(const Register& rt, const MemOperand& src);
1523
1524 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1525 void ldapursw(const Register& xt, const MemOperand& src);
1526
1527 // Atomic add on byte in memory [Armv8.1]
1528 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1529
1530 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1531 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1532
1533 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1534 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1535
1536 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1537 // [Armv8.1]
1538 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1539
1540 // Atomic add on halfword in memory [Armv8.1]
1541 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1542
1543 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1544 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1545
1546 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1547 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1548
1549 // Atomic add on halfword in memory, with Load-acquire and Store-release
1550 // semantics [Armv8.1]
1551 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1552
1553 // Atomic add on word or doubleword in memory [Armv8.1]
1554 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1555
1556 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1557 // [Armv8.1]
1558 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1559
1560 // Atomic add on word or doubleword in memory, with Store-release semantics
1561 // [Armv8.1]
1562 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1563
1564 // Atomic add on word or doubleword in memory, with Load-acquire and
1565 // Store-release semantics [Armv8.1]
1566 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1567
1568 // Atomic bit clear on byte in memory [Armv8.1]
1569 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1570
1571 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1572 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1573
1574 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1575 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1576
1577 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1578 // semantics [Armv8.1]
1579 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1580
1581 // Atomic bit clear on halfword in memory [Armv8.1]
1582 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1583
1584 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1585 // [Armv8.1]
1586 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1587
1588 // Atomic bit clear on halfword in memory, with Store-release semantics
1589 // [Armv8.1]
1590 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1591
1592 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1593 // semantics [Armv8.1]
1594 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1595
1596 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1597 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1598
1599 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1600 // semantics [Armv8.1]
1601 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1602
1603 // Atomic bit clear on word or doubleword in memory, with Store-release
1604 // semantics [Armv8.1]
1605 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1606
1607 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1608 // Store-release semantics [Armv8.1]
1609 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1610
1611 // Atomic exclusive OR on byte in memory [Armv8.1]
1612 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1613
1614 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1615 // [Armv8.1]
1616 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1617
1618 // Atomic exclusive OR on byte in memory, with Store-release semantics
1619 // [Armv8.1]
1620 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1621
1622 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1623 // semantics [Armv8.1]
1624 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1625
1626 // Atomic exclusive OR on halfword in memory [Armv8.1]
1627 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1628
1629 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1630 // [Armv8.1]
1631 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1632
1633 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1634 // [Armv8.1]
1635 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1636
1637 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1638 // Store-release semantics [Armv8.1]
1639 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1640
1641 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1642 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1643
1644 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1645 // semantics [Armv8.1]
1646 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1647
1648 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1649 // semantics [Armv8.1]
1650 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1651
1652 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1653 // Store-release semantics [Armv8.1]
1654 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1655
1656 // Atomic bit set on byte in memory [Armv8.1]
1657 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1658
1659 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1660 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1661
1662 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1663 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1664
1665 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1666 // semantics [Armv8.1]
1667 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1668
1669 // Atomic bit set on halfword in memory [Armv8.1]
1670 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1671
1672 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1673 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1674
1675 // Atomic bit set on halfword in memory, with Store-release semantics
1676 // [Armv8.1]
1677 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1678
1679 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1680 // semantics [Armv8.1]
1681 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1682
1683 // Atomic bit set on word or doubleword in memory [Armv8.1]
1684 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1685
1686 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1687 // [Armv8.1]
1688 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1689
1690 // Atomic bit set on word or doubleword in memory, with Store-release
1691 // semantics [Armv8.1]
1692 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1693
1694 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1695 // Store-release semantics [Armv8.1]
1696 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1697
1698 // Atomic signed maximum on byte in memory [Armv8.1]
1699 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1700
1701 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1702 // [Armv8.1]
1703 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1704
1705 // Atomic signed maximum on byte in memory, with Store-release semantics
1706 // [Armv8.1]
1707 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1708
1709 // Atomic signed maximum on byte in memory, with Load-acquire and
1710 // Store-release semantics [Armv8.1]
1711 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1712
1713 // Atomic signed maximum on halfword in memory [Armv8.1]
1714 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1715
1716 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1717 // [Armv8.1]
1718 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1719
1720 // Atomic signed maximum on halfword in memory, with Store-release semantics
1721 // [Armv8.1]
1722 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1723
1724 // Atomic signed maximum on halfword in memory, with Load-acquire and
1725 // Store-release semantics [Armv8.1]
1726 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1727
1728 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1729 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1730
1731 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1732 // semantics [Armv8.1]
1733 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1734
1735 // Atomic signed maximum on word or doubleword in memory, with Store-release
1736 // semantics [Armv8.1]
1737 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1738
1739 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1740 // and Store-release semantics [Armv8.1]
1741 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1742
1743 // Atomic signed minimum on byte in memory [Armv8.1]
1744 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1745
1746 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1747 // [Armv8.1]
1748 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1749
1750 // Atomic signed minimum on byte in memory, with Store-release semantics
1751 // [Armv8.1]
1752 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1753
1754 // Atomic signed minimum on byte in memory, with Load-acquire and
1755 // Store-release semantics [Armv8.1]
1756 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1757
1758 // Atomic signed minimum on halfword in memory [Armv8.1]
1759 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1760
1761 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1762 // [Armv8.1]
1763 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1764
1765 // Atomic signed minimum on halfword in memory, with Store-release semantics
1766 // [Armv8.1]
1767 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1768
1769 // Atomic signed minimum on halfword in memory, with Load-acquire and
1770 // Store-release semantics [Armv8.1]
1771 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1772
1773 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1774 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1775
1776 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1777 // semantics [Armv8.1]
1778 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1779
1780 // Atomic signed minimum on word or doubleword in memory, with Store-release
1781 // semantics [Armv8.1]
1782 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1783
1784 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1785 // and Store-release semantics [Armv8.1]
1786 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1787
1788 // Atomic unsigned maximum on byte in memory [Armv8.1]
1789 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1790
1791 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1792 // [Armv8.1]
1793 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1794
1795 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1796 // [Armv8.1]
1797 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1798
1799 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1800 // Store-release semantics [Armv8.1]
1801 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1802
1803 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1804 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1805
1806 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1807 // [Armv8.1]
1808 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1809
1810 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1811 // [Armv8.1]
1812 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1813
1814 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1815 // Store-release semantics [Armv8.1]
1816 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1817
1818 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1819 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1820
1821 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1822 // semantics [Armv8.1]
1823 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1824
1825 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1826 // semantics [Armv8.1]
1827 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1828
1829 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1830 // and Store-release semantics [Armv8.1]
1831 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1832
1833 // Atomic unsigned minimum on byte in memory [Armv8.1]
1834 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1835
1836 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1837 // [Armv8.1]
1838 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1839
1840 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1841 // [Armv8.1]
1842 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1843
1844 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1845 // Store-release semantics [Armv8.1]
1846 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1847
1848 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1849 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1850
1851 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1852 // [Armv8.1]
1853 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1854
1855 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1856 // [Armv8.1]
1857 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1858
1859 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1860 // Store-release semantics [Armv8.1]
1861 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1862
1863 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1864 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1865
1866 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1867 // semantics [Armv8.1]
1868 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1869
1870 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1871 // semantics [Armv8.1]
1872 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1873
1874 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1875 // and Store-release semantics [Armv8.1]
1876 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1877
1878 // Atomic add on byte in memory, without return. [Armv8.1]
1879 void staddb(const Register& rs, const MemOperand& src);
1880
1881 // Atomic add on byte in memory, with Store-release semantics and without
1882 // return. [Armv8.1]
1883 void staddlb(const Register& rs, const MemOperand& src);
1884
1885 // Atomic add on halfword in memory, without return. [Armv8.1]
1886 void staddh(const Register& rs, const MemOperand& src);
1887
1888 // Atomic add on halfword in memory, with Store-release semantics and without
1889 // return. [Armv8.1]
1890 void staddlh(const Register& rs, const MemOperand& src);
1891
1892 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1893 void stadd(const Register& rs, const MemOperand& src);
1894
1895 // Atomic add on word or doubleword in memory, with Store-release semantics
1896 // and without return. [Armv8.1]
1897 void staddl(const Register& rs, const MemOperand& src);
1898
1899 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1900 void stclrb(const Register& rs, const MemOperand& src);
1901
1902 // Atomic bit clear on byte in memory, with Store-release semantics and
1903 // without return. [Armv8.1]
1904 void stclrlb(const Register& rs, const MemOperand& src);
1905
1906 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1907 void stclrh(const Register& rs, const MemOperand& src);
1908
1909 // Atomic bit clear on halfword in memory, with Store-release semantics and
1910 // without return. [Armv8.1]
1911 void stclrlh(const Register& rs, const MemOperand& src);
1912
1913 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1914 void stclr(const Register& rs, const MemOperand& src);
1915
1916 // Atomic bit clear on word or doubleword in memory, with Store-release
1917 // semantics and without return. [Armv8.1]
1918 void stclrl(const Register& rs, const MemOperand& src);
1919
1920 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1921 void steorb(const Register& rs, const MemOperand& src);
1922
1923 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1924 // without return. [Armv8.1]
1925 void steorlb(const Register& rs, const MemOperand& src);
1926
1927 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1928 void steorh(const Register& rs, const MemOperand& src);
1929
1930 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1931 // and without return. [Armv8.1]
1932 void steorlh(const Register& rs, const MemOperand& src);
1933
1934 // Atomic exclusive OR on word or doubleword in memory, without return.
1935 // [Armv8.1]
1936 void steor(const Register& rs, const MemOperand& src);
1937
1938 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1939 // semantics and without return. [Armv8.1]
1940 void steorl(const Register& rs, const MemOperand& src);
1941
1942 // Atomic bit set on byte in memory, without return. [Armv8.1]
1943 void stsetb(const Register& rs, const MemOperand& src);
1944
1945 // Atomic bit set on byte in memory, with Store-release semantics and without
1946 // return. [Armv8.1]
1947 void stsetlb(const Register& rs, const MemOperand& src);
1948
1949 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1950 void stseth(const Register& rs, const MemOperand& src);
1951
1952 // Atomic bit set on halfword in memory, with Store-release semantics and
1953 // without return. [Armv8.1]
1954 void stsetlh(const Register& rs, const MemOperand& src);
1955
1956 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1957 void stset(const Register& rs, const MemOperand& src);
1958
1959 // Atomic bit set on word or doubleword in memory, with Store-release
1960 // semantics and without return. [Armv8.1]
1961 void stsetl(const Register& rs, const MemOperand& src);
1962
1963 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1964 void stsmaxb(const Register& rs, const MemOperand& src);
1965
1966 // Atomic signed maximum on byte in memory, with Store-release semantics and
1967 // without return. [Armv8.1]
1968 void stsmaxlb(const Register& rs, const MemOperand& src);
1969
1970 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1971 void stsmaxh(const Register& rs, const MemOperand& src);
1972
1973 // Atomic signed maximum on halfword in memory, with Store-release semantics
1974 // and without return. [Armv8.1]
1975 void stsmaxlh(const Register& rs, const MemOperand& src);
1976
1977 // Atomic signed maximum on word or doubleword in memory, without return.
1978 // [Armv8.1]
1979 void stsmax(const Register& rs, const MemOperand& src);
1980
1981 // Atomic signed maximum on word or doubleword in memory, with Store-release
1982 // semantics and without return. [Armv8.1]
1983 void stsmaxl(const Register& rs, const MemOperand& src);
1984
1985 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1986 void stsminb(const Register& rs, const MemOperand& src);
1987
1988 // Atomic signed minimum on byte in memory, with Store-release semantics and
1989 // without return. [Armv8.1]
1990 void stsminlb(const Register& rs, const MemOperand& src);
1991
1992 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1993 void stsminh(const Register& rs, const MemOperand& src);
1994
1995 // Atomic signed minimum on halfword in memory, with Store-release semantics
1996 // and without return. [Armv8.1]
1997 void stsminlh(const Register& rs, const MemOperand& src);
1998
1999 // Atomic signed minimum on word or doubleword in memory, without return.
2000 // [Armv8.1]
2001 void stsmin(const Register& rs, const MemOperand& src);
2002
2003 // Atomic signed minimum on word or doubleword in memory, with Store-release
2004 // semantics and without return. semantics [Armv8.1]
2005 void stsminl(const Register& rs, const MemOperand& src);
2006
2007 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
2008 void stumaxb(const Register& rs, const MemOperand& src);
2009
2010 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
2011 // without return. [Armv8.1]
2012 void stumaxlb(const Register& rs, const MemOperand& src);
2013
2014 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
2015 void stumaxh(const Register& rs, const MemOperand& src);
2016
2017 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
2018 // and without return. [Armv8.1]
2019 void stumaxlh(const Register& rs, const MemOperand& src);
2020
2021 // Atomic unsigned maximum on word or doubleword in memory, without return.
2022 // [Armv8.1]
2023 void stumax(const Register& rs, const MemOperand& src);
2024
2025 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
2026 // semantics and without return. [Armv8.1]
2027 void stumaxl(const Register& rs, const MemOperand& src);
2028
2029 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
2030 void stuminb(const Register& rs, const MemOperand& src);
2031
2032 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
2033 // without return. [Armv8.1]
2034 void stuminlb(const Register& rs, const MemOperand& src);
2035
2036 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
2037 void stuminh(const Register& rs, const MemOperand& src);
2038
2039 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
2040 // and without return. [Armv8.1]
2041 void stuminlh(const Register& rs, const MemOperand& src);
2042
2043 // Atomic unsigned minimum on word or doubleword in memory, without return.
2044 // [Armv8.1]
2045 void stumin(const Register& rs, const MemOperand& src);
2046
2047 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2048 // semantics and without return. [Armv8.1]
2049 void stuminl(const Register& rs, const MemOperand& src);
2050
2051 // Swap byte in memory [Armv8.1]
2052 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2053
2054 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2055 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2056
2057 // Swap byte in memory, with Store-release semantics [Armv8.1]
2058 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2059
2060 // Swap byte in memory, with Load-acquire and Store-release semantics
2061 // [Armv8.1]
2062 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2063
2064 // Swap halfword in memory [Armv8.1]
2065 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2066
2067 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2068 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2069
2070 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2071 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2072
2073 // Swap halfword in memory, with Load-acquire and Store-release semantics
2074 // [Armv8.1]
2075 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2076
2077 // Swap word or doubleword in memory [Armv8.1]
2078 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2079
2080 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2081 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2082
2083 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2084 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2085
2086 // Swap word or doubleword in memory, with Load-acquire and Store-release
2087 // semantics [Armv8.1]
2088 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2089
2090 // Load-Acquire RCpc Register byte [Armv8.3]
2091 void ldaprb(const Register& rt, const MemOperand& src);
2092
2093 // Load-Acquire RCpc Register halfword [Armv8.3]
2094 void ldaprh(const Register& rt, const MemOperand& src);
2095
2096 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2097 void ldapr(const Register& rt, const MemOperand& src);
2098
2099 // Prefetch memory.
2100 void prfm(PrefetchOperation op,
2101 const MemOperand& addr,
2102 LoadStoreScalingOption option = PreferScaledOffset);
2103
2104 // Prefetch memory (with unscaled offset).
2105 void prfum(PrefetchOperation op,
2106 const MemOperand& addr,
2107 LoadStoreScalingOption option = PreferUnscaledOffset);
2108
2109 // Prefetch memory in the literal pool.
2110 void prfm(PrefetchOperation op, RawLiteral* literal);
2111
2112 // Prefetch from pc + imm19 << 2.
2113 void prfm(PrefetchOperation op, int64_t imm19);
2114
2115 // Move instructions. The default shift of -1 indicates that the move
2116 // instruction will calculate an appropriate 16-bit immediate and left shift
2117 // that is equal to the 64-bit immediate argument. If an explicit left shift
2118 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2119 //
2120 // For movk, an explicit shift can be used to indicate which half word should
2121 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2122 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2123 // most-significant.
2124
2125 // Move immediate and keep.
2126 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2127 MoveWide(rd, imm, shift, MOVK);
2128 }
2129
2130 // Move inverted immediate.
2131 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2132 MoveWide(rd, imm, shift, MOVN);
2133 }
2134
2135 // Move immediate.
2136 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2137 MoveWide(rd, imm, shift, MOVZ);
2138 }
2139
2140 // Misc instructions.
2141
2142 // Monitor debug-mode breakpoint.
2143 void brk(int code);
2144
2145 // Halting debug-mode breakpoint.
2146 void hlt(int code);
2147
2148 // Generate exception targeting EL1.
2149 void svc(int code);
2150
2151 // Generate undefined instruction exception.
2152 void udf(int code);
2153
2154 // Move register to register.
2155 void mov(const Register& rd, const Register& rn);
2156
2157 // Move inverted operand to register.
2158 void mvn(const Register& rd, const Operand& operand);
2159
2160 // System instructions.
2161
2162 // Move to register from system register.
2163 void mrs(const Register& xt, SystemRegister sysreg);
2164
2165 // Move from register to system register.
2166 void msr(SystemRegister sysreg, const Register& xt);
2167
2168 // Invert carry flag [Armv8.4].
2169 void cfinv();
2170
2171 // Convert floating-point condition flags from alternative format to Arm
2172 // format [Armv8.5].
2173 void xaflag();
2174
2175 // Convert floating-point condition flags from Arm format to alternative
2176 // format [Armv8.5].
2177 void axflag();
2178
2179 // System instruction.
2180 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2181
2182 // System instruction with pre-encoded op (op1:crn:crm:op2).
2183 void sys(int op, const Register& xt = xzr);
2184
2185 // System data cache operation.
2186 void dc(DataCacheOp op, const Register& rt);
2187
2188 // System instruction cache operation.
2189 void ic(InstructionCacheOp op, const Register& rt);
2190
2191 // System hint (named type).
2192 void hint(SystemHint code);
2193
2194 // System hint (numbered type).
2195 void hint(int imm7);
2196
2197 // Clear exclusive monitor.
2198 void clrex(int imm4 = 0xf);
2199
2200 // Data memory barrier.
2201 void dmb(BarrierDomain domain, BarrierType type);
2202
2203 // Data synchronization barrier.
2204 void dsb(BarrierDomain domain, BarrierType type);
2205
2206 // Instruction synchronization barrier.
2207 void isb();
2208
2209 // Error synchronization barrier.
2210 void esb();
2211
2212 // Conditional speculation dependency barrier.
2213 void csdb();
2214
2215 // No-op.
nop()2216 void nop() { hint(NOP); }
2217
2218 // Branch target identification.
2219 void bti(BranchTargetIdentifier id);
2220
2221 // FP and NEON instructions.
2222
2223 // Move double precision immediate to FP register.
2224 void fmov(const VRegister& vd, double imm);
2225
2226 // Move single precision immediate to FP register.
2227 void fmov(const VRegister& vd, float imm);
2228
2229 // Move half precision immediate to FP register [Armv8.2].
2230 void fmov(const VRegister& vd, Float16 imm);
2231
2232 // Move FP register to register.
2233 void fmov(const Register& rd, const VRegister& fn);
2234
2235 // Move register to FP register.
2236 void fmov(const VRegister& vd, const Register& rn);
2237
2238 // Move FP register to FP register.
2239 void fmov(const VRegister& vd, const VRegister& fn);
2240
2241 // Move 64-bit register to top half of 128-bit FP register.
2242 void fmov(const VRegister& vd, int index, const Register& rn);
2243
2244 // Move top half of 128-bit FP register to 64-bit register.
2245 void fmov(const Register& rd, const VRegister& vn, int index);
2246
2247 // FP add.
2248 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2249
2250 // FP subtract.
2251 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2252
2253 // FP multiply.
2254 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2255
2256 // FP fused multiply-add.
2257 void fmadd(const VRegister& vd,
2258 const VRegister& vn,
2259 const VRegister& vm,
2260 const VRegister& va);
2261
2262 // FP fused multiply-subtract.
2263 void fmsub(const VRegister& vd,
2264 const VRegister& vn,
2265 const VRegister& vm,
2266 const VRegister& va);
2267
2268 // FP fused multiply-add and negate.
2269 void fnmadd(const VRegister& vd,
2270 const VRegister& vn,
2271 const VRegister& vm,
2272 const VRegister& va);
2273
2274 // FP fused multiply-subtract and negate.
2275 void fnmsub(const VRegister& vd,
2276 const VRegister& vn,
2277 const VRegister& vm,
2278 const VRegister& va);
2279
2280 // FP multiply-negate scalar.
2281 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2282
2283 // FP reciprocal exponent scalar.
2284 void frecpx(const VRegister& vd, const VRegister& vn);
2285
2286 // FP divide.
2287 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2288
2289 // FP maximum.
2290 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2291
2292 // FP minimum.
2293 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2294
2295 // FP maximum number.
2296 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2297
2298 // FP minimum number.
2299 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2300
2301 // FP absolute.
2302 void fabs(const VRegister& vd, const VRegister& vn);
2303
2304 // FP negate.
2305 void fneg(const VRegister& vd, const VRegister& vn);
2306
2307 // FP square root.
2308 void fsqrt(const VRegister& vd, const VRegister& vn);
2309
2310 // FP round to integer, nearest with ties to away.
2311 void frinta(const VRegister& vd, const VRegister& vn);
2312
2313 // FP round to integer, implicit rounding.
2314 void frinti(const VRegister& vd, const VRegister& vn);
2315
2316 // FP round to integer, toward minus infinity.
2317 void frintm(const VRegister& vd, const VRegister& vn);
2318
2319 // FP round to integer, nearest with ties to even.
2320 void frintn(const VRegister& vd, const VRegister& vn);
2321
2322 // FP round to integer, toward plus infinity.
2323 void frintp(const VRegister& vd, const VRegister& vn);
2324
2325 // FP round to integer, exact, implicit rounding.
2326 void frintx(const VRegister& vd, const VRegister& vn);
2327
2328 // FP round to integer, towards zero.
2329 void frintz(const VRegister& vd, const VRegister& vn);
2330
2331 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2332 void frint32x(const VRegister& vd, const VRegister& vn);
2333
2334 // FP round to 32-bit integer, towards zero [Armv8.5].
2335 void frint32z(const VRegister& vd, const VRegister& vn);
2336
2337 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2338 void frint64x(const VRegister& vd, const VRegister& vn);
2339
2340 // FP round to 64-bit integer, towards zero [Armv8.5].
2341 void frint64z(const VRegister& vd, const VRegister& vn);
2342
2343 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2344
2345 void FPCompareMacro(const VRegister& vn,
2346 const VRegister& vm,
2347 FPTrapFlags trap);
2348
2349 // FP compare registers.
2350 void fcmp(const VRegister& vn, const VRegister& vm);
2351
2352 // FP compare immediate.
2353 void fcmp(const VRegister& vn, double value);
2354
2355 void FPCCompareMacro(const VRegister& vn,
2356 const VRegister& vm,
2357 StatusFlags nzcv,
2358 Condition cond,
2359 FPTrapFlags trap);
2360
2361 // FP conditional compare.
2362 void fccmp(const VRegister& vn,
2363 const VRegister& vm,
2364 StatusFlags nzcv,
2365 Condition cond);
2366
2367 // FP signaling compare registers.
2368 void fcmpe(const VRegister& vn, const VRegister& vm);
2369
2370 // FP signaling compare immediate.
2371 void fcmpe(const VRegister& vn, double value);
2372
2373 // FP conditional signaling compare.
2374 void fccmpe(const VRegister& vn,
2375 const VRegister& vm,
2376 StatusFlags nzcv,
2377 Condition cond);
2378
2379 // FP conditional select.
2380 void fcsel(const VRegister& vd,
2381 const VRegister& vn,
2382 const VRegister& vm,
2383 Condition cond);
2384
2385 // Common FP Convert functions.
2386 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2387 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2388 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2389
2390 // FP convert between precisions.
2391 void fcvt(const VRegister& vd, const VRegister& vn);
2392
2393 // FP convert to higher precision.
2394 void fcvtl(const VRegister& vd, const VRegister& vn);
2395
2396 // FP convert to higher precision (second part).
2397 void fcvtl2(const VRegister& vd, const VRegister& vn);
2398
2399 // FP convert to lower precision.
2400 void fcvtn(const VRegister& vd, const VRegister& vn);
2401
2402 // FP convert to lower prevision (second part).
2403 void fcvtn2(const VRegister& vd, const VRegister& vn);
2404
2405 // FP convert to lower precision, rounding to odd.
2406 void fcvtxn(const VRegister& vd, const VRegister& vn);
2407
2408 // FP convert to lower precision, rounding to odd (second part).
2409 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2410
2411 // FP convert to signed integer, nearest with ties to away.
2412 void fcvtas(const Register& rd, const VRegister& vn);
2413
2414 // FP convert to unsigned integer, nearest with ties to away.
2415 void fcvtau(const Register& rd, const VRegister& vn);
2416
2417 // FP convert to signed integer, nearest with ties to away.
2418 void fcvtas(const VRegister& vd, const VRegister& vn);
2419
2420 // FP convert to unsigned integer, nearest with ties to away.
2421 void fcvtau(const VRegister& vd, const VRegister& vn);
2422
2423 // FP convert to signed integer, round towards -infinity.
2424 void fcvtms(const Register& rd, const VRegister& vn);
2425
2426 // FP convert to unsigned integer, round towards -infinity.
2427 void fcvtmu(const Register& rd, const VRegister& vn);
2428
2429 // FP convert to signed integer, round towards -infinity.
2430 void fcvtms(const VRegister& vd, const VRegister& vn);
2431
2432 // FP convert to unsigned integer, round towards -infinity.
2433 void fcvtmu(const VRegister& vd, const VRegister& vn);
2434
2435 // FP convert to signed integer, nearest with ties to even.
2436 void fcvtns(const Register& rd, const VRegister& vn);
2437
2438 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2439 void fjcvtzs(const Register& rd, const VRegister& vn);
2440
2441 // FP convert to unsigned integer, nearest with ties to even.
2442 void fcvtnu(const Register& rd, const VRegister& vn);
2443
2444 // FP convert to signed integer, nearest with ties to even.
2445 void fcvtns(const VRegister& rd, const VRegister& vn);
2446
2447 // FP convert to unsigned integer, nearest with ties to even.
2448 void fcvtnu(const VRegister& rd, const VRegister& vn);
2449
2450 // FP convert to signed integer or fixed-point, round towards zero.
2451 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2452
2453 // FP convert to unsigned integer or fixed-point, round towards zero.
2454 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2455
2456 // FP convert to signed integer or fixed-point, round towards zero.
2457 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2458
2459 // FP convert to unsigned integer or fixed-point, round towards zero.
2460 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2461
2462 // FP convert to signed integer, round towards +infinity.
2463 void fcvtps(const Register& rd, const VRegister& vn);
2464
2465 // FP convert to unsigned integer, round towards +infinity.
2466 void fcvtpu(const Register& rd, const VRegister& vn);
2467
2468 // FP convert to signed integer, round towards +infinity.
2469 void fcvtps(const VRegister& vd, const VRegister& vn);
2470
2471 // FP convert to unsigned integer, round towards +infinity.
2472 void fcvtpu(const VRegister& vd, const VRegister& vn);
2473
2474 // Convert signed integer or fixed point to FP.
2475 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2476
2477 // Convert unsigned integer or fixed point to FP.
2478 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2479
2480 // Convert signed integer or fixed-point to FP.
2481 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2482
2483 // Convert unsigned integer or fixed-point to FP.
2484 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2485
2486 // Unsigned absolute difference.
2487 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2488
2489 // Signed absolute difference.
2490 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2491
2492 // Unsigned absolute difference and accumulate.
2493 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2494
2495 // Signed absolute difference and accumulate.
2496 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497
2498 // Add.
2499 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500
2501 // Subtract.
2502 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503
2504 // Unsigned halving add.
2505 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2506
2507 // Signed halving add.
2508 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2509
2510 // Unsigned rounding halving add.
2511 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2512
2513 // Signed rounding halving add.
2514 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2515
2516 // Unsigned halving sub.
2517 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2518
2519 // Signed halving sub.
2520 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521
2522 // Unsigned saturating add.
2523 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524
2525 // Signed saturating add.
2526 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2527
2528 // Unsigned saturating subtract.
2529 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2530
2531 // Signed saturating subtract.
2532 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2533
2534 // Add pairwise.
2535 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2536
2537 // Add pair of elements scalar.
2538 void addp(const VRegister& vd, const VRegister& vn);
2539
2540 // Multiply-add to accumulator.
2541 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2542
2543 // Multiply-subtract to accumulator.
2544 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2545
2546 // Multiply.
2547 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2548
2549 // Multiply by scalar element.
2550 void mul(const VRegister& vd,
2551 const VRegister& vn,
2552 const VRegister& vm,
2553 int vm_index);
2554
2555 // Multiply-add by scalar element.
2556 void mla(const VRegister& vd,
2557 const VRegister& vn,
2558 const VRegister& vm,
2559 int vm_index);
2560
2561 // Multiply-subtract by scalar element.
2562 void mls(const VRegister& vd,
2563 const VRegister& vn,
2564 const VRegister& vm,
2565 int vm_index);
2566
2567 // Signed long multiply-add by scalar element.
2568 void smlal(const VRegister& vd,
2569 const VRegister& vn,
2570 const VRegister& vm,
2571 int vm_index);
2572
2573 // Signed long multiply-add by scalar element (second part).
2574 void smlal2(const VRegister& vd,
2575 const VRegister& vn,
2576 const VRegister& vm,
2577 int vm_index);
2578
2579 // Unsigned long multiply-add by scalar element.
2580 void umlal(const VRegister& vd,
2581 const VRegister& vn,
2582 const VRegister& vm,
2583 int vm_index);
2584
2585 // Unsigned long multiply-add by scalar element (second part).
2586 void umlal2(const VRegister& vd,
2587 const VRegister& vn,
2588 const VRegister& vm,
2589 int vm_index);
2590
2591 // Signed long multiply-sub by scalar element.
2592 void smlsl(const VRegister& vd,
2593 const VRegister& vn,
2594 const VRegister& vm,
2595 int vm_index);
2596
2597 // Signed long multiply-sub by scalar element (second part).
2598 void smlsl2(const VRegister& vd,
2599 const VRegister& vn,
2600 const VRegister& vm,
2601 int vm_index);
2602
2603 // Unsigned long multiply-sub by scalar element.
2604 void umlsl(const VRegister& vd,
2605 const VRegister& vn,
2606 const VRegister& vm,
2607 int vm_index);
2608
2609 // Unsigned long multiply-sub by scalar element (second part).
2610 void umlsl2(const VRegister& vd,
2611 const VRegister& vn,
2612 const VRegister& vm,
2613 int vm_index);
2614
2615 // Signed long multiply by scalar element.
2616 void smull(const VRegister& vd,
2617 const VRegister& vn,
2618 const VRegister& vm,
2619 int vm_index);
2620
2621 // Signed long multiply by scalar element (second part).
2622 void smull2(const VRegister& vd,
2623 const VRegister& vn,
2624 const VRegister& vm,
2625 int vm_index);
2626
2627 // Unsigned long multiply by scalar element.
2628 void umull(const VRegister& vd,
2629 const VRegister& vn,
2630 const VRegister& vm,
2631 int vm_index);
2632
2633 // Unsigned long multiply by scalar element (second part).
2634 void umull2(const VRegister& vd,
2635 const VRegister& vn,
2636 const VRegister& vm,
2637 int vm_index);
2638
2639 // Signed saturating double long multiply by element.
2640 void sqdmull(const VRegister& vd,
2641 const VRegister& vn,
2642 const VRegister& vm,
2643 int vm_index);
2644
2645 // Signed saturating double long multiply by element (second part).
2646 void sqdmull2(const VRegister& vd,
2647 const VRegister& vn,
2648 const VRegister& vm,
2649 int vm_index);
2650
2651 // Signed saturating doubling long multiply-add by element.
2652 void sqdmlal(const VRegister& vd,
2653 const VRegister& vn,
2654 const VRegister& vm,
2655 int vm_index);
2656
2657 // Signed saturating doubling long multiply-add by element (second part).
2658 void sqdmlal2(const VRegister& vd,
2659 const VRegister& vn,
2660 const VRegister& vm,
2661 int vm_index);
2662
2663 // Signed saturating doubling long multiply-sub by element.
2664 void sqdmlsl(const VRegister& vd,
2665 const VRegister& vn,
2666 const VRegister& vm,
2667 int vm_index);
2668
2669 // Signed saturating doubling long multiply-sub by element (second part).
2670 void sqdmlsl2(const VRegister& vd,
2671 const VRegister& vn,
2672 const VRegister& vm,
2673 int vm_index);
2674
2675 // Compare equal.
2676 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2677
2678 // Compare signed greater than or equal.
2679 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2680
2681 // Compare signed greater than.
2682 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2683
2684 // Compare unsigned higher.
2685 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2686
2687 // Compare unsigned higher or same.
2688 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2689
2690 // Compare bitwise test bits nonzero.
2691 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2692
2693 // Compare bitwise to zero.
2694 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2695
2696 // Compare signed greater than or equal to zero.
2697 void cmge(const VRegister& vd, const VRegister& vn, int value);
2698
2699 // Compare signed greater than zero.
2700 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2701
2702 // Compare signed less than or equal to zero.
2703 void cmle(const VRegister& vd, const VRegister& vn, int value);
2704
2705 // Compare signed less than zero.
2706 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2707
2708 // Signed shift left by register.
2709 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2710
2711 // Unsigned shift left by register.
2712 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2713
2714 // Signed saturating shift left by register.
2715 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2716
2717 // Unsigned saturating shift left by register.
2718 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2719
2720 // Signed rounding shift left by register.
2721 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2722
2723 // Unsigned rounding shift left by register.
2724 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2725
2726 // Signed saturating rounding shift left by register.
2727 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2728
2729 // Unsigned saturating rounding shift left by register.
2730 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2731
2732 // Bitwise and.
2733 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2734
2735 // Bitwise or.
2736 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2737
2738 // Bitwise or immediate.
2739 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2740
2741 // Move register to register.
2742 void mov(const VRegister& vd, const VRegister& vn);
2743
2744 // Bitwise orn.
2745 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2746
2747 // Bitwise eor.
2748 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2749
2750 // Bit clear immediate.
2751 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2752
2753 // Bit clear.
2754 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2755
2756 // Bitwise insert if false.
2757 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2758
2759 // Bitwise insert if true.
2760 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2761
2762 // Bitwise select.
2763 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2764
2765 // Polynomial multiply.
2766 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2767
2768 // Vector move immediate.
2769 void movi(const VRegister& vd,
2770 const uint64_t imm,
2771 Shift shift = LSL,
2772 const int shift_amount = 0);
2773
2774 // Bitwise not.
2775 void mvn(const VRegister& vd, const VRegister& vn);
2776
2777 // Vector move inverted immediate.
2778 void mvni(const VRegister& vd,
2779 const int imm8,
2780 Shift shift = LSL,
2781 const int shift_amount = 0);
2782
2783 // Signed saturating accumulate of unsigned value.
2784 void suqadd(const VRegister& vd, const VRegister& vn);
2785
2786 // Unsigned saturating accumulate of signed value.
2787 void usqadd(const VRegister& vd, const VRegister& vn);
2788
2789 // Absolute value.
2790 void abs(const VRegister& vd, const VRegister& vn);
2791
2792 // Signed saturating absolute value.
2793 void sqabs(const VRegister& vd, const VRegister& vn);
2794
2795 // Negate.
2796 void neg(const VRegister& vd, const VRegister& vn);
2797
2798 // Signed saturating negate.
2799 void sqneg(const VRegister& vd, const VRegister& vn);
2800
2801 // Bitwise not.
2802 void not_(const VRegister& vd, const VRegister& vn);
2803
2804 // Extract narrow.
2805 void xtn(const VRegister& vd, const VRegister& vn);
2806
2807 // Extract narrow (second part).
2808 void xtn2(const VRegister& vd, const VRegister& vn);
2809
2810 // Signed saturating extract narrow.
2811 void sqxtn(const VRegister& vd, const VRegister& vn);
2812
2813 // Signed saturating extract narrow (second part).
2814 void sqxtn2(const VRegister& vd, const VRegister& vn);
2815
2816 // Unsigned saturating extract narrow.
2817 void uqxtn(const VRegister& vd, const VRegister& vn);
2818
2819 // Unsigned saturating extract narrow (second part).
2820 void uqxtn2(const VRegister& vd, const VRegister& vn);
2821
2822 // Signed saturating extract unsigned narrow.
2823 void sqxtun(const VRegister& vd, const VRegister& vn);
2824
2825 // Signed saturating extract unsigned narrow (second part).
2826 void sqxtun2(const VRegister& vd, const VRegister& vn);
2827
2828 // Extract vector from pair of vectors.
2829 void ext(const VRegister& vd,
2830 const VRegister& vn,
2831 const VRegister& vm,
2832 int index);
2833
2834 // Duplicate vector element to vector or scalar.
2835 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2836
2837 // Move vector element to scalar.
2838 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2839
2840 // Duplicate general-purpose register to vector.
2841 void dup(const VRegister& vd, const Register& rn);
2842
2843 // Insert vector element from another vector element.
2844 void ins(const VRegister& vd,
2845 int vd_index,
2846 const VRegister& vn,
2847 int vn_index);
2848
2849 // Move vector element to another vector element.
2850 void mov(const VRegister& vd,
2851 int vd_index,
2852 const VRegister& vn,
2853 int vn_index);
2854
2855 // Insert vector element from general-purpose register.
2856 void ins(const VRegister& vd, int vd_index, const Register& rn);
2857
2858 // Move general-purpose register to a vector element.
2859 void mov(const VRegister& vd, int vd_index, const Register& rn);
2860
2861 // Unsigned move vector element to general-purpose register.
2862 void umov(const Register& rd, const VRegister& vn, int vn_index);
2863
2864 // Move vector element to general-purpose register.
2865 void mov(const Register& rd, const VRegister& vn, int vn_index);
2866
2867 // Signed move vector element to general-purpose register.
2868 void smov(const Register& rd, const VRegister& vn, int vn_index);
2869
2870 // One-element structure load to one register.
2871 void ld1(const VRegister& vt, const MemOperand& src);
2872
2873 // One-element structure load to two registers.
2874 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2875
2876 // One-element structure load to three registers.
2877 void ld1(const VRegister& vt,
2878 const VRegister& vt2,
2879 const VRegister& vt3,
2880 const MemOperand& src);
2881
2882 // One-element structure load to four registers.
2883 void ld1(const VRegister& vt,
2884 const VRegister& vt2,
2885 const VRegister& vt3,
2886 const VRegister& vt4,
2887 const MemOperand& src);
2888
2889 // One-element single structure load to one lane.
2890 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2891
2892 // One-element single structure load to all lanes.
2893 void ld1r(const VRegister& vt, const MemOperand& src);
2894
2895 // Two-element structure load.
2896 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2897
2898 // Two-element single structure load to one lane.
2899 void ld2(const VRegister& vt,
2900 const VRegister& vt2,
2901 int lane,
2902 const MemOperand& src);
2903
2904 // Two-element single structure load to all lanes.
2905 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2906
2907 // Three-element structure load.
2908 void ld3(const VRegister& vt,
2909 const VRegister& vt2,
2910 const VRegister& vt3,
2911 const MemOperand& src);
2912
2913 // Three-element single structure load to one lane.
2914 void ld3(const VRegister& vt,
2915 const VRegister& vt2,
2916 const VRegister& vt3,
2917 int lane,
2918 const MemOperand& src);
2919
2920 // Three-element single structure load to all lanes.
2921 void ld3r(const VRegister& vt,
2922 const VRegister& vt2,
2923 const VRegister& vt3,
2924 const MemOperand& src);
2925
2926 // Four-element structure load.
2927 void ld4(const VRegister& vt,
2928 const VRegister& vt2,
2929 const VRegister& vt3,
2930 const VRegister& vt4,
2931 const MemOperand& src);
2932
2933 // Four-element single structure load to one lane.
2934 void ld4(const VRegister& vt,
2935 const VRegister& vt2,
2936 const VRegister& vt3,
2937 const VRegister& vt4,
2938 int lane,
2939 const MemOperand& src);
2940
2941 // Four-element single structure load to all lanes.
2942 void ld4r(const VRegister& vt,
2943 const VRegister& vt2,
2944 const VRegister& vt3,
2945 const VRegister& vt4,
2946 const MemOperand& src);
2947
2948 // Count leading sign bits.
2949 void cls(const VRegister& vd, const VRegister& vn);
2950
2951 // Count leading zero bits (vector).
2952 void clz(const VRegister& vd, const VRegister& vn);
2953
2954 // Population count per byte.
2955 void cnt(const VRegister& vd, const VRegister& vn);
2956
2957 // Reverse bit order.
2958 void rbit(const VRegister& vd, const VRegister& vn);
2959
2960 // Reverse elements in 16-bit halfwords.
2961 void rev16(const VRegister& vd, const VRegister& vn);
2962
2963 // Reverse elements in 32-bit words.
2964 void rev32(const VRegister& vd, const VRegister& vn);
2965
2966 // Reverse elements in 64-bit doublewords.
2967 void rev64(const VRegister& vd, const VRegister& vn);
2968
2969 // Unsigned reciprocal square root estimate.
2970 void ursqrte(const VRegister& vd, const VRegister& vn);
2971
2972 // Unsigned reciprocal estimate.
2973 void urecpe(const VRegister& vd, const VRegister& vn);
2974
2975 // Signed pairwise long add.
2976 void saddlp(const VRegister& vd, const VRegister& vn);
2977
2978 // Unsigned pairwise long add.
2979 void uaddlp(const VRegister& vd, const VRegister& vn);
2980
2981 // Signed pairwise long add and accumulate.
2982 void sadalp(const VRegister& vd, const VRegister& vn);
2983
2984 // Unsigned pairwise long add and accumulate.
2985 void uadalp(const VRegister& vd, const VRegister& vn);
2986
2987 // Shift left by immediate.
2988 void shl(const VRegister& vd, const VRegister& vn, int shift);
2989
2990 // Signed saturating shift left by immediate.
2991 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2992
2993 // Signed saturating shift left unsigned by immediate.
2994 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2995
2996 // Unsigned saturating shift left by immediate.
2997 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2998
2999 // Signed shift left long by immediate.
3000 void sshll(const VRegister& vd, const VRegister& vn, int shift);
3001
3002 // Signed shift left long by immediate (second part).
3003 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
3004
3005 // Signed extend long.
3006 void sxtl(const VRegister& vd, const VRegister& vn);
3007
3008 // Signed extend long (second part).
3009 void sxtl2(const VRegister& vd, const VRegister& vn);
3010
3011 // Unsigned shift left long by immediate.
3012 void ushll(const VRegister& vd, const VRegister& vn, int shift);
3013
3014 // Unsigned shift left long by immediate (second part).
3015 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
3016
3017 // Shift left long by element size.
3018 void shll(const VRegister& vd, const VRegister& vn, int shift);
3019
3020 // Shift left long by element size (second part).
3021 void shll2(const VRegister& vd, const VRegister& vn, int shift);
3022
3023 // Unsigned extend long.
3024 void uxtl(const VRegister& vd, const VRegister& vn);
3025
3026 // Unsigned extend long (second part).
3027 void uxtl2(const VRegister& vd, const VRegister& vn);
3028
3029 // Shift left by immediate and insert.
3030 void sli(const VRegister& vd, const VRegister& vn, int shift);
3031
3032 // Shift right by immediate and insert.
3033 void sri(const VRegister& vd, const VRegister& vn, int shift);
3034
3035 // Signed maximum.
3036 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3037
3038 // Signed pairwise maximum.
3039 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3040
3041 // Add across vector.
3042 void addv(const VRegister& vd, const VRegister& vn);
3043
3044 // Signed add long across vector.
3045 void saddlv(const VRegister& vd, const VRegister& vn);
3046
3047 // Unsigned add long across vector.
3048 void uaddlv(const VRegister& vd, const VRegister& vn);
3049
3050 // FP maximum number across vector.
3051 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3052
3053 // FP maximum across vector.
3054 void fmaxv(const VRegister& vd, const VRegister& vn);
3055
3056 // FP minimum number across vector.
3057 void fminnmv(const VRegister& vd, const VRegister& vn);
3058
3059 // FP minimum across vector.
3060 void fminv(const VRegister& vd, const VRegister& vn);
3061
3062 // Signed maximum across vector.
3063 void smaxv(const VRegister& vd, const VRegister& vn);
3064
3065 // Signed minimum.
3066 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3067
3068 // Signed minimum pairwise.
3069 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3070
3071 // Signed minimum across vector.
3072 void sminv(const VRegister& vd, const VRegister& vn);
3073
3074 // One-element structure store from one register.
3075 void st1(const VRegister& vt, const MemOperand& src);
3076
3077 // One-element structure store from two registers.
3078 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3079
3080 // One-element structure store from three registers.
3081 void st1(const VRegister& vt,
3082 const VRegister& vt2,
3083 const VRegister& vt3,
3084 const MemOperand& src);
3085
3086 // One-element structure store from four registers.
3087 void st1(const VRegister& vt,
3088 const VRegister& vt2,
3089 const VRegister& vt3,
3090 const VRegister& vt4,
3091 const MemOperand& src);
3092
3093 // One-element single structure store from one lane.
3094 void st1(const VRegister& vt, int lane, const MemOperand& src);
3095
3096 // Two-element structure store from two registers.
3097 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3098
3099 // Two-element single structure store from two lanes.
3100 void st2(const VRegister& vt,
3101 const VRegister& vt2,
3102 int lane,
3103 const MemOperand& src);
3104
3105 // Three-element structure store from three registers.
3106 void st3(const VRegister& vt,
3107 const VRegister& vt2,
3108 const VRegister& vt3,
3109 const MemOperand& src);
3110
3111 // Three-element single structure store from three lanes.
3112 void st3(const VRegister& vt,
3113 const VRegister& vt2,
3114 const VRegister& vt3,
3115 int lane,
3116 const MemOperand& src);
3117
3118 // Four-element structure store from four registers.
3119 void st4(const VRegister& vt,
3120 const VRegister& vt2,
3121 const VRegister& vt3,
3122 const VRegister& vt4,
3123 const MemOperand& src);
3124
3125 // Four-element single structure store from four lanes.
3126 void st4(const VRegister& vt,
3127 const VRegister& vt2,
3128 const VRegister& vt3,
3129 const VRegister& vt4,
3130 int lane,
3131 const MemOperand& src);
3132
3133 // Unsigned add long.
3134 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3135
3136 // Unsigned add long (second part).
3137 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3138
3139 // Unsigned add wide.
3140 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3141
3142 // Unsigned add wide (second part).
3143 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3144
3145 // Signed add long.
3146 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3147
3148 // Signed add long (second part).
3149 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3150
3151 // Signed add wide.
3152 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3153
3154 // Signed add wide (second part).
3155 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3156
3157 // Unsigned subtract long.
3158 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3159
3160 // Unsigned subtract long (second part).
3161 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3162
3163 // Unsigned subtract wide.
3164 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3165
3166 // Unsigned subtract wide (second part).
3167 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3168
3169 // Signed subtract long.
3170 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3171
3172 // Signed subtract long (second part).
3173 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3174
3175 // Signed integer subtract wide.
3176 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3177
3178 // Signed integer subtract wide (second part).
3179 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3180
3181 // Unsigned maximum.
3182 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3183
3184 // Unsigned pairwise maximum.
3185 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3186
3187 // Unsigned maximum across vector.
3188 void umaxv(const VRegister& vd, const VRegister& vn);
3189
3190 // Unsigned minimum.
3191 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3192
3193 // Unsigned pairwise minimum.
3194 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3195
3196 // Unsigned minimum across vector.
3197 void uminv(const VRegister& vd, const VRegister& vn);
3198
3199 // Transpose vectors (primary).
3200 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3201
3202 // Transpose vectors (secondary).
3203 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3204
3205 // Unzip vectors (primary).
3206 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3207
3208 // Unzip vectors (secondary).
3209 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3210
3211 // Zip vectors (primary).
3212 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3213
3214 // Zip vectors (secondary).
3215 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3216
3217 // Signed shift right by immediate.
3218 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3219
3220 // Unsigned shift right by immediate.
3221 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3222
3223 // Signed rounding shift right by immediate.
3224 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3225
3226 // Unsigned rounding shift right by immediate.
3227 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3228
3229 // Signed shift right by immediate and accumulate.
3230 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3231
3232 // Unsigned shift right by immediate and accumulate.
3233 void usra(const VRegister& vd, const VRegister& vn, int shift);
3234
3235 // Signed rounding shift right by immediate and accumulate.
3236 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3237
3238 // Unsigned rounding shift right by immediate and accumulate.
3239 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3240
3241 // Shift right narrow by immediate.
3242 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3243
3244 // Shift right narrow by immediate (second part).
3245 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3246
3247 // Rounding shift right narrow by immediate.
3248 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3249
3250 // Rounding shift right narrow by immediate (second part).
3251 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3252
3253 // Unsigned saturating shift right narrow by immediate.
3254 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3255
3256 // Unsigned saturating shift right narrow by immediate (second part).
3257 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3258
3259 // Unsigned saturating rounding shift right narrow by immediate.
3260 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3261
3262 // Unsigned saturating rounding shift right narrow by immediate (second part).
3263 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3264
3265 // Signed saturating shift right narrow by immediate.
3266 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3267
3268 // Signed saturating shift right narrow by immediate (second part).
3269 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3270
3271 // Signed saturating rounded shift right narrow by immediate.
3272 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3273
3274 // Signed saturating rounded shift right narrow by immediate (second part).
3275 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3276
3277 // Signed saturating shift right unsigned narrow by immediate.
3278 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3279
3280 // Signed saturating shift right unsigned narrow by immediate (second part).
3281 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3282
3283 // Signed sat rounded shift right unsigned narrow by immediate.
3284 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3285
3286 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3287 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3288
3289 // FP reciprocal step.
3290 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3291
3292 // FP reciprocal estimate.
3293 void frecpe(const VRegister& vd, const VRegister& vn);
3294
3295 // FP reciprocal square root estimate.
3296 void frsqrte(const VRegister& vd, const VRegister& vn);
3297
3298 // FP reciprocal square root step.
3299 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3300
3301 // Signed absolute difference and accumulate long.
3302 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3303
3304 // Signed absolute difference and accumulate long (second part).
3305 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3306
3307 // Unsigned absolute difference and accumulate long.
3308 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3309
3310 // Unsigned absolute difference and accumulate long (second part).
3311 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3312
3313 // Signed absolute difference long.
3314 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3315
3316 // Signed absolute difference long (second part).
3317 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3318
3319 // Unsigned absolute difference long.
3320 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3321
3322 // Unsigned absolute difference long (second part).
3323 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3324
3325 // Polynomial multiply long.
3326 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3327
3328 // Polynomial multiply long (second part).
3329 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3330
3331 // Signed long multiply-add.
3332 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3333
3334 // Signed long multiply-add (second part).
3335 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3336
3337 // Unsigned long multiply-add.
3338 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3339
3340 // Unsigned long multiply-add (second part).
3341 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3342
3343 // Signed long multiply-sub.
3344 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3345
3346 // Signed long multiply-sub (second part).
3347 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3348
3349 // Unsigned long multiply-sub.
3350 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3351
3352 // Unsigned long multiply-sub (second part).
3353 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3354
3355 // Signed long multiply.
3356 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3357
3358 // Signed long multiply (second part).
3359 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3360
3361 // Signed saturating doubling long multiply-add.
3362 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3363
3364 // Signed saturating doubling long multiply-add (second part).
3365 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366
3367 // Signed saturating doubling long multiply-subtract.
3368 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3369
3370 // Signed saturating doubling long multiply-subtract (second part).
3371 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3372
3373 // Signed saturating doubling long multiply.
3374 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3375
3376 // Signed saturating doubling long multiply (second part).
3377 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3378
3379 // Signed saturating doubling multiply returning high half.
3380 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3381
3382 // Signed saturating rounding doubling multiply returning high half.
3383 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3384
3385 // Signed dot product [Armv8.2].
3386 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3387
3388 // Signed saturating rounding doubling multiply accumulate returning high
3389 // half [Armv8.1].
3390 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3391
3392 // Unsigned dot product [Armv8.2].
3393 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3394
3395 // Signed saturating rounding doubling multiply subtract returning high half
3396 // [Armv8.1].
3397 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3398
3399 // Signed saturating doubling multiply element returning high half.
3400 void sqdmulh(const VRegister& vd,
3401 const VRegister& vn,
3402 const VRegister& vm,
3403 int vm_index);
3404
3405 // Signed saturating rounding doubling multiply element returning high half.
3406 void sqrdmulh(const VRegister& vd,
3407 const VRegister& vn,
3408 const VRegister& vm,
3409 int vm_index);
3410
3411 // Signed dot product by element [Armv8.2].
3412 void sdot(const VRegister& vd,
3413 const VRegister& vn,
3414 const VRegister& vm,
3415 int vm_index);
3416
3417 // Signed saturating rounding doubling multiply accumulate element returning
3418 // high half [Armv8.1].
3419 void sqrdmlah(const VRegister& vd,
3420 const VRegister& vn,
3421 const VRegister& vm,
3422 int vm_index);
3423
3424 // Unsigned dot product by element [Armv8.2].
3425 void udot(const VRegister& vd,
3426 const VRegister& vn,
3427 const VRegister& vm,
3428 int vm_index);
3429
3430 // Signed saturating rounding doubling multiply subtract element returning
3431 // high half [Armv8.1].
3432 void sqrdmlsh(const VRegister& vd,
3433 const VRegister& vn,
3434 const VRegister& vm,
3435 int vm_index);
3436
3437 // Unsigned long multiply long.
3438 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3439
3440 // Unsigned long multiply (second part).
3441 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3442
3443 // Add narrow returning high half.
3444 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3445
3446 // Add narrow returning high half (second part).
3447 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3448
3449 // Rounding add narrow returning high half.
3450 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3451
3452 // Rounding add narrow returning high half (second part).
3453 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3454
3455 // Subtract narrow returning high half.
3456 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3457
3458 // Subtract narrow returning high half (second part).
3459 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3460
3461 // Rounding subtract narrow returning high half.
3462 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3463
3464 // Rounding subtract narrow returning high half (second part).
3465 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3466
3467 // FP vector multiply accumulate.
3468 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3469
3470 // FP fused multiply-add long to accumulator.
3471 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3472
3473 // FP fused multiply-add long to accumulator (second part).
3474 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3475
3476 // FP fused multiply-add long to accumulator by element.
3477 void fmlal(const VRegister& vd,
3478 const VRegister& vn,
3479 const VRegister& vm,
3480 int vm_index);
3481
3482 // FP fused multiply-add long to accumulator by element (second part).
3483 void fmlal2(const VRegister& vd,
3484 const VRegister& vn,
3485 const VRegister& vm,
3486 int vm_index);
3487
3488 // FP vector multiply subtract.
3489 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3490
3491 // FP fused multiply-subtract long to accumulator.
3492 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3493
3494 // FP fused multiply-subtract long to accumulator (second part).
3495 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3496
3497 // FP fused multiply-subtract long to accumulator by element.
3498 void fmlsl(const VRegister& vd,
3499 const VRegister& vn,
3500 const VRegister& vm,
3501 int vm_index);
3502
3503 // FP fused multiply-subtract long to accumulator by element (second part).
3504 void fmlsl2(const VRegister& vd,
3505 const VRegister& vn,
3506 const VRegister& vm,
3507 int vm_index);
3508
3509 // FP vector multiply extended.
3510 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3511
3512 // FP absolute greater than or equal.
3513 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3514
3515 // FP absolute greater than.
3516 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3517
3518 // FP multiply by element.
3519 void fmul(const VRegister& vd,
3520 const VRegister& vn,
3521 const VRegister& vm,
3522 int vm_index);
3523
3524 // FP fused multiply-add to accumulator by element.
3525 void fmla(const VRegister& vd,
3526 const VRegister& vn,
3527 const VRegister& vm,
3528 int vm_index);
3529
3530 // FP fused multiply-sub from accumulator by element.
3531 void fmls(const VRegister& vd,
3532 const VRegister& vn,
3533 const VRegister& vm,
3534 int vm_index);
3535
3536 // FP multiply extended by element.
3537 void fmulx(const VRegister& vd,
3538 const VRegister& vn,
3539 const VRegister& vm,
3540 int vm_index);
3541
3542 // FP compare equal.
3543 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3544
3545 // FP greater than.
3546 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3547
3548 // FP greater than or equal.
3549 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3550
3551 // FP compare equal to zero.
3552 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3553
3554 // FP greater than zero.
3555 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3556
3557 // FP greater than or equal to zero.
3558 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3559
3560 // FP less than or equal to zero.
3561 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3562
3563 // FP less than to zero.
3564 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3565
3566 // FP absolute difference.
3567 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3568
3569 // FP pairwise add vector.
3570 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3571
3572 // FP pairwise add scalar.
3573 void faddp(const VRegister& vd, const VRegister& vn);
3574
3575 // FP pairwise maximum vector.
3576 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3577
3578 // FP pairwise maximum scalar.
3579 void fmaxp(const VRegister& vd, const VRegister& vn);
3580
3581 // FP pairwise minimum vector.
3582 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3583
3584 // FP pairwise minimum scalar.
3585 void fminp(const VRegister& vd, const VRegister& vn);
3586
3587 // FP pairwise maximum number vector.
3588 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3589
3590 // FP pairwise maximum number scalar.
3591 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3592
3593 // FP pairwise minimum number vector.
3594 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3595
3596 // FP pairwise minimum number scalar.
3597 void fminnmp(const VRegister& vd, const VRegister& vn);
3598
3599 // v8.3 complex numbers - note that these are only partial/helper functions
3600 // and must be used in series in order to perform full CN operations.
3601
3602 // FP complex multiply accumulate (by element) [Armv8.3].
3603 void fcmla(const VRegister& vd,
3604 const VRegister& vn,
3605 const VRegister& vm,
3606 int vm_index,
3607 int rot);
3608
3609 // FP complex multiply accumulate [Armv8.3].
3610 void fcmla(const VRegister& vd,
3611 const VRegister& vn,
3612 const VRegister& vm,
3613 int rot);
3614
3615 // FP complex add [Armv8.3].
3616 void fcadd(const VRegister& vd,
3617 const VRegister& vn,
3618 const VRegister& vm,
3619 int rot);
3620
3621 // Emit generic instructions.
3622
3623 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)3624 void dci(Instr raw_inst) { Emit(raw_inst); }
3625
3626 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)3627 void dc32(uint32_t data) { dc(data); }
3628
3629 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)3630 void dc64(uint64_t data) { dc(data); }
3631
3632 // Emit data in the instruction stream.
3633 template <typename T>
dc(T data)3634 void dc(T data) {
3635 VIXL_ASSERT(AllowAssembler());
3636 GetBuffer()->Emit<T>(data);
3637 }
3638
3639 // Copy a string into the instruction stream, including the terminating NULL
3640 // character. The instruction pointer is then aligned correctly for
3641 // subsequent instructions.
EmitString(const char * string)3642 void EmitString(const char* string) {
3643 VIXL_ASSERT(string != NULL);
3644 VIXL_ASSERT(AllowAssembler());
3645
3646 GetBuffer()->EmitString(string);
3647 GetBuffer()->Align();
3648 }
3649
3650 // Code generation helpers.
3651
3652 // Register encoding.
Rd(CPURegister rd)3653 static Instr Rd(CPURegister rd) {
3654 VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
3655 return rd.GetCode() << Rd_offset;
3656 }
3657
Rn(CPURegister rn)3658 static Instr Rn(CPURegister rn) {
3659 VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
3660 return rn.GetCode() << Rn_offset;
3661 }
3662
Rm(CPURegister rm)3663 static Instr Rm(CPURegister rm) {
3664 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
3665 return rm.GetCode() << Rm_offset;
3666 }
3667
RmNot31(CPURegister rm)3668 static Instr RmNot31(CPURegister rm) {
3669 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
3670 VIXL_ASSERT(!rm.IsZero());
3671 return Rm(rm);
3672 }
3673
Ra(CPURegister ra)3674 static Instr Ra(CPURegister ra) {
3675 VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
3676 return ra.GetCode() << Ra_offset;
3677 }
3678
Rt(CPURegister rt)3679 static Instr Rt(CPURegister rt) {
3680 VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
3681 return rt.GetCode() << Rt_offset;
3682 }
3683
Rt2(CPURegister rt2)3684 static Instr Rt2(CPURegister rt2) {
3685 VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
3686 return rt2.GetCode() << Rt2_offset;
3687 }
3688
Rs(CPURegister rs)3689 static Instr Rs(CPURegister rs) {
3690 VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
3691 return rs.GetCode() << Rs_offset;
3692 }
3693
3694 // These encoding functions allow the stack pointer to be encoded, and
3695 // disallow the zero register.
RdSP(Register rd)3696 static Instr RdSP(Register rd) {
3697 VIXL_ASSERT(!rd.IsZero());
3698 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
3699 }
3700
RnSP(Register rn)3701 static Instr RnSP(Register rn) {
3702 VIXL_ASSERT(!rn.IsZero());
3703 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
3704 }
3705
RmSP(Register rm)3706 static Instr RmSP(Register rm) {
3707 VIXL_ASSERT(!rm.IsZero());
3708 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
3709 }
3710
3711 // Flags encoding.
Flags(FlagsUpdate S)3712 static Instr Flags(FlagsUpdate S) {
3713 if (S == SetFlags) {
3714 return 1 << FlagsUpdate_offset;
3715 } else if (S == LeaveFlags) {
3716 return 0 << FlagsUpdate_offset;
3717 }
3718 VIXL_UNREACHABLE();
3719 return 0;
3720 }
3721
Cond(Condition cond)3722 static Instr Cond(Condition cond) { return cond << Condition_offset; }
3723
3724 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)3725 static Instr ImmPCRelAddress(int64_t imm21) {
3726 VIXL_ASSERT(IsInt21(imm21));
3727 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
3728 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3729 Instr immlo = imm << ImmPCRelLo_offset;
3730 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3731 }
3732
3733 // Branch encoding.
ImmUncondBranch(int64_t imm26)3734 static Instr ImmUncondBranch(int64_t imm26) {
3735 VIXL_ASSERT(IsInt26(imm26));
3736 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
3737 }
3738
ImmCondBranch(int64_t imm19)3739 static Instr ImmCondBranch(int64_t imm19) {
3740 VIXL_ASSERT(IsInt19(imm19));
3741 return TruncateToUint19(imm19) << ImmCondBranch_offset;
3742 }
3743
ImmCmpBranch(int64_t imm19)3744 static Instr ImmCmpBranch(int64_t imm19) {
3745 VIXL_ASSERT(IsInt19(imm19));
3746 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
3747 }
3748
ImmTestBranch(int64_t imm14)3749 static Instr ImmTestBranch(int64_t imm14) {
3750 VIXL_ASSERT(IsInt14(imm14));
3751 return TruncateToUint14(imm14) << ImmTestBranch_offset;
3752 }
3753
ImmTestBranchBit(unsigned bit_pos)3754 static Instr ImmTestBranchBit(unsigned bit_pos) {
3755 VIXL_ASSERT(IsUint6(bit_pos));
3756 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3757 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3758 unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3759 b5 &= ImmTestBranchBit5_mask;
3760 b40 &= ImmTestBranchBit40_mask;
3761 return b5 | b40;
3762 }
3763
3764 // Data Processing encoding.
SF(Register rd)3765 static Instr SF(Register rd) {
3766 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3767 }
3768
ImmAddSub(int imm)3769 static Instr ImmAddSub(int imm) {
3770 VIXL_ASSERT(IsImmAddSub(imm));
3771 if (IsUint12(imm)) { // No shift required.
3772 imm <<= ImmAddSub_offset;
3773 } else {
3774 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3775 }
3776 return imm;
3777 }
3778
ImmS(unsigned imms,unsigned reg_size)3779 static Instr ImmS(unsigned imms, unsigned reg_size) {
3780 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
3781 ((reg_size == kWRegSize) && IsUint5(imms)));
3782 USE(reg_size);
3783 return imms << ImmS_offset;
3784 }
3785
ImmR(unsigned immr,unsigned reg_size)3786 static Instr ImmR(unsigned immr, unsigned reg_size) {
3787 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
3788 ((reg_size == kWRegSize) && IsUint5(immr)));
3789 USE(reg_size);
3790 VIXL_ASSERT(IsUint6(immr));
3791 return immr << ImmR_offset;
3792 }
3793
ImmSetBits(unsigned imms,unsigned reg_size)3794 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3795 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3796 VIXL_ASSERT(IsUint6(imms));
3797 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
3798 USE(reg_size);
3799 return imms << ImmSetBits_offset;
3800 }
3801
ImmRotate(unsigned immr,unsigned reg_size)3802 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3803 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3804 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
3805 ((reg_size == kWRegSize) && IsUint5(immr)));
3806 USE(reg_size);
3807 return immr << ImmRotate_offset;
3808 }
3809
ImmLLiteral(int64_t imm19)3810 static Instr ImmLLiteral(int64_t imm19) {
3811 VIXL_ASSERT(IsInt19(imm19));
3812 return TruncateToUint19(imm19) << ImmLLiteral_offset;
3813 }
3814
BitN(unsigned bitn,unsigned reg_size)3815 static Instr BitN(unsigned bitn, unsigned reg_size) {
3816 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3817 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3818 USE(reg_size);
3819 return bitn << BitN_offset;
3820 }
3821
ShiftDP(Shift shift)3822 static Instr ShiftDP(Shift shift) {
3823 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3824 return shift << ShiftDP_offset;
3825 }
3826
ImmDPShift(unsigned amount)3827 static Instr ImmDPShift(unsigned amount) {
3828 VIXL_ASSERT(IsUint6(amount));
3829 return amount << ImmDPShift_offset;
3830 }
3831
ExtendMode(Extend extend)3832 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
3833
ImmExtendShift(unsigned left_shift)3834 static Instr ImmExtendShift(unsigned left_shift) {
3835 VIXL_ASSERT(left_shift <= 4);
3836 return left_shift << ImmExtendShift_offset;
3837 }
3838
ImmCondCmp(unsigned imm)3839 static Instr ImmCondCmp(unsigned imm) {
3840 VIXL_ASSERT(IsUint5(imm));
3841 return imm << ImmCondCmp_offset;
3842 }
3843
Nzcv(StatusFlags nzcv)3844 static Instr Nzcv(StatusFlags nzcv) {
3845 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3846 }
3847
3848 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)3849 static Instr ImmLSUnsigned(int64_t imm12) {
3850 VIXL_ASSERT(IsUint12(imm12));
3851 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
3852 }
3853
ImmLS(int64_t imm9)3854 static Instr ImmLS(int64_t imm9) {
3855 VIXL_ASSERT(IsInt9(imm9));
3856 return TruncateToUint9(imm9) << ImmLS_offset;
3857 }
3858
ImmLSPair(int64_t imm7,unsigned access_size)3859 static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
3860 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
3861 int64_t scaled_imm7 = imm7 / (1 << access_size);
3862 VIXL_ASSERT(IsInt7(scaled_imm7));
3863 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
3864 }
3865
ImmShiftLS(unsigned shift_amount)3866 static Instr ImmShiftLS(unsigned shift_amount) {
3867 VIXL_ASSERT(IsUint1(shift_amount));
3868 return shift_amount << ImmShiftLS_offset;
3869 }
3870
ImmLSPAC(int64_t imm10)3871 static Instr ImmLSPAC(int64_t imm10) {
3872 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
3873 int64_t scaled_imm10 = imm10 / (1 << 3);
3874 VIXL_ASSERT(IsInt10(scaled_imm10));
3875 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
3876 return (s_bit << ImmLSPACHi_offset) |
3877 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
3878 }
3879
ImmPrefetchOperation(int imm5)3880 static Instr ImmPrefetchOperation(int imm5) {
3881 VIXL_ASSERT(IsUint5(imm5));
3882 return imm5 << ImmPrefetchOperation_offset;
3883 }
3884
ImmException(int imm16)3885 static Instr ImmException(int imm16) {
3886 VIXL_ASSERT(IsUint16(imm16));
3887 return imm16 << ImmException_offset;
3888 }
3889
ImmUdf(int imm16)3890 static Instr ImmUdf(int imm16) {
3891 VIXL_ASSERT(IsUint16(imm16));
3892 return imm16 << ImmUdf_offset;
3893 }
3894
ImmSystemRegister(int imm16)3895 static Instr ImmSystemRegister(int imm16) {
3896 VIXL_ASSERT(IsUint16(imm16));
3897 return imm16 << ImmSystemRegister_offset;
3898 }
3899
ImmRMIFRotation(int imm6)3900 static Instr ImmRMIFRotation(int imm6) {
3901 VIXL_ASSERT(IsUint6(imm6));
3902 return imm6 << ImmRMIFRotation_offset;
3903 }
3904
ImmHint(int imm7)3905 static Instr ImmHint(int imm7) {
3906 VIXL_ASSERT(IsUint7(imm7));
3907 return imm7 << ImmHint_offset;
3908 }
3909
CRm(int imm4)3910 static Instr CRm(int imm4) {
3911 VIXL_ASSERT(IsUint4(imm4));
3912 return imm4 << CRm_offset;
3913 }
3914
CRn(int imm4)3915 static Instr CRn(int imm4) {
3916 VIXL_ASSERT(IsUint4(imm4));
3917 return imm4 << CRn_offset;
3918 }
3919
SysOp(int imm14)3920 static Instr SysOp(int imm14) {
3921 VIXL_ASSERT(IsUint14(imm14));
3922 return imm14 << SysOp_offset;
3923 }
3924
ImmSysOp1(int imm3)3925 static Instr ImmSysOp1(int imm3) {
3926 VIXL_ASSERT(IsUint3(imm3));
3927 return imm3 << SysOp1_offset;
3928 }
3929
ImmSysOp2(int imm3)3930 static Instr ImmSysOp2(int imm3) {
3931 VIXL_ASSERT(IsUint3(imm3));
3932 return imm3 << SysOp2_offset;
3933 }
3934
ImmBarrierDomain(int imm2)3935 static Instr ImmBarrierDomain(int imm2) {
3936 VIXL_ASSERT(IsUint2(imm2));
3937 return imm2 << ImmBarrierDomain_offset;
3938 }
3939
ImmBarrierType(int imm2)3940 static Instr ImmBarrierType(int imm2) {
3941 VIXL_ASSERT(IsUint2(imm2));
3942 return imm2 << ImmBarrierType_offset;
3943 }
3944
3945 // Move immediates encoding.
ImmMoveWide(uint64_t imm)3946 static Instr ImmMoveWide(uint64_t imm) {
3947 VIXL_ASSERT(IsUint16(imm));
3948 return static_cast<Instr>(imm << ImmMoveWide_offset);
3949 }
3950
ShiftMoveWide(int64_t shift)3951 static Instr ShiftMoveWide(int64_t shift) {
3952 VIXL_ASSERT(IsUint2(shift));
3953 return static_cast<Instr>(shift << ShiftMoveWide_offset);
3954 }
3955
3956 // FP Immediates.
3957 static Instr ImmFP16(Float16 imm);
3958 static Instr ImmFP32(float imm);
3959 static Instr ImmFP64(double imm);
3960
3961 // FP register type.
FPType(VRegister fd)3962 static Instr FPType(VRegister fd) {
3963 VIXL_ASSERT(fd.IsScalar());
3964 switch (fd.GetSizeInBits()) {
3965 case 16:
3966 return FP16;
3967 case 32:
3968 return FP32;
3969 case 64:
3970 return FP64;
3971 default:
3972 VIXL_UNREACHABLE();
3973 return 0;
3974 }
3975 }
3976
FPScale(unsigned scale)3977 static Instr FPScale(unsigned scale) {
3978 VIXL_ASSERT(IsUint6(scale));
3979 return scale << FPScale_offset;
3980 }
3981
3982 // Immediate field checking helpers.
3983 static bool IsImmAddSub(int64_t immediate);
3984 static bool IsImmConditionalCompare(int64_t immediate);
3985 static bool IsImmFP16(Float16 imm);
3986 static bool IsImmFP32(float imm);
3987 static bool IsImmFP64(double imm);
3988 static bool IsImmLogical(uint64_t value,
3989 unsigned width,
3990 unsigned* n = NULL,
3991 unsigned* imm_s = NULL,
3992 unsigned* imm_r = NULL);
3993 static bool IsImmLSPair(int64_t offset, unsigned access_size);
3994 static bool IsImmLSScaled(int64_t offset, unsigned access_size);
3995 static bool IsImmLSUnscaled(int64_t offset);
3996 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
3997 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
3998
3999 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)4000 static Instr VFormat(VRegister vd) {
4001 if (vd.Is64Bits()) {
4002 switch (vd.GetLanes()) {
4003 case 2:
4004 return NEON_2S;
4005 case 4:
4006 return NEON_4H;
4007 case 8:
4008 return NEON_8B;
4009 default:
4010 return 0xffffffff;
4011 }
4012 } else {
4013 VIXL_ASSERT(vd.Is128Bits());
4014 switch (vd.GetLanes()) {
4015 case 2:
4016 return NEON_2D;
4017 case 4:
4018 return NEON_4S;
4019 case 8:
4020 return NEON_8H;
4021 case 16:
4022 return NEON_16B;
4023 default:
4024 return 0xffffffff;
4025 }
4026 }
4027 }
4028
4029 // Instruction bits for vector format in floating point data processing
4030 // operations.
FPFormat(VRegister vd)4031 static Instr FPFormat(VRegister vd) {
4032 switch (vd.GetLanes()) {
4033 case 1:
4034 // Floating point scalar formats.
4035 switch (vd.GetSizeInBits()) {
4036 case 16:
4037 return FP16;
4038 case 32:
4039 return FP32;
4040 case 64:
4041 return FP64;
4042 default:
4043 VIXL_UNREACHABLE();
4044 }
4045 break;
4046 case 2:
4047 // Two lane floating point vector formats.
4048 switch (vd.GetSizeInBits()) {
4049 case 64:
4050 return NEON_FP_2S;
4051 case 128:
4052 return NEON_FP_2D;
4053 default:
4054 VIXL_UNREACHABLE();
4055 }
4056 break;
4057 case 4:
4058 // Four lane floating point vector formats.
4059 switch (vd.GetSizeInBits()) {
4060 case 64:
4061 return NEON_FP_4H;
4062 case 128:
4063 return NEON_FP_4S;
4064 default:
4065 VIXL_UNREACHABLE();
4066 }
4067 break;
4068 case 8:
4069 // Eight lane floating point vector format.
4070 VIXL_ASSERT(vd.Is128Bits());
4071 return NEON_FP_8H;
4072 default:
4073 VIXL_UNREACHABLE();
4074 return 0;
4075 }
4076 VIXL_UNREACHABLE();
4077 return 0;
4078 }
4079
4080 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)4081 static Instr LSVFormat(VRegister vd) {
4082 if (vd.Is64Bits()) {
4083 switch (vd.GetLanes()) {
4084 case 1:
4085 return LS_NEON_1D;
4086 case 2:
4087 return LS_NEON_2S;
4088 case 4:
4089 return LS_NEON_4H;
4090 case 8:
4091 return LS_NEON_8B;
4092 default:
4093 return 0xffffffff;
4094 }
4095 } else {
4096 VIXL_ASSERT(vd.Is128Bits());
4097 switch (vd.GetLanes()) {
4098 case 2:
4099 return LS_NEON_2D;
4100 case 4:
4101 return LS_NEON_4S;
4102 case 8:
4103 return LS_NEON_8H;
4104 case 16:
4105 return LS_NEON_16B;
4106 default:
4107 return 0xffffffff;
4108 }
4109 }
4110 }
4111
4112 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)4113 static Instr SFormat(VRegister vd) {
4114 VIXL_ASSERT(vd.GetLanes() == 1);
4115 switch (vd.GetSizeInBytes()) {
4116 case 1:
4117 return NEON_B;
4118 case 2:
4119 return NEON_H;
4120 case 4:
4121 return NEON_S;
4122 case 8:
4123 return NEON_D;
4124 default:
4125 return 0xffffffff;
4126 }
4127 }
4128
ImmNEONHLM(int index,int num_bits)4129 static Instr ImmNEONHLM(int index, int num_bits) {
4130 int h, l, m;
4131 if (num_bits == 3) {
4132 VIXL_ASSERT(IsUint3(index));
4133 h = (index >> 2) & 1;
4134 l = (index >> 1) & 1;
4135 m = (index >> 0) & 1;
4136 } else if (num_bits == 2) {
4137 VIXL_ASSERT(IsUint2(index));
4138 h = (index >> 1) & 1;
4139 l = (index >> 0) & 1;
4140 m = 0;
4141 } else {
4142 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
4143 h = (index >> 0) & 1;
4144 l = 0;
4145 m = 0;
4146 }
4147 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
4148 }
4149
ImmRotFcadd(int rot)4150 static Instr ImmRotFcadd(int rot) {
4151 VIXL_ASSERT(rot == 90 || rot == 270);
4152 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
4153 }
4154
ImmRotFcmlaSca(int rot)4155 static Instr ImmRotFcmlaSca(int rot) {
4156 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
4157 return (rot / 90) << ImmRotFcmlaSca_offset;
4158 }
4159
ImmRotFcmlaVec(int rot)4160 static Instr ImmRotFcmlaVec(int rot) {
4161 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
4162 return (rot / 90) << ImmRotFcmlaVec_offset;
4163 }
4164
ImmNEONExt(int imm4)4165 static Instr ImmNEONExt(int imm4) {
4166 VIXL_ASSERT(IsUint4(imm4));
4167 return imm4 << ImmNEONExt_offset;
4168 }
4169
ImmNEON5(Instr format,int index)4170 static Instr ImmNEON5(Instr format, int index) {
4171 VIXL_ASSERT(IsUint4(index));
4172 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4173 int imm5 = (index << (s + 1)) | (1 << s);
4174 return imm5 << ImmNEON5_offset;
4175 }
4176
ImmNEON4(Instr format,int index)4177 static Instr ImmNEON4(Instr format, int index) {
4178 VIXL_ASSERT(IsUint4(index));
4179 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
4180 int imm4 = index << s;
4181 return imm4 << ImmNEON4_offset;
4182 }
4183
ImmNEONabcdefgh(int imm8)4184 static Instr ImmNEONabcdefgh(int imm8) {
4185 VIXL_ASSERT(IsUint8(imm8));
4186 Instr instr;
4187 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
4188 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
4189 return instr;
4190 }
4191
NEONCmode(int cmode)4192 static Instr NEONCmode(int cmode) {
4193 VIXL_ASSERT(IsUint4(cmode));
4194 return cmode << NEONCmode_offset;
4195 }
4196
NEONModImmOp(int op)4197 static Instr NEONModImmOp(int op) {
4198 VIXL_ASSERT(IsUint1(op));
4199 return op << NEONModImmOp_offset;
4200 }
4201
4202 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)4203 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
4204 VIXL_ASSERT(label->IsBound());
4205 return GetBuffer().GetOffsetFrom(label->GetLocation());
4206 }
4207 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
4208 size_t SizeOfCodeGeneratedSince(Label* label) const) {
4209 return GetSizeOfCodeGeneratedSince(label);
4210 }
4211
4212 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
4213 size_t GetBufferCapacity() const) {
4214 return GetBuffer().GetCapacity();
4215 }
4216 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
4217 return GetBuffer().GetCapacity();
4218 }
4219
4220 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
4221 size_t GetRemainingBufferSpace() const) {
4222 return GetBuffer().GetRemainingBytes();
4223 }
4224 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
4225 size_t RemainingBufferSpace() const) {
4226 return GetBuffer().GetRemainingBytes();
4227 }
4228
GetPic()4229 PositionIndependentCodeOption GetPic() const { return pic_; }
4230 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
4231 return GetPic();
4232 }
4233
GetCPUFeatures()4234 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
4235
SetCPUFeatures(const CPUFeatures & cpu_features)4236 void SetCPUFeatures(const CPUFeatures& cpu_features) {
4237 cpu_features_ = cpu_features;
4238 }
4239
AllowPageOffsetDependentCode()4240 bool AllowPageOffsetDependentCode() const {
4241 return (GetPic() == PageOffsetDependentCode) ||
4242 (GetPic() == PositionDependentCode);
4243 }
4244
AppropriateZeroRegFor(const CPURegister & reg)4245 static Register AppropriateZeroRegFor(const CPURegister& reg) {
4246 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
4247 }
4248
4249 protected:
4250 void LoadStore(const CPURegister& rt,
4251 const MemOperand& addr,
4252 LoadStoreOp op,
4253 LoadStoreScalingOption option = PreferScaledOffset);
4254
4255 void LoadStorePAC(const Register& xt,
4256 const MemOperand& addr,
4257 LoadStorePACOp op);
4258
4259 void LoadStorePair(const CPURegister& rt,
4260 const CPURegister& rt2,
4261 const MemOperand& addr,
4262 LoadStorePairOp op);
4263 void LoadStoreStruct(const VRegister& vt,
4264 const MemOperand& addr,
4265 NEONLoadStoreMultiStructOp op);
4266 void LoadStoreStruct1(const VRegister& vt,
4267 int reg_count,
4268 const MemOperand& addr);
4269 void LoadStoreStructSingle(const VRegister& vt,
4270 uint32_t lane,
4271 const MemOperand& addr,
4272 NEONLoadStoreSingleStructOp op);
4273 void LoadStoreStructSingleAllLanes(const VRegister& vt,
4274 const MemOperand& addr,
4275 NEONLoadStoreSingleStructOp op);
4276 void LoadStoreStructVerify(const VRegister& vt,
4277 const MemOperand& addr,
4278 Instr op);
4279
4280 void Prefetch(PrefetchOperation op,
4281 const MemOperand& addr,
4282 LoadStoreScalingOption option = PreferScaledOffset);
4283
4284 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
4285 // reports a bogus uninitialised warning then.
4286 void Logical(const Register& rd,
4287 const Register& rn,
4288 const Operand operand,
4289 LogicalOp op);
4290 void LogicalImmediate(const Register& rd,
4291 const Register& rn,
4292 unsigned n,
4293 unsigned imm_s,
4294 unsigned imm_r,
4295 LogicalOp op);
4296
4297 void ConditionalCompare(const Register& rn,
4298 const Operand& operand,
4299 StatusFlags nzcv,
4300 Condition cond,
4301 ConditionalCompareOp op);
4302
4303 void AddSubWithCarry(const Register& rd,
4304 const Register& rn,
4305 const Operand& operand,
4306 FlagsUpdate S,
4307 AddSubWithCarryOp op);
4308
4309
4310 // Functions for emulating operands not directly supported by the instruction
4311 // set.
4312 void EmitShift(const Register& rd,
4313 const Register& rn,
4314 Shift shift,
4315 unsigned amount);
4316 void EmitExtendShift(const Register& rd,
4317 const Register& rn,
4318 Extend extend,
4319 unsigned left_shift);
4320
4321 void AddSub(const Register& rd,
4322 const Register& rn,
4323 const Operand& operand,
4324 FlagsUpdate S,
4325 AddSubOp op);
4326
4327 void NEONTable(const VRegister& vd,
4328 const VRegister& vn,
4329 const VRegister& vm,
4330 NEONTableOp op);
4331
4332 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4333 // registers. Only simple loads are supported; sign- and zero-extension (such
4334 // as in LDPSW_x or LDRB_w) are not supported.
4335 static LoadStoreOp LoadOpFor(const CPURegister& rt);
4336 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4337 const CPURegister& rt2);
4338 static LoadStoreOp StoreOpFor(const CPURegister& rt);
4339 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4340 const CPURegister& rt2);
4341 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4342 const CPURegister& rt, const CPURegister& rt2);
4343 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4344 const CPURegister& rt, const CPURegister& rt2);
4345 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4346
4347 // Convenience pass-through for CPU feature checks.
4348 bool CPUHas(CPUFeatures::Feature feature0,
4349 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
4350 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
4351 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
4352 return cpu_features_.Has(feature0, feature1, feature2, feature3);
4353 }
4354
4355 // Determine whether the target CPU has the specified registers, based on the
4356 // currently-enabled CPU features. Presence of a register does not imply
4357 // support for arbitrary operations on it. For example, CPUs with FP have H
4358 // registers, but most half-precision operations require the FPHalf feature.
4359 //
4360 // These are used to check CPU features in loads and stores that have the same
4361 // entry point for both integer and FP registers.
4362 bool CPUHas(const CPURegister& rt) const;
4363 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
4364
4365 bool CPUHas(SystemRegister sysreg) const;
4366
4367 private:
4368 static uint32_t FP16ToImm8(Float16 imm);
4369 static uint32_t FP32ToImm8(float imm);
4370 static uint32_t FP64ToImm8(double imm);
4371
4372 // Instruction helpers.
4373 void MoveWide(const Register& rd,
4374 uint64_t imm,
4375 int shift,
4376 MoveWideImmediateOp mov_op);
4377 void DataProcShiftedRegister(const Register& rd,
4378 const Register& rn,
4379 const Operand& operand,
4380 FlagsUpdate S,
4381 Instr op);
4382 void DataProcExtendedRegister(const Register& rd,
4383 const Register& rn,
4384 const Operand& operand,
4385 FlagsUpdate S,
4386 Instr op);
4387 void LoadStorePairNonTemporal(const CPURegister& rt,
4388 const CPURegister& rt2,
4389 const MemOperand& addr,
4390 LoadStorePairNonTemporalOp op);
4391 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4392 void ConditionalSelect(const Register& rd,
4393 const Register& rn,
4394 const Register& rm,
4395 Condition cond,
4396 ConditionalSelectOp op);
4397 void DataProcessing1Source(const Register& rd,
4398 const Register& rn,
4399 DataProcessing1SourceOp op);
4400 void DataProcessing3Source(const Register& rd,
4401 const Register& rn,
4402 const Register& rm,
4403 const Register& ra,
4404 DataProcessing3SourceOp op);
4405 void FPDataProcessing1Source(const VRegister& fd,
4406 const VRegister& fn,
4407 FPDataProcessing1SourceOp op);
4408 void FPDataProcessing3Source(const VRegister& fd,
4409 const VRegister& fn,
4410 const VRegister& fm,
4411 const VRegister& fa,
4412 FPDataProcessing3SourceOp op);
4413 void NEONAcrossLanesL(const VRegister& vd,
4414 const VRegister& vn,
4415 NEONAcrossLanesOp op);
4416 void NEONAcrossLanes(const VRegister& vd,
4417 const VRegister& vn,
4418 NEONAcrossLanesOp op,
4419 Instr op_half);
4420 void NEONModifiedImmShiftLsl(const VRegister& vd,
4421 const int imm8,
4422 const int left_shift,
4423 NEONModifiedImmediateOp op);
4424 void NEONModifiedImmShiftMsl(const VRegister& vd,
4425 const int imm8,
4426 const int shift_amount,
4427 NEONModifiedImmediateOp op);
4428 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
4429 void NEON3Same(const VRegister& vd,
4430 const VRegister& vn,
4431 const VRegister& vm,
4432 NEON3SameOp vop);
4433 void NEON3SameFP16(const VRegister& vd,
4434 const VRegister& vn,
4435 const VRegister& vm,
4436 Instr op);
4437 void NEONFP3Same(const VRegister& vd,
4438 const VRegister& vn,
4439 const VRegister& vm,
4440 Instr op);
4441 void NEON3DifferentL(const VRegister& vd,
4442 const VRegister& vn,
4443 const VRegister& vm,
4444 NEON3DifferentOp vop);
4445 void NEON3DifferentW(const VRegister& vd,
4446 const VRegister& vn,
4447 const VRegister& vm,
4448 NEON3DifferentOp vop);
4449 void NEON3DifferentHN(const VRegister& vd,
4450 const VRegister& vn,
4451 const VRegister& vm,
4452 NEON3DifferentOp vop);
4453 void NEONFP2RegMisc(const VRegister& vd,
4454 const VRegister& vn,
4455 NEON2RegMiscOp vop,
4456 double value = 0.0);
4457 void NEONFP2RegMiscFP16(const VRegister& vd,
4458 const VRegister& vn,
4459 NEON2RegMiscFP16Op vop,
4460 double value = 0.0);
4461 void NEON2RegMisc(const VRegister& vd,
4462 const VRegister& vn,
4463 NEON2RegMiscOp vop,
4464 int value = 0);
4465 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
4466 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
4467 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
4468 void NEONPerm(const VRegister& vd,
4469 const VRegister& vn,
4470 const VRegister& vm,
4471 NEONPermOp op);
4472 void NEONFPByElement(const VRegister& vd,
4473 const VRegister& vn,
4474 const VRegister& vm,
4475 int vm_index,
4476 NEONByIndexedElementOp op,
4477 NEONByIndexedElementOp op_half);
4478 void NEONByElement(const VRegister& vd,
4479 const VRegister& vn,
4480 const VRegister& vm,
4481 int vm_index,
4482 NEONByIndexedElementOp op);
4483 void NEONByElementL(const VRegister& vd,
4484 const VRegister& vn,
4485 const VRegister& vm,
4486 int vm_index,
4487 NEONByIndexedElementOp op);
4488 void NEONShiftImmediate(const VRegister& vd,
4489 const VRegister& vn,
4490 NEONShiftImmediateOp op,
4491 int immh_immb);
4492 void NEONShiftLeftImmediate(const VRegister& vd,
4493 const VRegister& vn,
4494 int shift,
4495 NEONShiftImmediateOp op);
4496 void NEONShiftRightImmediate(const VRegister& vd,
4497 const VRegister& vn,
4498 int shift,
4499 NEONShiftImmediateOp op);
4500 void NEONShiftImmediateL(const VRegister& vd,
4501 const VRegister& vn,
4502 int shift,
4503 NEONShiftImmediateOp op);
4504 void NEONShiftImmediateN(const VRegister& vd,
4505 const VRegister& vn,
4506 int shift,
4507 NEONShiftImmediateOp op);
4508 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
4509
4510 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4511
4512 // Encode the specified MemOperand for the specified access size and scaling
4513 // preference.
4514 Instr LoadStoreMemOperand(const MemOperand& addr,
4515 unsigned access_size,
4516 LoadStoreScalingOption option);
4517
4518 // Link the current (not-yet-emitted) instruction to the specified label, then
4519 // return an offset to be encoded in the instruction. If the label is not yet
4520 // bound, an offset of 0 is returned.
4521 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
4522 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
4523 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
4524
4525 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
4526 template <int element_shift>
4527 ptrdiff_t LinkAndGetOffsetTo(Label* label);
4528
4529 // Literal load offset are in words (32-bit).
4530 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
4531
4532 // Emit the instruction in buffer_.
Emit(Instr instruction)4533 void Emit(Instr instruction) {
4534 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
4535 VIXL_ASSERT(AllowAssembler());
4536 GetBuffer()->Emit32(instruction);
4537 }
4538
4539 PositionIndependentCodeOption pic_;
4540
4541 CPUFeatures cpu_features_;
4542 };
4543
4544
4545 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)4546 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
4547 return UpdateValue(new_value,
4548 assembler->GetBuffer().GetStartAddress<uint8_t*>());
4549 }
4550
4551
4552 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)4553 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
4554 return UpdateValue(high64,
4555 low64,
4556 assembler->GetBuffer().GetStartAddress<uint8_t*>());
4557 }
4558
4559
4560 } // namespace aarch64
4561
4562 // Required InvalSet template specialisations.
4563 // TODO: These template specialisations should not live in this file. Move
4564 // Label out of the aarch64 namespace in order to share its implementation
4565 // later.
4566 #define INVAL_SET_TEMPLATE_PARAMETERS \
4567 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
4568 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
4569 aarch64::Label::kReclaimFactor
4570 template <>
GetKey(const ptrdiff_t & element)4571 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
4572 const ptrdiff_t& element) {
4573 return element;
4574 }
4575 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)4576 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
4577 ptrdiff_t key) {
4578 *element = key;
4579 }
4580 #undef INVAL_SET_TEMPLATE_PARAMETERS
4581
4582 } // namespace vixl
4583
4584 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
4585