1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37
38 namespace vixl {
39 namespace aarch64 {
40
41 class LabelTestHelper; // Forward declaration.
42
43
44 class Label {
45 public:
Label()46 Label() : location_(kLocationUnbound) {}
~Label()47 ~Label() {
48 // All links to a label must have been resolved before it is destructed.
49 VIXL_ASSERT(!IsLinked());
50 }
51
IsBound()52 bool IsBound() const { return location_ >= 0; }
IsLinked()53 bool IsLinked() const { return !links_.empty(); }
54
GetLocation()55 ptrdiff_t GetLocation() const { return location_; }
56 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57 return GetLocation();
58 }
59
60 static const int kNPreallocatedLinks = 4;
61 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62 static const size_t kReclaimFrom = 512;
63 static const size_t kReclaimFactor = 2;
64
65 typedef InvalSet<ptrdiff_t,
66 kNPreallocatedLinks,
67 ptrdiff_t,
68 kInvalidLinkKey,
69 kReclaimFrom,
70 kReclaimFactor>
71 LinksSetBase;
72 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
73
74 private:
75 class LinksSet : public LinksSetBase {
76 public:
LinksSet()77 LinksSet() : LinksSetBase() {}
78 };
79
80 // Allows iterating over the links of a label. The behaviour is undefined if
81 // the list of links is modified in any way while iterating.
82 class LabelLinksIterator : public LabelLinksIteratorBase {
83 public:
LabelLinksIterator(Label * label)84 explicit LabelLinksIterator(Label* label)
85 : LabelLinksIteratorBase(&label->links_) {}
86
87 // TODO: Remove these and use the STL-like interface instead.
88 using LabelLinksIteratorBase::Advance;
89 using LabelLinksIteratorBase::Current;
90 };
91
Bind(ptrdiff_t location)92 void Bind(ptrdiff_t location) {
93 // Labels can only be bound once.
94 VIXL_ASSERT(!IsBound());
95 location_ = location;
96 }
97
AddLink(ptrdiff_t instruction)98 void AddLink(ptrdiff_t instruction) {
99 // If a label is bound, the assembler already has the information it needs
100 // to write the instruction, so there is no need to add it to links_.
101 VIXL_ASSERT(!IsBound());
102 links_.insert(instruction);
103 }
104
DeleteLink(ptrdiff_t instruction)105 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
106
ClearAllLinks()107 void ClearAllLinks() { links_.clear(); }
108
109 // TODO: The comment below considers average case complexity for our
110 // usual use-cases. The elements of interest are:
111 // - Branches to a label are emitted in order: branch instructions to a label
112 // are generated at an offset in the code generation buffer greater than any
113 // other branch to that same label already generated. As an example, this can
114 // be broken when an instruction is patched to become a branch. Note that the
115 // code will still work, but the complexity considerations below may locally
116 // not apply any more.
117 // - Veneers are generated in order: for multiple branches of the same type
118 // branching to the same unbound label going out of range, veneers are
119 // generated in growing order of the branch instruction offset from the start
120 // of the buffer.
121 //
122 // When creating a veneer for a branch going out of range, the link for this
123 // branch needs to be removed from this `links_`. Since all branches are
124 // tracked in one underlying InvalSet, the complexity for this deletion is the
125 // same as for finding the element, ie. O(n), where n is the number of links
126 // in the set.
127 // This could be reduced to O(1) by using the same trick as used when tracking
128 // branch information for veneers: split the container to use one set per type
129 // of branch. With that setup, when a veneer is created and the link needs to
130 // be deleted, if the two points above hold, it must be the minimum element of
131 // the set for its type of branch, and that minimum element will be accessible
132 // in O(1).
133
134 // The offsets of the instructions that have linked to this label.
135 LinksSet links_;
136 // The label location.
137 ptrdiff_t location_;
138
139 static const ptrdiff_t kLocationUnbound = -1;
140
141 // It is not safe to copy labels, so disable the copy constructor and operator
142 // by declaring them private (without an implementation).
143 #if __cplusplus >= 201103L
144 Label(const Label&) = delete;
145 void operator=(const Label&) = delete;
146 #else
147 Label(const Label&);
148 void operator=(const Label&);
149 #endif
150
151 // The Assembler class is responsible for binding and linking labels, since
152 // the stored offsets need to be consistent with the Assembler's buffer.
153 friend class Assembler;
154 // The MacroAssembler and VeneerPool handle resolution of branches to distant
155 // targets.
156 friend class MacroAssembler;
157 friend class VeneerPool;
158 };
159
160
161 class Assembler;
162 class LiteralPool;
163
164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
165 // stream and loaded through a pc relative load. The same literal can be
166 // referred to by multiple instructions but a literal can only reside at one
167 // place in memory. A literal can be used by a load before or after being
168 // placed in memory.
169 //
170 // Internally an offset of 0 is associated with a literal which has been
171 // neither used nor placed. Then two possibilities arise:
172 // 1) the label is placed, the offset (stored as offset + 1) is used to
173 // resolve any subsequent load using the label.
174 // 2) the label is not placed and offset is the offset of the last load using
175 // the literal (stored as -offset -1). If multiple loads refer to this
176 // literal then the last load holds the offset of the preceding load and
177 // all loads form a chain. Once the offset is placed all the loads in the
178 // chain are resolved and future loads fall back to possibility 1.
179 class RawLiteral {
180 public:
181 enum DeletionPolicy {
182 kDeletedOnPlacementByPool,
183 kDeletedOnPoolDestruction,
184 kManuallyDeleted
185 };
186
187 RawLiteral(size_t size,
188 LiteralPool* literal_pool,
189 DeletionPolicy deletion_policy = kManuallyDeleted);
190
191 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
192 // actually pointing to `Literal<T>` objects.
~RawLiteral()193 virtual ~RawLiteral() {}
194
GetSize()195 size_t GetSize() const {
196 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
197 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
198 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
199 (size_ == kQRegSizeInBytes));
200 return size_;
201 }
202 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
203
GetRawValue128Low64()204 uint64_t GetRawValue128Low64() const {
205 VIXL_ASSERT(size_ == kQRegSizeInBytes);
206 return low64_;
207 }
208 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
209 return GetRawValue128Low64();
210 }
211
GetRawValue128High64()212 uint64_t GetRawValue128High64() const {
213 VIXL_ASSERT(size_ == kQRegSizeInBytes);
214 return high64_;
215 }
216 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
217 return GetRawValue128High64();
218 }
219
GetRawValue64()220 uint64_t GetRawValue64() const {
221 VIXL_ASSERT(size_ == kXRegSizeInBytes);
222 VIXL_ASSERT(high64_ == 0);
223 return low64_;
224 }
225 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
226 return GetRawValue64();
227 }
228
GetRawValue32()229 uint32_t GetRawValue32() const {
230 VIXL_ASSERT(size_ == kWRegSizeInBytes);
231 VIXL_ASSERT(high64_ == 0);
232 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
233 return static_cast<uint32_t>(low64_);
234 }
235 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
236 return GetRawValue32();
237 }
238
IsUsed()239 bool IsUsed() const { return offset_ < 0; }
IsPlaced()240 bool IsPlaced() const { return offset_ > 0; }
241
GetLiteralPool()242 LiteralPool* GetLiteralPool() const { return literal_pool_; }
243
GetOffset()244 ptrdiff_t GetOffset() const {
245 VIXL_ASSERT(IsPlaced());
246 return offset_ - 1;
247 }
248 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
249
250 protected:
SetOffset(ptrdiff_t offset)251 void SetOffset(ptrdiff_t offset) {
252 VIXL_ASSERT(offset >= 0);
253 VIXL_ASSERT(IsWordAligned(offset));
254 VIXL_ASSERT(!IsPlaced());
255 offset_ = offset + 1;
256 }
set_offset(ptrdiff_t offset)257 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
258 SetOffset(offset);
259 }
260
GetLastUse()261 ptrdiff_t GetLastUse() const {
262 VIXL_ASSERT(IsUsed());
263 return -offset_ - 1;
264 }
265 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
266
SetLastUse(ptrdiff_t offset)267 void SetLastUse(ptrdiff_t offset) {
268 VIXL_ASSERT(offset >= 0);
269 VIXL_ASSERT(IsWordAligned(offset));
270 VIXL_ASSERT(!IsPlaced());
271 offset_ = -offset - 1;
272 }
set_last_use(ptrdiff_t offset)273 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
274 SetLastUse(offset);
275 }
276
277 size_t size_;
278 ptrdiff_t offset_;
279 uint64_t low64_;
280 uint64_t high64_;
281
282 private:
283 LiteralPool* literal_pool_;
284 DeletionPolicy deletion_policy_;
285
286 friend class Assembler;
287 friend class LiteralPool;
288 };
289
290
291 template <typename T>
292 class Literal : public RawLiteral {
293 public:
294 explicit Literal(T value,
295 LiteralPool* literal_pool = NULL,
296 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)297 : RawLiteral(sizeof(value), literal_pool, ownership) {
298 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
299 UpdateValue(value);
300 }
301
302 Literal(T high64,
303 T low64,
304 LiteralPool* literal_pool = NULL,
305 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)306 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
307 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
308 UpdateValue(high64, low64);
309 }
310
~Literal()311 virtual ~Literal() {}
312
313 // Update the value of this literal, if necessary by rewriting the value in
314 // the pool.
315 // If the literal has already been placed in a literal pool, the address of
316 // the start of the code buffer must be provided, as the literal only knows it
317 // offset from there. This also allows patching the value after the code has
318 // been moved in memory.
319 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
320 VIXL_ASSERT(sizeof(new_value) == size_);
321 memcpy(&low64_, &new_value, sizeof(new_value));
322 if (IsPlaced()) {
323 VIXL_ASSERT(code_buffer != NULL);
324 RewriteValueInCode(code_buffer);
325 }
326 }
327
328 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
329 VIXL_ASSERT(sizeof(low64) == size_ / 2);
330 memcpy(&low64_, &low64, sizeof(low64));
331 memcpy(&high64_, &high64, sizeof(high64));
332 if (IsPlaced()) {
333 VIXL_ASSERT(code_buffer != NULL);
334 RewriteValueInCode(code_buffer);
335 }
336 }
337
338 void UpdateValue(T new_value, const Assembler* assembler);
339 void UpdateValue(T high64, T low64, const Assembler* assembler);
340
341 private:
RewriteValueInCode(uint8_t * code_buffer)342 void RewriteValueInCode(uint8_t* code_buffer) {
343 VIXL_ASSERT(IsPlaced());
344 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
345 switch (GetSize()) {
346 case kSRegSizeInBytes:
347 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
348 GetRawValue32();
349 break;
350 case kDRegSizeInBytes:
351 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
352 GetRawValue64();
353 break;
354 default:
355 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
356 uint64_t* base_address =
357 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
358 *base_address = GetRawValue128Low64();
359 *(base_address + 1) = GetRawValue128High64();
360 }
361 }
362 };
363
364
365 // Control whether or not position-independent code should be emitted.
366 enum PositionIndependentCodeOption {
367 // All code generated will be position-independent; all branches and
368 // references to labels generated with the Label class will use PC-relative
369 // addressing.
370 PositionIndependentCode,
371
372 // Allow VIXL to generate code that refers to absolute addresses. With this
373 // option, it will not be possible to copy the code buffer and run it from a
374 // different address; code must be generated in its final location.
375 PositionDependentCode,
376
377 // Allow VIXL to assume that the bottom 12 bits of the address will be
378 // constant, but that the top 48 bits may change. This allows `adrp` to
379 // function in systems which copy code between pages, but otherwise maintain
380 // 4KB page alignment.
381 PageOffsetDependentCode
382 };
383
384
385 // Control how scaled- and unscaled-offset loads and stores are generated.
386 enum LoadStoreScalingOption {
387 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
388 // register-offset, pre-index or post-index instructions if necessary.
389 PreferScaledOffset,
390
391 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
392 // register-offset, pre-index or post-index instructions if necessary.
393 PreferUnscaledOffset,
394
395 // Require scaled-immediate-offset instructions.
396 RequireScaledOffset,
397
398 // Require unscaled-immediate-offset instructions.
399 RequireUnscaledOffset
400 };
401
402
403 // Assembler.
404 class Assembler : public vixl::internal::AssemblerBase {
405 public:
406 explicit Assembler(
407 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)408 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
409 explicit Assembler(
410 size_t capacity,
411 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)412 : AssemblerBase(capacity),
413 pic_(pic),
414 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
415 Assembler(byte* buffer,
416 size_t capacity,
417 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)418 : AssemblerBase(buffer, capacity),
419 pic_(pic),
420 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
421
422 // Upon destruction, the code will assert that one of the following is true:
423 // * The Assembler object has not been used.
424 // * Nothing has been emitted since the last Reset() call.
425 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()426 ~Assembler() {}
427
428 // System functions.
429
430 // Start generating code from the beginning of the buffer, discarding any code
431 // and data that has already been emitted into the buffer.
432 void Reset();
433
434 // Bind a label to the current PC.
435 void bind(Label* label);
436
437 // Bind a label to a specified offset from the start of the buffer.
438 void BindToOffset(Label* label, ptrdiff_t offset);
439
440 // Place a literal at the current PC.
441 void place(RawLiteral* literal);
442
443 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
444 return GetCursorOffset();
445 }
446
447 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448 ptrdiff_t GetBufferEndOffset() const) {
449 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
450 }
451 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
452 ptrdiff_t BufferEndOffset() const) {
453 return GetBuffer().GetCapacity();
454 }
455
456 // Return the address of a bound label.
457 template <typename T>
GetLabelAddress(const Label * label)458 T GetLabelAddress(const Label* label) const {
459 VIXL_ASSERT(label->IsBound());
460 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
461 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
462 }
463
GetInstructionAt(ptrdiff_t instruction_offset)464 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
465 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
466 }
467 VIXL_DEPRECATED("GetInstructionAt",
468 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
469 return GetInstructionAt(instruction_offset);
470 }
471
GetInstructionOffset(Instruction * instruction)472 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
473 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
474 ptrdiff_t offset =
475 instruction - GetBuffer()->GetStartAddress<Instruction*>();
476 VIXL_ASSERT((0 <= offset) &&
477 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
478 return offset;
479 }
480 VIXL_DEPRECATED("GetInstructionOffset",
481 ptrdiff_t InstructionOffset(Instruction* instruction)) {
482 return GetInstructionOffset(instruction);
483 }
484
485 // Instruction set functions.
486
487 // Branch / Jump instructions.
488
489 // Branch to register.
490 void br(const Register& xn);
491
492 // Branch with link to register.
493 void blr(const Register& xn);
494
495 // Branch to register with return hint.
496 void ret(const Register& xn = lr);
497
498 // Branch to register, with pointer authentication. Using key A and a modifier
499 // of zero [Armv8.3].
500 void braaz(const Register& xn);
501
502 // Branch to register, with pointer authentication. Using key B and a modifier
503 // of zero [Armv8.3].
504 void brabz(const Register& xn);
505
506 // Branch with link to register, with pointer authentication. Using key A and
507 // a modifier of zero [Armv8.3].
508 void blraaz(const Register& xn);
509
510 // Branch with link to register, with pointer authentication. Using key B and
511 // a modifier of zero [Armv8.3].
512 void blrabz(const Register& xn);
513
514 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
515 void retaa();
516
517 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
518 void retab();
519
520 // Branch to register, with pointer authentication. Using key A [Armv8.3].
521 void braa(const Register& xn, const Register& xm);
522
523 // Branch to register, with pointer authentication. Using key B [Armv8.3].
524 void brab(const Register& xn, const Register& xm);
525
526 // Branch with link to register, with pointer authentication. Using key A
527 // [Armv8.3].
528 void blraa(const Register& xn, const Register& xm);
529
530 // Branch with link to register, with pointer authentication. Using key B
531 // [Armv8.3].
532 void blrab(const Register& xn, const Register& xm);
533
534 // Unconditional branch to label.
535 void b(Label* label);
536
537 // Conditional branch to label.
538 void b(Label* label, Condition cond);
539
540 // Unconditional branch to PC offset.
541 void b(int64_t imm26);
542
543 // Conditional branch to PC offset.
544 void b(int64_t imm19, Condition cond);
545
546 // Branch with link to label.
547 void bl(Label* label);
548
549 // Branch with link to PC offset.
550 void bl(int64_t imm26);
551
552 // Compare and branch to label if zero.
553 void cbz(const Register& rt, Label* label);
554
555 // Compare and branch to PC offset if zero.
556 void cbz(const Register& rt, int64_t imm19);
557
558 // Compare and branch to label if not zero.
559 void cbnz(const Register& rt, Label* label);
560
561 // Compare and branch to PC offset if not zero.
562 void cbnz(const Register& rt, int64_t imm19);
563
564 // Table lookup from one register.
565 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566
567 // Table lookup from two registers.
568 void tbl(const VRegister& vd,
569 const VRegister& vn,
570 const VRegister& vn2,
571 const VRegister& vm);
572
573 // Table lookup from three registers.
574 void tbl(const VRegister& vd,
575 const VRegister& vn,
576 const VRegister& vn2,
577 const VRegister& vn3,
578 const VRegister& vm);
579
580 // Table lookup from four registers.
581 void tbl(const VRegister& vd,
582 const VRegister& vn,
583 const VRegister& vn2,
584 const VRegister& vn3,
585 const VRegister& vn4,
586 const VRegister& vm);
587
588 // Table lookup extension from one register.
589 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590
591 // Table lookup extension from two registers.
592 void tbx(const VRegister& vd,
593 const VRegister& vn,
594 const VRegister& vn2,
595 const VRegister& vm);
596
597 // Table lookup extension from three registers.
598 void tbx(const VRegister& vd,
599 const VRegister& vn,
600 const VRegister& vn2,
601 const VRegister& vn3,
602 const VRegister& vm);
603
604 // Table lookup extension from four registers.
605 void tbx(const VRegister& vd,
606 const VRegister& vn,
607 const VRegister& vn2,
608 const VRegister& vn3,
609 const VRegister& vn4,
610 const VRegister& vm);
611
612 // Test bit and branch to label if zero.
613 void tbz(const Register& rt, unsigned bit_pos, Label* label);
614
615 // Test bit and branch to PC offset if zero.
616 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
617
618 // Test bit and branch to label if not zero.
619 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
620
621 // Test bit and branch to PC offset if not zero.
622 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
623
624 // Address calculation instructions.
625 // Calculate a PC-relative address. Unlike for branches the offset in adr is
626 // unscaled (i.e. the result can be unaligned).
627
628 // Calculate the address of a label.
629 void adr(const Register& xd, Label* label);
630
631 // Calculate the address of a PC offset.
632 void adr(const Register& xd, int64_t imm21);
633
634 // Calculate the page address of a label.
635 void adrp(const Register& xd, Label* label);
636
637 // Calculate the page address of a PC offset.
638 void adrp(const Register& xd, int64_t imm21);
639
640 // Data Processing instructions.
641
642 // Add.
643 void add(const Register& rd, const Register& rn, const Operand& operand);
644
645 // Add and update status flags.
646 void adds(const Register& rd, const Register& rn, const Operand& operand);
647
648 // Compare negative.
649 void cmn(const Register& rn, const Operand& operand);
650
651 // Subtract.
652 void sub(const Register& rd, const Register& rn, const Operand& operand);
653
654 // Subtract and update status flags.
655 void subs(const Register& rd, const Register& rn, const Operand& operand);
656
657 // Compare.
658 void cmp(const Register& rn, const Operand& operand);
659
660 // Negate.
661 void neg(const Register& rd, const Operand& operand);
662
663 // Negate and update status flags.
664 void negs(const Register& rd, const Operand& operand);
665
666 // Add with carry bit.
667 void adc(const Register& rd, const Register& rn, const Operand& operand);
668
669 // Add with carry bit and update status flags.
670 void adcs(const Register& rd, const Register& rn, const Operand& operand);
671
672 // Subtract with carry bit.
673 void sbc(const Register& rd, const Register& rn, const Operand& operand);
674
675 // Subtract with carry bit and update status flags.
676 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
677
678 // Rotate register right and insert into NZCV flags under the control of a
679 // mask [Armv8.4].
680 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
681
682 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
683 void setf8(const Register& rn);
684
685 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
686 void setf16(const Register& rn);
687
688 // Negate with carry bit.
689 void ngc(const Register& rd, const Operand& operand);
690
691 // Negate with carry bit and update status flags.
692 void ngcs(const Register& rd, const Operand& operand);
693
694 // Logical instructions.
695
696 // Bitwise and (A & B).
697 void and_(const Register& rd, const Register& rn, const Operand& operand);
698
699 // Bitwise and (A & B) and update status flags.
700 void ands(const Register& rd, const Register& rn, const Operand& operand);
701
702 // Bit test and set flags.
703 void tst(const Register& rn, const Operand& operand);
704
705 // Bit clear (A & ~B).
706 void bic(const Register& rd, const Register& rn, const Operand& operand);
707
708 // Bit clear (A & ~B) and update status flags.
709 void bics(const Register& rd, const Register& rn, const Operand& operand);
710
711 // Bitwise or (A | B).
712 void orr(const Register& rd, const Register& rn, const Operand& operand);
713
714 // Bitwise nor (A | ~B).
715 void orn(const Register& rd, const Register& rn, const Operand& operand);
716
717 // Bitwise eor/xor (A ^ B).
718 void eor(const Register& rd, const Register& rn, const Operand& operand);
719
720 // Bitwise enor/xnor (A ^ ~B).
721 void eon(const Register& rd, const Register& rn, const Operand& operand);
722
723 // Logical shift left by variable.
724 void lslv(const Register& rd, const Register& rn, const Register& rm);
725
726 // Logical shift right by variable.
727 void lsrv(const Register& rd, const Register& rn, const Register& rm);
728
729 // Arithmetic shift right by variable.
730 void asrv(const Register& rd, const Register& rn, const Register& rm);
731
732 // Rotate right by variable.
733 void rorv(const Register& rd, const Register& rn, const Register& rm);
734
735 // Bitfield instructions.
736
737 // Bitfield move.
738 void bfm(const Register& rd,
739 const Register& rn,
740 unsigned immr,
741 unsigned imms);
742
743 // Signed bitfield move.
744 void sbfm(const Register& rd,
745 const Register& rn,
746 unsigned immr,
747 unsigned imms);
748
749 // Unsigned bitfield move.
750 void ubfm(const Register& rd,
751 const Register& rn,
752 unsigned immr,
753 unsigned imms);
754
755 // Bfm aliases.
756
757 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)758 void bfi(const Register& rd,
759 const Register& rn,
760 unsigned lsb,
761 unsigned width) {
762 VIXL_ASSERT(width >= 1);
763 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
764 bfm(rd,
765 rn,
766 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
767 width - 1);
768 }
769
770 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)771 void bfxil(const Register& rd,
772 const Register& rn,
773 unsigned lsb,
774 unsigned width) {
775 VIXL_ASSERT(width >= 1);
776 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
777 bfm(rd, rn, lsb, lsb + width - 1);
778 }
779
780 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)781 void bfc(const Register& rd, unsigned lsb, unsigned width) {
782 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
783 }
784
785 // Sbfm aliases.
786
787 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)788 void asr(const Register& rd, const Register& rn, unsigned shift) {
789 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
790 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
791 }
792
793 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)794 void sbfiz(const Register& rd,
795 const Register& rn,
796 unsigned lsb,
797 unsigned width) {
798 VIXL_ASSERT(width >= 1);
799 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
800 sbfm(rd,
801 rn,
802 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
803 width - 1);
804 }
805
806 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)807 void sbfx(const Register& rd,
808 const Register& rn,
809 unsigned lsb,
810 unsigned width) {
811 VIXL_ASSERT(width >= 1);
812 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
813 sbfm(rd, rn, lsb, lsb + width - 1);
814 }
815
816 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)817 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
818
819 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)820 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
821
822 // Signed extend word.
sxtw(const Register & rd,const Register & rn)823 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
824
825 // Ubfm aliases.
826
827 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)828 void lsl(const Register& rd, const Register& rn, unsigned shift) {
829 unsigned reg_size = rd.GetSizeInBits();
830 VIXL_ASSERT(shift < reg_size);
831 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
832 }
833
834 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)835 void lsr(const Register& rd, const Register& rn, unsigned shift) {
836 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
837 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
838 }
839
840 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)841 void ubfiz(const Register& rd,
842 const Register& rn,
843 unsigned lsb,
844 unsigned width) {
845 VIXL_ASSERT(width >= 1);
846 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
847 ubfm(rd,
848 rn,
849 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
850 width - 1);
851 }
852
853 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)854 void ubfx(const Register& rd,
855 const Register& rn,
856 unsigned lsb,
857 unsigned width) {
858 VIXL_ASSERT(width >= 1);
859 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
860 ubfm(rd, rn, lsb, lsb + width - 1);
861 }
862
863 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)864 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
865
866 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)867 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
868
869 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)870 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
871
872 // Extract.
873 void extr(const Register& rd,
874 const Register& rn,
875 const Register& rm,
876 unsigned lsb);
877
878 // Conditional select: rd = cond ? rn : rm.
879 void csel(const Register& rd,
880 const Register& rn,
881 const Register& rm,
882 Condition cond);
883
884 // Conditional select increment: rd = cond ? rn : rm + 1.
885 void csinc(const Register& rd,
886 const Register& rn,
887 const Register& rm,
888 Condition cond);
889
890 // Conditional select inversion: rd = cond ? rn : ~rm.
891 void csinv(const Register& rd,
892 const Register& rn,
893 const Register& rm,
894 Condition cond);
895
896 // Conditional select negation: rd = cond ? rn : -rm.
897 void csneg(const Register& rd,
898 const Register& rn,
899 const Register& rm,
900 Condition cond);
901
902 // Conditional set: rd = cond ? 1 : 0.
903 void cset(const Register& rd, Condition cond);
904
905 // Conditional set mask: rd = cond ? -1 : 0.
906 void csetm(const Register& rd, Condition cond);
907
908 // Conditional increment: rd = cond ? rn + 1 : rn.
909 void cinc(const Register& rd, const Register& rn, Condition cond);
910
911 // Conditional invert: rd = cond ? ~rn : rn.
912 void cinv(const Register& rd, const Register& rn, Condition cond);
913
914 // Conditional negate: rd = cond ? -rn : rn.
915 void cneg(const Register& rd, const Register& rn, Condition cond);
916
917 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)918 void ror(const Register& rd, const Register& rs, unsigned shift) {
919 extr(rd, rs, rs, shift);
920 }
921
922 // Conditional comparison.
923
924 // Conditional compare negative.
925 void ccmn(const Register& rn,
926 const Operand& operand,
927 StatusFlags nzcv,
928 Condition cond);
929
930 // Conditional compare.
931 void ccmp(const Register& rn,
932 const Operand& operand,
933 StatusFlags nzcv,
934 Condition cond);
935
936 // CRC-32 checksum from byte.
937 void crc32b(const Register& wd, const Register& wn, const Register& wm);
938
939 // CRC-32 checksum from half-word.
940 void crc32h(const Register& wd, const Register& wn, const Register& wm);
941
942 // CRC-32 checksum from word.
943 void crc32w(const Register& wd, const Register& wn, const Register& wm);
944
945 // CRC-32 checksum from double word.
946 void crc32x(const Register& wd, const Register& wn, const Register& xm);
947
948 // CRC-32 C checksum from byte.
949 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
950
951 // CRC-32 C checksum from half-word.
952 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
953
954 // CRC-32 C checksum from word.
955 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
956
957 // CRC-32C checksum from double word.
958 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
959
960 // Multiply.
961 void mul(const Register& rd, const Register& rn, const Register& rm);
962
963 // Negated multiply.
964 void mneg(const Register& rd, const Register& rn, const Register& rm);
965
966 // Signed long multiply: 32 x 32 -> 64-bit.
967 void smull(const Register& xd, const Register& wn, const Register& wm);
968
969 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
970 void smulh(const Register& xd, const Register& xn, const Register& xm);
971
972 // Multiply and accumulate.
973 void madd(const Register& rd,
974 const Register& rn,
975 const Register& rm,
976 const Register& ra);
977
978 // Multiply and subtract.
979 void msub(const Register& rd,
980 const Register& rn,
981 const Register& rm,
982 const Register& ra);
983
984 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
985 void smaddl(const Register& xd,
986 const Register& wn,
987 const Register& wm,
988 const Register& xa);
989
990 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
991 void umaddl(const Register& xd,
992 const Register& wn,
993 const Register& wm,
994 const Register& xa);
995
996 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)997 void umull(const Register& xd, const Register& wn, const Register& wm) {
998 umaddl(xd, wn, wm, xzr);
999 }
1000
1001 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1002 void umulh(const Register& xd, const Register& xn, const Register& xm);
1003
1004 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1005 void smsubl(const Register& xd,
1006 const Register& wn,
1007 const Register& wm,
1008 const Register& xa);
1009
1010 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1011 void umsubl(const Register& xd,
1012 const Register& wn,
1013 const Register& wm,
1014 const Register& xa);
1015
1016 // Signed integer divide.
1017 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1018
1019 // Unsigned integer divide.
1020 void udiv(const Register& rd, const Register& rn, const Register& rm);
1021
1022 // Bit reverse.
1023 void rbit(const Register& rd, const Register& rn);
1024
1025 // Reverse bytes in 16-bit half words.
1026 void rev16(const Register& rd, const Register& rn);
1027
1028 // Reverse bytes in 32-bit words.
1029 void rev32(const Register& xd, const Register& xn);
1030
1031 // Reverse bytes in 64-bit general purpose register, an alias for rev
1032 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1033 void rev64(const Register& xd, const Register& xn) {
1034 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1035 rev(xd, xn);
1036 }
1037
1038 // Reverse bytes.
1039 void rev(const Register& rd, const Register& rn);
1040
1041 // Count leading zeroes.
1042 void clz(const Register& rd, const Register& rn);
1043
1044 // Count leading sign bits.
1045 void cls(const Register& rd, const Register& rn);
1046
1047 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1048 void pacia(const Register& xd, const Register& rn);
1049
1050 // Pointer Authentication Code for Instruction address, using key A and a
1051 // modifier of zero [Armv8.3].
1052 void paciza(const Register& xd);
1053
1054 // Pointer Authentication Code for Instruction address, using key A, with
1055 // address in x17 and modifier in x16 [Armv8.3].
1056 void pacia1716();
1057
1058 // Pointer Authentication Code for Instruction address, using key A, with
1059 // address in LR and modifier in SP [Armv8.3].
1060 void paciasp();
1061
1062 // Pointer Authentication Code for Instruction address, using key A, with
1063 // address in LR and a modifier of zero [Armv8.3].
1064 void paciaz();
1065
1066 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1067 void pacib(const Register& xd, const Register& xn);
1068
1069 // Pointer Authentication Code for Instruction address, using key B and a
1070 // modifier of zero [Armv8.3].
1071 void pacizb(const Register& xd);
1072
1073 // Pointer Authentication Code for Instruction address, using key B, with
1074 // address in x17 and modifier in x16 [Armv8.3].
1075 void pacib1716();
1076
1077 // Pointer Authentication Code for Instruction address, using key B, with
1078 // address in LR and modifier in SP [Armv8.3].
1079 void pacibsp();
1080
1081 // Pointer Authentication Code for Instruction address, using key B, with
1082 // address in LR and a modifier of zero [Armv8.3].
1083 void pacibz();
1084
1085 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1086 void pacda(const Register& xd, const Register& xn);
1087
1088 // Pointer Authentication Code for Data address, using key A and a modifier of
1089 // zero [Armv8.3].
1090 void pacdza(const Register& xd);
1091
1092 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1093 void pacdb(const Register& xd, const Register& xn);
1094
1095 // Pointer Authentication Code for Data address, using key B and a modifier of
1096 // zero [Armv8.3].
1097 void pacdzb(const Register& xd);
1098
1099 // Pointer Authentication Code, using Generic key [Armv8.3].
1100 void pacga(const Register& xd, const Register& xn, const Register& xm);
1101
1102 // Authenticate Instruction address, using key A [Armv8.3].
1103 void autia(const Register& xd, const Register& xn);
1104
1105 // Authenticate Instruction address, using key A and a modifier of zero
1106 // [Armv8.3].
1107 void autiza(const Register& xd);
1108
1109 // Authenticate Instruction address, using key A, with address in x17 and
1110 // modifier in x16 [Armv8.3].
1111 void autia1716();
1112
1113 // Authenticate Instruction address, using key A, with address in LR and
1114 // modifier in SP [Armv8.3].
1115 void autiasp();
1116
1117 // Authenticate Instruction address, using key A, with address in LR and a
1118 // modifier of zero [Armv8.3].
1119 void autiaz();
1120
1121 // Authenticate Instruction address, using key B [Armv8.3].
1122 void autib(const Register& xd, const Register& xn);
1123
1124 // Authenticate Instruction address, using key B and a modifier of zero
1125 // [Armv8.3].
1126 void autizb(const Register& xd);
1127
1128 // Authenticate Instruction address, using key B, with address in x17 and
1129 // modifier in x16 [Armv8.3].
1130 void autib1716();
1131
1132 // Authenticate Instruction address, using key B, with address in LR and
1133 // modifier in SP [Armv8.3].
1134 void autibsp();
1135
1136 // Authenticate Instruction address, using key B, with address in LR and a
1137 // modifier of zero [Armv8.3].
1138 void autibz();
1139
1140 // Authenticate Data address, using key A [Armv8.3].
1141 void autda(const Register& xd, const Register& xn);
1142
1143 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1144 void autdza(const Register& xd);
1145
1146 // Authenticate Data address, using key B [Armv8.3].
1147 void autdb(const Register& xd, const Register& xn);
1148
1149 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1150 void autdzb(const Register& xd);
1151
1152 // Strip Pointer Authentication Code of Data address [Armv8.3].
1153 void xpacd(const Register& xd);
1154
1155 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1156 void xpaci(const Register& xd);
1157
1158 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1159 void xpaclri();
1160
1161 // Memory instructions.
1162
1163 // Load integer or FP register.
1164 void ldr(const CPURegister& rt,
1165 const MemOperand& src,
1166 LoadStoreScalingOption option = PreferScaledOffset);
1167
1168 // Store integer or FP register.
1169 void str(const CPURegister& rt,
1170 const MemOperand& dst,
1171 LoadStoreScalingOption option = PreferScaledOffset);
1172
1173 // Load word with sign extension.
1174 void ldrsw(const Register& xt,
1175 const MemOperand& src,
1176 LoadStoreScalingOption option = PreferScaledOffset);
1177
1178 // Load byte.
1179 void ldrb(const Register& rt,
1180 const MemOperand& src,
1181 LoadStoreScalingOption option = PreferScaledOffset);
1182
1183 // Store byte.
1184 void strb(const Register& rt,
1185 const MemOperand& dst,
1186 LoadStoreScalingOption option = PreferScaledOffset);
1187
1188 // Load byte with sign extension.
1189 void ldrsb(const Register& rt,
1190 const MemOperand& src,
1191 LoadStoreScalingOption option = PreferScaledOffset);
1192
1193 // Load half-word.
1194 void ldrh(const Register& rt,
1195 const MemOperand& src,
1196 LoadStoreScalingOption option = PreferScaledOffset);
1197
1198 // Store half-word.
1199 void strh(const Register& rt,
1200 const MemOperand& dst,
1201 LoadStoreScalingOption option = PreferScaledOffset);
1202
1203 // Load half-word with sign extension.
1204 void ldrsh(const Register& rt,
1205 const MemOperand& src,
1206 LoadStoreScalingOption option = PreferScaledOffset);
1207
1208 // Load integer or FP register (with unscaled offset).
1209 void ldur(const CPURegister& rt,
1210 const MemOperand& src,
1211 LoadStoreScalingOption option = PreferUnscaledOffset);
1212
1213 // Store integer or FP register (with unscaled offset).
1214 void stur(const CPURegister& rt,
1215 const MemOperand& src,
1216 LoadStoreScalingOption option = PreferUnscaledOffset);
1217
1218 // Load word with sign extension.
1219 void ldursw(const Register& xt,
1220 const MemOperand& src,
1221 LoadStoreScalingOption option = PreferUnscaledOffset);
1222
1223 // Load byte (with unscaled offset).
1224 void ldurb(const Register& rt,
1225 const MemOperand& src,
1226 LoadStoreScalingOption option = PreferUnscaledOffset);
1227
1228 // Store byte (with unscaled offset).
1229 void sturb(const Register& rt,
1230 const MemOperand& dst,
1231 LoadStoreScalingOption option = PreferUnscaledOffset);
1232
1233 // Load byte with sign extension (and unscaled offset).
1234 void ldursb(const Register& rt,
1235 const MemOperand& src,
1236 LoadStoreScalingOption option = PreferUnscaledOffset);
1237
1238 // Load half-word (with unscaled offset).
1239 void ldurh(const Register& rt,
1240 const MemOperand& src,
1241 LoadStoreScalingOption option = PreferUnscaledOffset);
1242
1243 // Store half-word (with unscaled offset).
1244 void sturh(const Register& rt,
1245 const MemOperand& dst,
1246 LoadStoreScalingOption option = PreferUnscaledOffset);
1247
1248 // Load half-word with sign extension (and unscaled offset).
1249 void ldursh(const Register& rt,
1250 const MemOperand& src,
1251 LoadStoreScalingOption option = PreferUnscaledOffset);
1252
1253 // Load double-word with pointer authentication, using data key A and a
1254 // modifier of zero [Armv8.3].
1255 void ldraa(const Register& xt, const MemOperand& src);
1256
1257 // Load double-word with pointer authentication, using data key B and a
1258 // modifier of zero [Armv8.3].
1259 void ldrab(const Register& xt, const MemOperand& src);
1260
1261 // Load integer or FP register pair.
1262 void ldp(const CPURegister& rt,
1263 const CPURegister& rt2,
1264 const MemOperand& src);
1265
1266 // Store integer or FP register pair.
1267 void stp(const CPURegister& rt,
1268 const CPURegister& rt2,
1269 const MemOperand& dst);
1270
1271 // Load word pair with sign extension.
1272 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1273
1274 // Load integer or FP register pair, non-temporal.
1275 void ldnp(const CPURegister& rt,
1276 const CPURegister& rt2,
1277 const MemOperand& src);
1278
1279 // Store integer or FP register pair, non-temporal.
1280 void stnp(const CPURegister& rt,
1281 const CPURegister& rt2,
1282 const MemOperand& dst);
1283
1284 // Load integer or FP register from literal pool.
1285 void ldr(const CPURegister& rt, RawLiteral* literal);
1286
1287 // Load word with sign extension from literal pool.
1288 void ldrsw(const Register& xt, RawLiteral* literal);
1289
1290 // Load integer or FP register from pc + imm19 << 2.
1291 void ldr(const CPURegister& rt, int64_t imm19);
1292
1293 // Load word with sign extension from pc + imm19 << 2.
1294 void ldrsw(const Register& xt, int64_t imm19);
1295
1296 // Store exclusive byte.
1297 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1298
1299 // Store exclusive half-word.
1300 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1301
1302 // Store exclusive register.
1303 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1304
1305 // Load exclusive byte.
1306 void ldxrb(const Register& rt, const MemOperand& src);
1307
1308 // Load exclusive half-word.
1309 void ldxrh(const Register& rt, const MemOperand& src);
1310
1311 // Load exclusive register.
1312 void ldxr(const Register& rt, const MemOperand& src);
1313
1314 // Store exclusive register pair.
1315 void stxp(const Register& rs,
1316 const Register& rt,
1317 const Register& rt2,
1318 const MemOperand& dst);
1319
1320 // Load exclusive register pair.
1321 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1322
1323 // Store-release exclusive byte.
1324 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1325
1326 // Store-release exclusive half-word.
1327 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1328
1329 // Store-release exclusive register.
1330 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1331
1332 // Load-acquire exclusive byte.
1333 void ldaxrb(const Register& rt, const MemOperand& src);
1334
1335 // Load-acquire exclusive half-word.
1336 void ldaxrh(const Register& rt, const MemOperand& src);
1337
1338 // Load-acquire exclusive register.
1339 void ldaxr(const Register& rt, const MemOperand& src);
1340
1341 // Store-release exclusive register pair.
1342 void stlxp(const Register& rs,
1343 const Register& rt,
1344 const Register& rt2,
1345 const MemOperand& dst);
1346
1347 // Load-acquire exclusive register pair.
1348 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1349
1350 // Store-release byte.
1351 void stlrb(const Register& rt, const MemOperand& dst);
1352
1353 // Store-release half-word.
1354 void stlrh(const Register& rt, const MemOperand& dst);
1355
1356 // Store-release register.
1357 void stlr(const Register& rt, const MemOperand& dst);
1358
1359 // Load-acquire byte.
1360 void ldarb(const Register& rt, const MemOperand& src);
1361
1362 // Load-acquire half-word.
1363 void ldarh(const Register& rt, const MemOperand& src);
1364
1365 // Load-acquire register.
1366 void ldar(const Register& rt, const MemOperand& src);
1367
1368 // Store LORelease byte [Armv8.1].
1369 void stllrb(const Register& rt, const MemOperand& dst);
1370
1371 // Store LORelease half-word [Armv8.1].
1372 void stllrh(const Register& rt, const MemOperand& dst);
1373
1374 // Store LORelease register [Armv8.1].
1375 void stllr(const Register& rt, const MemOperand& dst);
1376
1377 // Load LORelease byte [Armv8.1].
1378 void ldlarb(const Register& rt, const MemOperand& src);
1379
1380 // Load LORelease half-word [Armv8.1].
1381 void ldlarh(const Register& rt, const MemOperand& src);
1382
1383 // Load LORelease register [Armv8.1].
1384 void ldlar(const Register& rt, const MemOperand& src);
1385
1386 // Compare and Swap word or doubleword in memory [Armv8.1].
1387 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1388
1389 // Compare and Swap word or doubleword in memory [Armv8.1].
1390 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1391
1392 // Compare and Swap word or doubleword in memory [Armv8.1].
1393 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1394
1395 // Compare and Swap word or doubleword in memory [Armv8.1].
1396 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1397
1398 // Compare and Swap byte in memory [Armv8.1].
1399 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1400
1401 // Compare and Swap byte in memory [Armv8.1].
1402 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1403
1404 // Compare and Swap byte in memory [Armv8.1].
1405 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1406
1407 // Compare and Swap byte in memory [Armv8.1].
1408 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1409
1410 // Compare and Swap halfword in memory [Armv8.1].
1411 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1412
1413 // Compare and Swap halfword in memory [Armv8.1].
1414 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1415
1416 // Compare and Swap halfword in memory [Armv8.1].
1417 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1418
1419 // Compare and Swap halfword in memory [Armv8.1].
1420 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1421
1422 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1423 void casp(const Register& rs,
1424 const Register& rs2,
1425 const Register& rt,
1426 const Register& rt2,
1427 const MemOperand& src);
1428
1429 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1430 void caspa(const Register& rs,
1431 const Register& rs2,
1432 const Register& rt,
1433 const Register& rt2,
1434 const MemOperand& src);
1435
1436 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1437 void caspl(const Register& rs,
1438 const Register& rs2,
1439 const Register& rt,
1440 const Register& rt2,
1441 const MemOperand& src);
1442
1443 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1444 void caspal(const Register& rs,
1445 const Register& rs2,
1446 const Register& rt,
1447 const Register& rt2,
1448 const MemOperand& src);
1449
1450 // Store-release byte (with unscaled offset) [Armv8.4].
1451 void stlurb(const Register& rt, const MemOperand& dst);
1452
1453 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1454 void ldapurb(const Register& rt, const MemOperand& src);
1455
1456 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1457 void ldapursb(const Register& rt, const MemOperand& src);
1458
1459 // Store-release half-word (with unscaled offset) [Armv8.4].
1460 void stlurh(const Register& rt, const MemOperand& dst);
1461
1462 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1463 void ldapurh(const Register& rt, const MemOperand& src);
1464
1465 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1466 // [Armv8.4].
1467 void ldapursh(const Register& rt, const MemOperand& src);
1468
1469 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1470 void stlur(const Register& rt, const MemOperand& dst);
1471
1472 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1473 // [Armv8.4].
1474 void ldapur(const Register& rt, const MemOperand& src);
1475
1476 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1477 void ldapursw(const Register& xt, const MemOperand& src);
1478
1479 // Atomic add on byte in memory [Armv8.1]
1480 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1481
1482 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1483 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1484
1485 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1486 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1487
1488 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1489 // [Armv8.1]
1490 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1491
1492 // Atomic add on halfword in memory [Armv8.1]
1493 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1494
1495 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1496 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1497
1498 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1499 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1500
1501 // Atomic add on halfword in memory, with Load-acquire and Store-release
1502 // semantics [Armv8.1]
1503 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1504
1505 // Atomic add on word or doubleword in memory [Armv8.1]
1506 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1507
1508 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1509 // [Armv8.1]
1510 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1511
1512 // Atomic add on word or doubleword in memory, with Store-release semantics
1513 // [Armv8.1]
1514 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1515
1516 // Atomic add on word or doubleword in memory, with Load-acquire and
1517 // Store-release semantics [Armv8.1]
1518 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1519
1520 // Atomic bit clear on byte in memory [Armv8.1]
1521 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1522
1523 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1524 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1525
1526 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1527 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1528
1529 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1530 // semantics [Armv8.1]
1531 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1532
1533 // Atomic bit clear on halfword in memory [Armv8.1]
1534 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1535
1536 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1537 // [Armv8.1]
1538 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1539
1540 // Atomic bit clear on halfword in memory, with Store-release semantics
1541 // [Armv8.1]
1542 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1543
1544 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1545 // semantics [Armv8.1]
1546 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1547
1548 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1549 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1550
1551 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1552 // semantics [Armv8.1]
1553 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1554
1555 // Atomic bit clear on word or doubleword in memory, with Store-release
1556 // semantics [Armv8.1]
1557 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1558
1559 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1560 // Store-release semantics [Armv8.1]
1561 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1562
1563 // Atomic exclusive OR on byte in memory [Armv8.1]
1564 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1565
1566 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1567 // [Armv8.1]
1568 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1569
1570 // Atomic exclusive OR on byte in memory, with Store-release semantics
1571 // [Armv8.1]
1572 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1573
1574 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1575 // semantics [Armv8.1]
1576 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1577
1578 // Atomic exclusive OR on halfword in memory [Armv8.1]
1579 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1580
1581 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1582 // [Armv8.1]
1583 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1584
1585 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1586 // [Armv8.1]
1587 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1588
1589 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1590 // Store-release semantics [Armv8.1]
1591 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1592
1593 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1594 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1595
1596 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1597 // semantics [Armv8.1]
1598 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1599
1600 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1601 // semantics [Armv8.1]
1602 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1603
1604 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1605 // Store-release semantics [Armv8.1]
1606 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1607
1608 // Atomic bit set on byte in memory [Armv8.1]
1609 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1610
1611 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1612 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1613
1614 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1615 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1616
1617 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1618 // semantics [Armv8.1]
1619 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1620
1621 // Atomic bit set on halfword in memory [Armv8.1]
1622 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1623
1624 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1625 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1626
1627 // Atomic bit set on halfword in memory, with Store-release semantics
1628 // [Armv8.1]
1629 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1630
1631 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1632 // semantics [Armv8.1]
1633 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1634
1635 // Atomic bit set on word or doubleword in memory [Armv8.1]
1636 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1637
1638 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1639 // [Armv8.1]
1640 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1641
1642 // Atomic bit set on word or doubleword in memory, with Store-release
1643 // semantics [Armv8.1]
1644 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1645
1646 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1647 // Store-release semantics [Armv8.1]
1648 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1649
1650 // Atomic signed maximum on byte in memory [Armv8.1]
1651 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1652
1653 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1654 // [Armv8.1]
1655 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1656
1657 // Atomic signed maximum on byte in memory, with Store-release semantics
1658 // [Armv8.1]
1659 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1660
1661 // Atomic signed maximum on byte in memory, with Load-acquire and
1662 // Store-release semantics [Armv8.1]
1663 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1664
1665 // Atomic signed maximum on halfword in memory [Armv8.1]
1666 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1667
1668 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1669 // [Armv8.1]
1670 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1671
1672 // Atomic signed maximum on halfword in memory, with Store-release semantics
1673 // [Armv8.1]
1674 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1675
1676 // Atomic signed maximum on halfword in memory, with Load-acquire and
1677 // Store-release semantics [Armv8.1]
1678 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1679
1680 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1681 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1682
1683 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1684 // semantics [Armv8.1]
1685 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1686
1687 // Atomic signed maximum on word or doubleword in memory, with Store-release
1688 // semantics [Armv8.1]
1689 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1690
1691 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1692 // and Store-release semantics [Armv8.1]
1693 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1694
1695 // Atomic signed minimum on byte in memory [Armv8.1]
1696 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1697
1698 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1699 // [Armv8.1]
1700 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1701
1702 // Atomic signed minimum on byte in memory, with Store-release semantics
1703 // [Armv8.1]
1704 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1705
1706 // Atomic signed minimum on byte in memory, with Load-acquire and
1707 // Store-release semantics [Armv8.1]
1708 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1709
1710 // Atomic signed minimum on halfword in memory [Armv8.1]
1711 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1712
1713 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1714 // [Armv8.1]
1715 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1716
1717 // Atomic signed minimum on halfword in memory, with Store-release semantics
1718 // [Armv8.1]
1719 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1720
1721 // Atomic signed minimum on halfword in memory, with Load-acquire and
1722 // Store-release semantics [Armv8.1]
1723 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1724
1725 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1726 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1727
1728 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1729 // semantics [Armv8.1]
1730 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1731
1732 // Atomic signed minimum on word or doubleword in memory, with Store-release
1733 // semantics [Armv8.1]
1734 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1735
1736 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1737 // and Store-release semantics [Armv8.1]
1738 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1739
1740 // Atomic unsigned maximum on byte in memory [Armv8.1]
1741 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1742
1743 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1744 // [Armv8.1]
1745 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1746
1747 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1748 // [Armv8.1]
1749 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1750
1751 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1752 // Store-release semantics [Armv8.1]
1753 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1754
1755 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1756 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1757
1758 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1759 // [Armv8.1]
1760 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1761
1762 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1763 // [Armv8.1]
1764 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1765
1766 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1767 // Store-release semantics [Armv8.1]
1768 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1769
1770 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1771 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1772
1773 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1774 // semantics [Armv8.1]
1775 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1776
1777 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1778 // semantics [Armv8.1]
1779 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1780
1781 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1782 // and Store-release semantics [Armv8.1]
1783 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1784
1785 // Atomic unsigned minimum on byte in memory [Armv8.1]
1786 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1787
1788 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1789 // [Armv8.1]
1790 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1791
1792 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1793 // [Armv8.1]
1794 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1795
1796 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1797 // Store-release semantics [Armv8.1]
1798 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1799
1800 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1801 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1802
1803 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1804 // [Armv8.1]
1805 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1806
1807 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1808 // [Armv8.1]
1809 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1810
1811 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1812 // Store-release semantics [Armv8.1]
1813 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1814
1815 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1816 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1817
1818 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1819 // semantics [Armv8.1]
1820 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1821
1822 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1823 // semantics [Armv8.1]
1824 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1825
1826 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1827 // and Store-release semantics [Armv8.1]
1828 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1829
1830 // Atomic add on byte in memory, without return. [Armv8.1]
1831 void staddb(const Register& rs, const MemOperand& src);
1832
1833 // Atomic add on byte in memory, with Store-release semantics and without
1834 // return. [Armv8.1]
1835 void staddlb(const Register& rs, const MemOperand& src);
1836
1837 // Atomic add on halfword in memory, without return. [Armv8.1]
1838 void staddh(const Register& rs, const MemOperand& src);
1839
1840 // Atomic add on halfword in memory, with Store-release semantics and without
1841 // return. [Armv8.1]
1842 void staddlh(const Register& rs, const MemOperand& src);
1843
1844 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1845 void stadd(const Register& rs, const MemOperand& src);
1846
1847 // Atomic add on word or doubleword in memory, with Store-release semantics
1848 // and without return. [Armv8.1]
1849 void staddl(const Register& rs, const MemOperand& src);
1850
1851 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1852 void stclrb(const Register& rs, const MemOperand& src);
1853
1854 // Atomic bit clear on byte in memory, with Store-release semantics and
1855 // without return. [Armv8.1]
1856 void stclrlb(const Register& rs, const MemOperand& src);
1857
1858 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1859 void stclrh(const Register& rs, const MemOperand& src);
1860
1861 // Atomic bit clear on halfword in memory, with Store-release semantics and
1862 // without return. [Armv8.1]
1863 void stclrlh(const Register& rs, const MemOperand& src);
1864
1865 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1866 void stclr(const Register& rs, const MemOperand& src);
1867
1868 // Atomic bit clear on word or doubleword in memory, with Store-release
1869 // semantics and without return. [Armv8.1]
1870 void stclrl(const Register& rs, const MemOperand& src);
1871
1872 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1873 void steorb(const Register& rs, const MemOperand& src);
1874
1875 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1876 // without return. [Armv8.1]
1877 void steorlb(const Register& rs, const MemOperand& src);
1878
1879 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1880 void steorh(const Register& rs, const MemOperand& src);
1881
1882 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1883 // and without return. [Armv8.1]
1884 void steorlh(const Register& rs, const MemOperand& src);
1885
1886 // Atomic exclusive OR on word or doubleword in memory, without return.
1887 // [Armv8.1]
1888 void steor(const Register& rs, const MemOperand& src);
1889
1890 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1891 // semantics and without return. [Armv8.1]
1892 void steorl(const Register& rs, const MemOperand& src);
1893
1894 // Atomic bit set on byte in memory, without return. [Armv8.1]
1895 void stsetb(const Register& rs, const MemOperand& src);
1896
1897 // Atomic bit set on byte in memory, with Store-release semantics and without
1898 // return. [Armv8.1]
1899 void stsetlb(const Register& rs, const MemOperand& src);
1900
1901 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1902 void stseth(const Register& rs, const MemOperand& src);
1903
1904 // Atomic bit set on halfword in memory, with Store-release semantics and
1905 // without return. [Armv8.1]
1906 void stsetlh(const Register& rs, const MemOperand& src);
1907
1908 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1909 void stset(const Register& rs, const MemOperand& src);
1910
1911 // Atomic bit set on word or doubleword in memory, with Store-release
1912 // semantics and without return. [Armv8.1]
1913 void stsetl(const Register& rs, const MemOperand& src);
1914
1915 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1916 void stsmaxb(const Register& rs, const MemOperand& src);
1917
1918 // Atomic signed maximum on byte in memory, with Store-release semantics and
1919 // without return. [Armv8.1]
1920 void stsmaxlb(const Register& rs, const MemOperand& src);
1921
1922 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1923 void stsmaxh(const Register& rs, const MemOperand& src);
1924
1925 // Atomic signed maximum on halfword in memory, with Store-release semantics
1926 // and without return. [Armv8.1]
1927 void stsmaxlh(const Register& rs, const MemOperand& src);
1928
1929 // Atomic signed maximum on word or doubleword in memory, without return.
1930 // [Armv8.1]
1931 void stsmax(const Register& rs, const MemOperand& src);
1932
1933 // Atomic signed maximum on word or doubleword in memory, with Store-release
1934 // semantics and without return. [Armv8.1]
1935 void stsmaxl(const Register& rs, const MemOperand& src);
1936
1937 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1938 void stsminb(const Register& rs, const MemOperand& src);
1939
1940 // Atomic signed minimum on byte in memory, with Store-release semantics and
1941 // without return. [Armv8.1]
1942 void stsminlb(const Register& rs, const MemOperand& src);
1943
1944 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1945 void stsminh(const Register& rs, const MemOperand& src);
1946
1947 // Atomic signed minimum on halfword in memory, with Store-release semantics
1948 // and without return. [Armv8.1]
1949 void stsminlh(const Register& rs, const MemOperand& src);
1950
1951 // Atomic signed minimum on word or doubleword in memory, without return.
1952 // [Armv8.1]
1953 void stsmin(const Register& rs, const MemOperand& src);
1954
1955 // Atomic signed minimum on word or doubleword in memory, with Store-release
1956 // semantics and without return. semantics [Armv8.1]
1957 void stsminl(const Register& rs, const MemOperand& src);
1958
1959 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1960 void stumaxb(const Register& rs, const MemOperand& src);
1961
1962 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1963 // without return. [Armv8.1]
1964 void stumaxlb(const Register& rs, const MemOperand& src);
1965
1966 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1967 void stumaxh(const Register& rs, const MemOperand& src);
1968
1969 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1970 // and without return. [Armv8.1]
1971 void stumaxlh(const Register& rs, const MemOperand& src);
1972
1973 // Atomic unsigned maximum on word or doubleword in memory, without return.
1974 // [Armv8.1]
1975 void stumax(const Register& rs, const MemOperand& src);
1976
1977 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1978 // semantics and without return. [Armv8.1]
1979 void stumaxl(const Register& rs, const MemOperand& src);
1980
1981 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1982 void stuminb(const Register& rs, const MemOperand& src);
1983
1984 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1985 // without return. [Armv8.1]
1986 void stuminlb(const Register& rs, const MemOperand& src);
1987
1988 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1989 void stuminh(const Register& rs, const MemOperand& src);
1990
1991 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1992 // and without return. [Armv8.1]
1993 void stuminlh(const Register& rs, const MemOperand& src);
1994
1995 // Atomic unsigned minimum on word or doubleword in memory, without return.
1996 // [Armv8.1]
1997 void stumin(const Register& rs, const MemOperand& src);
1998
1999 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2000 // semantics and without return. [Armv8.1]
2001 void stuminl(const Register& rs, const MemOperand& src);
2002
2003 // Swap byte in memory [Armv8.1]
2004 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2005
2006 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2007 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2008
2009 // Swap byte in memory, with Store-release semantics [Armv8.1]
2010 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2011
2012 // Swap byte in memory, with Load-acquire and Store-release semantics
2013 // [Armv8.1]
2014 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2015
2016 // Swap halfword in memory [Armv8.1]
2017 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2018
2019 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2020 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2021
2022 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2023 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2024
2025 // Swap halfword in memory, with Load-acquire and Store-release semantics
2026 // [Armv8.1]
2027 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2028
2029 // Swap word or doubleword in memory [Armv8.1]
2030 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2031
2032 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2033 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2034
2035 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2036 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2037
2038 // Swap word or doubleword in memory, with Load-acquire and Store-release
2039 // semantics [Armv8.1]
2040 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2041
2042 // Load-Acquire RCpc Register byte [Armv8.3]
2043 void ldaprb(const Register& rt, const MemOperand& src);
2044
2045 // Load-Acquire RCpc Register halfword [Armv8.3]
2046 void ldaprh(const Register& rt, const MemOperand& src);
2047
2048 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2049 void ldapr(const Register& rt, const MemOperand& src);
2050
2051 // Prefetch memory.
2052 void prfm(PrefetchOperation op,
2053 const MemOperand& addr,
2054 LoadStoreScalingOption option = PreferScaledOffset);
2055
2056 // Prefetch memory (with unscaled offset).
2057 void prfum(PrefetchOperation op,
2058 const MemOperand& addr,
2059 LoadStoreScalingOption option = PreferUnscaledOffset);
2060
2061 // Prefetch memory in the literal pool.
2062 void prfm(PrefetchOperation op, RawLiteral* literal);
2063
2064 // Prefetch from pc + imm19 << 2.
2065 void prfm(PrefetchOperation op, int64_t imm19);
2066
2067 // Prefetch memory (allowing unallocated hints).
2068 void prfm(int op,
2069 const MemOperand& addr,
2070 LoadStoreScalingOption option = PreferScaledOffset);
2071
2072 // Prefetch memory (with unscaled offset, allowing unallocated hints).
2073 void prfum(int op,
2074 const MemOperand& addr,
2075 LoadStoreScalingOption option = PreferUnscaledOffset);
2076
2077 // Prefetch memory in the literal pool (allowing unallocated hints).
2078 void prfm(int op, RawLiteral* literal);
2079
2080 // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2081 void prfm(int op, int64_t imm19);
2082
2083 // Move instructions. The default shift of -1 indicates that the move
2084 // instruction will calculate an appropriate 16-bit immediate and left shift
2085 // that is equal to the 64-bit immediate argument. If an explicit left shift
2086 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2087 //
2088 // For movk, an explicit shift can be used to indicate which half word should
2089 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2090 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2091 // most-significant.
2092
2093 // Move immediate and keep.
2094 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2095 MoveWide(rd, imm, shift, MOVK);
2096 }
2097
2098 // Move inverted immediate.
2099 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2100 MoveWide(rd, imm, shift, MOVN);
2101 }
2102
2103 // Move immediate.
2104 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2105 MoveWide(rd, imm, shift, MOVZ);
2106 }
2107
2108 // Misc instructions.
2109
2110 // Monitor debug-mode breakpoint.
2111 void brk(int code);
2112
2113 // Halting debug-mode breakpoint.
2114 void hlt(int code);
2115
2116 // Generate exception targeting EL1.
2117 void svc(int code);
2118
2119 // Generate undefined instruction exception.
2120 void udf(int code);
2121
2122 // Move register to register.
2123 void mov(const Register& rd, const Register& rn);
2124
2125 // Move inverted operand to register.
2126 void mvn(const Register& rd, const Operand& operand);
2127
2128 // System instructions.
2129
2130 // Move to register from system register.
2131 void mrs(const Register& xt, SystemRegister sysreg);
2132
2133 // Move from register to system register.
2134 void msr(SystemRegister sysreg, const Register& xt);
2135
2136 // Invert carry flag [Armv8.4].
2137 void cfinv();
2138
2139 // Convert floating-point condition flags from alternative format to Arm
2140 // format [Armv8.5].
2141 void xaflag();
2142
2143 // Convert floating-point condition flags from Arm format to alternative
2144 // format [Armv8.5].
2145 void axflag();
2146
2147 // System instruction.
2148 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2149
2150 // System instruction with pre-encoded op (op1:crn:crm:op2).
2151 void sys(int op, const Register& xt = xzr);
2152
2153 // System data cache operation.
2154 void dc(DataCacheOp op, const Register& rt);
2155
2156 // System instruction cache operation.
2157 void ic(InstructionCacheOp op, const Register& rt);
2158
2159 // System hint (named type).
2160 void hint(SystemHint code);
2161
2162 // System hint (numbered type).
2163 void hint(int imm7);
2164
2165 // Clear exclusive monitor.
2166 void clrex(int imm4 = 0xf);
2167
2168 // Data memory barrier.
2169 void dmb(BarrierDomain domain, BarrierType type);
2170
2171 // Data synchronization barrier.
2172 void dsb(BarrierDomain domain, BarrierType type);
2173
2174 // Instruction synchronization barrier.
2175 void isb();
2176
2177 // Error synchronization barrier.
2178 void esb();
2179
2180 // Conditional speculation dependency barrier.
2181 void csdb();
2182
2183 // No-op.
nop()2184 void nop() { hint(NOP); }
2185
2186 // Branch target identification.
2187 void bti(BranchTargetIdentifier id);
2188
2189 // FP and NEON instructions.
2190
2191 // Move double precision immediate to FP register.
2192 void fmov(const VRegister& vd, double imm);
2193
2194 // Move single precision immediate to FP register.
2195 void fmov(const VRegister& vd, float imm);
2196
2197 // Move half precision immediate to FP register [Armv8.2].
2198 void fmov(const VRegister& vd, Float16 imm);
2199
2200 // Move FP register to register.
2201 void fmov(const Register& rd, const VRegister& fn);
2202
2203 // Move register to FP register.
2204 void fmov(const VRegister& vd, const Register& rn);
2205
2206 // Move FP register to FP register.
2207 void fmov(const VRegister& vd, const VRegister& fn);
2208
2209 // Move 64-bit register to top half of 128-bit FP register.
2210 void fmov(const VRegister& vd, int index, const Register& rn);
2211
2212 // Move top half of 128-bit FP register to 64-bit register.
2213 void fmov(const Register& rd, const VRegister& vn, int index);
2214
2215 // FP add.
2216 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2217
2218 // FP subtract.
2219 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2220
2221 // FP multiply.
2222 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2223
2224 // FP fused multiply-add.
2225 void fmadd(const VRegister& vd,
2226 const VRegister& vn,
2227 const VRegister& vm,
2228 const VRegister& va);
2229
2230 // FP fused multiply-subtract.
2231 void fmsub(const VRegister& vd,
2232 const VRegister& vn,
2233 const VRegister& vm,
2234 const VRegister& va);
2235
2236 // FP fused multiply-add and negate.
2237 void fnmadd(const VRegister& vd,
2238 const VRegister& vn,
2239 const VRegister& vm,
2240 const VRegister& va);
2241
2242 // FP fused multiply-subtract and negate.
2243 void fnmsub(const VRegister& vd,
2244 const VRegister& vn,
2245 const VRegister& vm,
2246 const VRegister& va);
2247
2248 // FP multiply-negate scalar.
2249 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2250
2251 // FP reciprocal exponent scalar.
2252 void frecpx(const VRegister& vd, const VRegister& vn);
2253
2254 // FP divide.
2255 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2256
2257 // FP maximum.
2258 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2259
2260 // FP minimum.
2261 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2262
2263 // FP maximum number.
2264 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2265
2266 // FP minimum number.
2267 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2268
2269 // FP absolute.
2270 void fabs(const VRegister& vd, const VRegister& vn);
2271
2272 // FP negate.
2273 void fneg(const VRegister& vd, const VRegister& vn);
2274
2275 // FP square root.
2276 void fsqrt(const VRegister& vd, const VRegister& vn);
2277
2278 // FP round to integer, nearest with ties to away.
2279 void frinta(const VRegister& vd, const VRegister& vn);
2280
2281 // FP round to integer, implicit rounding.
2282 void frinti(const VRegister& vd, const VRegister& vn);
2283
2284 // FP round to integer, toward minus infinity.
2285 void frintm(const VRegister& vd, const VRegister& vn);
2286
2287 // FP round to integer, nearest with ties to even.
2288 void frintn(const VRegister& vd, const VRegister& vn);
2289
2290 // FP round to integer, toward plus infinity.
2291 void frintp(const VRegister& vd, const VRegister& vn);
2292
2293 // FP round to integer, exact, implicit rounding.
2294 void frintx(const VRegister& vd, const VRegister& vn);
2295
2296 // FP round to integer, towards zero.
2297 void frintz(const VRegister& vd, const VRegister& vn);
2298
2299 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2300 void frint32x(const VRegister& vd, const VRegister& vn);
2301
2302 // FP round to 32-bit integer, towards zero [Armv8.5].
2303 void frint32z(const VRegister& vd, const VRegister& vn);
2304
2305 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2306 void frint64x(const VRegister& vd, const VRegister& vn);
2307
2308 // FP round to 64-bit integer, towards zero [Armv8.5].
2309 void frint64z(const VRegister& vd, const VRegister& vn);
2310
2311 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2312
2313 void FPCompareMacro(const VRegister& vn,
2314 const VRegister& vm,
2315 FPTrapFlags trap);
2316
2317 // FP compare registers.
2318 void fcmp(const VRegister& vn, const VRegister& vm);
2319
2320 // FP compare immediate.
2321 void fcmp(const VRegister& vn, double value);
2322
2323 void FPCCompareMacro(const VRegister& vn,
2324 const VRegister& vm,
2325 StatusFlags nzcv,
2326 Condition cond,
2327 FPTrapFlags trap);
2328
2329 // FP conditional compare.
2330 void fccmp(const VRegister& vn,
2331 const VRegister& vm,
2332 StatusFlags nzcv,
2333 Condition cond);
2334
2335 // FP signaling compare registers.
2336 void fcmpe(const VRegister& vn, const VRegister& vm);
2337
2338 // FP signaling compare immediate.
2339 void fcmpe(const VRegister& vn, double value);
2340
2341 // FP conditional signaling compare.
2342 void fccmpe(const VRegister& vn,
2343 const VRegister& vm,
2344 StatusFlags nzcv,
2345 Condition cond);
2346
2347 // FP conditional select.
2348 void fcsel(const VRegister& vd,
2349 const VRegister& vn,
2350 const VRegister& vm,
2351 Condition cond);
2352
2353 // Common FP Convert functions.
2354 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2355 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2356 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2357
2358 // FP convert between precisions.
2359 void fcvt(const VRegister& vd, const VRegister& vn);
2360
2361 // FP convert to higher precision.
2362 void fcvtl(const VRegister& vd, const VRegister& vn);
2363
2364 // FP convert to higher precision (second part).
2365 void fcvtl2(const VRegister& vd, const VRegister& vn);
2366
2367 // FP convert to lower precision.
2368 void fcvtn(const VRegister& vd, const VRegister& vn);
2369
2370 // FP convert to lower prevision (second part).
2371 void fcvtn2(const VRegister& vd, const VRegister& vn);
2372
2373 // FP convert to lower precision, rounding to odd.
2374 void fcvtxn(const VRegister& vd, const VRegister& vn);
2375
2376 // FP convert to lower precision, rounding to odd (second part).
2377 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2378
2379 // FP convert to signed integer, nearest with ties to away.
2380 void fcvtas(const Register& rd, const VRegister& vn);
2381
2382 // FP convert to unsigned integer, nearest with ties to away.
2383 void fcvtau(const Register& rd, const VRegister& vn);
2384
2385 // FP convert to signed integer, nearest with ties to away.
2386 void fcvtas(const VRegister& vd, const VRegister& vn);
2387
2388 // FP convert to unsigned integer, nearest with ties to away.
2389 void fcvtau(const VRegister& vd, const VRegister& vn);
2390
2391 // FP convert to signed integer, round towards -infinity.
2392 void fcvtms(const Register& rd, const VRegister& vn);
2393
2394 // FP convert to unsigned integer, round towards -infinity.
2395 void fcvtmu(const Register& rd, const VRegister& vn);
2396
2397 // FP convert to signed integer, round towards -infinity.
2398 void fcvtms(const VRegister& vd, const VRegister& vn);
2399
2400 // FP convert to unsigned integer, round towards -infinity.
2401 void fcvtmu(const VRegister& vd, const VRegister& vn);
2402
2403 // FP convert to signed integer, nearest with ties to even.
2404 void fcvtns(const Register& rd, const VRegister& vn);
2405
2406 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2407 void fjcvtzs(const Register& rd, const VRegister& vn);
2408
2409 // FP convert to unsigned integer, nearest with ties to even.
2410 void fcvtnu(const Register& rd, const VRegister& vn);
2411
2412 // FP convert to signed integer, nearest with ties to even.
2413 void fcvtns(const VRegister& rd, const VRegister& vn);
2414
2415 // FP convert to unsigned integer, nearest with ties to even.
2416 void fcvtnu(const VRegister& rd, const VRegister& vn);
2417
2418 // FP convert to signed integer or fixed-point, round towards zero.
2419 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2420
2421 // FP convert to unsigned integer or fixed-point, round towards zero.
2422 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2423
2424 // FP convert to signed integer or fixed-point, round towards zero.
2425 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2426
2427 // FP convert to unsigned integer or fixed-point, round towards zero.
2428 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2429
2430 // FP convert to signed integer, round towards +infinity.
2431 void fcvtps(const Register& rd, const VRegister& vn);
2432
2433 // FP convert to unsigned integer, round towards +infinity.
2434 void fcvtpu(const Register& rd, const VRegister& vn);
2435
2436 // FP convert to signed integer, round towards +infinity.
2437 void fcvtps(const VRegister& vd, const VRegister& vn);
2438
2439 // FP convert to unsigned integer, round towards +infinity.
2440 void fcvtpu(const VRegister& vd, const VRegister& vn);
2441
2442 // Convert signed integer or fixed point to FP.
2443 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2444
2445 // Convert unsigned integer or fixed point to FP.
2446 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2447
2448 // Convert signed integer or fixed-point to FP.
2449 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2450
2451 // Convert unsigned integer or fixed-point to FP.
2452 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2453
2454 // Unsigned absolute difference.
2455 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2456
2457 // Signed absolute difference.
2458 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2459
2460 // Unsigned absolute difference and accumulate.
2461 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2462
2463 // Signed absolute difference and accumulate.
2464 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2465
2466 // Add.
2467 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2468
2469 // Subtract.
2470 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2471
2472 // Unsigned halving add.
2473 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2474
2475 // Signed halving add.
2476 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2477
2478 // Unsigned rounding halving add.
2479 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2480
2481 // Signed rounding halving add.
2482 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2483
2484 // Unsigned halving sub.
2485 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2486
2487 // Signed halving sub.
2488 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2489
2490 // Unsigned saturating add.
2491 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2492
2493 // Signed saturating add.
2494 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2495
2496 // Unsigned saturating subtract.
2497 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2498
2499 // Signed saturating subtract.
2500 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2501
2502 // Add pairwise.
2503 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2504
2505 // Add pair of elements scalar.
2506 void addp(const VRegister& vd, const VRegister& vn);
2507
2508 // Multiply-add to accumulator.
2509 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2510
2511 // Multiply-subtract to accumulator.
2512 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2513
2514 // Multiply.
2515 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2516
2517 // Multiply by scalar element.
2518 void mul(const VRegister& vd,
2519 const VRegister& vn,
2520 const VRegister& vm,
2521 int vm_index);
2522
2523 // Multiply-add by scalar element.
2524 void mla(const VRegister& vd,
2525 const VRegister& vn,
2526 const VRegister& vm,
2527 int vm_index);
2528
2529 // Multiply-subtract by scalar element.
2530 void mls(const VRegister& vd,
2531 const VRegister& vn,
2532 const VRegister& vm,
2533 int vm_index);
2534
2535 // Signed long multiply-add by scalar element.
2536 void smlal(const VRegister& vd,
2537 const VRegister& vn,
2538 const VRegister& vm,
2539 int vm_index);
2540
2541 // Signed long multiply-add by scalar element (second part).
2542 void smlal2(const VRegister& vd,
2543 const VRegister& vn,
2544 const VRegister& vm,
2545 int vm_index);
2546
2547 // Unsigned long multiply-add by scalar element.
2548 void umlal(const VRegister& vd,
2549 const VRegister& vn,
2550 const VRegister& vm,
2551 int vm_index);
2552
2553 // Unsigned long multiply-add by scalar element (second part).
2554 void umlal2(const VRegister& vd,
2555 const VRegister& vn,
2556 const VRegister& vm,
2557 int vm_index);
2558
2559 // Signed long multiply-sub by scalar element.
2560 void smlsl(const VRegister& vd,
2561 const VRegister& vn,
2562 const VRegister& vm,
2563 int vm_index);
2564
2565 // Signed long multiply-sub by scalar element (second part).
2566 void smlsl2(const VRegister& vd,
2567 const VRegister& vn,
2568 const VRegister& vm,
2569 int vm_index);
2570
2571 // Unsigned long multiply-sub by scalar element.
2572 void umlsl(const VRegister& vd,
2573 const VRegister& vn,
2574 const VRegister& vm,
2575 int vm_index);
2576
2577 // Unsigned long multiply-sub by scalar element (second part).
2578 void umlsl2(const VRegister& vd,
2579 const VRegister& vn,
2580 const VRegister& vm,
2581 int vm_index);
2582
2583 // Signed long multiply by scalar element.
2584 void smull(const VRegister& vd,
2585 const VRegister& vn,
2586 const VRegister& vm,
2587 int vm_index);
2588
2589 // Signed long multiply by scalar element (second part).
2590 void smull2(const VRegister& vd,
2591 const VRegister& vn,
2592 const VRegister& vm,
2593 int vm_index);
2594
2595 // Unsigned long multiply by scalar element.
2596 void umull(const VRegister& vd,
2597 const VRegister& vn,
2598 const VRegister& vm,
2599 int vm_index);
2600
2601 // Unsigned long multiply by scalar element (second part).
2602 void umull2(const VRegister& vd,
2603 const VRegister& vn,
2604 const VRegister& vm,
2605 int vm_index);
2606
2607 // Signed saturating double long multiply by element.
2608 void sqdmull(const VRegister& vd,
2609 const VRegister& vn,
2610 const VRegister& vm,
2611 int vm_index);
2612
2613 // Signed saturating double long multiply by element (second part).
2614 void sqdmull2(const VRegister& vd,
2615 const VRegister& vn,
2616 const VRegister& vm,
2617 int vm_index);
2618
2619 // Signed saturating doubling long multiply-add by element.
2620 void sqdmlal(const VRegister& vd,
2621 const VRegister& vn,
2622 const VRegister& vm,
2623 int vm_index);
2624
2625 // Signed saturating doubling long multiply-add by element (second part).
2626 void sqdmlal2(const VRegister& vd,
2627 const VRegister& vn,
2628 const VRegister& vm,
2629 int vm_index);
2630
2631 // Signed saturating doubling long multiply-sub by element.
2632 void sqdmlsl(const VRegister& vd,
2633 const VRegister& vn,
2634 const VRegister& vm,
2635 int vm_index);
2636
2637 // Signed saturating doubling long multiply-sub by element (second part).
2638 void sqdmlsl2(const VRegister& vd,
2639 const VRegister& vn,
2640 const VRegister& vm,
2641 int vm_index);
2642
2643 // Compare equal.
2644 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2645
2646 // Compare signed greater than or equal.
2647 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2648
2649 // Compare signed greater than.
2650 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2651
2652 // Compare unsigned higher.
2653 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2654
2655 // Compare unsigned higher or same.
2656 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2657
2658 // Compare bitwise test bits nonzero.
2659 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2660
2661 // Compare bitwise to zero.
2662 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2663
2664 // Compare signed greater than or equal to zero.
2665 void cmge(const VRegister& vd, const VRegister& vn, int value);
2666
2667 // Compare signed greater than zero.
2668 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2669
2670 // Compare signed less than or equal to zero.
2671 void cmle(const VRegister& vd, const VRegister& vn, int value);
2672
2673 // Compare signed less than zero.
2674 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2675
2676 // Signed shift left by register.
2677 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2678
2679 // Unsigned shift left by register.
2680 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2681
2682 // Signed saturating shift left by register.
2683 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2684
2685 // Unsigned saturating shift left by register.
2686 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2687
2688 // Signed rounding shift left by register.
2689 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2690
2691 // Unsigned rounding shift left by register.
2692 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2693
2694 // Signed saturating rounding shift left by register.
2695 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2696
2697 // Unsigned saturating rounding shift left by register.
2698 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2699
2700 // Bitwise and.
2701 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2702
2703 // Bitwise or.
2704 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2705
2706 // Bitwise or immediate.
2707 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2708
2709 // Move register to register.
2710 void mov(const VRegister& vd, const VRegister& vn);
2711
2712 // Bitwise orn.
2713 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2714
2715 // Bitwise eor.
2716 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2717
2718 // Bit clear immediate.
2719 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2720
2721 // Bit clear.
2722 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2723
2724 // Bitwise insert if false.
2725 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2726
2727 // Bitwise insert if true.
2728 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2729
2730 // Bitwise select.
2731 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2732
2733 // Polynomial multiply.
2734 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2735
2736 // Vector move immediate.
2737 void movi(const VRegister& vd,
2738 const uint64_t imm,
2739 Shift shift = LSL,
2740 const int shift_amount = 0);
2741
2742 // Bitwise not.
2743 void mvn(const VRegister& vd, const VRegister& vn);
2744
2745 // Vector move inverted immediate.
2746 void mvni(const VRegister& vd,
2747 const int imm8,
2748 Shift shift = LSL,
2749 const int shift_amount = 0);
2750
2751 // Signed saturating accumulate of unsigned value.
2752 void suqadd(const VRegister& vd, const VRegister& vn);
2753
2754 // Unsigned saturating accumulate of signed value.
2755 void usqadd(const VRegister& vd, const VRegister& vn);
2756
2757 // Absolute value.
2758 void abs(const VRegister& vd, const VRegister& vn);
2759
2760 // Signed saturating absolute value.
2761 void sqabs(const VRegister& vd, const VRegister& vn);
2762
2763 // Negate.
2764 void neg(const VRegister& vd, const VRegister& vn);
2765
2766 // Signed saturating negate.
2767 void sqneg(const VRegister& vd, const VRegister& vn);
2768
2769 // Bitwise not.
2770 void not_(const VRegister& vd, const VRegister& vn);
2771
2772 // Extract narrow.
2773 void xtn(const VRegister& vd, const VRegister& vn);
2774
2775 // Extract narrow (second part).
2776 void xtn2(const VRegister& vd, const VRegister& vn);
2777
2778 // Signed saturating extract narrow.
2779 void sqxtn(const VRegister& vd, const VRegister& vn);
2780
2781 // Signed saturating extract narrow (second part).
2782 void sqxtn2(const VRegister& vd, const VRegister& vn);
2783
2784 // Unsigned saturating extract narrow.
2785 void uqxtn(const VRegister& vd, const VRegister& vn);
2786
2787 // Unsigned saturating extract narrow (second part).
2788 void uqxtn2(const VRegister& vd, const VRegister& vn);
2789
2790 // Signed saturating extract unsigned narrow.
2791 void sqxtun(const VRegister& vd, const VRegister& vn);
2792
2793 // Signed saturating extract unsigned narrow (second part).
2794 void sqxtun2(const VRegister& vd, const VRegister& vn);
2795
2796 // Extract vector from pair of vectors.
2797 void ext(const VRegister& vd,
2798 const VRegister& vn,
2799 const VRegister& vm,
2800 int index);
2801
2802 // Duplicate vector element to vector or scalar.
2803 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2804
2805 // Move vector element to scalar.
2806 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2807
2808 // Duplicate general-purpose register to vector.
2809 void dup(const VRegister& vd, const Register& rn);
2810
2811 // Insert vector element from another vector element.
2812 void ins(const VRegister& vd,
2813 int vd_index,
2814 const VRegister& vn,
2815 int vn_index);
2816
2817 // Move vector element to another vector element.
2818 void mov(const VRegister& vd,
2819 int vd_index,
2820 const VRegister& vn,
2821 int vn_index);
2822
2823 // Insert vector element from general-purpose register.
2824 void ins(const VRegister& vd, int vd_index, const Register& rn);
2825
2826 // Move general-purpose register to a vector element.
2827 void mov(const VRegister& vd, int vd_index, const Register& rn);
2828
2829 // Unsigned move vector element to general-purpose register.
2830 void umov(const Register& rd, const VRegister& vn, int vn_index);
2831
2832 // Move vector element to general-purpose register.
2833 void mov(const Register& rd, const VRegister& vn, int vn_index);
2834
2835 // Signed move vector element to general-purpose register.
2836 void smov(const Register& rd, const VRegister& vn, int vn_index);
2837
2838 // One-element structure load to one register.
2839 void ld1(const VRegister& vt, const MemOperand& src);
2840
2841 // One-element structure load to two registers.
2842 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2843
2844 // One-element structure load to three registers.
2845 void ld1(const VRegister& vt,
2846 const VRegister& vt2,
2847 const VRegister& vt3,
2848 const MemOperand& src);
2849
2850 // One-element structure load to four registers.
2851 void ld1(const VRegister& vt,
2852 const VRegister& vt2,
2853 const VRegister& vt3,
2854 const VRegister& vt4,
2855 const MemOperand& src);
2856
2857 // One-element single structure load to one lane.
2858 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2859
2860 // One-element single structure load to all lanes.
2861 void ld1r(const VRegister& vt, const MemOperand& src);
2862
2863 // Two-element structure load.
2864 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2865
2866 // Two-element single structure load to one lane.
2867 void ld2(const VRegister& vt,
2868 const VRegister& vt2,
2869 int lane,
2870 const MemOperand& src);
2871
2872 // Two-element single structure load to all lanes.
2873 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2874
2875 // Three-element structure load.
2876 void ld3(const VRegister& vt,
2877 const VRegister& vt2,
2878 const VRegister& vt3,
2879 const MemOperand& src);
2880
2881 // Three-element single structure load to one lane.
2882 void ld3(const VRegister& vt,
2883 const VRegister& vt2,
2884 const VRegister& vt3,
2885 int lane,
2886 const MemOperand& src);
2887
2888 // Three-element single structure load to all lanes.
2889 void ld3r(const VRegister& vt,
2890 const VRegister& vt2,
2891 const VRegister& vt3,
2892 const MemOperand& src);
2893
2894 // Four-element structure load.
2895 void ld4(const VRegister& vt,
2896 const VRegister& vt2,
2897 const VRegister& vt3,
2898 const VRegister& vt4,
2899 const MemOperand& src);
2900
2901 // Four-element single structure load to one lane.
2902 void ld4(const VRegister& vt,
2903 const VRegister& vt2,
2904 const VRegister& vt3,
2905 const VRegister& vt4,
2906 int lane,
2907 const MemOperand& src);
2908
2909 // Four-element single structure load to all lanes.
2910 void ld4r(const VRegister& vt,
2911 const VRegister& vt2,
2912 const VRegister& vt3,
2913 const VRegister& vt4,
2914 const MemOperand& src);
2915
2916 // Count leading sign bits.
2917 void cls(const VRegister& vd, const VRegister& vn);
2918
2919 // Count leading zero bits (vector).
2920 void clz(const VRegister& vd, const VRegister& vn);
2921
2922 // Population count per byte.
2923 void cnt(const VRegister& vd, const VRegister& vn);
2924
2925 // Reverse bit order.
2926 void rbit(const VRegister& vd, const VRegister& vn);
2927
2928 // Reverse elements in 16-bit halfwords.
2929 void rev16(const VRegister& vd, const VRegister& vn);
2930
2931 // Reverse elements in 32-bit words.
2932 void rev32(const VRegister& vd, const VRegister& vn);
2933
2934 // Reverse elements in 64-bit doublewords.
2935 void rev64(const VRegister& vd, const VRegister& vn);
2936
2937 // Unsigned reciprocal square root estimate.
2938 void ursqrte(const VRegister& vd, const VRegister& vn);
2939
2940 // Unsigned reciprocal estimate.
2941 void urecpe(const VRegister& vd, const VRegister& vn);
2942
2943 // Signed pairwise long add.
2944 void saddlp(const VRegister& vd, const VRegister& vn);
2945
2946 // Unsigned pairwise long add.
2947 void uaddlp(const VRegister& vd, const VRegister& vn);
2948
2949 // Signed pairwise long add and accumulate.
2950 void sadalp(const VRegister& vd, const VRegister& vn);
2951
2952 // Unsigned pairwise long add and accumulate.
2953 void uadalp(const VRegister& vd, const VRegister& vn);
2954
2955 // Shift left by immediate.
2956 void shl(const VRegister& vd, const VRegister& vn, int shift);
2957
2958 // Signed saturating shift left by immediate.
2959 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2960
2961 // Signed saturating shift left unsigned by immediate.
2962 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2963
2964 // Unsigned saturating shift left by immediate.
2965 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2966
2967 // Signed shift left long by immediate.
2968 void sshll(const VRegister& vd, const VRegister& vn, int shift);
2969
2970 // Signed shift left long by immediate (second part).
2971 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2972
2973 // Signed extend long.
2974 void sxtl(const VRegister& vd, const VRegister& vn);
2975
2976 // Signed extend long (second part).
2977 void sxtl2(const VRegister& vd, const VRegister& vn);
2978
2979 // Unsigned shift left long by immediate.
2980 void ushll(const VRegister& vd, const VRegister& vn, int shift);
2981
2982 // Unsigned shift left long by immediate (second part).
2983 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2984
2985 // Shift left long by element size.
2986 void shll(const VRegister& vd, const VRegister& vn, int shift);
2987
2988 // Shift left long by element size (second part).
2989 void shll2(const VRegister& vd, const VRegister& vn, int shift);
2990
2991 // Unsigned extend long.
2992 void uxtl(const VRegister& vd, const VRegister& vn);
2993
2994 // Unsigned extend long (second part).
2995 void uxtl2(const VRegister& vd, const VRegister& vn);
2996
2997 // Shift left by immediate and insert.
2998 void sli(const VRegister& vd, const VRegister& vn, int shift);
2999
3000 // Shift right by immediate and insert.
3001 void sri(const VRegister& vd, const VRegister& vn, int shift);
3002
3003 // Signed maximum.
3004 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3005
3006 // Signed pairwise maximum.
3007 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3008
3009 // Add across vector.
3010 void addv(const VRegister& vd, const VRegister& vn);
3011
3012 // Signed add long across vector.
3013 void saddlv(const VRegister& vd, const VRegister& vn);
3014
3015 // Unsigned add long across vector.
3016 void uaddlv(const VRegister& vd, const VRegister& vn);
3017
3018 // FP maximum number across vector.
3019 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3020
3021 // FP maximum across vector.
3022 void fmaxv(const VRegister& vd, const VRegister& vn);
3023
3024 // FP minimum number across vector.
3025 void fminnmv(const VRegister& vd, const VRegister& vn);
3026
3027 // FP minimum across vector.
3028 void fminv(const VRegister& vd, const VRegister& vn);
3029
3030 // Signed maximum across vector.
3031 void smaxv(const VRegister& vd, const VRegister& vn);
3032
3033 // Signed minimum.
3034 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3035
3036 // Signed minimum pairwise.
3037 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3038
3039 // Signed minimum across vector.
3040 void sminv(const VRegister& vd, const VRegister& vn);
3041
3042 // One-element structure store from one register.
3043 void st1(const VRegister& vt, const MemOperand& src);
3044
3045 // One-element structure store from two registers.
3046 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3047
3048 // One-element structure store from three registers.
3049 void st1(const VRegister& vt,
3050 const VRegister& vt2,
3051 const VRegister& vt3,
3052 const MemOperand& src);
3053
3054 // One-element structure store from four registers.
3055 void st1(const VRegister& vt,
3056 const VRegister& vt2,
3057 const VRegister& vt3,
3058 const VRegister& vt4,
3059 const MemOperand& src);
3060
3061 // One-element single structure store from one lane.
3062 void st1(const VRegister& vt, int lane, const MemOperand& src);
3063
3064 // Two-element structure store from two registers.
3065 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3066
3067 // Two-element single structure store from two lanes.
3068 void st2(const VRegister& vt,
3069 const VRegister& vt2,
3070 int lane,
3071 const MemOperand& src);
3072
3073 // Three-element structure store from three registers.
3074 void st3(const VRegister& vt,
3075 const VRegister& vt2,
3076 const VRegister& vt3,
3077 const MemOperand& src);
3078
3079 // Three-element single structure store from three lanes.
3080 void st3(const VRegister& vt,
3081 const VRegister& vt2,
3082 const VRegister& vt3,
3083 int lane,
3084 const MemOperand& src);
3085
3086 // Four-element structure store from four registers.
3087 void st4(const VRegister& vt,
3088 const VRegister& vt2,
3089 const VRegister& vt3,
3090 const VRegister& vt4,
3091 const MemOperand& src);
3092
3093 // Four-element single structure store from four lanes.
3094 void st4(const VRegister& vt,
3095 const VRegister& vt2,
3096 const VRegister& vt3,
3097 const VRegister& vt4,
3098 int lane,
3099 const MemOperand& src);
3100
3101 // Unsigned add long.
3102 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3103
3104 // Unsigned add long (second part).
3105 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3106
3107 // Unsigned add wide.
3108 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3109
3110 // Unsigned add wide (second part).
3111 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3112
3113 // Signed add long.
3114 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3115
3116 // Signed add long (second part).
3117 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3118
3119 // Signed add wide.
3120 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3121
3122 // Signed add wide (second part).
3123 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3124
3125 // Unsigned subtract long.
3126 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3127
3128 // Unsigned subtract long (second part).
3129 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3130
3131 // Unsigned subtract wide.
3132 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3133
3134 // Unsigned subtract wide (second part).
3135 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3136
3137 // Signed subtract long.
3138 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3139
3140 // Signed subtract long (second part).
3141 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3142
3143 // Signed integer subtract wide.
3144 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3145
3146 // Signed integer subtract wide (second part).
3147 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3148
3149 // Unsigned maximum.
3150 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3151
3152 // Unsigned pairwise maximum.
3153 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3154
3155 // Unsigned maximum across vector.
3156 void umaxv(const VRegister& vd, const VRegister& vn);
3157
3158 // Unsigned minimum.
3159 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3160
3161 // Unsigned pairwise minimum.
3162 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3163
3164 // Unsigned minimum across vector.
3165 void uminv(const VRegister& vd, const VRegister& vn);
3166
3167 // Transpose vectors (primary).
3168 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3169
3170 // Transpose vectors (secondary).
3171 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3172
3173 // Unzip vectors (primary).
3174 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3175
3176 // Unzip vectors (secondary).
3177 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3178
3179 // Zip vectors (primary).
3180 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3181
3182 // Zip vectors (secondary).
3183 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3184
3185 // Signed shift right by immediate.
3186 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3187
3188 // Unsigned shift right by immediate.
3189 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3190
3191 // Signed rounding shift right by immediate.
3192 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3193
3194 // Unsigned rounding shift right by immediate.
3195 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3196
3197 // Signed shift right by immediate and accumulate.
3198 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3199
3200 // Unsigned shift right by immediate and accumulate.
3201 void usra(const VRegister& vd, const VRegister& vn, int shift);
3202
3203 // Signed rounding shift right by immediate and accumulate.
3204 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3205
3206 // Unsigned rounding shift right by immediate and accumulate.
3207 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3208
3209 // Shift right narrow by immediate.
3210 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3211
3212 // Shift right narrow by immediate (second part).
3213 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3214
3215 // Rounding shift right narrow by immediate.
3216 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3217
3218 // Rounding shift right narrow by immediate (second part).
3219 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3220
3221 // Unsigned saturating shift right narrow by immediate.
3222 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3223
3224 // Unsigned saturating shift right narrow by immediate (second part).
3225 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3226
3227 // Unsigned saturating rounding shift right narrow by immediate.
3228 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3229
3230 // Unsigned saturating rounding shift right narrow by immediate (second part).
3231 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3232
3233 // Signed saturating shift right narrow by immediate.
3234 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3235
3236 // Signed saturating shift right narrow by immediate (second part).
3237 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3238
3239 // Signed saturating rounded shift right narrow by immediate.
3240 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3241
3242 // Signed saturating rounded shift right narrow by immediate (second part).
3243 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3244
3245 // Signed saturating shift right unsigned narrow by immediate.
3246 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3247
3248 // Signed saturating shift right unsigned narrow by immediate (second part).
3249 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3250
3251 // Signed sat rounded shift right unsigned narrow by immediate.
3252 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3253
3254 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3255 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3256
3257 // FP reciprocal step.
3258 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3259
3260 // FP reciprocal estimate.
3261 void frecpe(const VRegister& vd, const VRegister& vn);
3262
3263 // FP reciprocal square root estimate.
3264 void frsqrte(const VRegister& vd, const VRegister& vn);
3265
3266 // FP reciprocal square root step.
3267 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3268
3269 // Signed absolute difference and accumulate long.
3270 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3271
3272 // Signed absolute difference and accumulate long (second part).
3273 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3274
3275 // Unsigned absolute difference and accumulate long.
3276 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3277
3278 // Unsigned absolute difference and accumulate long (second part).
3279 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3280
3281 // Signed absolute difference long.
3282 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3283
3284 // Signed absolute difference long (second part).
3285 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3286
3287 // Unsigned absolute difference long.
3288 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3289
3290 // Unsigned absolute difference long (second part).
3291 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3292
3293 // Polynomial multiply long.
3294 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3295
3296 // Polynomial multiply long (second part).
3297 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3298
3299 // Signed long multiply-add.
3300 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3301
3302 // Signed long multiply-add (second part).
3303 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3304
3305 // Unsigned long multiply-add.
3306 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3307
3308 // Unsigned long multiply-add (second part).
3309 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3310
3311 // Signed long multiply-sub.
3312 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3313
3314 // Signed long multiply-sub (second part).
3315 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3316
3317 // Unsigned long multiply-sub.
3318 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3319
3320 // Unsigned long multiply-sub (second part).
3321 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3322
3323 // Signed long multiply.
3324 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3325
3326 // Signed long multiply (second part).
3327 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3328
3329 // Signed saturating doubling long multiply-add.
3330 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3331
3332 // Signed saturating doubling long multiply-add (second part).
3333 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3334
3335 // Signed saturating doubling long multiply-subtract.
3336 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3337
3338 // Signed saturating doubling long multiply-subtract (second part).
3339 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3340
3341 // Signed saturating doubling long multiply.
3342 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3343
3344 // Signed saturating doubling long multiply (second part).
3345 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3346
3347 // Signed saturating doubling multiply returning high half.
3348 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3349
3350 // Signed saturating rounding doubling multiply returning high half.
3351 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3352
3353 // Signed dot product [Armv8.2].
3354 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3355
3356 // Signed saturating rounding doubling multiply accumulate returning high
3357 // half [Armv8.1].
3358 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3359
3360 // Unsigned dot product [Armv8.2].
3361 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3362
3363 // Signed saturating rounding doubling multiply subtract returning high half
3364 // [Armv8.1].
3365 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366
3367 // Signed saturating doubling multiply element returning high half.
3368 void sqdmulh(const VRegister& vd,
3369 const VRegister& vn,
3370 const VRegister& vm,
3371 int vm_index);
3372
3373 // Signed saturating rounding doubling multiply element returning high half.
3374 void sqrdmulh(const VRegister& vd,
3375 const VRegister& vn,
3376 const VRegister& vm,
3377 int vm_index);
3378
3379 // Signed dot product by element [Armv8.2].
3380 void sdot(const VRegister& vd,
3381 const VRegister& vn,
3382 const VRegister& vm,
3383 int vm_index);
3384
3385 // Signed saturating rounding doubling multiply accumulate element returning
3386 // high half [Armv8.1].
3387 void sqrdmlah(const VRegister& vd,
3388 const VRegister& vn,
3389 const VRegister& vm,
3390 int vm_index);
3391
3392 // Unsigned dot product by element [Armv8.2].
3393 void udot(const VRegister& vd,
3394 const VRegister& vn,
3395 const VRegister& vm,
3396 int vm_index);
3397
3398 // Signed saturating rounding doubling multiply subtract element returning
3399 // high half [Armv8.1].
3400 void sqrdmlsh(const VRegister& vd,
3401 const VRegister& vn,
3402 const VRegister& vm,
3403 int vm_index);
3404
3405 // Unsigned long multiply long.
3406 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3407
3408 // Unsigned long multiply (second part).
3409 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3410
3411 // Add narrow returning high half.
3412 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3413
3414 // Add narrow returning high half (second part).
3415 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3416
3417 // Rounding add narrow returning high half.
3418 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3419
3420 // Rounding add narrow returning high half (second part).
3421 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3422
3423 // Subtract narrow returning high half.
3424 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3425
3426 // Subtract narrow returning high half (second part).
3427 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3428
3429 // Rounding subtract narrow returning high half.
3430 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3431
3432 // Rounding subtract narrow returning high half (second part).
3433 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3434
3435 // FP vector multiply accumulate.
3436 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3437
3438 // FP fused multiply-add long to accumulator.
3439 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3440
3441 // FP fused multiply-add long to accumulator (second part).
3442 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3443
3444 // FP fused multiply-add long to accumulator by element.
3445 void fmlal(const VRegister& vd,
3446 const VRegister& vn,
3447 const VRegister& vm,
3448 int vm_index);
3449
3450 // FP fused multiply-add long to accumulator by element (second part).
3451 void fmlal2(const VRegister& vd,
3452 const VRegister& vn,
3453 const VRegister& vm,
3454 int vm_index);
3455
3456 // FP vector multiply subtract.
3457 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3458
3459 // FP fused multiply-subtract long to accumulator.
3460 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3461
3462 // FP fused multiply-subtract long to accumulator (second part).
3463 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3464
3465 // FP fused multiply-subtract long to accumulator by element.
3466 void fmlsl(const VRegister& vd,
3467 const VRegister& vn,
3468 const VRegister& vm,
3469 int vm_index);
3470
3471 // FP fused multiply-subtract long to accumulator by element (second part).
3472 void fmlsl2(const VRegister& vd,
3473 const VRegister& vn,
3474 const VRegister& vm,
3475 int vm_index);
3476
3477 // FP vector multiply extended.
3478 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3479
3480 // FP absolute greater than or equal.
3481 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3482
3483 // FP absolute greater than.
3484 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3485
3486 // FP multiply by element.
3487 void fmul(const VRegister& vd,
3488 const VRegister& vn,
3489 const VRegister& vm,
3490 int vm_index);
3491
3492 // FP fused multiply-add to accumulator by element.
3493 void fmla(const VRegister& vd,
3494 const VRegister& vn,
3495 const VRegister& vm,
3496 int vm_index);
3497
3498 // FP fused multiply-sub from accumulator by element.
3499 void fmls(const VRegister& vd,
3500 const VRegister& vn,
3501 const VRegister& vm,
3502 int vm_index);
3503
3504 // FP multiply extended by element.
3505 void fmulx(const VRegister& vd,
3506 const VRegister& vn,
3507 const VRegister& vm,
3508 int vm_index);
3509
3510 // FP compare equal.
3511 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3512
3513 // FP greater than.
3514 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3515
3516 // FP greater than or equal.
3517 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3518
3519 // FP compare equal to zero.
3520 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3521
3522 // FP greater than zero.
3523 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3524
3525 // FP greater than or equal to zero.
3526 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3527
3528 // FP less than or equal to zero.
3529 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3530
3531 // FP less than to zero.
3532 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3533
3534 // FP absolute difference.
3535 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3536
3537 // FP pairwise add vector.
3538 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3539
3540 // FP pairwise add scalar.
3541 void faddp(const VRegister& vd, const VRegister& vn);
3542
3543 // FP pairwise maximum vector.
3544 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3545
3546 // FP pairwise maximum scalar.
3547 void fmaxp(const VRegister& vd, const VRegister& vn);
3548
3549 // FP pairwise minimum vector.
3550 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3551
3552 // FP pairwise minimum scalar.
3553 void fminp(const VRegister& vd, const VRegister& vn);
3554
3555 // FP pairwise maximum number vector.
3556 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3557
3558 // FP pairwise maximum number scalar.
3559 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3560
3561 // FP pairwise minimum number vector.
3562 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3563
3564 // FP pairwise minimum number scalar.
3565 void fminnmp(const VRegister& vd, const VRegister& vn);
3566
3567 // v8.3 complex numbers - note that these are only partial/helper functions
3568 // and must be used in series in order to perform full CN operations.
3569
3570 // FP complex multiply accumulate (by element) [Armv8.3].
3571 void fcmla(const VRegister& vd,
3572 const VRegister& vn,
3573 const VRegister& vm,
3574 int vm_index,
3575 int rot);
3576
3577 // FP complex multiply accumulate [Armv8.3].
3578 void fcmla(const VRegister& vd,
3579 const VRegister& vn,
3580 const VRegister& vm,
3581 int rot);
3582
3583 // FP complex add [Armv8.3].
3584 void fcadd(const VRegister& vd,
3585 const VRegister& vn,
3586 const VRegister& vm,
3587 int rot);
3588
3589 // Scalable Vector Extensions.
3590
3591 // Absolute value (predicated).
3592 void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3593
3594 // Add vectors (predicated).
3595 void add(const ZRegister& zd,
3596 const PRegisterM& pg,
3597 const ZRegister& zn,
3598 const ZRegister& zm);
3599
3600 // Add vectors (unpredicated).
3601 void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3602
3603 // Add immediate (unpredicated).
3604 void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3605
3606 // Add multiple of predicate register size to scalar register.
3607 void addpl(const Register& xd, const Register& xn, int imm6);
3608
3609 // Add multiple of vector register size to scalar register.
3610 void addvl(const Register& xd, const Register& xn, int imm6);
3611
3612 // Compute vector address.
3613 void adr(const ZRegister& zd, const SVEMemOperand& addr);
3614
3615 // Bitwise AND predicates.
3616 void and_(const PRegisterWithLaneSize& pd,
3617 const PRegisterZ& pg,
3618 const PRegisterWithLaneSize& pn,
3619 const PRegisterWithLaneSize& pm);
3620
3621 // Bitwise AND vectors (predicated).
3622 void and_(const ZRegister& zd,
3623 const PRegisterM& pg,
3624 const ZRegister& zn,
3625 const ZRegister& zm);
3626
3627 // Bitwise AND with immediate (unpredicated).
3628 void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3629
3630 // Bitwise AND vectors (unpredicated).
3631 void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3632
3633 // Bitwise AND predicates.
3634 void ands(const PRegisterWithLaneSize& pd,
3635 const PRegisterZ& pg,
3636 const PRegisterWithLaneSize& pn,
3637 const PRegisterWithLaneSize& pm);
3638
3639 // Bitwise AND reduction to scalar.
3640 void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3641
3642 // Arithmetic shift right by immediate (predicated).
3643 void asr(const ZRegister& zd,
3644 const PRegisterM& pg,
3645 const ZRegister& zn,
3646 int shift);
3647
3648 // Arithmetic shift right by 64-bit wide elements (predicated).
3649 void asr(const ZRegister& zd,
3650 const PRegisterM& pg,
3651 const ZRegister& zn,
3652 const ZRegister& zm);
3653
3654 // Arithmetic shift right by immediate (unpredicated).
3655 void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3656
3657 // Arithmetic shift right by 64-bit wide elements (unpredicated).
3658 void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3659
3660 // Arithmetic shift right for divide by immediate (predicated).
3661 void asrd(const ZRegister& zd,
3662 const PRegisterM& pg,
3663 const ZRegister& zn,
3664 int shift);
3665
3666 // Reversed arithmetic shift right by vector (predicated).
3667 void asrr(const ZRegister& zd,
3668 const PRegisterM& pg,
3669 const ZRegister& zn,
3670 const ZRegister& zm);
3671
3672 // Bitwise clear predicates.
3673 void bic(const PRegisterWithLaneSize& pd,
3674 const PRegisterZ& pg,
3675 const PRegisterWithLaneSize& pn,
3676 const PRegisterWithLaneSize& pm);
3677
3678 // Bitwise clear vectors (predicated).
3679 void bic(const ZRegister& zd,
3680 const PRegisterM& pg,
3681 const ZRegister& zn,
3682 const ZRegister& zm);
3683
3684 // Bitwise clear bits using immediate (unpredicated).
3685 void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3686
3687 // Bitwise clear vectors (unpredicated).
3688 void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3689
3690 // Bitwise clear predicates.
3691 void bics(const PRegisterWithLaneSize& pd,
3692 const PRegisterZ& pg,
3693 const PRegisterWithLaneSize& pn,
3694 const PRegisterWithLaneSize& pm);
3695
3696 // Break after first true condition.
3697 void brka(const PRegisterWithLaneSize& pd,
3698 const PRegister& pg,
3699 const PRegisterWithLaneSize& pn);
3700
3701 // Break after first true condition.
3702 void brkas(const PRegisterWithLaneSize& pd,
3703 const PRegisterZ& pg,
3704 const PRegisterWithLaneSize& pn);
3705
3706 // Break before first true condition.
3707 void brkb(const PRegisterWithLaneSize& pd,
3708 const PRegister& pg,
3709 const PRegisterWithLaneSize& pn);
3710
3711 // Break before first true condition.
3712 void brkbs(const PRegisterWithLaneSize& pd,
3713 const PRegisterZ& pg,
3714 const PRegisterWithLaneSize& pn);
3715
3716 // Propagate break to next partition.
3717 void brkn(const PRegisterWithLaneSize& pd,
3718 const PRegisterZ& pg,
3719 const PRegisterWithLaneSize& pn,
3720 const PRegisterWithLaneSize& pm);
3721
3722 // Propagate break to next partition.
3723 void brkns(const PRegisterWithLaneSize& pd,
3724 const PRegisterZ& pg,
3725 const PRegisterWithLaneSize& pn,
3726 const PRegisterWithLaneSize& pm);
3727
3728 // Break after first true condition, propagating from previous partition.
3729 void brkpa(const PRegisterWithLaneSize& pd,
3730 const PRegisterZ& pg,
3731 const PRegisterWithLaneSize& pn,
3732 const PRegisterWithLaneSize& pm);
3733
3734 // Break after first true condition, propagating from previous partition.
3735 void brkpas(const PRegisterWithLaneSize& pd,
3736 const PRegisterZ& pg,
3737 const PRegisterWithLaneSize& pn,
3738 const PRegisterWithLaneSize& pm);
3739
3740 // Break before first true condition, propagating from previous partition.
3741 void brkpb(const PRegisterWithLaneSize& pd,
3742 const PRegisterZ& pg,
3743 const PRegisterWithLaneSize& pn,
3744 const PRegisterWithLaneSize& pm);
3745
3746 // Break before first true condition, propagating from previous partition.
3747 void brkpbs(const PRegisterWithLaneSize& pd,
3748 const PRegisterZ& pg,
3749 const PRegisterWithLaneSize& pn,
3750 const PRegisterWithLaneSize& pm);
3751
3752 // Conditionally extract element after last to general-purpose register.
3753 void clasta(const Register& rd,
3754 const PRegister& pg,
3755 const Register& rn,
3756 const ZRegister& zm);
3757
3758 // Conditionally extract element after last to SIMD&FP scalar register.
3759 void clasta(const VRegister& vd,
3760 const PRegister& pg,
3761 const VRegister& vn,
3762 const ZRegister& zm);
3763
3764 // Conditionally extract element after last to vector register.
3765 void clasta(const ZRegister& zd,
3766 const PRegister& pg,
3767 const ZRegister& zn,
3768 const ZRegister& zm);
3769
3770 // Conditionally extract last element to general-purpose register.
3771 void clastb(const Register& rd,
3772 const PRegister& pg,
3773 const Register& rn,
3774 const ZRegister& zm);
3775
3776 // Conditionally extract last element to SIMD&FP scalar register.
3777 void clastb(const VRegister& vd,
3778 const PRegister& pg,
3779 const VRegister& vn,
3780 const ZRegister& zm);
3781
3782 // Conditionally extract last element to vector register.
3783 void clastb(const ZRegister& zd,
3784 const PRegister& pg,
3785 const ZRegister& zn,
3786 const ZRegister& zm);
3787
3788 // Count leading sign bits (predicated).
3789 void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3790
3791 // Count leading zero bits (predicated).
3792 void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3793
3794 void cmp(Condition cond,
3795 const PRegisterWithLaneSize& pd,
3796 const PRegisterZ& pg,
3797 const ZRegister& zn,
3798 const ZRegister& zm);
3799
3800 // Compare vector to 64-bit wide elements.
3801 void cmpeq(const PRegisterWithLaneSize& pd,
3802 const PRegisterZ& pg,
3803 const ZRegister& zn,
3804 const ZRegister& zm);
3805
3806 // Compare vector to immediate.
3807 void cmpeq(const PRegisterWithLaneSize& pd,
3808 const PRegisterZ& pg,
3809 const ZRegister& zn,
3810 int imm5);
3811
3812 // Compare vector to 64-bit wide elements.
3813 void cmpge(const PRegisterWithLaneSize& pd,
3814 const PRegisterZ& pg,
3815 const ZRegister& zn,
3816 const ZRegister& zm);
3817
3818 // Compare vector to immediate.
3819 void cmpge(const PRegisterWithLaneSize& pd,
3820 const PRegisterZ& pg,
3821 const ZRegister& zn,
3822 int imm5);
3823
3824 // Compare vector to 64-bit wide elements.
3825 void cmpgt(const PRegisterWithLaneSize& pd,
3826 const PRegisterZ& pg,
3827 const ZRegister& zn,
3828 const ZRegister& zm);
3829
3830 // Compare vector to immediate.
3831 void cmpgt(const PRegisterWithLaneSize& pd,
3832 const PRegisterZ& pg,
3833 const ZRegister& zn,
3834 int imm5);
3835
3836 // Compare vector to 64-bit wide elements.
3837 void cmphi(const PRegisterWithLaneSize& pd,
3838 const PRegisterZ& pg,
3839 const ZRegister& zn,
3840 const ZRegister& zm);
3841
3842 // Compare vector to immediate.
3843 void cmphi(const PRegisterWithLaneSize& pd,
3844 const PRegisterZ& pg,
3845 const ZRegister& zn,
3846 unsigned imm7);
3847
3848 // Compare vector to 64-bit wide elements.
3849 void cmphs(const PRegisterWithLaneSize& pd,
3850 const PRegisterZ& pg,
3851 const ZRegister& zn,
3852 const ZRegister& zm);
3853
3854 // Compare vector to immediate.
3855 void cmphs(const PRegisterWithLaneSize& pd,
3856 const PRegisterZ& pg,
3857 const ZRegister& zn,
3858 unsigned imm7);
3859
3860 // Compare vector to 64-bit wide elements.
3861 void cmple(const PRegisterWithLaneSize& pd,
3862 const PRegisterZ& pg,
3863 const ZRegister& zn,
3864 const ZRegister& zm);
3865
3866 // Compare vector to immediate.
3867 void cmple(const PRegisterWithLaneSize& pd,
3868 const PRegisterZ& pg,
3869 const ZRegister& zn,
3870 int imm5);
3871
3872 // Compare vector to 64-bit wide elements.
3873 void cmplo(const PRegisterWithLaneSize& pd,
3874 const PRegisterZ& pg,
3875 const ZRegister& zn,
3876 const ZRegister& zm);
3877
3878 // Compare vector to immediate.
3879 void cmplo(const PRegisterWithLaneSize& pd,
3880 const PRegisterZ& pg,
3881 const ZRegister& zn,
3882 unsigned imm7);
3883
3884 // Compare vector to 64-bit wide elements.
3885 void cmpls(const PRegisterWithLaneSize& pd,
3886 const PRegisterZ& pg,
3887 const ZRegister& zn,
3888 const ZRegister& zm);
3889
3890 // Compare vector to immediate.
3891 void cmpls(const PRegisterWithLaneSize& pd,
3892 const PRegisterZ& pg,
3893 const ZRegister& zn,
3894 unsigned imm7);
3895
3896 // Compare vector to 64-bit wide elements.
3897 void cmplt(const PRegisterWithLaneSize& pd,
3898 const PRegisterZ& pg,
3899 const ZRegister& zn,
3900 const ZRegister& zm);
3901
3902 // Compare vector to immediate.
3903 void cmplt(const PRegisterWithLaneSize& pd,
3904 const PRegisterZ& pg,
3905 const ZRegister& zn,
3906 int imm5);
3907
3908 // Compare vector to 64-bit wide elements.
3909 void cmpne(const PRegisterWithLaneSize& pd,
3910 const PRegisterZ& pg,
3911 const ZRegister& zn,
3912 const ZRegister& zm);
3913
3914 // Compare vector to immediate.
3915 void cmpne(const PRegisterWithLaneSize& pd,
3916 const PRegisterZ& pg,
3917 const ZRegister& zn,
3918 int imm5);
3919
3920 // Logically invert boolean condition in vector (predicated).
3921 void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3922
3923 // Count non-zero bits (predicated).
3924 void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3925
3926 // Set scalar to multiple of predicate constraint element count.
3927 void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3928
3929 // Set scalar to multiple of predicate constraint element count.
3930 void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3931
3932 // Set scalar to multiple of predicate constraint element count.
3933 void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3934
3935 // Set scalar to active predicate element count.
3936 void cntp(const Register& xd,
3937 const PRegister& pg,
3938 const PRegisterWithLaneSize& pn);
3939
3940 // Set scalar to multiple of predicate constraint element count.
3941 void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3942
3943 // Shuffle active elements of vector to the right and fill with zero.
3944 void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
3945
3946 // Copy signed integer immediate to vector elements (predicated).
3947 void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
3948
3949 // Copy general-purpose register to vector elements (predicated).
3950 void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
3951
3952 // Copy SIMD&FP scalar register to vector elements (predicated).
3953 void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
3954
3955 // Compare and terminate loop.
3956 void ctermeq(const Register& rn, const Register& rm);
3957
3958 // Compare and terminate loop.
3959 void ctermne(const Register& rn, const Register& rm);
3960
3961 // Decrement scalar by multiple of predicate constraint element count.
3962 void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3963
3964 // Decrement scalar by multiple of predicate constraint element count.
3965 void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3966
3967 // Decrement vector by multiple of predicate constraint element count.
3968 void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3969
3970 // Decrement scalar by multiple of predicate constraint element count.
3971 void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3972
3973 // Decrement vector by multiple of predicate constraint element count.
3974 void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3975
3976 // Decrement scalar by active predicate element count.
3977 void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
3978
3979 // Decrement vector by active predicate element count.
3980 void decp(const ZRegister& zdn, const PRegister& pg);
3981
3982 // Decrement scalar by multiple of predicate constraint element count.
3983 void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3984
3985 // Decrement vector by multiple of predicate constraint element count.
3986 void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3987
3988 // Broadcast general-purpose register to vector elements (unpredicated).
3989 void dup(const ZRegister& zd, const Register& xn);
3990
3991 // Broadcast indexed element to vector (unpredicated).
3992 void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
3993
3994 // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
3995 // assembler will pick an appropriate immediate and left shift that is
3996 // equivalent to the immediate argument. If an explicit left shift is
3997 // specified (0 or 8), the immediate must be a signed 8-bit integer.
3998
3999 // Broadcast signed immediate to vector elements (unpredicated).
4000 void dup(const ZRegister& zd, int imm8, int shift = -1);
4001
4002 // Broadcast logical bitmask immediate to vector (unpredicated).
4003 void dupm(const ZRegister& zd, uint64_t imm);
4004
4005 // Bitwise exclusive OR with inverted immediate (unpredicated).
4006 void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4007
4008 // Bitwise exclusive OR predicates.
4009 void eor(const PRegisterWithLaneSize& pd,
4010 const PRegisterZ& pg,
4011 const PRegisterWithLaneSize& pn,
4012 const PRegisterWithLaneSize& pm);
4013
4014 // Bitwise exclusive OR vectors (predicated).
4015 void eor(const ZRegister& zd,
4016 const PRegisterM& pg,
4017 const ZRegister& zn,
4018 const ZRegister& zm);
4019
4020 // Bitwise exclusive OR with immediate (unpredicated).
4021 void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4022
4023 // Bitwise exclusive OR vectors (unpredicated).
4024 void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4025
4026 // Bitwise exclusive OR predicates.
4027 void eors(const PRegisterWithLaneSize& pd,
4028 const PRegisterZ& pg,
4029 const PRegisterWithLaneSize& pn,
4030 const PRegisterWithLaneSize& pm);
4031
4032 // Bitwise XOR reduction to scalar.
4033 void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4034
4035 // Extract vector from pair of vectors.
4036 void ext(const ZRegister& zd,
4037 const ZRegister& zn,
4038 const ZRegister& zm,
4039 unsigned offset);
4040
4041 // Floating-point absolute difference (predicated).
4042 void fabd(const ZRegister& zd,
4043 const PRegisterM& pg,
4044 const ZRegister& zn,
4045 const ZRegister& zm);
4046
4047 // Floating-point absolute value (predicated).
4048 void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4049
4050 // Floating-point absolute compare vectors.
4051 void facge(const PRegisterWithLaneSize& pd,
4052 const PRegisterZ& pg,
4053 const ZRegister& zn,
4054 const ZRegister& zm);
4055
4056 // Floating-point absolute compare vectors.
4057 void facgt(const PRegisterWithLaneSize& pd,
4058 const PRegisterZ& pg,
4059 const ZRegister& zn,
4060 const ZRegister& zm);
4061
4062 // Floating-point add immediate (predicated).
4063 void fadd(const ZRegister& zd,
4064 const PRegisterM& pg,
4065 const ZRegister& zn,
4066 double imm);
4067
4068 // Floating-point add vector (predicated).
4069 void fadd(const ZRegister& zd,
4070 const PRegisterM& pg,
4071 const ZRegister& zn,
4072 const ZRegister& zm);
4073
4074 // Floating-point add vector (unpredicated).
4075 void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4076
4077 // Floating-point add strictly-ordered reduction, accumulating in scalar.
4078 void fadda(const VRegister& vd,
4079 const PRegister& pg,
4080 const VRegister& vn,
4081 const ZRegister& zm);
4082
4083 // Floating-point add recursive reduction to scalar.
4084 void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4085
4086 // Floating-point complex add with rotate (predicated).
4087 void fcadd(const ZRegister& zd,
4088 const PRegisterM& pg,
4089 const ZRegister& zn,
4090 const ZRegister& zm,
4091 int rot);
4092
4093 // Floating-point compare vector with zero.
4094 void fcmeq(const PRegisterWithLaneSize& pd,
4095 const PRegisterZ& pg,
4096 const ZRegister& zn,
4097 double zero);
4098
4099 // Floating-point compare vectors.
4100 void fcmeq(const PRegisterWithLaneSize& pd,
4101 const PRegisterZ& pg,
4102 const ZRegister& zn,
4103 const ZRegister& zm);
4104
4105 // Floating-point compare vector with zero.
4106 void fcmge(const PRegisterWithLaneSize& pd,
4107 const PRegisterZ& pg,
4108 const ZRegister& zn,
4109 double zero);
4110
4111 // Floating-point compare vectors.
4112 void fcmge(const PRegisterWithLaneSize& pd,
4113 const PRegisterZ& pg,
4114 const ZRegister& zn,
4115 const ZRegister& zm);
4116
4117 // Floating-point compare vector with zero.
4118 void fcmgt(const PRegisterWithLaneSize& pd,
4119 const PRegisterZ& pg,
4120 const ZRegister& zn,
4121 double zero);
4122
4123 // Floating-point compare vectors.
4124 void fcmgt(const PRegisterWithLaneSize& pd,
4125 const PRegisterZ& pg,
4126 const ZRegister& zn,
4127 const ZRegister& zm);
4128
4129 // Floating-point complex multiply-add with rotate (predicated).
4130 void fcmla(const ZRegister& zda,
4131 const PRegisterM& pg,
4132 const ZRegister& zn,
4133 const ZRegister& zm,
4134 int rot);
4135
4136 // Floating-point complex multiply-add by indexed values with rotate.
4137 void fcmla(const ZRegister& zda,
4138 const ZRegister& zn,
4139 const ZRegister& zm,
4140 int index,
4141 int rot);
4142
4143 // Floating-point compare vector with zero.
4144 void fcmle(const PRegisterWithLaneSize& pd,
4145 const PRegisterZ& pg,
4146 const ZRegister& zn,
4147 double zero);
4148
4149 // Floating-point compare vector with zero.
4150 void fcmlt(const PRegisterWithLaneSize& pd,
4151 const PRegisterZ& pg,
4152 const ZRegister& zn,
4153 double zero);
4154
4155 // Floating-point compare vector with zero.
4156 void fcmne(const PRegisterWithLaneSize& pd,
4157 const PRegisterZ& pg,
4158 const ZRegister& zn,
4159 double zero);
4160
4161 // Floating-point compare vectors.
4162 void fcmne(const PRegisterWithLaneSize& pd,
4163 const PRegisterZ& pg,
4164 const ZRegister& zn,
4165 const ZRegister& zm);
4166
4167 // Floating-point compare vectors.
4168 void fcmuo(const PRegisterWithLaneSize& pd,
4169 const PRegisterZ& pg,
4170 const ZRegister& zn,
4171 const ZRegister& zm);
4172
4173 // Copy floating-point immediate to vector elements (predicated).
4174 void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4175
4176 // Copy half-precision floating-point immediate to vector elements
4177 // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4178 void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4179 fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4180 }
4181
4182 // Floating-point convert precision (predicated).
4183 void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4184
4185 // Floating-point convert to signed integer, rounding toward zero
4186 // (predicated).
4187 void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4188
4189 // Floating-point convert to unsigned integer, rounding toward zero
4190 // (predicated).
4191 void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4192
4193 // Floating-point divide by vector (predicated).
4194 void fdiv(const ZRegister& zd,
4195 const PRegisterM& pg,
4196 const ZRegister& zn,
4197 const ZRegister& zm);
4198
4199 // Floating-point reversed divide by vector (predicated).
4200 void fdivr(const ZRegister& zd,
4201 const PRegisterM& pg,
4202 const ZRegister& zn,
4203 const ZRegister& zm);
4204
4205 // Broadcast floating-point immediate to vector elements.
4206 void fdup(const ZRegister& zd, double imm);
4207
4208 // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4209 void fdup(const ZRegister& zd, Float16 imm) {
4210 fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4211 }
4212
4213 // Floating-point exponential accelerator.
4214 void fexpa(const ZRegister& zd, const ZRegister& zn);
4215
4216 // Floating-point fused multiply-add vectors (predicated), writing
4217 // multiplicand [Zdn = Za + Zdn * Zm].
4218 void fmad(const ZRegister& zdn,
4219 const PRegisterM& pg,
4220 const ZRegister& zm,
4221 const ZRegister& za);
4222
4223 // Floating-point maximum with immediate (predicated).
4224 void fmax(const ZRegister& zd,
4225 const PRegisterM& pg,
4226 const ZRegister& zn,
4227 double imm);
4228
4229 // Floating-point maximum (predicated).
4230 void fmax(const ZRegister& zd,
4231 const PRegisterM& pg,
4232 const ZRegister& zn,
4233 const ZRegister& zm);
4234
4235 // Floating-point maximum number with immediate (predicated).
4236 void fmaxnm(const ZRegister& zd,
4237 const PRegisterM& pg,
4238 const ZRegister& zn,
4239 double imm);
4240
4241 // Floating-point maximum number (predicated).
4242 void fmaxnm(const ZRegister& zd,
4243 const PRegisterM& pg,
4244 const ZRegister& zn,
4245 const ZRegister& zm);
4246
4247 // Floating-point maximum number recursive reduction to scalar.
4248 void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4249
4250 // Floating-point maximum recursive reduction to scalar.
4251 void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4252
4253 // Floating-point minimum with immediate (predicated).
4254 void fmin(const ZRegister& zd,
4255 const PRegisterM& pg,
4256 const ZRegister& zn,
4257 double imm);
4258
4259 // Floating-point minimum (predicated).
4260 void fmin(const ZRegister& zd,
4261 const PRegisterM& pg,
4262 const ZRegister& zn,
4263 const ZRegister& zm);
4264
4265 // Floating-point minimum number with immediate (predicated).
4266 void fminnm(const ZRegister& zd,
4267 const PRegisterM& pg,
4268 const ZRegister& zn,
4269 double imm);
4270
4271 // Floating-point minimum number (predicated).
4272 void fminnm(const ZRegister& zd,
4273 const PRegisterM& pg,
4274 const ZRegister& zn,
4275 const ZRegister& zm);
4276
4277 // Floating-point minimum number recursive reduction to scalar.
4278 void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4279
4280 // Floating-point minimum recursive reduction to scalar.
4281 void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4282
4283 // Floating-point fused multiply-add vectors (predicated), writing addend
4284 // [Zda = Zda + Zn * Zm].
4285 void fmla(const ZRegister& zda,
4286 const PRegisterM& pg,
4287 const ZRegister& zn,
4288 const ZRegister& zm);
4289
4290 // Floating-point fused multiply-add by indexed elements
4291 // (Zda = Zda + Zn * Zm[indexed]).
4292 void fmla(const ZRegister& zda,
4293 const ZRegister& zn,
4294 const ZRegister& zm,
4295 int index);
4296
4297 // Floating-point fused multiply-subtract vectors (predicated), writing
4298 // addend [Zda = Zda + -Zn * Zm].
4299 void fmls(const ZRegister& zda,
4300 const PRegisterM& pg,
4301 const ZRegister& zn,
4302 const ZRegister& zm);
4303
4304 // Floating-point fused multiply-subtract by indexed elements
4305 // (Zda = Zda + -Zn * Zm[indexed]).
4306 void fmls(const ZRegister& zda,
4307 const ZRegister& zn,
4308 const ZRegister& zm,
4309 int index);
4310
4311 // Move 8-bit floating-point immediate to vector elements (unpredicated).
4312 void fmov(const ZRegister& zd, double imm);
4313
4314 // Move 8-bit floating-point immediate to vector elements (predicated).
4315 void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4316
4317 // Floating-point fused multiply-subtract vectors (predicated), writing
4318 // multiplicand [Zdn = Za + -Zdn * Zm].
4319 void fmsb(const ZRegister& zdn,
4320 const PRegisterM& pg,
4321 const ZRegister& zm,
4322 const ZRegister& za);
4323
4324 // Floating-point multiply by immediate (predicated).
4325 void fmul(const ZRegister& zd,
4326 const PRegisterM& pg,
4327 const ZRegister& zn,
4328 double imm);
4329
4330 // Floating-point multiply vectors (predicated).
4331 void fmul(const ZRegister& zd,
4332 const PRegisterM& pg,
4333 const ZRegister& zn,
4334 const ZRegister& zm);
4335
4336 // Floating-point multiply by indexed elements.
4337 void fmul(const ZRegister& zd,
4338 const ZRegister& zn,
4339 const ZRegister& zm,
4340 unsigned index);
4341
4342 // Floating-point multiply vectors (unpredicated).
4343 void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4344
4345 // Floating-point multiply-extended vectors (predicated).
4346 void fmulx(const ZRegister& zd,
4347 const PRegisterM& pg,
4348 const ZRegister& zn,
4349 const ZRegister& zm);
4350
4351 // Floating-point negate (predicated).
4352 void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4353
4354 // Floating-point negated fused multiply-add vectors (predicated), writing
4355 // multiplicand [Zdn = -Za + -Zdn * Zm].
4356 void fnmad(const ZRegister& zdn,
4357 const PRegisterM& pg,
4358 const ZRegister& zm,
4359 const ZRegister& za);
4360
4361 // Floating-point negated fused multiply-add vectors (predicated), writing
4362 // addend [Zda = -Zda + -Zn * Zm].
4363 void fnmla(const ZRegister& zda,
4364 const PRegisterM& pg,
4365 const ZRegister& zn,
4366 const ZRegister& zm);
4367
4368 // Floating-point negated fused multiply-subtract vectors (predicated),
4369 // writing addend [Zda = -Zda + Zn * Zm].
4370 void fnmls(const ZRegister& zda,
4371 const PRegisterM& pg,
4372 const ZRegister& zn,
4373 const ZRegister& zm);
4374
4375 // Floating-point negated fused multiply-subtract vectors (predicated),
4376 // writing multiplicand [Zdn = -Za + Zdn * Zm].
4377 void fnmsb(const ZRegister& zdn,
4378 const PRegisterM& pg,
4379 const ZRegister& zm,
4380 const ZRegister& za);
4381
4382 // Floating-point reciprocal estimate (unpredicated).
4383 void frecpe(const ZRegister& zd, const ZRegister& zn);
4384
4385 // Floating-point reciprocal step (unpredicated).
4386 void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4387
4388 // Floating-point reciprocal exponent (predicated).
4389 void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4390
4391 // Floating-point round to integral value (predicated).
4392 void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4393
4394 // Floating-point round to integral value (predicated).
4395 void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4396
4397 // Floating-point round to integral value (predicated).
4398 void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4399
4400 // Floating-point round to integral value (predicated).
4401 void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4402
4403 // Floating-point round to integral value (predicated).
4404 void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4405
4406 // Floating-point round to integral value (predicated).
4407 void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4408
4409 // Floating-point round to integral value (predicated).
4410 void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4411
4412 // Floating-point reciprocal square root estimate (unpredicated).
4413 void frsqrte(const ZRegister& zd, const ZRegister& zn);
4414
4415 // Floating-point reciprocal square root step (unpredicated).
4416 void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4417
4418 // Floating-point adjust exponent by vector (predicated).
4419 void fscale(const ZRegister& zd,
4420 const PRegisterM& pg,
4421 const ZRegister& zn,
4422 const ZRegister& zm);
4423
4424 // Floating-point square root (predicated).
4425 void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4426
4427 // Floating-point subtract immediate (predicated).
4428 void fsub(const ZRegister& zd,
4429 const PRegisterM& pg,
4430 const ZRegister& zn,
4431 double imm);
4432
4433 // Floating-point subtract vectors (predicated).
4434 void fsub(const ZRegister& zd,
4435 const PRegisterM& pg,
4436 const ZRegister& zn,
4437 const ZRegister& zm);
4438
4439 // Floating-point subtract vectors (unpredicated).
4440 void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4441
4442 // Floating-point reversed subtract from immediate (predicated).
4443 void fsubr(const ZRegister& zd,
4444 const PRegisterM& pg,
4445 const ZRegister& zn,
4446 double imm);
4447
4448 // Floating-point reversed subtract vectors (predicated).
4449 void fsubr(const ZRegister& zd,
4450 const PRegisterM& pg,
4451 const ZRegister& zn,
4452 const ZRegister& zm);
4453
4454 // Floating-point trigonometric multiply-add coefficient.
4455 void ftmad(const ZRegister& zd,
4456 const ZRegister& zn,
4457 const ZRegister& zm,
4458 int imm3);
4459
4460 // Floating-point trigonometric starting value.
4461 void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4462
4463 // Floating-point trigonometric select coefficient.
4464 void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4465
4466 // Increment scalar by multiple of predicate constraint element count.
4467 void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4468
4469 // Increment scalar by multiple of predicate constraint element count.
4470 void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4471
4472 // Increment vector by multiple of predicate constraint element count.
4473 void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4474
4475 // Increment scalar by multiple of predicate constraint element count.
4476 void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4477
4478 // Increment vector by multiple of predicate constraint element count.
4479 void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4480
4481 // Increment scalar by active predicate element count.
4482 void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4483
4484 // Increment vector by active predicate element count.
4485 void incp(const ZRegister& zdn, const PRegister& pg);
4486
4487 // Increment scalar by multiple of predicate constraint element count.
4488 void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4489
4490 // Increment vector by multiple of predicate constraint element count.
4491 void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4492
4493 // Create index starting from and incremented by immediate.
4494 void index(const ZRegister& zd, int start, int step);
4495
4496 // Create index starting from and incremented by general-purpose register.
4497 void index(const ZRegister& zd, const Register& rn, const Register& rm);
4498
4499 // Create index starting from general-purpose register and incremented by
4500 // immediate.
4501 void index(const ZRegister& zd, const Register& rn, int imm5);
4502
4503 // Create index starting from immediate and incremented by general-purpose
4504 // register.
4505 void index(const ZRegister& zd, int imm5, const Register& rm);
4506
4507 // Insert general-purpose register in shifted vector.
4508 void insr(const ZRegister& zdn, const Register& rm);
4509
4510 // Insert SIMD&FP scalar register in shifted vector.
4511 void insr(const ZRegister& zdn, const VRegister& vm);
4512
4513 // Extract element after last to general-purpose register.
4514 void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4515
4516 // Extract element after last to SIMD&FP scalar register.
4517 void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4518
4519 // Extract last element to general-purpose register.
4520 void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4521
4522 // Extract last element to SIMD&FP scalar register.
4523 void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4524
4525 // Contiguous/gather load bytes to vector.
4526 void ld1b(const ZRegister& zt,
4527 const PRegisterZ& pg,
4528 const SVEMemOperand& addr);
4529
4530 // Contiguous/gather load halfwords to vector.
4531 void ld1h(const ZRegister& zt,
4532 const PRegisterZ& pg,
4533 const SVEMemOperand& addr);
4534
4535 // Contiguous/gather load words to vector.
4536 void ld1w(const ZRegister& zt,
4537 const PRegisterZ& pg,
4538 const SVEMemOperand& addr);
4539
4540 // Contiguous/gather load doublewords to vector.
4541 void ld1d(const ZRegister& zt,
4542 const PRegisterZ& pg,
4543 const SVEMemOperand& addr);
4544
4545 // TODO: Merge other loads into the SVEMemOperand versions.
4546
4547 // Load and broadcast unsigned byte to vector.
4548 void ld1rb(const ZRegister& zt,
4549 const PRegisterZ& pg,
4550 const SVEMemOperand& addr);
4551
4552 // Load and broadcast unsigned halfword to vector.
4553 void ld1rh(const ZRegister& zt,
4554 const PRegisterZ& pg,
4555 const SVEMemOperand& addr);
4556
4557 // Load and broadcast unsigned word to vector.
4558 void ld1rw(const ZRegister& zt,
4559 const PRegisterZ& pg,
4560 const SVEMemOperand& addr);
4561
4562 // Load and broadcast doubleword to vector.
4563 void ld1rd(const ZRegister& zt,
4564 const PRegisterZ& pg,
4565 const SVEMemOperand& addr);
4566
4567 // Contiguous load and replicate sixteen bytes.
4568 void ld1rqb(const ZRegister& zt,
4569 const PRegisterZ& pg,
4570 const SVEMemOperand& addr);
4571
4572 // Contiguous load and replicate eight halfwords.
4573 void ld1rqh(const ZRegister& zt,
4574 const PRegisterZ& pg,
4575 const SVEMemOperand& addr);
4576
4577 // Contiguous load and replicate four words.
4578 void ld1rqw(const ZRegister& zt,
4579 const PRegisterZ& pg,
4580 const SVEMemOperand& addr);
4581
4582 // Contiguous load and replicate two doublewords.
4583 void ld1rqd(const ZRegister& zt,
4584 const PRegisterZ& pg,
4585 const SVEMemOperand& addr);
4586
4587 // Load and broadcast signed byte to vector.
4588 void ld1rsb(const ZRegister& zt,
4589 const PRegisterZ& pg,
4590 const SVEMemOperand& addr);
4591
4592 // Load and broadcast signed halfword to vector.
4593 void ld1rsh(const ZRegister& zt,
4594 const PRegisterZ& pg,
4595 const SVEMemOperand& addr);
4596
4597 // Load and broadcast signed word to vector.
4598 void ld1rsw(const ZRegister& zt,
4599 const PRegisterZ& pg,
4600 const SVEMemOperand& addr);
4601
4602 // Contiguous/gather load signed bytes to vector.
4603 void ld1sb(const ZRegister& zt,
4604 const PRegisterZ& pg,
4605 const SVEMemOperand& addr);
4606
4607 // Contiguous/gather load signed halfwords to vector.
4608 void ld1sh(const ZRegister& zt,
4609 const PRegisterZ& pg,
4610 const SVEMemOperand& addr);
4611
4612 // Contiguous/gather load signed words to vector.
4613 void ld1sw(const ZRegister& zt,
4614 const PRegisterZ& pg,
4615 const SVEMemOperand& addr);
4616
4617 // TODO: Merge other loads into the SVEMemOperand versions.
4618
4619 // Contiguous load two-byte structures to two vectors.
4620 void ld2b(const ZRegister& zt1,
4621 const ZRegister& zt2,
4622 const PRegisterZ& pg,
4623 const SVEMemOperand& addr);
4624
4625 // Contiguous load two-halfword structures to two vectors.
4626 void ld2h(const ZRegister& zt1,
4627 const ZRegister& zt2,
4628 const PRegisterZ& pg,
4629 const SVEMemOperand& addr);
4630
4631 // Contiguous load two-word structures to two vectors.
4632 void ld2w(const ZRegister& zt1,
4633 const ZRegister& zt2,
4634 const PRegisterZ& pg,
4635 const SVEMemOperand& addr);
4636
4637 // Contiguous load two-doubleword structures to two vectors.
4638 void ld2d(const ZRegister& zt1,
4639 const ZRegister& zt2,
4640 const PRegisterZ& pg,
4641 const SVEMemOperand& addr);
4642
4643 // Contiguous load three-byte structures to three vectors.
4644 void ld3b(const ZRegister& zt1,
4645 const ZRegister& zt2,
4646 const ZRegister& zt3,
4647 const PRegisterZ& pg,
4648 const SVEMemOperand& addr);
4649
4650 // Contiguous load three-halfword structures to three vectors.
4651 void ld3h(const ZRegister& zt1,
4652 const ZRegister& zt2,
4653 const ZRegister& zt3,
4654 const PRegisterZ& pg,
4655 const SVEMemOperand& addr);
4656
4657 // Contiguous load three-word structures to three vectors.
4658 void ld3w(const ZRegister& zt1,
4659 const ZRegister& zt2,
4660 const ZRegister& zt3,
4661 const PRegisterZ& pg,
4662 const SVEMemOperand& addr);
4663
4664 // Contiguous load three-doubleword structures to three vectors.
4665 void ld3d(const ZRegister& zt1,
4666 const ZRegister& zt2,
4667 const ZRegister& zt3,
4668 const PRegisterZ& pg,
4669 const SVEMemOperand& addr);
4670
4671 // Contiguous load four-byte structures to four vectors.
4672 void ld4b(const ZRegister& zt1,
4673 const ZRegister& zt2,
4674 const ZRegister& zt3,
4675 const ZRegister& zt4,
4676 const PRegisterZ& pg,
4677 const SVEMemOperand& addr);
4678
4679 // Contiguous load four-halfword structures to four vectors.
4680 void ld4h(const ZRegister& zt1,
4681 const ZRegister& zt2,
4682 const ZRegister& zt3,
4683 const ZRegister& zt4,
4684 const PRegisterZ& pg,
4685 const SVEMemOperand& addr);
4686
4687 // Contiguous load four-word structures to four vectors.
4688 void ld4w(const ZRegister& zt1,
4689 const ZRegister& zt2,
4690 const ZRegister& zt3,
4691 const ZRegister& zt4,
4692 const PRegisterZ& pg,
4693 const SVEMemOperand& addr);
4694
4695 // Contiguous load four-doubleword structures to four vectors.
4696 void ld4d(const ZRegister& zt1,
4697 const ZRegister& zt2,
4698 const ZRegister& zt3,
4699 const ZRegister& zt4,
4700 const PRegisterZ& pg,
4701 const SVEMemOperand& addr);
4702
4703 // Contiguous load first-fault unsigned bytes to vector.
4704 void ldff1b(const ZRegister& zt,
4705 const PRegisterZ& pg,
4706 const SVEMemOperand& addr);
4707
4708 // Contiguous load first-fault unsigned halfwords to vector.
4709 void ldff1h(const ZRegister& zt,
4710 const PRegisterZ& pg,
4711 const SVEMemOperand& addr);
4712
4713 // Contiguous load first-fault unsigned words to vector.
4714 void ldff1w(const ZRegister& zt,
4715 const PRegisterZ& pg,
4716 const SVEMemOperand& addr);
4717
4718 // Contiguous load first-fault doublewords to vector.
4719 void ldff1d(const ZRegister& zt,
4720 const PRegisterZ& pg,
4721 const SVEMemOperand& addr);
4722
4723 // Contiguous load first-fault signed bytes to vector.
4724 void ldff1sb(const ZRegister& zt,
4725 const PRegisterZ& pg,
4726 const SVEMemOperand& addr);
4727
4728 // Contiguous load first-fault signed halfwords to vector.
4729 void ldff1sh(const ZRegister& zt,
4730 const PRegisterZ& pg,
4731 const SVEMemOperand& addr);
4732
4733 // Contiguous load first-fault signed words to vector.
4734 void ldff1sw(const ZRegister& zt,
4735 const PRegisterZ& pg,
4736 const SVEMemOperand& addr);
4737
4738 // Gather load first-fault unsigned bytes to vector.
4739 void ldff1b(const ZRegister& zt,
4740 const PRegisterZ& pg,
4741 const Register& xn,
4742 const ZRegister& zm);
4743
4744 // Gather load first-fault unsigned bytes to vector (immediate index).
4745 void ldff1b(const ZRegister& zt,
4746 const PRegisterZ& pg,
4747 const ZRegister& zn,
4748 int imm5);
4749
4750 // Gather load first-fault doublewords to vector (vector index).
4751 void ldff1d(const ZRegister& zt,
4752 const PRegisterZ& pg,
4753 const Register& xn,
4754 const ZRegister& zm);
4755
4756 // Gather load first-fault doublewords to vector (immediate index).
4757 void ldff1d(const ZRegister& zt,
4758 const PRegisterZ& pg,
4759 const ZRegister& zn,
4760 int imm5);
4761
4762 // Gather load first-fault unsigned halfwords to vector (vector index).
4763 void ldff1h(const ZRegister& zt,
4764 const PRegisterZ& pg,
4765 const Register& xn,
4766 const ZRegister& zm);
4767
4768 // Gather load first-fault unsigned halfwords to vector (immediate index).
4769 void ldff1h(const ZRegister& zt,
4770 const PRegisterZ& pg,
4771 const ZRegister& zn,
4772 int imm5);
4773
4774 // Gather load first-fault signed bytes to vector (vector index).
4775 void ldff1sb(const ZRegister& zt,
4776 const PRegisterZ& pg,
4777 const Register& xn,
4778 const ZRegister& zm);
4779
4780 // Gather load first-fault signed bytes to vector (immediate index).
4781 void ldff1sb(const ZRegister& zt,
4782 const PRegisterZ& pg,
4783 const ZRegister& zn,
4784 int imm5);
4785
4786 // Gather load first-fault signed halfwords to vector (vector index).
4787 void ldff1sh(const ZRegister& zt,
4788 const PRegisterZ& pg,
4789 const Register& xn,
4790 const ZRegister& zm);
4791
4792 // Gather load first-fault signed halfwords to vector (immediate index).
4793 void ldff1sh(const ZRegister& zt,
4794 const PRegisterZ& pg,
4795 const ZRegister& zn,
4796 int imm5);
4797
4798 // Gather load first-fault signed words to vector (vector index).
4799 void ldff1sw(const ZRegister& zt,
4800 const PRegisterZ& pg,
4801 const Register& xn,
4802 const ZRegister& zm);
4803
4804 // Gather load first-fault signed words to vector (immediate index).
4805 void ldff1sw(const ZRegister& zt,
4806 const PRegisterZ& pg,
4807 const ZRegister& zn,
4808 int imm5);
4809
4810 // Gather load first-fault unsigned words to vector (vector index).
4811 void ldff1w(const ZRegister& zt,
4812 const PRegisterZ& pg,
4813 const Register& xn,
4814 const ZRegister& zm);
4815
4816 // Gather load first-fault unsigned words to vector (immediate index).
4817 void ldff1w(const ZRegister& zt,
4818 const PRegisterZ& pg,
4819 const ZRegister& zn,
4820 int imm5);
4821
4822 // Contiguous load non-fault unsigned bytes to vector (immediate index).
4823 void ldnf1b(const ZRegister& zt,
4824 const PRegisterZ& pg,
4825 const SVEMemOperand& addr);
4826
4827 // Contiguous load non-fault doublewords to vector (immediate index).
4828 void ldnf1d(const ZRegister& zt,
4829 const PRegisterZ& pg,
4830 const SVEMemOperand& addr);
4831
4832 // Contiguous load non-fault unsigned halfwords to vector (immediate
4833 // index).
4834 void ldnf1h(const ZRegister& zt,
4835 const PRegisterZ& pg,
4836 const SVEMemOperand& addr);
4837
4838 // Contiguous load non-fault signed bytes to vector (immediate index).
4839 void ldnf1sb(const ZRegister& zt,
4840 const PRegisterZ& pg,
4841 const SVEMemOperand& addr);
4842
4843 // Contiguous load non-fault signed halfwords to vector (immediate index).
4844 void ldnf1sh(const ZRegister& zt,
4845 const PRegisterZ& pg,
4846 const SVEMemOperand& addr);
4847
4848 // Contiguous load non-fault signed words to vector (immediate index).
4849 void ldnf1sw(const ZRegister& zt,
4850 const PRegisterZ& pg,
4851 const SVEMemOperand& addr);
4852
4853 // Contiguous load non-fault unsigned words to vector (immediate index).
4854 void ldnf1w(const ZRegister& zt,
4855 const PRegisterZ& pg,
4856 const SVEMemOperand& addr);
4857
4858 // Contiguous load non-temporal bytes to vector.
4859 void ldnt1b(const ZRegister& zt,
4860 const PRegisterZ& pg,
4861 const SVEMemOperand& addr);
4862
4863 // Contiguous load non-temporal halfwords to vector.
4864 void ldnt1h(const ZRegister& zt,
4865 const PRegisterZ& pg,
4866 const SVEMemOperand& addr);
4867
4868 // Contiguous load non-temporal words to vector.
4869 void ldnt1w(const ZRegister& zt,
4870 const PRegisterZ& pg,
4871 const SVEMemOperand& addr);
4872
4873 // Contiguous load non-temporal doublewords to vector.
4874 void ldnt1d(const ZRegister& zt,
4875 const PRegisterZ& pg,
4876 const SVEMemOperand& addr);
4877
4878 // Load SVE predicate/vector register.
4879 void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4880
4881 // Logical shift left by immediate (predicated).
4882 void lsl(const ZRegister& zd,
4883 const PRegisterM& pg,
4884 const ZRegister& zn,
4885 int shift);
4886
4887 // Logical shift left by 64-bit wide elements (predicated).
4888 void lsl(const ZRegister& zd,
4889 const PRegisterM& pg,
4890 const ZRegister& zn,
4891 const ZRegister& zm);
4892
4893 // Logical shift left by immediate (unpredicated).
4894 void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4895
4896 // Logical shift left by 64-bit wide elements (unpredicated).
4897 void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4898
4899 // Reversed logical shift left by vector (predicated).
4900 void lslr(const ZRegister& zd,
4901 const PRegisterM& pg,
4902 const ZRegister& zn,
4903 const ZRegister& zm);
4904
4905 // Logical shift right by immediate (predicated).
4906 void lsr(const ZRegister& zd,
4907 const PRegisterM& pg,
4908 const ZRegister& zn,
4909 int shift);
4910
4911 // Logical shift right by 64-bit wide elements (predicated).
4912 void lsr(const ZRegister& zd,
4913 const PRegisterM& pg,
4914 const ZRegister& zn,
4915 const ZRegister& zm);
4916
4917 // Logical shift right by immediate (unpredicated).
4918 void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4919
4920 // Logical shift right by 64-bit wide elements (unpredicated).
4921 void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4922
4923 // Reversed logical shift right by vector (predicated).
4924 void lsrr(const ZRegister& zd,
4925 const PRegisterM& pg,
4926 const ZRegister& zn,
4927 const ZRegister& zm);
4928
4929 // Bitwise invert predicate.
4930 void not_(const PRegisterWithLaneSize& pd,
4931 const PRegisterZ& pg,
4932 const PRegisterWithLaneSize& pn);
4933
4934 // Bitwise invert predicate, setting the condition flags.
4935 void nots(const PRegisterWithLaneSize& pd,
4936 const PRegisterZ& pg,
4937 const PRegisterWithLaneSize& pn);
4938
4939 // Multiply-add vectors (predicated), writing multiplicand
4940 // [Zdn = Za + Zdn * Zm].
4941 void mad(const ZRegister& zdn,
4942 const PRegisterM& pg,
4943 const ZRegister& zm,
4944 const ZRegister& za);
4945
4946 // Multiply-add vectors (predicated), writing addend
4947 // [Zda = Zda + Zn * Zm].
4948 void mla(const ZRegister& zda,
4949 const PRegisterM& pg,
4950 const ZRegister& zn,
4951 const ZRegister& zm);
4952
4953 // Multiply-subtract vectors (predicated), writing addend
4954 // [Zda = Zda - Zn * Zm].
4955 void mls(const ZRegister& zda,
4956 const PRegisterM& pg,
4957 const ZRegister& zn,
4958 const ZRegister& zm);
4959
4960 // Move predicates (unpredicated)
4961 void mov(const PRegister& pd, const PRegister& pn);
4962
4963 // Move predicates (merging)
4964 void mov(const PRegisterWithLaneSize& pd,
4965 const PRegisterM& pg,
4966 const PRegisterWithLaneSize& pn);
4967
4968 // Move predicates (zeroing)
4969 void mov(const PRegisterWithLaneSize& pd,
4970 const PRegisterZ& pg,
4971 const PRegisterWithLaneSize& pn);
4972
4973 // Move general-purpose register to vector elements (unpredicated)
4974 void mov(const ZRegister& zd, const Register& xn);
4975
4976 // Move SIMD&FP scalar register to vector elements (unpredicated)
4977 void mov(const ZRegister& zd, const VRegister& vn);
4978
4979 // Move vector register (unpredicated)
4980 void mov(const ZRegister& zd, const ZRegister& zn);
4981
4982 // Move indexed element to vector elements (unpredicated)
4983 void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
4984
4985 // Move general-purpose register to vector elements (predicated)
4986 void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4987
4988 // Move SIMD&FP scalar register to vector elements (predicated)
4989 void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4990
4991 // Move vector elements (predicated)
4992 void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4993
4994 // Move signed integer immediate to vector elements (predicated)
4995 void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4996
4997 // Move signed immediate to vector elements (unpredicated).
4998 void mov(const ZRegister& zd, int imm8, int shift);
4999
5000 // Move logical bitmask immediate to vector (unpredicated).
5001 void mov(const ZRegister& zd, uint64_t imm);
5002
5003 // Move predicate (unpredicated), setting the condition flags
5004 void movs(const PRegister& pd, const PRegister& pn);
5005
5006 // Move predicates (zeroing), setting the condition flags
5007 void movs(const PRegisterWithLaneSize& pd,
5008 const PRegisterZ& pg,
5009 const PRegisterWithLaneSize& pn);
5010
5011 // Move prefix (predicated).
5012 void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5013
5014 // Move prefix (unpredicated).
5015 void movprfx(const ZRegister& zd, const ZRegister& zn);
5016
5017 // Multiply-subtract vectors (predicated), writing multiplicand
5018 // [Zdn = Za - Zdn * Zm].
5019 void msb(const ZRegister& zdn,
5020 const PRegisterM& pg,
5021 const ZRegister& zm,
5022 const ZRegister& za);
5023
5024 // Multiply vectors (predicated).
5025 void mul(const ZRegister& zd,
5026 const PRegisterM& pg,
5027 const ZRegister& zn,
5028 const ZRegister& zm);
5029
5030 // Multiply by immediate (unpredicated).
5031 void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5032
5033 // Bitwise NAND predicates.
5034 void nand(const PRegisterWithLaneSize& pd,
5035 const PRegisterZ& pg,
5036 const PRegisterWithLaneSize& pn,
5037 const PRegisterWithLaneSize& pm);
5038
5039 // Bitwise NAND predicates.
5040 void nands(const PRegisterWithLaneSize& pd,
5041 const PRegisterZ& pg,
5042 const PRegisterWithLaneSize& pn,
5043 const PRegisterWithLaneSize& pm);
5044
5045 // Negate (predicated).
5046 void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5047
5048 // Bitwise NOR predicates.
5049 void nor(const PRegisterWithLaneSize& pd,
5050 const PRegisterZ& pg,
5051 const PRegisterWithLaneSize& pn,
5052 const PRegisterWithLaneSize& pm);
5053
5054 // Bitwise NOR predicates.
5055 void nors(const PRegisterWithLaneSize& pd,
5056 const PRegisterZ& pg,
5057 const PRegisterWithLaneSize& pn,
5058 const PRegisterWithLaneSize& pm);
5059
5060 // Bitwise invert vector (predicated).
5061 void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5062
5063 // Bitwise OR inverted predicate.
5064 void orn(const PRegisterWithLaneSize& pd,
5065 const PRegisterZ& pg,
5066 const PRegisterWithLaneSize& pn,
5067 const PRegisterWithLaneSize& pm);
5068
5069 // Bitwise OR inverted predicate.
5070 void orns(const PRegisterWithLaneSize& pd,
5071 const PRegisterZ& pg,
5072 const PRegisterWithLaneSize& pn,
5073 const PRegisterWithLaneSize& pm);
5074
5075 // Bitwise OR with inverted immediate (unpredicated).
5076 void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5077
5078 // Bitwise OR predicate.
5079 void orr(const PRegisterWithLaneSize& pd,
5080 const PRegisterZ& pg,
5081 const PRegisterWithLaneSize& pn,
5082 const PRegisterWithLaneSize& pm);
5083
5084 // Bitwise OR vectors (predicated).
5085 void orr(const ZRegister& zd,
5086 const PRegisterM& pg,
5087 const ZRegister& zn,
5088 const ZRegister& zm);
5089
5090 // Bitwise OR with immediate (unpredicated).
5091 void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5092
5093 // Bitwise OR vectors (unpredicated).
5094 void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5095
5096 // Bitwise OR predicate.
5097 void orrs(const PRegisterWithLaneSize& pd,
5098 const PRegisterZ& pg,
5099 const PRegisterWithLaneSize& pn,
5100 const PRegisterWithLaneSize& pm);
5101
5102 // Bitwise OR reduction to scalar.
5103 void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5104
5105 // Set all predicate elements to false.
5106 void pfalse(const PRegisterWithLaneSize& pd);
5107
5108 // Set the first active predicate element to true.
5109 void pfirst(const PRegisterWithLaneSize& pd,
5110 const PRegister& pg,
5111 const PRegisterWithLaneSize& pn);
5112
5113 // Find next active predicate.
5114 void pnext(const PRegisterWithLaneSize& pd,
5115 const PRegister& pg,
5116 const PRegisterWithLaneSize& pn);
5117
5118 // Prefetch bytes.
5119 void prfb(PrefetchOperation prfop,
5120 const PRegister& pg,
5121 const SVEMemOperand& addr);
5122
5123 // Prefetch halfwords.
5124 void prfh(PrefetchOperation prfop,
5125 const PRegister& pg,
5126 const SVEMemOperand& addr);
5127
5128 // Prefetch words.
5129 void prfw(PrefetchOperation prfop,
5130 const PRegister& pg,
5131 const SVEMemOperand& addr);
5132
5133 // Prefetch doublewords.
5134 void prfd(PrefetchOperation prfop,
5135 const PRegister& pg,
5136 const SVEMemOperand& addr);
5137
5138 // Set condition flags for predicate.
5139 void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5140
5141 // Initialise predicate from named constraint.
5142 void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5143
5144 // Initialise predicate from named constraint.
5145 void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5146
5147 // Unpack and widen half of predicate.
5148 void punpkhi(const PRegisterWithLaneSize& pd,
5149 const PRegisterWithLaneSize& pn);
5150
5151 // Unpack and widen half of predicate.
5152 void punpklo(const PRegisterWithLaneSize& pd,
5153 const PRegisterWithLaneSize& pn);
5154
5155 // Reverse bits (predicated).
5156 void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5157
5158 // Read the first-fault register.
5159 void rdffr(const PRegisterWithLaneSize& pd);
5160
5161 // Return predicate of succesfully loaded elements.
5162 void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5163
5164 // Return predicate of succesfully loaded elements.
5165 void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5166
5167 // Read multiple of vector register size to scalar register.
5168 void rdvl(const Register& xd, int imm6);
5169
5170 // Reverse all elements in a predicate.
5171 void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5172
5173 // Reverse all elements in a vector (unpredicated).
5174 void rev(const ZRegister& zd, const ZRegister& zn);
5175
5176 // Reverse bytes / halfwords / words within elements (predicated).
5177 void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5178
5179 // Reverse bytes / halfwords / words within elements (predicated).
5180 void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5181
5182 // Reverse bytes / halfwords / words within elements (predicated).
5183 void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5184
5185 // Signed absolute difference (predicated).
5186 void sabd(const ZRegister& zd,
5187 const PRegisterM& pg,
5188 const ZRegister& zn,
5189 const ZRegister& zm);
5190
5191 // Signed add reduction to scalar.
5192 void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5193
5194 // Signed integer convert to floating-point (predicated).
5195 void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5196
5197 // Signed divide (predicated).
5198 void sdiv(const ZRegister& zd,
5199 const PRegisterM& pg,
5200 const ZRegister& zn,
5201 const ZRegister& zm);
5202
5203 // Signed reversed divide (predicated).
5204 void sdivr(const ZRegister& zd,
5205 const PRegisterM& pg,
5206 const ZRegister& zn,
5207 const ZRegister& zm);
5208
5209 // Signed dot product by indexed quadtuplet.
5210 void sdot(const ZRegister& zda,
5211 const ZRegister& zn,
5212 const ZRegister& zm,
5213 int index);
5214
5215 // Signed dot product.
5216 void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5217
5218 // Conditionally select elements from two predicates.
5219 void sel(const PRegisterWithLaneSize& pd,
5220 const PRegister& pg,
5221 const PRegisterWithLaneSize& pn,
5222 const PRegisterWithLaneSize& pm);
5223
5224 // Conditionally select elements from two vectors.
5225 void sel(const ZRegister& zd,
5226 const PRegister& pg,
5227 const ZRegister& zn,
5228 const ZRegister& zm);
5229
5230 // Initialise the first-fault register to all true.
5231 void setffr();
5232
5233 // Signed maximum vectors (predicated).
5234 void smax(const ZRegister& zd,
5235 const PRegisterM& pg,
5236 const ZRegister& zn,
5237 const ZRegister& zm);
5238
5239 // Signed maximum with immediate (unpredicated).
5240 void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5241
5242 // Signed maximum reduction to scalar.
5243 void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5244
5245 // Signed minimum vectors (predicated).
5246 void smin(const ZRegister& zd,
5247 const PRegisterM& pg,
5248 const ZRegister& zn,
5249 const ZRegister& zm);
5250
5251 // Signed minimum with immediate (unpredicated).
5252 void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5253
5254 // Signed minimum reduction to scalar.
5255 void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5256
5257 // Signed multiply returning high half (predicated).
5258 void smulh(const ZRegister& zd,
5259 const PRegisterM& pg,
5260 const ZRegister& zn,
5261 const ZRegister& zm);
5262
5263 // Splice two vectors under predicate control.
5264 void splice(const ZRegister& zd,
5265 const PRegister& pg,
5266 const ZRegister& zn,
5267 const ZRegister& zm);
5268
5269 // Signed saturating add vectors (unpredicated).
5270 void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5271
5272 // Signed saturating add immediate (unpredicated).
5273 void sqadd(const ZRegister& zd,
5274 const ZRegister& zn,
5275 int imm8,
5276 int shift = -1);
5277
5278 // Signed saturating decrement scalar by multiple of 8-bit predicate
5279 // constraint element count.
5280 void sqdecb(const Register& xd,
5281 const Register& wn,
5282 int pattern,
5283 int multiplier);
5284
5285 // Signed saturating decrement scalar by multiple of 8-bit predicate
5286 // constraint element count.
5287 void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5288
5289 // Signed saturating decrement scalar by multiple of 64-bit predicate
5290 // constraint element count.
5291 void sqdecd(const Register& xd,
5292 const Register& wn,
5293 int pattern = SVE_ALL,
5294 int multiplier = 1);
5295
5296 // Signed saturating decrement scalar by multiple of 64-bit predicate
5297 // constraint element count.
5298 void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5299
5300 // Signed saturating decrement vector by multiple of 64-bit predicate
5301 // constraint element count.
5302 void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5303
5304 // Signed saturating decrement scalar by multiple of 16-bit predicate
5305 // constraint element count.
5306 void sqdech(const Register& xd,
5307 const Register& wn,
5308 int pattern = SVE_ALL,
5309 int multiplier = 1);
5310
5311 // Signed saturating decrement scalar by multiple of 16-bit predicate
5312 // constraint element count.
5313 void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5314
5315 // Signed saturating decrement vector by multiple of 16-bit predicate
5316 // constraint element count.
5317 void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5318
5319 // Signed saturating decrement scalar by active predicate element count.
5320 void sqdecp(const Register& xd,
5321 const PRegisterWithLaneSize& pg,
5322 const Register& wn);
5323
5324 // Signed saturating decrement scalar by active predicate element count.
5325 void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5326
5327 // Signed saturating decrement vector by active predicate element count.
5328 void sqdecp(const ZRegister& zdn, const PRegister& pg);
5329
5330 // Signed saturating decrement scalar by multiple of 32-bit predicate
5331 // constraint element count.
5332 void sqdecw(const Register& xd,
5333 const Register& wn,
5334 int pattern = SVE_ALL,
5335 int multiplier = 1);
5336
5337 // Signed saturating decrement scalar by multiple of 32-bit predicate
5338 // constraint element count.
5339 void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5340
5341 // Signed saturating decrement vector by multiple of 32-bit predicate
5342 // constraint element count.
5343 void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5344
5345 // Signed saturating increment scalar by multiple of 8-bit predicate
5346 // constraint element count.
5347 void sqincb(const Register& xd,
5348 const Register& wn,
5349 int pattern = SVE_ALL,
5350 int multiplier = 1);
5351
5352 // Signed saturating increment scalar by multiple of 8-bit predicate
5353 // constraint element count.
5354 void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5355
5356 // Signed saturating increment scalar by multiple of 64-bit predicate
5357 // constraint element count.
5358 void sqincd(const Register& xd,
5359 const Register& wn,
5360 int pattern,
5361 int multiplier);
5362
5363 // Signed saturating increment scalar by multiple of 64-bit predicate
5364 // constraint element count.
5365 void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5366
5367 // Signed saturating increment vector by multiple of 64-bit predicate
5368 // constraint element count.
5369 void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5370
5371 // Signed saturating increment scalar by multiple of 16-bit predicate
5372 // constraint element count.
5373 void sqinch(const Register& xd,
5374 const Register& wn,
5375 int pattern = SVE_ALL,
5376 int multiplier = 1);
5377
5378 // Signed saturating increment scalar by multiple of 16-bit predicate
5379 // constraint element count.
5380 void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5381
5382 // Signed saturating increment vector by multiple of 16-bit predicate
5383 // constraint element count.
5384 void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5385
5386 // Signed saturating increment scalar by active predicate element count.
5387 void sqincp(const Register& xd,
5388 const PRegisterWithLaneSize& pg,
5389 const Register& wn);
5390
5391 // Signed saturating increment scalar by active predicate element count.
5392 void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5393
5394 // Signed saturating increment vector by active predicate element count.
5395 void sqincp(const ZRegister& zdn, const PRegister& pg);
5396
5397 // Signed saturating increment scalar by multiple of 32-bit predicate
5398 // constraint element count.
5399 void sqincw(const Register& xd,
5400 const Register& wn,
5401 int pattern = SVE_ALL,
5402 int multiplier = 1);
5403
5404 // Signed saturating increment scalar by multiple of 32-bit predicate
5405 // constraint element count.
5406 void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5407
5408 // Signed saturating increment vector by multiple of 32-bit predicate
5409 // constraint element count.
5410 void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5411
5412 // Signed saturating subtract vectors (unpredicated).
5413 void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5414
5415 // Signed saturating subtract immediate (unpredicated).
5416 void sqsub(const ZRegister& zd,
5417 const ZRegister& zn,
5418 int imm8,
5419 int shift = -1);
5420
5421 // Contiguous/scatter store bytes from vector.
5422 void st1b(const ZRegister& zt,
5423 const PRegister& pg,
5424 const SVEMemOperand& addr);
5425
5426 // Contiguous/scatter store halfwords from vector.
5427 void st1h(const ZRegister& zt,
5428 const PRegister& pg,
5429 const SVEMemOperand& addr);
5430
5431 // Contiguous/scatter store words from vector.
5432 void st1w(const ZRegister& zt,
5433 const PRegister& pg,
5434 const SVEMemOperand& addr);
5435
5436 // Contiguous/scatter store doublewords from vector.
5437 void st1d(const ZRegister& zt,
5438 const PRegister& pg,
5439 const SVEMemOperand& addr);
5440
5441 // Contiguous store two-byte structures from two vectors.
5442 void st2b(const ZRegister& zt1,
5443 const ZRegister& zt2,
5444 const PRegister& pg,
5445 const SVEMemOperand& addr);
5446
5447 // Contiguous store two-halfword structures from two vectors.
5448 void st2h(const ZRegister& zt1,
5449 const ZRegister& zt2,
5450 const PRegister& pg,
5451 const SVEMemOperand& addr);
5452
5453 // Contiguous store two-word structures from two vectors.
5454 void st2w(const ZRegister& zt1,
5455 const ZRegister& zt2,
5456 const PRegister& pg,
5457 const SVEMemOperand& addr);
5458
5459 // Contiguous store two-doubleword structures from two vectors,
5460 void st2d(const ZRegister& zt1,
5461 const ZRegister& zt2,
5462 const PRegister& pg,
5463 const SVEMemOperand& addr);
5464
5465 // Contiguous store three-byte structures from three vectors.
5466 void st3b(const ZRegister& zt1,
5467 const ZRegister& zt2,
5468 const ZRegister& zt3,
5469 const PRegister& pg,
5470 const SVEMemOperand& addr);
5471
5472 // Contiguous store three-halfword structures from three vectors.
5473 void st3h(const ZRegister& zt1,
5474 const ZRegister& zt2,
5475 const ZRegister& zt3,
5476 const PRegister& pg,
5477 const SVEMemOperand& addr);
5478
5479 // Contiguous store three-word structures from three vectors.
5480 void st3w(const ZRegister& zt1,
5481 const ZRegister& zt2,
5482 const ZRegister& zt3,
5483 const PRegister& pg,
5484 const SVEMemOperand& addr);
5485
5486 // Contiguous store three-doubleword structures from three vectors.
5487 void st3d(const ZRegister& zt1,
5488 const ZRegister& zt2,
5489 const ZRegister& zt3,
5490 const PRegister& pg,
5491 const SVEMemOperand& addr);
5492
5493 // Contiguous store four-byte structures from four vectors.
5494 void st4b(const ZRegister& zt1,
5495 const ZRegister& zt2,
5496 const ZRegister& zt3,
5497 const ZRegister& zt4,
5498 const PRegister& pg,
5499 const SVEMemOperand& addr);
5500
5501 // Contiguous store four-halfword structures from four vectors.
5502 void st4h(const ZRegister& zt1,
5503 const ZRegister& zt2,
5504 const ZRegister& zt3,
5505 const ZRegister& zt4,
5506 const PRegister& pg,
5507 const SVEMemOperand& addr);
5508
5509 // Contiguous store four-word structures from four vectors.
5510 void st4w(const ZRegister& zt1,
5511 const ZRegister& zt2,
5512 const ZRegister& zt3,
5513 const ZRegister& zt4,
5514 const PRegister& pg,
5515 const SVEMemOperand& addr);
5516
5517 // Contiguous store four-doubleword structures from four vectors.
5518 void st4d(const ZRegister& zt1,
5519 const ZRegister& zt2,
5520 const ZRegister& zt3,
5521 const ZRegister& zt4,
5522 const PRegister& pg,
5523 const SVEMemOperand& addr);
5524
5525 // Contiguous store non-temporal bytes from vector.
5526 void stnt1b(const ZRegister& zt,
5527 const PRegister& pg,
5528 const SVEMemOperand& addr);
5529
5530 // Contiguous store non-temporal halfwords from vector.
5531 void stnt1h(const ZRegister& zt,
5532 const PRegister& pg,
5533 const SVEMemOperand& addr);
5534
5535 // Contiguous store non-temporal words from vector.
5536 void stnt1w(const ZRegister& zt,
5537 const PRegister& pg,
5538 const SVEMemOperand& addr);
5539
5540 // Contiguous store non-temporal doublewords from vector.
5541 void stnt1d(const ZRegister& zt,
5542 const PRegister& pg,
5543 const SVEMemOperand& addr);
5544
5545 // Store SVE predicate/vector register.
5546 void str(const CPURegister& rt, const SVEMemOperand& addr);
5547
5548 // Subtract vectors (predicated).
5549 void sub(const ZRegister& zd,
5550 const PRegisterM& pg,
5551 const ZRegister& zn,
5552 const ZRegister& zm);
5553
5554 // Subtract vectors (unpredicated).
5555 void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5556
5557 // Subtract immediate (unpredicated).
5558 void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5559
5560 // Reversed subtract vectors (predicated).
5561 void subr(const ZRegister& zd,
5562 const PRegisterM& pg,
5563 const ZRegister& zn,
5564 const ZRegister& zm);
5565
5566 // Reversed subtract from immediate (unpredicated).
5567 void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5568
5569 // Signed unpack and extend half of vector.
5570 void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5571
5572 // Signed unpack and extend half of vector.
5573 void sunpklo(const ZRegister& zd, const ZRegister& zn);
5574
5575 // Signed byte extend (predicated).
5576 void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5577
5578 // Signed halfword extend (predicated).
5579 void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5580
5581 // Signed word extend (predicated).
5582 void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5583
5584 // Programmable table lookup/permute using vector of indices into a
5585 // vector.
5586 void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5587
5588 // Interleave even or odd elements from two predicates.
5589 void trn1(const PRegisterWithLaneSize& pd,
5590 const PRegisterWithLaneSize& pn,
5591 const PRegisterWithLaneSize& pm);
5592
5593 // Interleave even or odd elements from two vectors.
5594 void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5595
5596 // Interleave even or odd elements from two predicates.
5597 void trn2(const PRegisterWithLaneSize& pd,
5598 const PRegisterWithLaneSize& pn,
5599 const PRegisterWithLaneSize& pm);
5600
5601 // Interleave even or odd elements from two vectors.
5602 void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5603
5604 // Unsigned absolute difference (predicated).
5605 void uabd(const ZRegister& zd,
5606 const PRegisterM& pg,
5607 const ZRegister& zn,
5608 const ZRegister& zm);
5609
5610 // Unsigned add reduction to scalar.
5611 void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5612
5613 // Unsigned integer convert to floating-point (predicated).
5614 void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5615
5616 // Unsigned divide (predicated).
5617 void udiv(const ZRegister& zd,
5618 const PRegisterM& pg,
5619 const ZRegister& zn,
5620 const ZRegister& zm);
5621
5622 // Unsigned reversed divide (predicated).
5623 void udivr(const ZRegister& zd,
5624 const PRegisterM& pg,
5625 const ZRegister& zn,
5626 const ZRegister& zm);
5627
5628 // Unsigned dot product by indexed quadtuplet.
5629 void udot(const ZRegister& zda,
5630 const ZRegister& zn,
5631 const ZRegister& zm,
5632 int index);
5633
5634 // Unsigned dot product.
5635 void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5636
5637 // Unsigned maximum vectors (predicated).
5638 void umax(const ZRegister& zd,
5639 const PRegisterM& pg,
5640 const ZRegister& zn,
5641 const ZRegister& zm);
5642
5643 // Unsigned maximum with immediate (unpredicated).
5644 void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5645
5646 // Unsigned maximum reduction to scalar.
5647 void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5648
5649 // Unsigned minimum vectors (predicated).
5650 void umin(const ZRegister& zd,
5651 const PRegisterM& pg,
5652 const ZRegister& zn,
5653 const ZRegister& zm);
5654
5655 // Unsigned minimum with immediate (unpredicated).
5656 void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5657
5658 // Unsigned minimum reduction to scalar.
5659 void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5660
5661 // Unsigned multiply returning high half (predicated).
5662 void umulh(const ZRegister& zd,
5663 const PRegisterM& pg,
5664 const ZRegister& zn,
5665 const ZRegister& zm);
5666
5667 // Unsigned saturating add vectors (unpredicated).
5668 void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5669
5670 // Unsigned saturating add immediate (unpredicated).
5671 void uqadd(const ZRegister& zd,
5672 const ZRegister& zn,
5673 int imm8,
5674 int shift = -1);
5675
5676 // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5677 // constraint element count.
5678 void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5679
5680 // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5681 // constraint element count.
5682 void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5683
5684 // Unsigned saturating decrement vector by multiple of 64-bit predicate
5685 // constraint element count.
5686 void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5687
5688 // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5689 // constraint element count.
5690 void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5691
5692 // Unsigned saturating decrement vector by multiple of 16-bit predicate
5693 // constraint element count.
5694 void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5695
5696 // Unsigned saturating decrement scalar by active predicate element count.
5697 void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5698
5699 // Unsigned saturating decrement vector by active predicate element count.
5700 void uqdecp(const ZRegister& zdn, const PRegister& pg);
5701
5702 // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5703 // constraint element count.
5704 void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5705
5706 // Unsigned saturating decrement vector by multiple of 32-bit predicate
5707 // constraint element count.
5708 void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5709
5710 // Unsigned saturating increment scalar by multiple of 8-bit predicate
5711 // constraint element count.
5712 void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5713
5714 // Unsigned saturating increment scalar by multiple of 64-bit predicate
5715 // constraint element count.
5716 void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5717
5718 // Unsigned saturating increment vector by multiple of 64-bit predicate
5719 // constraint element count.
5720 void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5721
5722 // Unsigned saturating increment scalar by multiple of 16-bit predicate
5723 // constraint element count.
5724 void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5725
5726 // Unsigned saturating increment vector by multiple of 16-bit predicate
5727 // constraint element count.
5728 void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5729
5730 // Unsigned saturating increment scalar by active predicate element count.
5731 void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5732
5733 // Unsigned saturating increment vector by active predicate element count.
5734 void uqincp(const ZRegister& zdn, const PRegister& pg);
5735
5736 // Unsigned saturating increment scalar by multiple of 32-bit predicate
5737 // constraint element count.
5738 void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5739
5740 // Unsigned saturating increment vector by multiple of 32-bit predicate
5741 // constraint element count.
5742 void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5743
5744 // Unsigned saturating subtract vectors (unpredicated).
5745 void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5746
5747 // Unsigned saturating subtract immediate (unpredicated).
5748 void uqsub(const ZRegister& zd,
5749 const ZRegister& zn,
5750 int imm8,
5751 int shift = -1);
5752
5753 // Unsigned unpack and extend half of vector.
5754 void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5755
5756 // Unsigned unpack and extend half of vector.
5757 void uunpklo(const ZRegister& zd, const ZRegister& zn);
5758
5759 // Unsigned byte extend (predicated).
5760 void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5761
5762 // Unsigned halfword extend (predicated).
5763 void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5764
5765 // Unsigned word extend (predicated).
5766 void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5767
5768 // Concatenate even or odd elements from two predicates.
5769 void uzp1(const PRegisterWithLaneSize& pd,
5770 const PRegisterWithLaneSize& pn,
5771 const PRegisterWithLaneSize& pm);
5772
5773 // Concatenate even or odd elements from two vectors.
5774 void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5775
5776 // Concatenate even or odd elements from two predicates.
5777 void uzp2(const PRegisterWithLaneSize& pd,
5778 const PRegisterWithLaneSize& pn,
5779 const PRegisterWithLaneSize& pm);
5780
5781 // Concatenate even or odd elements from two vectors.
5782 void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5783
5784 // While incrementing signed scalar less than or equal to scalar.
5785 void whilele(const PRegisterWithLaneSize& pd,
5786 const Register& rn,
5787 const Register& rm);
5788
5789 // While incrementing unsigned scalar lower than scalar.
5790 void whilelo(const PRegisterWithLaneSize& pd,
5791 const Register& rn,
5792 const Register& rm);
5793
5794 // While incrementing unsigned scalar lower or same as scalar.
5795 void whilels(const PRegisterWithLaneSize& pd,
5796 const Register& rn,
5797 const Register& rm);
5798
5799 // While incrementing signed scalar less than scalar.
5800 void whilelt(const PRegisterWithLaneSize& pd,
5801 const Register& rn,
5802 const Register& rm);
5803
5804 // Write the first-fault register.
5805 void wrffr(const PRegisterWithLaneSize& pn);
5806
5807 // Interleave elements from two half predicates.
5808 void zip1(const PRegisterWithLaneSize& pd,
5809 const PRegisterWithLaneSize& pn,
5810 const PRegisterWithLaneSize& pm);
5811
5812 // Interleave elements from two half vectors.
5813 void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5814
5815 // Interleave elements from two half predicates.
5816 void zip2(const PRegisterWithLaneSize& pd,
5817 const PRegisterWithLaneSize& pn,
5818 const PRegisterWithLaneSize& pm);
5819
5820 // Interleave elements from two half vectors.
5821 void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5822
5823 // Emit generic instructions.
5824
5825 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)5826 void dci(Instr raw_inst) { Emit(raw_inst); }
5827
5828 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)5829 void dc32(uint32_t data) { dc(data); }
5830
5831 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)5832 void dc64(uint64_t data) { dc(data); }
5833
5834 // Emit data in the instruction stream.
5835 template <typename T>
dc(T data)5836 void dc(T data) {
5837 VIXL_ASSERT(AllowAssembler());
5838 GetBuffer()->Emit<T>(data);
5839 }
5840
5841 // Copy a string into the instruction stream, including the terminating NULL
5842 // character. The instruction pointer is then aligned correctly for
5843 // subsequent instructions.
EmitString(const char * string)5844 void EmitString(const char* string) {
5845 VIXL_ASSERT(string != NULL);
5846 VIXL_ASSERT(AllowAssembler());
5847
5848 GetBuffer()->EmitString(string);
5849 GetBuffer()->Align();
5850 }
5851
5852 // Code generation helpers.
5853
5854 // Register encoding.
5855 template <int hibit, int lobit>
Rx(CPURegister rx)5856 static Instr Rx(CPURegister rx) {
5857 VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
5858 return ImmUnsignedField<hibit, lobit>(rx.GetCode());
5859 }
5860
5861 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
5862 #define REGISTER_ENCODER(N) \
5863 static Instr R##N(CPURegister r##N) { \
5864 return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
5865 }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)5866 CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
5867 #undef REGISTER_ENCODER
5868 #undef CPU_REGISTER_FIELD_NAMES
5869
5870 static Instr RmNot31(CPURegister rm) {
5871 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
5872 VIXL_ASSERT(!rm.IsZero());
5873 return Rm(rm);
5874 }
5875
5876 // These encoding functions allow the stack pointer to be encoded, and
5877 // disallow the zero register.
RdSP(Register rd)5878 static Instr RdSP(Register rd) {
5879 VIXL_ASSERT(!rd.IsZero());
5880 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
5881 }
5882
RnSP(Register rn)5883 static Instr RnSP(Register rn) {
5884 VIXL_ASSERT(!rn.IsZero());
5885 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
5886 }
5887
RmSP(Register rm)5888 static Instr RmSP(Register rm) {
5889 VIXL_ASSERT(!rm.IsZero());
5890 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
5891 }
5892
Pd(PRegister pd)5893 static Instr Pd(PRegister pd) {
5894 return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
5895 }
5896
Pm(PRegister pm)5897 static Instr Pm(PRegister pm) {
5898 return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
5899 }
5900
Pn(PRegister pn)5901 static Instr Pn(PRegister pn) {
5902 return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
5903 }
5904
PgLow8(PRegister pg)5905 static Instr PgLow8(PRegister pg) {
5906 // Governing predicates can be merging, zeroing, or unqualified. They should
5907 // never have a lane size.
5908 VIXL_ASSERT(!pg.HasLaneSize());
5909 return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
5910 }
5911
5912 template <int hibit, int lobit>
Pg(PRegister pg)5913 static Instr Pg(PRegister pg) {
5914 // Governing predicates can be merging, zeroing, or unqualified. They should
5915 // never have a lane size.
5916 VIXL_ASSERT(!pg.HasLaneSize());
5917 return Rx<hibit, lobit>(pg);
5918 }
5919
5920 // Flags encoding.
Flags(FlagsUpdate S)5921 static Instr Flags(FlagsUpdate S) {
5922 if (S == SetFlags) {
5923 return 1 << FlagsUpdate_offset;
5924 } else if (S == LeaveFlags) {
5925 return 0 << FlagsUpdate_offset;
5926 }
5927 VIXL_UNREACHABLE();
5928 return 0;
5929 }
5930
Cond(Condition cond)5931 static Instr Cond(Condition cond) { return cond << Condition_offset; }
5932
5933 // Generic immediate encoding.
5934 template <int hibit, int lobit>
ImmField(int64_t imm)5935 static Instr ImmField(int64_t imm) {
5936 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
5937 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
5938 int fieldsize = hibit - lobit + 1;
5939 VIXL_ASSERT(IsIntN(fieldsize, imm));
5940 return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
5941 }
5942
5943 // For unsigned immediate encoding.
5944 // TODO: Handle signed and unsigned immediate in satisfactory way.
5945 template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)5946 static Instr ImmUnsignedField(uint64_t imm) {
5947 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
5948 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
5949 VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
5950 return static_cast<Instr>(imm << lobit);
5951 }
5952
5953 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)5954 static Instr ImmPCRelAddress(int64_t imm21) {
5955 VIXL_ASSERT(IsInt21(imm21));
5956 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
5957 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
5958 Instr immlo = imm << ImmPCRelLo_offset;
5959 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
5960 }
5961
5962 // Branch encoding.
ImmUncondBranch(int64_t imm26)5963 static Instr ImmUncondBranch(int64_t imm26) {
5964 VIXL_ASSERT(IsInt26(imm26));
5965 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
5966 }
5967
ImmCondBranch(int64_t imm19)5968 static Instr ImmCondBranch(int64_t imm19) {
5969 VIXL_ASSERT(IsInt19(imm19));
5970 return TruncateToUint19(imm19) << ImmCondBranch_offset;
5971 }
5972
ImmCmpBranch(int64_t imm19)5973 static Instr ImmCmpBranch(int64_t imm19) {
5974 VIXL_ASSERT(IsInt19(imm19));
5975 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
5976 }
5977
ImmTestBranch(int64_t imm14)5978 static Instr ImmTestBranch(int64_t imm14) {
5979 VIXL_ASSERT(IsInt14(imm14));
5980 return TruncateToUint14(imm14) << ImmTestBranch_offset;
5981 }
5982
ImmTestBranchBit(unsigned bit_pos)5983 static Instr ImmTestBranchBit(unsigned bit_pos) {
5984 VIXL_ASSERT(IsUint6(bit_pos));
5985 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
5986 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
5987 unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
5988 b5 &= ImmTestBranchBit5_mask;
5989 b40 &= ImmTestBranchBit40_mask;
5990 return b5 | b40;
5991 }
5992
5993 // Data Processing encoding.
SF(Register rd)5994 static Instr SF(Register rd) {
5995 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
5996 }
5997
ImmAddSub(int imm)5998 static Instr ImmAddSub(int imm) {
5999 VIXL_ASSERT(IsImmAddSub(imm));
6000 if (IsUint12(imm)) { // No shift required.
6001 imm <<= ImmAddSub_offset;
6002 } else {
6003 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
6004 }
6005 return imm;
6006 }
6007
SVEImmSetBits(unsigned imms,unsigned lane_size)6008 static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
6009 VIXL_ASSERT(IsUint6(imms));
6010 VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
6011 USE(lane_size);
6012 return imms << SVEImmSetBits_offset;
6013 }
6014
SVEImmRotate(unsigned immr,unsigned lane_size)6015 static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
6016 VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
6017 USE(lane_size);
6018 return immr << SVEImmRotate_offset;
6019 }
6020
SVEBitN(unsigned bitn)6021 static Instr SVEBitN(unsigned bitn) {
6022 VIXL_ASSERT(IsUint1(bitn));
6023 return bitn << SVEBitN_offset;
6024 }
6025
6026 static Instr SVEDtype(unsigned msize_in_bytes_log2,
6027 unsigned esize_in_bytes_log2,
6028 bool is_signed,
6029 int dtype_h_lsb = 23,
6030 int dtype_l_lsb = 21) {
6031 VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
6032 VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
6033 Instr dtype_h = msize_in_bytes_log2;
6034 Instr dtype_l = esize_in_bytes_log2;
6035 // Signed forms use the encodings where msize would be greater than esize.
6036 if (is_signed) {
6037 dtype_h = dtype_h ^ 0x3;
6038 dtype_l = dtype_l ^ 0x3;
6039 }
6040 VIXL_ASSERT(IsUint2(dtype_h));
6041 VIXL_ASSERT(IsUint2(dtype_l));
6042 VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
6043
6044 return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
6045 }
6046
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)6047 static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
6048 unsigned esize_in_bytes_log2,
6049 bool is_signed) {
6050 return SVEDtype(msize_in_bytes_log2,
6051 esize_in_bytes_log2,
6052 is_signed,
6053 23,
6054 13);
6055 }
6056
ImmS(unsigned imms,unsigned reg_size)6057 static Instr ImmS(unsigned imms, unsigned reg_size) {
6058 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
6059 ((reg_size == kWRegSize) && IsUint5(imms)));
6060 USE(reg_size);
6061 return imms << ImmS_offset;
6062 }
6063
ImmR(unsigned immr,unsigned reg_size)6064 static Instr ImmR(unsigned immr, unsigned reg_size) {
6065 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
6066 ((reg_size == kWRegSize) && IsUint5(immr)));
6067 USE(reg_size);
6068 VIXL_ASSERT(IsUint6(immr));
6069 return immr << ImmR_offset;
6070 }
6071
ImmSetBits(unsigned imms,unsigned reg_size)6072 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
6073 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6074 VIXL_ASSERT(IsUint6(imms));
6075 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
6076 USE(reg_size);
6077 return imms << ImmSetBits_offset;
6078 }
6079
ImmRotate(unsigned immr,unsigned reg_size)6080 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
6081 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6082 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
6083 ((reg_size == kWRegSize) && IsUint5(immr)));
6084 USE(reg_size);
6085 return immr << ImmRotate_offset;
6086 }
6087
ImmLLiteral(int64_t imm19)6088 static Instr ImmLLiteral(int64_t imm19) {
6089 VIXL_ASSERT(IsInt19(imm19));
6090 return TruncateToUint19(imm19) << ImmLLiteral_offset;
6091 }
6092
BitN(unsigned bitn,unsigned reg_size)6093 static Instr BitN(unsigned bitn, unsigned reg_size) {
6094 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6095 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
6096 USE(reg_size);
6097 return bitn << BitN_offset;
6098 }
6099
ShiftDP(Shift shift)6100 static Instr ShiftDP(Shift shift) {
6101 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
6102 return shift << ShiftDP_offset;
6103 }
6104
ImmDPShift(unsigned amount)6105 static Instr ImmDPShift(unsigned amount) {
6106 VIXL_ASSERT(IsUint6(amount));
6107 return amount << ImmDPShift_offset;
6108 }
6109
ExtendMode(Extend extend)6110 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
6111
ImmExtendShift(unsigned left_shift)6112 static Instr ImmExtendShift(unsigned left_shift) {
6113 VIXL_ASSERT(left_shift <= 4);
6114 return left_shift << ImmExtendShift_offset;
6115 }
6116
ImmCondCmp(unsigned imm)6117 static Instr ImmCondCmp(unsigned imm) {
6118 VIXL_ASSERT(IsUint5(imm));
6119 return imm << ImmCondCmp_offset;
6120 }
6121
Nzcv(StatusFlags nzcv)6122 static Instr Nzcv(StatusFlags nzcv) {
6123 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
6124 }
6125
6126 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)6127 static Instr ImmLSUnsigned(int64_t imm12) {
6128 VIXL_ASSERT(IsUint12(imm12));
6129 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
6130 }
6131
ImmLS(int64_t imm9)6132 static Instr ImmLS(int64_t imm9) {
6133 VIXL_ASSERT(IsInt9(imm9));
6134 return TruncateToUint9(imm9) << ImmLS_offset;
6135 }
6136
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)6137 static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
6138 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
6139 int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
6140 VIXL_ASSERT(IsInt7(scaled_imm7));
6141 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
6142 }
6143
ImmShiftLS(unsigned shift_amount)6144 static Instr ImmShiftLS(unsigned shift_amount) {
6145 VIXL_ASSERT(IsUint1(shift_amount));
6146 return shift_amount << ImmShiftLS_offset;
6147 }
6148
ImmLSPAC(int64_t imm10)6149 static Instr ImmLSPAC(int64_t imm10) {
6150 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
6151 int64_t scaled_imm10 = imm10 / (1 << 3);
6152 VIXL_ASSERT(IsInt10(scaled_imm10));
6153 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
6154 return (s_bit << ImmLSPACHi_offset) |
6155 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
6156 }
6157
ImmPrefetchOperation(int imm5)6158 static Instr ImmPrefetchOperation(int imm5) {
6159 VIXL_ASSERT(IsUint5(imm5));
6160 return imm5 << ImmPrefetchOperation_offset;
6161 }
6162
ImmException(int imm16)6163 static Instr ImmException(int imm16) {
6164 VIXL_ASSERT(IsUint16(imm16));
6165 return imm16 << ImmException_offset;
6166 }
6167
ImmUdf(int imm16)6168 static Instr ImmUdf(int imm16) {
6169 VIXL_ASSERT(IsUint16(imm16));
6170 return imm16 << ImmUdf_offset;
6171 }
6172
ImmSystemRegister(int imm16)6173 static Instr ImmSystemRegister(int imm16) {
6174 VIXL_ASSERT(IsUint16(imm16));
6175 return imm16 << ImmSystemRegister_offset;
6176 }
6177
ImmRMIFRotation(int imm6)6178 static Instr ImmRMIFRotation(int imm6) {
6179 VIXL_ASSERT(IsUint6(imm6));
6180 return imm6 << ImmRMIFRotation_offset;
6181 }
6182
ImmHint(int imm7)6183 static Instr ImmHint(int imm7) {
6184 VIXL_ASSERT(IsUint7(imm7));
6185 return imm7 << ImmHint_offset;
6186 }
6187
CRm(int imm4)6188 static Instr CRm(int imm4) {
6189 VIXL_ASSERT(IsUint4(imm4));
6190 return imm4 << CRm_offset;
6191 }
6192
CRn(int imm4)6193 static Instr CRn(int imm4) {
6194 VIXL_ASSERT(IsUint4(imm4));
6195 return imm4 << CRn_offset;
6196 }
6197
SysOp(int imm14)6198 static Instr SysOp(int imm14) {
6199 VIXL_ASSERT(IsUint14(imm14));
6200 return imm14 << SysOp_offset;
6201 }
6202
ImmSysOp1(int imm3)6203 static Instr ImmSysOp1(int imm3) {
6204 VIXL_ASSERT(IsUint3(imm3));
6205 return imm3 << SysOp1_offset;
6206 }
6207
ImmSysOp2(int imm3)6208 static Instr ImmSysOp2(int imm3) {
6209 VIXL_ASSERT(IsUint3(imm3));
6210 return imm3 << SysOp2_offset;
6211 }
6212
ImmBarrierDomain(int imm2)6213 static Instr ImmBarrierDomain(int imm2) {
6214 VIXL_ASSERT(IsUint2(imm2));
6215 return imm2 << ImmBarrierDomain_offset;
6216 }
6217
ImmBarrierType(int imm2)6218 static Instr ImmBarrierType(int imm2) {
6219 VIXL_ASSERT(IsUint2(imm2));
6220 return imm2 << ImmBarrierType_offset;
6221 }
6222
6223 // Move immediates encoding.
ImmMoveWide(uint64_t imm)6224 static Instr ImmMoveWide(uint64_t imm) {
6225 VIXL_ASSERT(IsUint16(imm));
6226 return static_cast<Instr>(imm << ImmMoveWide_offset);
6227 }
6228
ShiftMoveWide(int64_t shift)6229 static Instr ShiftMoveWide(int64_t shift) {
6230 VIXL_ASSERT(IsUint2(shift));
6231 return static_cast<Instr>(shift << ShiftMoveWide_offset);
6232 }
6233
6234 // FP Immediates.
6235 static Instr ImmFP16(Float16 imm);
6236 static Instr ImmFP32(float imm);
6237 static Instr ImmFP64(double imm);
6238
6239 // FP register type.
FPType(VRegister fd)6240 static Instr FPType(VRegister fd) {
6241 VIXL_ASSERT(fd.IsScalar());
6242 switch (fd.GetSizeInBits()) {
6243 case 16:
6244 return FP16;
6245 case 32:
6246 return FP32;
6247 case 64:
6248 return FP64;
6249 default:
6250 VIXL_UNREACHABLE();
6251 return 0;
6252 }
6253 }
6254
FPScale(unsigned scale)6255 static Instr FPScale(unsigned scale) {
6256 VIXL_ASSERT(IsUint6(scale));
6257 return scale << FPScale_offset;
6258 }
6259
6260 // Immediate field checking helpers.
6261 static bool IsImmAddSub(int64_t immediate);
6262 static bool IsImmConditionalCompare(int64_t immediate);
6263 static bool IsImmFP16(Float16 imm);
6264 static bool IsImmFP32(float imm);
6265 static bool IsImmFP64(double imm);
6266 static bool IsImmLogical(uint64_t value,
6267 unsigned width,
6268 unsigned* n = NULL,
6269 unsigned* imm_s = NULL,
6270 unsigned* imm_r = NULL);
6271 static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
6272 static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
6273 static bool IsImmLSUnscaled(int64_t offset);
6274 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
6275 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
6276
6277 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)6278 static Instr VFormat(VRegister vd) {
6279 if (vd.Is64Bits()) {
6280 switch (vd.GetLanes()) {
6281 case 2:
6282 return NEON_2S;
6283 case 4:
6284 return NEON_4H;
6285 case 8:
6286 return NEON_8B;
6287 default:
6288 return 0xffffffff;
6289 }
6290 } else {
6291 VIXL_ASSERT(vd.Is128Bits());
6292 switch (vd.GetLanes()) {
6293 case 2:
6294 return NEON_2D;
6295 case 4:
6296 return NEON_4S;
6297 case 8:
6298 return NEON_8H;
6299 case 16:
6300 return NEON_16B;
6301 default:
6302 return 0xffffffff;
6303 }
6304 }
6305 }
6306
6307 // Instruction bits for vector format in floating point data processing
6308 // operations.
FPFormat(VRegister vd)6309 static Instr FPFormat(VRegister vd) {
6310 switch (vd.GetLanes()) {
6311 case 1:
6312 // Floating point scalar formats.
6313 switch (vd.GetSizeInBits()) {
6314 case 16:
6315 return FP16;
6316 case 32:
6317 return FP32;
6318 case 64:
6319 return FP64;
6320 default:
6321 VIXL_UNREACHABLE();
6322 }
6323 break;
6324 case 2:
6325 // Two lane floating point vector formats.
6326 switch (vd.GetSizeInBits()) {
6327 case 64:
6328 return NEON_FP_2S;
6329 case 128:
6330 return NEON_FP_2D;
6331 default:
6332 VIXL_UNREACHABLE();
6333 }
6334 break;
6335 case 4:
6336 // Four lane floating point vector formats.
6337 switch (vd.GetSizeInBits()) {
6338 case 64:
6339 return NEON_FP_4H;
6340 case 128:
6341 return NEON_FP_4S;
6342 default:
6343 VIXL_UNREACHABLE();
6344 }
6345 break;
6346 case 8:
6347 // Eight lane floating point vector format.
6348 VIXL_ASSERT(vd.Is128Bits());
6349 return NEON_FP_8H;
6350 default:
6351 VIXL_UNREACHABLE();
6352 return 0;
6353 }
6354 VIXL_UNREACHABLE();
6355 return 0;
6356 }
6357
6358 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)6359 static Instr LSVFormat(VRegister vd) {
6360 if (vd.Is64Bits()) {
6361 switch (vd.GetLanes()) {
6362 case 1:
6363 return LS_NEON_1D;
6364 case 2:
6365 return LS_NEON_2S;
6366 case 4:
6367 return LS_NEON_4H;
6368 case 8:
6369 return LS_NEON_8B;
6370 default:
6371 return 0xffffffff;
6372 }
6373 } else {
6374 VIXL_ASSERT(vd.Is128Bits());
6375 switch (vd.GetLanes()) {
6376 case 2:
6377 return LS_NEON_2D;
6378 case 4:
6379 return LS_NEON_4S;
6380 case 8:
6381 return LS_NEON_8H;
6382 case 16:
6383 return LS_NEON_16B;
6384 default:
6385 return 0xffffffff;
6386 }
6387 }
6388 }
6389
6390 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)6391 static Instr SFormat(VRegister vd) {
6392 VIXL_ASSERT(vd.GetLanes() == 1);
6393 switch (vd.GetSizeInBytes()) {
6394 case 1:
6395 return NEON_B;
6396 case 2:
6397 return NEON_H;
6398 case 4:
6399 return NEON_S;
6400 case 8:
6401 return NEON_D;
6402 default:
6403 return 0xffffffff;
6404 }
6405 }
6406
6407 template <typename T>
SVESize(const T & rd)6408 static Instr SVESize(const T& rd) {
6409 VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
6410 VIXL_ASSERT(rd.HasLaneSize());
6411 switch (rd.GetLaneSizeInBytes()) {
6412 case 1:
6413 return SVE_B;
6414 case 2:
6415 return SVE_H;
6416 case 4:
6417 return SVE_S;
6418 case 8:
6419 return SVE_D;
6420 default:
6421 return 0xffffffff;
6422 }
6423 }
6424
ImmSVEPredicateConstraint(int pattern)6425 static Instr ImmSVEPredicateConstraint(int pattern) {
6426 VIXL_ASSERT(IsUint5(pattern));
6427 return (pattern << ImmSVEPredicateConstraint_offset) &
6428 ImmSVEPredicateConstraint_mask;
6429 }
6430
ImmNEONHLM(int index,int num_bits)6431 static Instr ImmNEONHLM(int index, int num_bits) {
6432 int h, l, m;
6433 if (num_bits == 3) {
6434 VIXL_ASSERT(IsUint3(index));
6435 h = (index >> 2) & 1;
6436 l = (index >> 1) & 1;
6437 m = (index >> 0) & 1;
6438 } else if (num_bits == 2) {
6439 VIXL_ASSERT(IsUint2(index));
6440 h = (index >> 1) & 1;
6441 l = (index >> 0) & 1;
6442 m = 0;
6443 } else {
6444 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
6445 h = (index >> 0) & 1;
6446 l = 0;
6447 m = 0;
6448 }
6449 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
6450 }
6451
ImmRotFcadd(int rot)6452 static Instr ImmRotFcadd(int rot) {
6453 VIXL_ASSERT(rot == 90 || rot == 270);
6454 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
6455 }
6456
ImmRotFcmlaSca(int rot)6457 static Instr ImmRotFcmlaSca(int rot) {
6458 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
6459 return (rot / 90) << ImmRotFcmlaSca_offset;
6460 }
6461
ImmRotFcmlaVec(int rot)6462 static Instr ImmRotFcmlaVec(int rot) {
6463 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
6464 return (rot / 90) << ImmRotFcmlaVec_offset;
6465 }
6466
ImmNEONExt(int imm4)6467 static Instr ImmNEONExt(int imm4) {
6468 VIXL_ASSERT(IsUint4(imm4));
6469 return imm4 << ImmNEONExt_offset;
6470 }
6471
ImmNEON5(Instr format,int index)6472 static Instr ImmNEON5(Instr format, int index) {
6473 VIXL_ASSERT(IsUint4(index));
6474 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
6475 int imm5 = (index << (s + 1)) | (1 << s);
6476 return imm5 << ImmNEON5_offset;
6477 }
6478
ImmNEON4(Instr format,int index)6479 static Instr ImmNEON4(Instr format, int index) {
6480 VIXL_ASSERT(IsUint4(index));
6481 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
6482 int imm4 = index << s;
6483 return imm4 << ImmNEON4_offset;
6484 }
6485
ImmNEONabcdefgh(int imm8)6486 static Instr ImmNEONabcdefgh(int imm8) {
6487 VIXL_ASSERT(IsUint8(imm8));
6488 Instr instr;
6489 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
6490 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
6491 return instr;
6492 }
6493
NEONCmode(int cmode)6494 static Instr NEONCmode(int cmode) {
6495 VIXL_ASSERT(IsUint4(cmode));
6496 return cmode << NEONCmode_offset;
6497 }
6498
NEONModImmOp(int op)6499 static Instr NEONModImmOp(int op) {
6500 VIXL_ASSERT(IsUint1(op));
6501 return op << NEONModImmOp_offset;
6502 }
6503
6504 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)6505 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
6506 VIXL_ASSERT(label->IsBound());
6507 return GetBuffer().GetOffsetFrom(label->GetLocation());
6508 }
6509 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
6510 size_t SizeOfCodeGeneratedSince(Label* label) const) {
6511 return GetSizeOfCodeGeneratedSince(label);
6512 }
6513
6514 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
6515 size_t GetBufferCapacity() const) {
6516 return GetBuffer().GetCapacity();
6517 }
6518 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
6519 return GetBuffer().GetCapacity();
6520 }
6521
6522 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
6523 size_t GetRemainingBufferSpace() const) {
6524 return GetBuffer().GetRemainingBytes();
6525 }
6526 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
6527 size_t RemainingBufferSpace() const) {
6528 return GetBuffer().GetRemainingBytes();
6529 }
6530
GetPic()6531 PositionIndependentCodeOption GetPic() const { return pic_; }
6532 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
6533 return GetPic();
6534 }
6535
GetCPUFeatures()6536 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
6537
SetCPUFeatures(const CPUFeatures & cpu_features)6538 void SetCPUFeatures(const CPUFeatures& cpu_features) {
6539 cpu_features_ = cpu_features;
6540 }
6541
AllowPageOffsetDependentCode()6542 bool AllowPageOffsetDependentCode() const {
6543 return (GetPic() == PageOffsetDependentCode) ||
6544 (GetPic() == PositionDependentCode);
6545 }
6546
AppropriateZeroRegFor(const CPURegister & reg)6547 static Register AppropriateZeroRegFor(const CPURegister& reg) {
6548 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
6549 }
6550
6551 protected:
6552 void LoadStore(const CPURegister& rt,
6553 const MemOperand& addr,
6554 LoadStoreOp op,
6555 LoadStoreScalingOption option = PreferScaledOffset);
6556
6557 void LoadStorePAC(const Register& xt,
6558 const MemOperand& addr,
6559 LoadStorePACOp op);
6560
6561 void LoadStorePair(const CPURegister& rt,
6562 const CPURegister& rt2,
6563 const MemOperand& addr,
6564 LoadStorePairOp op);
6565 void LoadStoreStruct(const VRegister& vt,
6566 const MemOperand& addr,
6567 NEONLoadStoreMultiStructOp op);
6568 void LoadStoreStruct1(const VRegister& vt,
6569 int reg_count,
6570 const MemOperand& addr);
6571 void LoadStoreStructSingle(const VRegister& vt,
6572 uint32_t lane,
6573 const MemOperand& addr,
6574 NEONLoadStoreSingleStructOp op);
6575 void LoadStoreStructSingleAllLanes(const VRegister& vt,
6576 const MemOperand& addr,
6577 NEONLoadStoreSingleStructOp op);
6578 void LoadStoreStructVerify(const VRegister& vt,
6579 const MemOperand& addr,
6580 Instr op);
6581
6582 // Set `is_load` to false in default as it's only used in the
6583 // scalar-plus-vector form.
6584 Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
6585 int num_regs,
6586 const SVEMemOperand& addr,
6587 bool is_load = false);
6588
6589 // E.g. st1b, st1h, ...
6590 // This supports both contiguous and scatter stores.
6591 void SVESt1Helper(unsigned msize_in_bytes_log2,
6592 const ZRegister& zt,
6593 const PRegister& pg,
6594 const SVEMemOperand& addr);
6595
6596 // E.g. ld1b, ld1h, ...
6597 // This supports both contiguous and gather loads.
6598 void SVELd1Helper(unsigned msize_in_bytes_log2,
6599 const ZRegister& zt,
6600 const PRegisterZ& pg,
6601 const SVEMemOperand& addr,
6602 bool is_signed);
6603
6604 // E.g. ld1rb, ld1rh, ...
6605 void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
6606 const ZRegister& zt,
6607 const PRegisterZ& pg,
6608 const SVEMemOperand& addr,
6609 bool is_signed);
6610
6611 // E.g. ldff1b, ldff1h, ...
6612 // This supports both contiguous and gather loads.
6613 void SVELdff1Helper(unsigned msize_in_bytes_log2,
6614 const ZRegister& zt,
6615 const PRegisterZ& pg,
6616 const SVEMemOperand& addr,
6617 bool is_signed);
6618
6619 // Common code for the helpers above.
6620 void SVELdSt1Helper(unsigned msize_in_bytes_log2,
6621 const ZRegister& zt,
6622 const PRegister& pg,
6623 const SVEMemOperand& addr,
6624 bool is_signed,
6625 Instr op);
6626
6627 // Common code for the helpers above.
6628 void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
6629 const ZRegister& zt,
6630 const PRegister& pg,
6631 const SVEMemOperand& addr,
6632 bool is_load,
6633 bool is_signed,
6634 bool is_first_fault);
6635
6636 // E.g. st2b, st3h, ...
6637 void SVESt234Helper(int num_regs,
6638 const ZRegister& zt1,
6639 const PRegister& pg,
6640 const SVEMemOperand& addr);
6641
6642 // E.g. ld2b, ld3h, ...
6643 void SVELd234Helper(int num_regs,
6644 const ZRegister& zt1,
6645 const PRegisterZ& pg,
6646 const SVEMemOperand& addr);
6647
6648 // Common code for the helpers above.
6649 void SVELdSt234Helper(int num_regs,
6650 const ZRegister& zt1,
6651 const PRegister& pg,
6652 const SVEMemOperand& addr,
6653 Instr op);
6654
6655 // E.g. ld1qb, ld1qh, ldnt1b, ...
6656 void SVELd1St1ScaImmHelper(const ZRegister& zt,
6657 const PRegister& pg,
6658 const SVEMemOperand& addr,
6659 Instr regoffset_op,
6660 Instr immoffset_op,
6661 int imm_divisor = 1);
6662
6663 void Prefetch(PrefetchOperation op,
6664 const MemOperand& addr,
6665 LoadStoreScalingOption option = PreferScaledOffset);
6666 void Prefetch(int op,
6667 const MemOperand& addr,
6668 LoadStoreScalingOption option = PreferScaledOffset);
6669
6670 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
6671 // reports a bogus uninitialised warning then.
6672 void Logical(const Register& rd,
6673 const Register& rn,
6674 const Operand operand,
6675 LogicalOp op);
6676
6677 void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
6678
6679 void LogicalImmediate(const Register& rd,
6680 const Register& rn,
6681 unsigned n,
6682 unsigned imm_s,
6683 unsigned imm_r,
6684 LogicalOp op);
6685
6686 void ConditionalCompare(const Register& rn,
6687 const Operand& operand,
6688 StatusFlags nzcv,
6689 Condition cond,
6690 ConditionalCompareOp op);
6691
6692 void AddSubWithCarry(const Register& rd,
6693 const Register& rn,
6694 const Operand& operand,
6695 FlagsUpdate S,
6696 AddSubWithCarryOp op);
6697
6698 void CompareVectors(const PRegisterWithLaneSize& pd,
6699 const PRegisterZ& pg,
6700 const ZRegister& zn,
6701 const ZRegister& zm,
6702 SVEIntCompareVectorsOp op);
6703
6704 void CompareVectors(const PRegisterWithLaneSize& pd,
6705 const PRegisterZ& pg,
6706 const ZRegister& zn,
6707 int imm,
6708 SVEIntCompareSignedImmOp op);
6709
6710 void CompareVectors(const PRegisterWithLaneSize& pd,
6711 const PRegisterZ& pg,
6712 const ZRegister& zn,
6713 unsigned imm,
6714 SVEIntCompareUnsignedImmOp op);
6715
6716 void SVEIntAddSubtractImmUnpredicatedHelper(
6717 SVEIntAddSubtractImm_UnpredicatedOp op,
6718 const ZRegister& zd,
6719 int imm8,
6720 int shift);
6721
6722 void SVEElementCountToRegisterHelper(Instr op,
6723 const Register& rd,
6724 int pattern,
6725 int multiplier);
6726
6727 Instr EncodeSVEShiftImmediate(Shift shift_op,
6728 int shift,
6729 int lane_size_in_bits);
6730
6731 void SVEBitwiseShiftImmediate(const ZRegister& zd,
6732 const ZRegister& zn,
6733 Instr encoded_imm,
6734 SVEBitwiseShiftUnpredicatedOp op);
6735
6736 void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
6737 const PRegisterM& pg,
6738 Instr encoded_imm,
6739 SVEBitwiseShiftByImm_PredicatedOp op);
6740
6741 Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
6742 const ZRegister& zm,
6743 int index,
6744 Instr op_h,
6745 Instr op_s,
6746 Instr op_d);
6747
6748
6749 void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
6750 const PRegister& pg,
6751 const SVEMemOperand& addr,
6752 int prefetch_size);
6753
6754 void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
6755 const PRegister& pg,
6756 const SVEMemOperand& addr,
6757 int prefetch_size);
6758
6759 void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
6760 const PRegister& pg,
6761 const SVEMemOperand& addr,
6762 int prefetch_size);
6763
6764 void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
6765 const PRegister& pg,
6766 const SVEMemOperand& addr,
6767 int prefetch_size);
6768
6769 void SVEPrefetchHelper(PrefetchOperation prfop,
6770 const PRegister& pg,
6771 const SVEMemOperand& addr,
6772 int prefetch_size);
6773
SVEImmPrefetchOperation(PrefetchOperation prfop)6774 static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
6775 // SVE only supports PLD and PST, not PLI.
6776 VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
6777 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
6778 // Check that we can simply map bits.
6779 VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
6780 VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
6781 // Remaining operations map directly.
6782 return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
6783 }
6784
6785 // Functions for emulating operands not directly supported by the instruction
6786 // set.
6787 void EmitShift(const Register& rd,
6788 const Register& rn,
6789 Shift shift,
6790 unsigned amount);
6791 void EmitExtendShift(const Register& rd,
6792 const Register& rn,
6793 Extend extend,
6794 unsigned left_shift);
6795
6796 void AddSub(const Register& rd,
6797 const Register& rn,
6798 const Operand& operand,
6799 FlagsUpdate S,
6800 AddSubOp op);
6801
6802 void NEONTable(const VRegister& vd,
6803 const VRegister& vn,
6804 const VRegister& vm,
6805 NEONTableOp op);
6806
6807 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
6808 // registers. Only simple loads are supported; sign- and zero-extension (such
6809 // as in LDPSW_x or LDRB_w) are not supported.
6810 static LoadStoreOp LoadOpFor(const CPURegister& rt);
6811 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
6812 const CPURegister& rt2);
6813 static LoadStoreOp StoreOpFor(const CPURegister& rt);
6814 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
6815 const CPURegister& rt2);
6816 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
6817 const CPURegister& rt, const CPURegister& rt2);
6818 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
6819 const CPURegister& rt, const CPURegister& rt2);
6820 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
6821
6822 // Convenience pass-through for CPU feature checks.
6823 bool CPUHas(CPUFeatures::Feature feature0,
6824 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
6825 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
6826 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
6827 return cpu_features_.Has(feature0, feature1, feature2, feature3);
6828 }
6829
6830 // Determine whether the target CPU has the specified registers, based on the
6831 // currently-enabled CPU features. Presence of a register does not imply
6832 // support for arbitrary operations on it. For example, CPUs with FP have H
6833 // registers, but most half-precision operations require the FPHalf feature.
6834 //
6835 // These are used to check CPU features in loads and stores that have the same
6836 // entry point for both integer and FP registers.
6837 bool CPUHas(const CPURegister& rt) const;
6838 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
6839
6840 bool CPUHas(SystemRegister sysreg) const;
6841
6842 private:
6843 static uint32_t FP16ToImm8(Float16 imm);
6844 static uint32_t FP32ToImm8(float imm);
6845 static uint32_t FP64ToImm8(double imm);
6846
6847 // Instruction helpers.
6848 void MoveWide(const Register& rd,
6849 uint64_t imm,
6850 int shift,
6851 MoveWideImmediateOp mov_op);
6852 void DataProcShiftedRegister(const Register& rd,
6853 const Register& rn,
6854 const Operand& operand,
6855 FlagsUpdate S,
6856 Instr op);
6857 void DataProcExtendedRegister(const Register& rd,
6858 const Register& rn,
6859 const Operand& operand,
6860 FlagsUpdate S,
6861 Instr op);
6862 void LoadStorePairNonTemporal(const CPURegister& rt,
6863 const CPURegister& rt2,
6864 const MemOperand& addr,
6865 LoadStorePairNonTemporalOp op);
6866 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
6867 void ConditionalSelect(const Register& rd,
6868 const Register& rn,
6869 const Register& rm,
6870 Condition cond,
6871 ConditionalSelectOp op);
6872 void DataProcessing1Source(const Register& rd,
6873 const Register& rn,
6874 DataProcessing1SourceOp op);
6875 void DataProcessing3Source(const Register& rd,
6876 const Register& rn,
6877 const Register& rm,
6878 const Register& ra,
6879 DataProcessing3SourceOp op);
6880 void FPDataProcessing1Source(const VRegister& fd,
6881 const VRegister& fn,
6882 FPDataProcessing1SourceOp op);
6883 void FPDataProcessing3Source(const VRegister& fd,
6884 const VRegister& fn,
6885 const VRegister& fm,
6886 const VRegister& fa,
6887 FPDataProcessing3SourceOp op);
6888 void NEONAcrossLanesL(const VRegister& vd,
6889 const VRegister& vn,
6890 NEONAcrossLanesOp op);
6891 void NEONAcrossLanes(const VRegister& vd,
6892 const VRegister& vn,
6893 NEONAcrossLanesOp op,
6894 Instr op_half);
6895 void NEONModifiedImmShiftLsl(const VRegister& vd,
6896 const int imm8,
6897 const int left_shift,
6898 NEONModifiedImmediateOp op);
6899 void NEONModifiedImmShiftMsl(const VRegister& vd,
6900 const int imm8,
6901 const int shift_amount,
6902 NEONModifiedImmediateOp op);
6903 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
6904 void NEON3Same(const VRegister& vd,
6905 const VRegister& vn,
6906 const VRegister& vm,
6907 NEON3SameOp vop);
6908 void NEON3SameFP16(const VRegister& vd,
6909 const VRegister& vn,
6910 const VRegister& vm,
6911 Instr op);
6912 void NEONFP3Same(const VRegister& vd,
6913 const VRegister& vn,
6914 const VRegister& vm,
6915 Instr op);
6916 void NEON3DifferentL(const VRegister& vd,
6917 const VRegister& vn,
6918 const VRegister& vm,
6919 NEON3DifferentOp vop);
6920 void NEON3DifferentW(const VRegister& vd,
6921 const VRegister& vn,
6922 const VRegister& vm,
6923 NEON3DifferentOp vop);
6924 void NEON3DifferentHN(const VRegister& vd,
6925 const VRegister& vn,
6926 const VRegister& vm,
6927 NEON3DifferentOp vop);
6928 void NEONFP2RegMisc(const VRegister& vd,
6929 const VRegister& vn,
6930 NEON2RegMiscOp vop,
6931 double value = 0.0);
6932 void NEONFP2RegMiscFP16(const VRegister& vd,
6933 const VRegister& vn,
6934 NEON2RegMiscFP16Op vop,
6935 double value = 0.0);
6936 void NEON2RegMisc(const VRegister& vd,
6937 const VRegister& vn,
6938 NEON2RegMiscOp vop,
6939 int value = 0);
6940 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
6941 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
6942 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
6943 void NEONPerm(const VRegister& vd,
6944 const VRegister& vn,
6945 const VRegister& vm,
6946 NEONPermOp op);
6947 void NEONFPByElement(const VRegister& vd,
6948 const VRegister& vn,
6949 const VRegister& vm,
6950 int vm_index,
6951 NEONByIndexedElementOp op,
6952 NEONByIndexedElementOp op_half);
6953 void NEONByElement(const VRegister& vd,
6954 const VRegister& vn,
6955 const VRegister& vm,
6956 int vm_index,
6957 NEONByIndexedElementOp op);
6958 void NEONByElementL(const VRegister& vd,
6959 const VRegister& vn,
6960 const VRegister& vm,
6961 int vm_index,
6962 NEONByIndexedElementOp op);
6963 void NEONShiftImmediate(const VRegister& vd,
6964 const VRegister& vn,
6965 NEONShiftImmediateOp op,
6966 int immh_immb);
6967 void NEONShiftLeftImmediate(const VRegister& vd,
6968 const VRegister& vn,
6969 int shift,
6970 NEONShiftImmediateOp op);
6971 void NEONShiftRightImmediate(const VRegister& vd,
6972 const VRegister& vn,
6973 int shift,
6974 NEONShiftImmediateOp op);
6975 void NEONShiftImmediateL(const VRegister& vd,
6976 const VRegister& vn,
6977 int shift,
6978 NEONShiftImmediateOp op);
6979 void NEONShiftImmediateN(const VRegister& vd,
6980 const VRegister& vn,
6981 int shift,
6982 NEONShiftImmediateOp op);
6983 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
6984
6985 // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
6986 // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
6987 void ResolveSVEImm8Shift(int* imm8, int* shift);
6988
6989 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
6990
6991 // Encode the specified MemOperand for the specified access size and scaling
6992 // preference.
6993 Instr LoadStoreMemOperand(const MemOperand& addr,
6994 unsigned access_size_in_bytes_log2,
6995 LoadStoreScalingOption option);
6996
6997 // Link the current (not-yet-emitted) instruction to the specified label, then
6998 // return an offset to be encoded in the instruction. If the label is not yet
6999 // bound, an offset of 0 is returned.
7000 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
7001 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
7002 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
7003
7004 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
7005 template <int element_shift>
7006 ptrdiff_t LinkAndGetOffsetTo(Label* label);
7007
7008 // Literal load offset are in words (32-bit).
7009 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
7010
7011 // Emit the instruction in buffer_.
Emit(Instr instruction)7012 void Emit(Instr instruction) {
7013 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
7014 VIXL_ASSERT(AllowAssembler());
7015 GetBuffer()->Emit32(instruction);
7016 }
7017
7018 PositionIndependentCodeOption pic_;
7019
7020 CPUFeatures cpu_features_;
7021 };
7022
7023
7024 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)7025 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
7026 return UpdateValue(new_value,
7027 assembler->GetBuffer().GetStartAddress<uint8_t*>());
7028 }
7029
7030
7031 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)7032 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
7033 return UpdateValue(high64,
7034 low64,
7035 assembler->GetBuffer().GetStartAddress<uint8_t*>());
7036 }
7037
7038
7039 } // namespace aarch64
7040
7041 // Required InvalSet template specialisations.
7042 // TODO: These template specialisations should not live in this file. Move
7043 // Label out of the aarch64 namespace in order to share its implementation
7044 // later.
7045 #define INVAL_SET_TEMPLATE_PARAMETERS \
7046 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
7047 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
7048 aarch64::Label::kReclaimFactor
7049 template <>
GetKey(const ptrdiff_t & element)7050 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
7051 const ptrdiff_t& element) {
7052 return element;
7053 }
7054 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)7055 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
7056 ptrdiff_t key) {
7057 *element = key;
7058 }
7059 #undef INVAL_SET_TEMPLATE_PARAMETERS
7060
7061 } // namespace vixl
7062
7063 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
7064