1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36
37 #include "operands-aarch64.h"
38
39 namespace vixl {
40 namespace aarch64 {
41
42 class LabelTestHelper; // Forward declaration.
43
44
45 class Label {
46 public:
47 #ifndef PANDA_BUILD
Label()48 Label() : location_(kLocationUnbound) {}
49 #else
50 Label() = delete;
51 Label(AllocatorWrapper allocator) : links_(allocator), location_(kLocationUnbound) {}
52 #endif
~Label()53 virtual ~Label() {
54 // All links to a label must have been resolved before it is destructed.
55 #ifndef PANDA_BUILD
56 VIXL_ASSERT(!IsLinked());
57 #else
58 // Codegen may create unlinked labels
59 #endif
60 }
61
IsBound()62 bool IsBound() const { return location_ >= 0; }
IsLinked()63 bool IsLinked() const { return !links_.empty(); }
64
GetLocation()65 ptrdiff_t GetLocation() const { return location_; }
66 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
67 return GetLocation();
68 }
69
70 static const int kNPreallocatedLinks = 4;
71 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
72 static const size_t kReclaimFrom = 512;
73 static const size_t kReclaimFactor = 2;
74
75 typedef InvalSet<ptrdiff_t,
76 kNPreallocatedLinks,
77 ptrdiff_t,
78 kInvalidLinkKey,
79 kReclaimFrom,
80 kReclaimFactor>
81 LinksSetBase;
82 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
83
84 private:
85 class LinksSet : public LinksSetBase {
86 public:
87 #ifndef PANDA_BUILD
LinksSet()88 LinksSet() : LinksSetBase() {}
89 #else
90 LinksSet() = delete;
91 LinksSet(AllocatorWrapper allocator) : LinksSetBase(allocator) {}
92 #endif
93 };
94
95 // Allows iterating over the links of a label. The behaviour is undefined if
96 // the list of links is modified in any way while iterating.
97 class LabelLinksIterator : public LabelLinksIteratorBase {
98 public:
LabelLinksIterator(Label * label)99 explicit LabelLinksIterator(Label* label)
100 : LabelLinksIteratorBase(&label->links_) {}
101
102 // TODO: Remove these and use the STL-like interface instead.
103 using LabelLinksIteratorBase::Advance;
104 using LabelLinksIteratorBase::Current;
105 };
106
Bind(ptrdiff_t location)107 void Bind(ptrdiff_t location) {
108 // Labels can only be bound once.
109 #ifndef PANDA_BUILD
110 VIXL_ASSERT(!IsBound());
111 #else
112 // Disabled for unit-tests (it bind non-bound locs)
113 #endif
114 location_ = location;
115 }
116
AddLink(ptrdiff_t instruction)117 void AddLink(ptrdiff_t instruction) {
118 // If a label is bound, the assembler already has the information it needs
119 // to write the instruction, so there is no need to add it to links_.
120 VIXL_ASSERT(!IsBound());
121 links_.insert(instruction);
122 }
123
DeleteLink(ptrdiff_t instruction)124 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
125
ClearAllLinks()126 void ClearAllLinks() { links_.clear(); }
127
128 // TODO: The comment below considers average case complexity for our
129 // usual use-cases. The elements of interest are:
130 // - Branches to a label are emitted in order: branch instructions to a label
131 // are generated at an offset in the code generation buffer greater than any
132 // other branch to that same label already generated. As an example, this can
133 // be broken when an instruction is patched to become a branch. Note that the
134 // code will still work, but the complexity considerations below may locally
135 // not apply any more.
136 // - Veneers are generated in order: for multiple branches of the same type
137 // branching to the same unbound label going out of range, veneers are
138 // generated in growing order of the branch instruction offset from the start
139 // of the buffer.
140 //
141 // When creating a veneer for a branch going out of range, the link for this
142 // branch needs to be removed from this `links_`. Since all branches are
143 // tracked in one underlying InvalSet, the complexity for this deletion is the
144 // same as for finding the element, ie. O(n), where n is the number of links
145 // in the set.
146 // This could be reduced to O(1) by using the same trick as used when tracking
147 // branch information for veneers: split the container to use one set per type
148 // of branch. With that setup, when a veneer is created and the link needs to
149 // be deleted, if the two points above hold, it must be the minimum element of
150 // the set for its type of branch, and that minimum element will be accessible
151 // in O(1).
152
153 // The offsets of the instructions that have linked to this label.
154 LinksSet links_;
155 // The label location.
156 ptrdiff_t location_;
157
158 static const ptrdiff_t kLocationUnbound = -1;
159
160 // It is not safe to copy labels, so disable the copy constructor and operator
161 // by declaring them private (without an implementation).
162 #if __cplusplus >= 201103L
163 Label(const Label&) = delete;
164 void operator=(const Label&) = delete;
165 #else
166 Label(const Label&);
167 void operator=(const Label&);
168 #endif
169
170 // The Assembler class is responsible for binding and linking labels, since
171 // the stored offsets need to be consistent with the Assembler's buffer.
172 friend class Assembler;
173 // The MacroAssembler and VeneerPool handle resolution of branches to distant
174 // targets.
175 friend class MacroAssembler;
176 friend class VeneerPool;
177 };
178
179 class Assembler;
180 class LiteralPool;
181
182 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
183 // stream and loaded through a pc relative load. The same literal can be
184 // referred to by multiple instructions but a literal can only reside at one
185 // place in memory. A literal can be used by a load before or after being
186 // placed in memory.
187 //
188 // Internally an offset of 0 is associated with a literal which has been
189 // neither used nor placed. Then two possibilities arise:
190 // 1) the label is placed, the offset (stored as offset + 1) is used to
191 // resolve any subsequent load using the label.
192 // 2) the label is not placed and offset is the offset of the last load using
193 // the literal (stored as -offset -1). If multiple loads refer to this
194 // literal then the last load holds the offset of the preceding load and
195 // all loads form a chain. Once the offset is placed all the loads in the
196 // chain are resolved and future loads fall back to possibility 1.
197 class RawLiteral {
198 public:
199 enum DeletionPolicy {
200 kDeletedOnPlacementByPool,
201 kDeletedOnPoolDestruction,
202 kManuallyDeleted
203 };
204
205 RawLiteral(size_t size,
206 LiteralPool* literal_pool,
207 DeletionPolicy deletion_policy = kManuallyDeleted);
208
209 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
210 // actually pointing to `Literal<T>` objects.
~RawLiteral()211 virtual ~RawLiteral() {}
212
GetSize()213 size_t GetSize() const {
214 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
215 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
216 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
217 (size_ == kQRegSizeInBytes));
218 return size_;
219 }
220 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
221
GetRawValue128Low64()222 uint64_t GetRawValue128Low64() const {
223 VIXL_ASSERT(size_ == kQRegSizeInBytes);
224 return low64_;
225 }
226 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
227 return GetRawValue128Low64();
228 }
229
GetRawValue128High64()230 uint64_t GetRawValue128High64() const {
231 VIXL_ASSERT(size_ == kQRegSizeInBytes);
232 return high64_;
233 }
234 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
235 return GetRawValue128High64();
236 }
237
GetRawValue64()238 uint64_t GetRawValue64() const {
239 VIXL_ASSERT(size_ == kXRegSizeInBytes);
240 VIXL_ASSERT(high64_ == 0);
241 return low64_;
242 }
243 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
244 return GetRawValue64();
245 }
246
GetRawValue32()247 uint32_t GetRawValue32() const {
248 VIXL_ASSERT(size_ == kWRegSizeInBytes);
249 VIXL_ASSERT(high64_ == 0);
250 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
251 return static_cast<uint32_t>(low64_);
252 }
253 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
254 return GetRawValue32();
255 }
256
IsUsed()257 bool IsUsed() const { return offset_ < 0; }
IsPlaced()258 bool IsPlaced() const { return offset_ > 0; }
259
GetLiteralPool()260 LiteralPool* GetLiteralPool() const { return literal_pool_; }
261
GetOffset()262 ptrdiff_t GetOffset() const {
263 VIXL_ASSERT(IsPlaced());
264 return offset_ - 1;
265 }
266 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
267
268 protected:
SetOffset(ptrdiff_t offset)269 void SetOffset(ptrdiff_t offset) {
270 VIXL_ASSERT(offset >= 0);
271 VIXL_ASSERT(IsWordAligned(offset));
272 VIXL_ASSERT(!IsPlaced());
273 offset_ = offset + 1;
274 }
set_offset(ptrdiff_t offset)275 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
276 SetOffset(offset);
277 }
278
GetLastUse()279 ptrdiff_t GetLastUse() const {
280 VIXL_ASSERT(IsUsed());
281 return -offset_ - 1;
282 }
283 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
284
SetLastUse(ptrdiff_t offset)285 void SetLastUse(ptrdiff_t offset) {
286 VIXL_ASSERT(offset >= 0);
287 VIXL_ASSERT(IsWordAligned(offset));
288 VIXL_ASSERT(!IsPlaced());
289 offset_ = -offset - 1;
290 }
set_last_use(ptrdiff_t offset)291 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
292 SetLastUse(offset);
293 }
294
295 size_t size_;
296 ptrdiff_t offset_;
297 uint64_t low64_;
298 uint64_t high64_;
299
300 private:
301 LiteralPool* literal_pool_;
302 DeletionPolicy deletion_policy_;
303
304 friend class Assembler;
305 friend class LiteralPool;
306 };
307
308
309 template <typename T>
310 class Literal : public RawLiteral {
311 public:
312 explicit Literal(T value,
313 LiteralPool* literal_pool = NULL,
314 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)315 : RawLiteral(sizeof(value), literal_pool, ownership) {
316 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
317 UpdateValue(value);
318 }
319
320 Literal(T high64,
321 T low64,
322 LiteralPool* literal_pool = NULL,
323 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)324 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
325 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
326 UpdateValue(high64, low64);
327 }
328
~Literal()329 virtual ~Literal() {}
330
331 // Update the value of this literal, if necessary by rewriting the value in
332 // the pool.
333 // If the literal has already been placed in a literal pool, the address of
334 // the start of the code buffer must be provided, as the literal only knows it
335 // offset from there. This also allows patching the value after the code has
336 // been moved in memory.
337 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
338 VIXL_ASSERT(sizeof(new_value) == size_);
339 memcpy(&low64_, &new_value, sizeof(new_value));
340 if (IsPlaced()) {
341 VIXL_ASSERT(code_buffer != NULL);
342 RewriteValueInCode(code_buffer);
343 }
344 }
345
346 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
347 VIXL_ASSERT(sizeof(low64) == size_ / 2);
348 memcpy(&low64_, &low64, sizeof(low64));
349 memcpy(&high64_, &high64, sizeof(high64));
350 if (IsPlaced()) {
351 VIXL_ASSERT(code_buffer != NULL);
352 RewriteValueInCode(code_buffer);
353 }
354 }
355
356 void UpdateValue(T new_value, const Assembler* assembler);
357 void UpdateValue(T high64, T low64, const Assembler* assembler);
358
359 private:
RewriteValueInCode(uint8_t * code_buffer)360 void RewriteValueInCode(uint8_t* code_buffer) {
361 VIXL_ASSERT(IsPlaced());
362 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
363 switch (GetSize()) {
364 case kSRegSizeInBytes:
365 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
366 GetRawValue32();
367 break;
368 case kDRegSizeInBytes:
369 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
370 GetRawValue64();
371 break;
372 default:
373 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
374 uint64_t* base_address =
375 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
376 *base_address = GetRawValue128Low64();
377 *(base_address + 1) = GetRawValue128High64();
378 }
379 }
380 };
381
382
383 // Control whether or not position-independent code should be emitted.
384 enum PositionIndependentCodeOption {
385 // All code generated will be position-independent; all branches and
386 // references to labels generated with the Label class will use PC-relative
387 // addressing.
388 PositionIndependentCode,
389
390 // Allow VIXL to generate code that refers to absolute addresses. With this
391 // option, it will not be possible to copy the code buffer and run it from a
392 // different address; code must be generated in its final location.
393 PositionDependentCode,
394
395 // Allow VIXL to assume that the bottom 12 bits of the address will be
396 // constant, but that the top 48 bits may change. This allows `adrp` to
397 // function in systems which copy code between pages, but otherwise maintain
398 // 4KB page alignment.
399 PageOffsetDependentCode
400 };
401
402
403 // Control how scaled- and unscaled-offset loads and stores are generated.
404 enum LoadStoreScalingOption {
405 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
406 // register-offset, pre-index or post-index instructions if necessary.
407 PreferScaledOffset,
408
409 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
410 // register-offset, pre-index or post-index instructions if necessary.
411 PreferUnscaledOffset,
412
413 // Require scaled-immediate-offset instructions.
414 RequireScaledOffset,
415
416 // Require unscaled-immediate-offset instructions.
417 RequireUnscaledOffset
418 };
419
420
421 // Assembler.
422 class Assembler : public vixl::internal::AssemblerBase {
423 public:
424 explicit Assembler(
425 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)426 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
427
428 #ifdef PANDA_BUILD
429 explicit Assembler(
430 size_t capacity,
431 PositionIndependentCodeOption pic = PositionIndependentCode) = delete;
432 #else
433 explicit Assembler(
434 size_t capacity,
435 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)436 : AssemblerBase(capacity),
437 pic_(pic),
438 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
439 #endif
440 Assembler(byte* buffer,
441 size_t capacity,
442 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)443 : AssemblerBase(buffer, capacity),
444 pic_(pic),
445 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
446
447 // Upon destruction, the code will assert that one of the following is true:
448 // * The Assembler object has not been used.
449 // * Nothing has been emitted since the last Reset() call.
450 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()451 ~Assembler() {}
452
453 // System functions.
454
455 // Start generating code from the beginning of the buffer, discarding any code
456 // and data that has already been emitted into the buffer.
457 void Reset();
458
459 // Bind a label to the current PC.
460 void bind(Label* label);
461
462 // Bind a label to a specified offset from the start of the buffer.
463 void BindToOffset(Label* label, ptrdiff_t offset);
464
465 // Place a literal at the current PC.
466 void place(RawLiteral* literal);
467
468 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
469 return GetCursorOffset();
470 }
471
472 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
473 ptrdiff_t GetBufferEndOffset() const) {
474 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
475 }
476 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
477 ptrdiff_t BufferEndOffset() const) {
478 return GetBuffer().GetCapacity();
479 }
480
481 // Return the address of a bound label.
482 template <typename T>
GetLabelAddress(const Label * label)483 T GetLabelAddress(const Label* label) const {
484 VIXL_ASSERT(label->IsBound());
485 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
486 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
487 }
488
GetInstructionAt(ptrdiff_t instruction_offset)489 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
490 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
491 }
492 VIXL_DEPRECATED("GetInstructionAt",
493 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
494 return GetInstructionAt(instruction_offset);
495 }
496
GetInstructionOffset(Instruction * instruction)497 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
498 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
499 ptrdiff_t offset =
500 instruction - GetBuffer()->GetStartAddress<Instruction*>();
501 VIXL_ASSERT((0 <= offset) &&
502 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
503 return offset;
504 }
505 VIXL_DEPRECATED("GetInstructionOffset",
506 ptrdiff_t InstructionOffset(Instruction* instruction)) {
507 return GetInstructionOffset(instruction);
508 }
509
510 // Instruction set functions.
511
512 // Branch / Jump instructions.
513
514 // Branch to register.
515 void br(const Register& xn);
516
517 // Branch with link to register.
518 void blr(const Register& xn);
519
520 // Branch to register with return hint.
521 void ret(const Register& xn = lr);
522
523 // Branch to register, with pointer authentication. Using key A and a modifier
524 // of zero [Armv8.3].
525 void braaz(const Register& xn);
526
527 // Branch to register, with pointer authentication. Using key B and a modifier
528 // of zero [Armv8.3].
529 void brabz(const Register& xn);
530
531 // Branch with link to register, with pointer authentication. Using key A and
532 // a modifier of zero [Armv8.3].
533 void blraaz(const Register& xn);
534
535 // Branch with link to register, with pointer authentication. Using key B and
536 // a modifier of zero [Armv8.3].
537 void blrabz(const Register& xn);
538
539 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
540 void retaa();
541
542 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
543 void retab();
544
545 // Branch to register, with pointer authentication. Using key A [Armv8.3].
546 void braa(const Register& xn, const Register& xm);
547
548 // Branch to register, with pointer authentication. Using key B [Armv8.3].
549 void brab(const Register& xn, const Register& xm);
550
551 // Branch with link to register, with pointer authentication. Using key A
552 // [Armv8.3].
553 void blraa(const Register& xn, const Register& xm);
554
555 // Branch with link to register, with pointer authentication. Using key B
556 // [Armv8.3].
557 void blrab(const Register& xn, const Register& xm);
558
559 // Unconditional branch to label.
560 void b(Label* label);
561
562 // Conditional branch to label.
563 void b(Label* label, Condition cond);
564
565 // Unconditional branch to PC offset.
566 void b(int64_t imm26);
567
568 // Conditional branch to PC offset.
569 void b(int64_t imm19, Condition cond);
570
571 // Branch with link to label.
572 void bl(Label* label);
573
574 // Branch with link to PC offset.
575 void bl(int64_t imm26);
576
577 // Compare and branch to label if zero.
578 void cbz(const Register& rt, Label* label);
579
580 // Compare and branch to PC offset if zero.
581 void cbz(const Register& rt, int64_t imm19);
582
583 // Compare and branch to label if not zero.
584 void cbnz(const Register& rt, Label* label);
585
586 // Compare and branch to PC offset if not zero.
587 void cbnz(const Register& rt, int64_t imm19);
588
589 // Table lookup from one register.
590 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
591
592 // Table lookup from two registers.
593 void tbl(const VRegister& vd,
594 const VRegister& vn,
595 const VRegister& vn2,
596 const VRegister& vm);
597
598 // Table lookup from three registers.
599 void tbl(const VRegister& vd,
600 const VRegister& vn,
601 const VRegister& vn2,
602 const VRegister& vn3,
603 const VRegister& vm);
604
605 // Table lookup from four registers.
606 void tbl(const VRegister& vd,
607 const VRegister& vn,
608 const VRegister& vn2,
609 const VRegister& vn3,
610 const VRegister& vn4,
611 const VRegister& vm);
612
613 // Table lookup extension from one register.
614 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
615
616 // Table lookup extension from two registers.
617 void tbx(const VRegister& vd,
618 const VRegister& vn,
619 const VRegister& vn2,
620 const VRegister& vm);
621
622 // Table lookup extension from three registers.
623 void tbx(const VRegister& vd,
624 const VRegister& vn,
625 const VRegister& vn2,
626 const VRegister& vn3,
627 const VRegister& vm);
628
629 // Table lookup extension from four registers.
630 void tbx(const VRegister& vd,
631 const VRegister& vn,
632 const VRegister& vn2,
633 const VRegister& vn3,
634 const VRegister& vn4,
635 const VRegister& vm);
636
637 // Test bit and branch to label if zero.
638 void tbz(const Register& rt, unsigned bit_pos, Label* label);
639
640 // Test bit and branch to PC offset if zero.
641 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
642
643 // Test bit and branch to label if not zero.
644 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
645
646 // Test bit and branch to PC offset if not zero.
647 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
648
649 // Address calculation instructions.
650 // Calculate a PC-relative address. Unlike for branches the offset in adr is
651 // unscaled (i.e. the result can be unaligned).
652
653 // Calculate the address of a label.
654 void adr(const Register& xd, Label* label);
655
656 // Calculate the address of a PC offset.
657 void adr(const Register& xd, int64_t imm21);
658
659 // Calculate the page address of a label.
660 void adrp(const Register& xd, Label* label);
661
662 // Calculate the page address of a PC offset.
663 void adrp(const Register& xd, int64_t imm21);
664
665 // Data Processing instructions.
666
667 // Add.
668 void add(const Register& rd, const Register& rn, const Operand& operand);
669
670 // Add and update status flags.
671 void adds(const Register& rd, const Register& rn, const Operand& operand);
672
673 // Compare negative.
674 void cmn(const Register& rn, const Operand& operand);
675
676 // Subtract.
677 void sub(const Register& rd, const Register& rn, const Operand& operand);
678
679 // Subtract and update status flags.
680 void subs(const Register& rd, const Register& rn, const Operand& operand);
681
682 // Compare.
683 void cmp(const Register& rn, const Operand& operand);
684
685 // Negate.
686 void neg(const Register& rd, const Operand& operand);
687
688 // Negate and update status flags.
689 void negs(const Register& rd, const Operand& operand);
690
691 // Add with carry bit.
692 void adc(const Register& rd, const Register& rn, const Operand& operand);
693
694 // Add with carry bit and update status flags.
695 void adcs(const Register& rd, const Register& rn, const Operand& operand);
696
697 // Subtract with carry bit.
698 void sbc(const Register& rd, const Register& rn, const Operand& operand);
699
700 // Subtract with carry bit and update status flags.
701 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
702
703 // Rotate register right and insert into NZCV flags under the control of a
704 // mask [Armv8.4].
705 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
706
707 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
708 void setf8(const Register& rn);
709
710 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
711 void setf16(const Register& rn);
712
713 // Negate with carry bit.
714 void ngc(const Register& rd, const Operand& operand);
715
716 // Negate with carry bit and update status flags.
717 void ngcs(const Register& rd, const Operand& operand);
718
719 // Logical instructions.
720
721 // Bitwise and (A & B).
722 void and_(const Register& rd, const Register& rn, const Operand& operand);
723
724 // Bitwise and (A & B) and update status flags.
725 void ands(const Register& rd, const Register& rn, const Operand& operand);
726
727 // Bit test and set flags.
728 void tst(const Register& rn, const Operand& operand);
729
730 // Bit clear (A & ~B).
731 void bic(const Register& rd, const Register& rn, const Operand& operand);
732
733 // Bit clear (A & ~B) and update status flags.
734 void bics(const Register& rd, const Register& rn, const Operand& operand);
735
736 // Bitwise or (A | B).
737 void orr(const Register& rd, const Register& rn, const Operand& operand);
738
739 // Bitwise nor (A | ~B).
740 void orn(const Register& rd, const Register& rn, const Operand& operand);
741
742 // Bitwise eor/xor (A ^ B).
743 void eor(const Register& rd, const Register& rn, const Operand& operand);
744
745 // Bitwise enor/xnor (A ^ ~B).
746 void eon(const Register& rd, const Register& rn, const Operand& operand);
747
748 // Logical shift left by variable.
749 void lslv(const Register& rd, const Register& rn, const Register& rm);
750
751 // Logical shift right by variable.
752 void lsrv(const Register& rd, const Register& rn, const Register& rm);
753
754 // Arithmetic shift right by variable.
755 void asrv(const Register& rd, const Register& rn, const Register& rm);
756
757 // Rotate right by variable.
758 void rorv(const Register& rd, const Register& rn, const Register& rm);
759
760 // Bitfield instructions.
761
762 // Bitfield move.
763 void bfm(const Register& rd,
764 const Register& rn,
765 unsigned immr,
766 unsigned imms);
767
768 // Signed bitfield move.
769 void sbfm(const Register& rd,
770 const Register& rn,
771 unsigned immr,
772 unsigned imms);
773
774 // Unsigned bitfield move.
775 void ubfm(const Register& rd,
776 const Register& rn,
777 unsigned immr,
778 unsigned imms);
779
780 // Bfm aliases.
781
782 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)783 void bfi(const Register& rd,
784 const Register& rn,
785 unsigned lsb,
786 unsigned width) {
787 VIXL_ASSERT(width >= 1);
788 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
789 bfm(rd,
790 rn,
791 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
792 width - 1);
793 }
794
795 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)796 void bfxil(const Register& rd,
797 const Register& rn,
798 unsigned lsb,
799 unsigned width) {
800 VIXL_ASSERT(width >= 1);
801 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
802 bfm(rd, rn, lsb, lsb + width - 1);
803 }
804
805 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)806 void bfc(const Register& rd, unsigned lsb, unsigned width) {
807 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
808 }
809
810 // Sbfm aliases.
811
812 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)813 void asr(const Register& rd, const Register& rn, unsigned shift) {
814 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
815 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
816 }
817
818 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)819 void sbfiz(const Register& rd,
820 const Register& rn,
821 unsigned lsb,
822 unsigned width) {
823 VIXL_ASSERT(width >= 1);
824 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
825 sbfm(rd,
826 rn,
827 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
828 width - 1);
829 }
830
831 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)832 void sbfx(const Register& rd,
833 const Register& rn,
834 unsigned lsb,
835 unsigned width) {
836 VIXL_ASSERT(width >= 1);
837 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
838 sbfm(rd, rn, lsb, lsb + width - 1);
839 }
840
841 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)842 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
843
844 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)845 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
846
847 // Signed extend word.
sxtw(const Register & rd,const Register & rn)848 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
849
850 // Ubfm aliases.
851
852 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)853 void lsl(const Register& rd, const Register& rn, unsigned shift) {
854 unsigned reg_size = rd.GetSizeInBits();
855 VIXL_ASSERT(shift < reg_size);
856 // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
857 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
858 }
859
860 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)861 void lsr(const Register& rd, const Register& rn, unsigned shift) {
862 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
863 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
864 }
865
866 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)867 void ubfiz(const Register& rd,
868 const Register& rn,
869 unsigned lsb,
870 unsigned width) {
871 VIXL_ASSERT(width >= 1);
872 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
873 ubfm(rd,
874 rn,
875 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
876 width - 1);
877 }
878
879 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)880 void ubfx(const Register& rd,
881 const Register& rn,
882 unsigned lsb,
883 unsigned width) {
884 VIXL_ASSERT(width >= 1);
885 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
886 ubfm(rd, rn, lsb, lsb + width - 1);
887 }
888
889 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)890 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
891
892 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)893 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
894
895 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)896 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
897
898 // Extract.
899 void extr(const Register& rd,
900 const Register& rn,
901 const Register& rm,
902 unsigned lsb);
903
904 // Conditional select: rd = cond ? rn : rm.
905 void csel(const Register& rd,
906 const Register& rn,
907 const Register& rm,
908 Condition cond);
909
910 // Conditional select increment: rd = cond ? rn : rm + 1.
911 void csinc(const Register& rd,
912 const Register& rn,
913 const Register& rm,
914 Condition cond);
915
916 // Conditional select inversion: rd = cond ? rn : ~rm.
917 void csinv(const Register& rd,
918 const Register& rn,
919 const Register& rm,
920 Condition cond);
921
922 // Conditional select negation: rd = cond ? rn : -rm.
923 void csneg(const Register& rd,
924 const Register& rn,
925 const Register& rm,
926 Condition cond);
927
928 // Conditional set: rd = cond ? 1 : 0.
929 void cset(const Register& rd, Condition cond);
930
931 // Conditional set mask: rd = cond ? -1 : 0.
932 void csetm(const Register& rd, Condition cond);
933
934 // Conditional increment: rd = cond ? rn + 1 : rn.
935 void cinc(const Register& rd, const Register& rn, Condition cond);
936
937 // Conditional invert: rd = cond ? ~rn : rn.
938 void cinv(const Register& rd, const Register& rn, Condition cond);
939
940 // Conditional negate: rd = cond ? -rn : rn.
941 void cneg(const Register& rd, const Register& rn, Condition cond);
942
943 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)944 void ror(const Register& rd, const Register& rs, unsigned shift) {
945 extr(rd, rs, rs, shift);
946 }
947
948 // Conditional comparison.
949
950 // Conditional compare negative.
951 void ccmn(const Register& rn,
952 const Operand& operand,
953 StatusFlags nzcv,
954 Condition cond);
955
956 // Conditional compare.
957 void ccmp(const Register& rn,
958 const Operand& operand,
959 StatusFlags nzcv,
960 Condition cond);
961
962 // CRC-32 checksum from byte.
963 void crc32b(const Register& wd, const Register& wn, const Register& wm);
964
965 // CRC-32 checksum from half-word.
966 void crc32h(const Register& wd, const Register& wn, const Register& wm);
967
968 // CRC-32 checksum from word.
969 void crc32w(const Register& wd, const Register& wn, const Register& wm);
970
971 // CRC-32 checksum from double word.
972 void crc32x(const Register& wd, const Register& wn, const Register& xm);
973
974 // CRC-32 C checksum from byte.
975 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
976
977 // CRC-32 C checksum from half-word.
978 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
979
980 // CRC-32 C checksum from word.
981 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
982
983 // CRC-32C checksum from double word.
984 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
985
986 // Multiply.
987 void mul(const Register& rd, const Register& rn, const Register& rm);
988
989 // Negated multiply.
990 void mneg(const Register& rd, const Register& rn, const Register& rm);
991
992 // Signed long multiply: 32 x 32 -> 64-bit.
993 void smull(const Register& xd, const Register& wn, const Register& wm);
994
995 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
996 void smulh(const Register& xd, const Register& xn, const Register& xm);
997
998 // Multiply and accumulate.
999 void madd(const Register& rd,
1000 const Register& rn,
1001 const Register& rm,
1002 const Register& ra);
1003
1004 // Multiply and subtract.
1005 void msub(const Register& rd,
1006 const Register& rn,
1007 const Register& rm,
1008 const Register& ra);
1009
1010 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1011 void smaddl(const Register& xd,
1012 const Register& wn,
1013 const Register& wm,
1014 const Register& xa);
1015
1016 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1017 void umaddl(const Register& xd,
1018 const Register& wn,
1019 const Register& wm,
1020 const Register& xa);
1021
1022 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)1023 void umull(const Register& xd, const Register& wn, const Register& wm) {
1024 umaddl(xd, wn, wm, xzr);
1025 }
1026
1027 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1028 void umulh(const Register& xd, const Register& xn, const Register& xm);
1029
1030 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1031 void smsubl(const Register& xd,
1032 const Register& wn,
1033 const Register& wm,
1034 const Register& xa);
1035
1036 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1037 void umsubl(const Register& xd,
1038 const Register& wn,
1039 const Register& wm,
1040 const Register& xa);
1041
1042 // Signed integer divide.
1043 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1044
1045 // Unsigned integer divide.
1046 void udiv(const Register& rd, const Register& rn, const Register& rm);
1047
1048 // Bit reverse.
1049 void rbit(const Register& rd, const Register& rn);
1050
1051 // Reverse bytes in 16-bit half words.
1052 void rev16(const Register& rd, const Register& rn);
1053
1054 // Reverse bytes in 32-bit words.
1055 void rev32(const Register& xd, const Register& xn);
1056
1057 // Reverse bytes in 64-bit general purpose register, an alias for rev
1058 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1059 void rev64(const Register& xd, const Register& xn) {
1060 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1061 rev(xd, xn);
1062 }
1063
1064 // Reverse bytes.
1065 void rev(const Register& rd, const Register& rn);
1066
1067 // Count leading zeroes.
1068 void clz(const Register& rd, const Register& rn);
1069
1070 // Count leading sign bits.
1071 void cls(const Register& rd, const Register& rn);
1072
1073 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1074 void pacia(const Register& xd, const Register& rn);
1075
1076 // Pointer Authentication Code for Instruction address, using key A and a
1077 // modifier of zero [Armv8.3].
1078 void paciza(const Register& xd);
1079
1080 // Pointer Authentication Code for Instruction address, using key A, with
1081 // address in x17 and modifier in x16 [Armv8.3].
1082 void pacia1716();
1083
1084 // Pointer Authentication Code for Instruction address, using key A, with
1085 // address in LR and modifier in SP [Armv8.3].
1086 void paciasp();
1087
1088 // Pointer Authentication Code for Instruction address, using key A, with
1089 // address in LR and a modifier of zero [Armv8.3].
1090 void paciaz();
1091
1092 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1093 void pacib(const Register& xd, const Register& xn);
1094
1095 // Pointer Authentication Code for Instruction address, using key B and a
1096 // modifier of zero [Armv8.3].
1097 void pacizb(const Register& xd);
1098
1099 // Pointer Authentication Code for Instruction address, using key B, with
1100 // address in x17 and modifier in x16 [Armv8.3].
1101 void pacib1716();
1102
1103 // Pointer Authentication Code for Instruction address, using key B, with
1104 // address in LR and modifier in SP [Armv8.3].
1105 void pacibsp();
1106
1107 // Pointer Authentication Code for Instruction address, using key B, with
1108 // address in LR and a modifier of zero [Armv8.3].
1109 void pacibz();
1110
1111 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1112 void pacda(const Register& xd, const Register& xn);
1113
1114 // Pointer Authentication Code for Data address, using key A and a modifier of
1115 // zero [Armv8.3].
1116 void pacdza(const Register& xd);
1117
1118 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1119 void pacdb(const Register& xd, const Register& xn);
1120
1121 // Pointer Authentication Code for Data address, using key B and a modifier of
1122 // zero [Armv8.3].
1123 void pacdzb(const Register& xd);
1124
1125 // Pointer Authentication Code, using Generic key [Armv8.3].
1126 void pacga(const Register& xd, const Register& xn, const Register& xm);
1127
1128 // Authenticate Instruction address, using key A [Armv8.3].
1129 void autia(const Register& xd, const Register& xn);
1130
1131 // Authenticate Instruction address, using key A and a modifier of zero
1132 // [Armv8.3].
1133 void autiza(const Register& xd);
1134
1135 // Authenticate Instruction address, using key A, with address in x17 and
1136 // modifier in x16 [Armv8.3].
1137 void autia1716();
1138
1139 // Authenticate Instruction address, using key A, with address in LR and
1140 // modifier in SP [Armv8.3].
1141 void autiasp();
1142
1143 // Authenticate Instruction address, using key A, with address in LR and a
1144 // modifier of zero [Armv8.3].
1145 void autiaz();
1146
1147 // Authenticate Instruction address, using key B [Armv8.3].
1148 void autib(const Register& xd, const Register& xn);
1149
1150 // Authenticate Instruction address, using key B and a modifier of zero
1151 // [Armv8.3].
1152 void autizb(const Register& xd);
1153
1154 // Authenticate Instruction address, using key B, with address in x17 and
1155 // modifier in x16 [Armv8.3].
1156 void autib1716();
1157
1158 // Authenticate Instruction address, using key B, with address in LR and
1159 // modifier in SP [Armv8.3].
1160 void autibsp();
1161
1162 // Authenticate Instruction address, using key B, with address in LR and a
1163 // modifier of zero [Armv8.3].
1164 void autibz();
1165
1166 // Authenticate Data address, using key A [Armv8.3].
1167 void autda(const Register& xd, const Register& xn);
1168
1169 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1170 void autdza(const Register& xd);
1171
1172 // Authenticate Data address, using key B [Armv8.3].
1173 void autdb(const Register& xd, const Register& xn);
1174
1175 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1176 void autdzb(const Register& xd);
1177
1178 // Strip Pointer Authentication Code of Data address [Armv8.3].
1179 void xpacd(const Register& xd);
1180
1181 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1182 void xpaci(const Register& xd);
1183
1184 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1185 void xpaclri();
1186
1187 // Memory instructions.
1188
1189 // Load integer or FP register.
1190 void ldr(const CPURegister& rt,
1191 const MemOperand& src,
1192 LoadStoreScalingOption option = PreferScaledOffset);
1193
1194 // Store integer or FP register.
1195 void str(const CPURegister& rt,
1196 const MemOperand& dst,
1197 LoadStoreScalingOption option = PreferScaledOffset);
1198
1199 // Load word with sign extension.
1200 void ldrsw(const Register& xt,
1201 const MemOperand& src,
1202 LoadStoreScalingOption option = PreferScaledOffset);
1203
1204 // Load byte.
1205 void ldrb(const Register& rt,
1206 const MemOperand& src,
1207 LoadStoreScalingOption option = PreferScaledOffset);
1208
1209 // Store byte.
1210 void strb(const Register& rt,
1211 const MemOperand& dst,
1212 LoadStoreScalingOption option = PreferScaledOffset);
1213
1214 // Load byte with sign extension.
1215 void ldrsb(const Register& rt,
1216 const MemOperand& src,
1217 LoadStoreScalingOption option = PreferScaledOffset);
1218
1219 // Load half-word.
1220 void ldrh(const Register& rt,
1221 const MemOperand& src,
1222 LoadStoreScalingOption option = PreferScaledOffset);
1223
1224 // Store half-word.
1225 void strh(const Register& rt,
1226 const MemOperand& dst,
1227 LoadStoreScalingOption option = PreferScaledOffset);
1228
1229 // Load half-word with sign extension.
1230 void ldrsh(const Register& rt,
1231 const MemOperand& src,
1232 LoadStoreScalingOption option = PreferScaledOffset);
1233
1234 // Load integer or FP register (with unscaled offset).
1235 void ldur(const CPURegister& rt,
1236 const MemOperand& src,
1237 LoadStoreScalingOption option = PreferUnscaledOffset);
1238
1239 // Store integer or FP register (with unscaled offset).
1240 void stur(const CPURegister& rt,
1241 const MemOperand& src,
1242 LoadStoreScalingOption option = PreferUnscaledOffset);
1243
1244 // Load word with sign extension.
1245 void ldursw(const Register& xt,
1246 const MemOperand& src,
1247 LoadStoreScalingOption option = PreferUnscaledOffset);
1248
1249 // Load byte (with unscaled offset).
1250 void ldurb(const Register& rt,
1251 const MemOperand& src,
1252 LoadStoreScalingOption option = PreferUnscaledOffset);
1253
1254 // Store byte (with unscaled offset).
1255 void sturb(const Register& rt,
1256 const MemOperand& dst,
1257 LoadStoreScalingOption option = PreferUnscaledOffset);
1258
1259 // Load byte with sign extension (and unscaled offset).
1260 void ldursb(const Register& rt,
1261 const MemOperand& src,
1262 LoadStoreScalingOption option = PreferUnscaledOffset);
1263
1264 // Load half-word (with unscaled offset).
1265 void ldurh(const Register& rt,
1266 const MemOperand& src,
1267 LoadStoreScalingOption option = PreferUnscaledOffset);
1268
1269 // Store half-word (with unscaled offset).
1270 void sturh(const Register& rt,
1271 const MemOperand& dst,
1272 LoadStoreScalingOption option = PreferUnscaledOffset);
1273
1274 // Load half-word with sign extension (and unscaled offset).
1275 void ldursh(const Register& rt,
1276 const MemOperand& src,
1277 LoadStoreScalingOption option = PreferUnscaledOffset);
1278
1279 // Load double-word with pointer authentication, using data key A and a
1280 // modifier of zero [Armv8.3].
1281 void ldraa(const Register& xt, const MemOperand& src);
1282
1283 // Load double-word with pointer authentication, using data key B and a
1284 // modifier of zero [Armv8.3].
1285 void ldrab(const Register& xt, const MemOperand& src);
1286
1287 // Load integer or FP register pair.
1288 void ldp(const CPURegister& rt,
1289 const CPURegister& rt2,
1290 const MemOperand& src);
1291
1292 // Store integer or FP register pair.
1293 void stp(const CPURegister& rt,
1294 const CPURegister& rt2,
1295 const MemOperand& dst);
1296
1297 // Load word pair with sign extension.
1298 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1299
1300 // Load integer or FP register pair, non-temporal.
1301 void ldnp(const CPURegister& rt,
1302 const CPURegister& rt2,
1303 const MemOperand& src);
1304
1305 // Store integer or FP register pair, non-temporal.
1306 void stnp(const CPURegister& rt,
1307 const CPURegister& rt2,
1308 const MemOperand& dst);
1309
1310 // Load integer or FP register from literal pool.
1311 void ldr(const CPURegister& rt, RawLiteral* literal);
1312
1313 // Load word with sign extension from literal pool.
1314 void ldrsw(const Register& xt, RawLiteral* literal);
1315
1316 // Load integer or FP register from pc + imm19 << 2.
1317 void ldr(const CPURegister& rt, int64_t imm19);
1318
1319 // Load word with sign extension from pc + imm19 << 2.
1320 void ldrsw(const Register& xt, int64_t imm19);
1321
1322 // Store exclusive byte.
1323 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1324
1325 // Store exclusive half-word.
1326 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1327
1328 // Store exclusive register.
1329 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1330
1331 // Load exclusive byte.
1332 void ldxrb(const Register& rt, const MemOperand& src);
1333
1334 // Load exclusive half-word.
1335 void ldxrh(const Register& rt, const MemOperand& src);
1336
1337 // Load exclusive register.
1338 void ldxr(const Register& rt, const MemOperand& src);
1339
1340 // Store exclusive register pair.
1341 void stxp(const Register& rs,
1342 const Register& rt,
1343 const Register& rt2,
1344 const MemOperand& dst);
1345
1346 // Load exclusive register pair.
1347 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1348
1349 // Store-release exclusive byte.
1350 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1351
1352 // Store-release exclusive half-word.
1353 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1354
1355 // Store-release exclusive register.
1356 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1357
1358 // Load-acquire exclusive byte.
1359 void ldaxrb(const Register& rt, const MemOperand& src);
1360
1361 // Load-acquire exclusive half-word.
1362 void ldaxrh(const Register& rt, const MemOperand& src);
1363
1364 // Load-acquire exclusive register.
1365 void ldaxr(const Register& rt, const MemOperand& src);
1366
1367 // Store-release exclusive register pair.
1368 void stlxp(const Register& rs,
1369 const Register& rt,
1370 const Register& rt2,
1371 const MemOperand& dst);
1372
1373 // Load-acquire exclusive register pair.
1374 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1375
1376 // Store-release byte.
1377 void stlrb(const Register& rt, const MemOperand& dst);
1378
1379 // Store-release half-word.
1380 void stlrh(const Register& rt, const MemOperand& dst);
1381
1382 // Store-release register.
1383 void stlr(const Register& rt, const MemOperand& dst);
1384
1385 // Load-acquire byte.
1386 void ldarb(const Register& rt, const MemOperand& src);
1387
1388 // Load-acquire half-word.
1389 void ldarh(const Register& rt, const MemOperand& src);
1390
1391 // Load-acquire register.
1392 void ldar(const Register& rt, const MemOperand& src);
1393
1394 // Store LORelease byte [Armv8.1].
1395 void stllrb(const Register& rt, const MemOperand& dst);
1396
1397 // Store LORelease half-word [Armv8.1].
1398 void stllrh(const Register& rt, const MemOperand& dst);
1399
1400 // Store LORelease register [Armv8.1].
1401 void stllr(const Register& rt, const MemOperand& dst);
1402
1403 // Load LORelease byte [Armv8.1].
1404 void ldlarb(const Register& rt, const MemOperand& src);
1405
1406 // Load LORelease half-word [Armv8.1].
1407 void ldlarh(const Register& rt, const MemOperand& src);
1408
1409 // Load LORelease register [Armv8.1].
1410 void ldlar(const Register& rt, const MemOperand& src);
1411
1412 // Compare and Swap word or doubleword in memory [Armv8.1].
1413 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1414
1415 // Compare and Swap word or doubleword in memory [Armv8.1].
1416 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1417
1418 // Compare and Swap word or doubleword in memory [Armv8.1].
1419 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1420
1421 // Compare and Swap word or doubleword in memory [Armv8.1].
1422 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1423
1424 // Compare and Swap byte in memory [Armv8.1].
1425 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1426
1427 // Compare and Swap byte in memory [Armv8.1].
1428 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1429
1430 // Compare and Swap byte in memory [Armv8.1].
1431 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1432
1433 // Compare and Swap byte in memory [Armv8.1].
1434 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1435
1436 // Compare and Swap halfword in memory [Armv8.1].
1437 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1438
1439 // Compare and Swap halfword in memory [Armv8.1].
1440 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1441
1442 // Compare and Swap halfword in memory [Armv8.1].
1443 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1444
1445 // Compare and Swap halfword in memory [Armv8.1].
1446 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1447
1448 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1449 void casp(const Register& rs,
1450 const Register& rs2,
1451 const Register& rt,
1452 const Register& rt2,
1453 const MemOperand& src);
1454
1455 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1456 void caspa(const Register& rs,
1457 const Register& rs2,
1458 const Register& rt,
1459 const Register& rt2,
1460 const MemOperand& src);
1461
1462 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1463 void caspl(const Register& rs,
1464 const Register& rs2,
1465 const Register& rt,
1466 const Register& rt2,
1467 const MemOperand& src);
1468
1469 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1470 void caspal(const Register& rs,
1471 const Register& rs2,
1472 const Register& rt,
1473 const Register& rt2,
1474 const MemOperand& src);
1475
1476 // Store-release byte (with unscaled offset) [Armv8.4].
1477 void stlurb(const Register& rt, const MemOperand& dst);
1478
1479 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1480 void ldapurb(const Register& rt, const MemOperand& src);
1481
1482 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1483 void ldapursb(const Register& rt, const MemOperand& src);
1484
1485 // Store-release half-word (with unscaled offset) [Armv8.4].
1486 void stlurh(const Register& rt, const MemOperand& dst);
1487
1488 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1489 void ldapurh(const Register& rt, const MemOperand& src);
1490
1491 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1492 // [Armv8.4].
1493 void ldapursh(const Register& rt, const MemOperand& src);
1494
1495 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1496 void stlur(const Register& rt, const MemOperand& dst);
1497
1498 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1499 // [Armv8.4].
1500 void ldapur(const Register& rt, const MemOperand& src);
1501
1502 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1503 void ldapursw(const Register& xt, const MemOperand& src);
1504
1505 // Atomic add on byte in memory [Armv8.1]
1506 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1507
1508 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1509 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1510
1511 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1512 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1513
1514 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1515 // [Armv8.1]
1516 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1517
1518 // Atomic add on halfword in memory [Armv8.1]
1519 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1520
1521 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1522 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1523
1524 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1525 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1526
1527 // Atomic add on halfword in memory, with Load-acquire and Store-release
1528 // semantics [Armv8.1]
1529 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1530
1531 // Atomic add on word or doubleword in memory [Armv8.1]
1532 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1533
1534 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1535 // [Armv8.1]
1536 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1537
1538 // Atomic add on word or doubleword in memory, with Store-release semantics
1539 // [Armv8.1]
1540 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1541
1542 // Atomic add on word or doubleword in memory, with Load-acquire and
1543 // Store-release semantics [Armv8.1]
1544 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1545
1546 // Atomic bit clear on byte in memory [Armv8.1]
1547 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1548
1549 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1550 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1551
1552 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1553 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1554
1555 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1556 // semantics [Armv8.1]
1557 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1558
1559 // Atomic bit clear on halfword in memory [Armv8.1]
1560 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1561
1562 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1563 // [Armv8.1]
1564 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1565
1566 // Atomic bit clear on halfword in memory, with Store-release semantics
1567 // [Armv8.1]
1568 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1569
1570 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1571 // semantics [Armv8.1]
1572 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1573
1574 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1575 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1576
1577 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1578 // semantics [Armv8.1]
1579 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1580
1581 // Atomic bit clear on word or doubleword in memory, with Store-release
1582 // semantics [Armv8.1]
1583 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1584
1585 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1586 // Store-release semantics [Armv8.1]
1587 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1588
1589 // Atomic exclusive OR on byte in memory [Armv8.1]
1590 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1591
1592 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1593 // [Armv8.1]
1594 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1595
1596 // Atomic exclusive OR on byte in memory, with Store-release semantics
1597 // [Armv8.1]
1598 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1599
1600 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1601 // semantics [Armv8.1]
1602 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1603
1604 // Atomic exclusive OR on halfword in memory [Armv8.1]
1605 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1606
1607 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1608 // [Armv8.1]
1609 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1610
1611 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1612 // [Armv8.1]
1613 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1614
1615 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1616 // Store-release semantics [Armv8.1]
1617 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1618
1619 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1620 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1621
1622 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1623 // semantics [Armv8.1]
1624 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1625
1626 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1627 // semantics [Armv8.1]
1628 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1629
1630 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1631 // Store-release semantics [Armv8.1]
1632 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1633
1634 // Atomic bit set on byte in memory [Armv8.1]
1635 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1636
1637 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1638 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1639
1640 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1641 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1642
1643 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1644 // semantics [Armv8.1]
1645 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1646
1647 // Atomic bit set on halfword in memory [Armv8.1]
1648 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1649
1650 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1651 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1652
1653 // Atomic bit set on halfword in memory, with Store-release semantics
1654 // [Armv8.1]
1655 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1656
1657 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1658 // semantics [Armv8.1]
1659 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1660
1661 // Atomic bit set on word or doubleword in memory [Armv8.1]
1662 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1663
1664 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1665 // [Armv8.1]
1666 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1667
1668 // Atomic bit set on word or doubleword in memory, with Store-release
1669 // semantics [Armv8.1]
1670 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1671
1672 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1673 // Store-release semantics [Armv8.1]
1674 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1675
1676 // Atomic signed maximum on byte in memory [Armv8.1]
1677 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1678
1679 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1680 // [Armv8.1]
1681 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1682
1683 // Atomic signed maximum on byte in memory, with Store-release semantics
1684 // [Armv8.1]
1685 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1686
1687 // Atomic signed maximum on byte in memory, with Load-acquire and
1688 // Store-release semantics [Armv8.1]
1689 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1690
1691 // Atomic signed maximum on halfword in memory [Armv8.1]
1692 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1693
1694 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1695 // [Armv8.1]
1696 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1697
1698 // Atomic signed maximum on halfword in memory, with Store-release semantics
1699 // [Armv8.1]
1700 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1701
1702 // Atomic signed maximum on halfword in memory, with Load-acquire and
1703 // Store-release semantics [Armv8.1]
1704 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1705
1706 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1707 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1708
1709 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1710 // semantics [Armv8.1]
1711 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1712
1713 // Atomic signed maximum on word or doubleword in memory, with Store-release
1714 // semantics [Armv8.1]
1715 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1716
1717 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1718 // and Store-release semantics [Armv8.1]
1719 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1720
1721 // Atomic signed minimum on byte in memory [Armv8.1]
1722 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1723
1724 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1725 // [Armv8.1]
1726 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1727
1728 // Atomic signed minimum on byte in memory, with Store-release semantics
1729 // [Armv8.1]
1730 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1731
1732 // Atomic signed minimum on byte in memory, with Load-acquire and
1733 // Store-release semantics [Armv8.1]
1734 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1735
1736 // Atomic signed minimum on halfword in memory [Armv8.1]
1737 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1738
1739 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1740 // [Armv8.1]
1741 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1742
1743 // Atomic signed minimum on halfword in memory, with Store-release semantics
1744 // [Armv8.1]
1745 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1746
1747 // Atomic signed minimum on halfword in memory, with Load-acquire and
1748 // Store-release semantics [Armv8.1]
1749 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1750
1751 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1752 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1753
1754 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1755 // semantics [Armv8.1]
1756 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1757
1758 // Atomic signed minimum on word or doubleword in memory, with Store-release
1759 // semantics [Armv8.1]
1760 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1761
1762 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1763 // and Store-release semantics [Armv8.1]
1764 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1765
1766 // Atomic unsigned maximum on byte in memory [Armv8.1]
1767 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1768
1769 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1770 // [Armv8.1]
1771 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1772
1773 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1774 // [Armv8.1]
1775 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1776
1777 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1778 // Store-release semantics [Armv8.1]
1779 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1780
1781 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1782 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1783
1784 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1785 // [Armv8.1]
1786 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1787
1788 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1789 // [Armv8.1]
1790 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1791
1792 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1793 // Store-release semantics [Armv8.1]
1794 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1795
1796 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1797 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1798
1799 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1800 // semantics [Armv8.1]
1801 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1802
1803 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1804 // semantics [Armv8.1]
1805 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1806
1807 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1808 // and Store-release semantics [Armv8.1]
1809 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1810
1811 // Atomic unsigned minimum on byte in memory [Armv8.1]
1812 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1813
1814 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1815 // [Armv8.1]
1816 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1817
1818 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1819 // [Armv8.1]
1820 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1821
1822 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1823 // Store-release semantics [Armv8.1]
1824 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1825
1826 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1827 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1828
1829 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1830 // [Armv8.1]
1831 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1832
1833 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1834 // [Armv8.1]
1835 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1836
1837 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1838 // Store-release semantics [Armv8.1]
1839 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1840
1841 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1842 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1843
1844 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1845 // semantics [Armv8.1]
1846 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1847
1848 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1849 // semantics [Armv8.1]
1850 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1851
1852 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1853 // and Store-release semantics [Armv8.1]
1854 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1855
1856 // Atomic add on byte in memory, without return. [Armv8.1]
1857 void staddb(const Register& rs, const MemOperand& src);
1858
1859 // Atomic add on byte in memory, with Store-release semantics and without
1860 // return. [Armv8.1]
1861 void staddlb(const Register& rs, const MemOperand& src);
1862
1863 // Atomic add on halfword in memory, without return. [Armv8.1]
1864 void staddh(const Register& rs, const MemOperand& src);
1865
1866 // Atomic add on halfword in memory, with Store-release semantics and without
1867 // return. [Armv8.1]
1868 void staddlh(const Register& rs, const MemOperand& src);
1869
1870 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1871 void stadd(const Register& rs, const MemOperand& src);
1872
1873 // Atomic add on word or doubleword in memory, with Store-release semantics
1874 // and without return. [Armv8.1]
1875 void staddl(const Register& rs, const MemOperand& src);
1876
1877 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1878 void stclrb(const Register& rs, const MemOperand& src);
1879
1880 // Atomic bit clear on byte in memory, with Store-release semantics and
1881 // without return. [Armv8.1]
1882 void stclrlb(const Register& rs, const MemOperand& src);
1883
1884 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1885 void stclrh(const Register& rs, const MemOperand& src);
1886
1887 // Atomic bit clear on halfword in memory, with Store-release semantics and
1888 // without return. [Armv8.1]
1889 void stclrlh(const Register& rs, const MemOperand& src);
1890
1891 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1892 void stclr(const Register& rs, const MemOperand& src);
1893
1894 // Atomic bit clear on word or doubleword in memory, with Store-release
1895 // semantics and without return. [Armv8.1]
1896 void stclrl(const Register& rs, const MemOperand& src);
1897
1898 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1899 void steorb(const Register& rs, const MemOperand& src);
1900
1901 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1902 // without return. [Armv8.1]
1903 void steorlb(const Register& rs, const MemOperand& src);
1904
1905 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1906 void steorh(const Register& rs, const MemOperand& src);
1907
1908 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1909 // and without return. [Armv8.1]
1910 void steorlh(const Register& rs, const MemOperand& src);
1911
1912 // Atomic exclusive OR on word or doubleword in memory, without return.
1913 // [Armv8.1]
1914 void steor(const Register& rs, const MemOperand& src);
1915
1916 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1917 // semantics and without return. [Armv8.1]
1918 void steorl(const Register& rs, const MemOperand& src);
1919
1920 // Atomic bit set on byte in memory, without return. [Armv8.1]
1921 void stsetb(const Register& rs, const MemOperand& src);
1922
1923 // Atomic bit set on byte in memory, with Store-release semantics and without
1924 // return. [Armv8.1]
1925 void stsetlb(const Register& rs, const MemOperand& src);
1926
1927 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1928 void stseth(const Register& rs, const MemOperand& src);
1929
1930 // Atomic bit set on halfword in memory, with Store-release semantics and
1931 // without return. [Armv8.1]
1932 void stsetlh(const Register& rs, const MemOperand& src);
1933
1934 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1935 void stset(const Register& rs, const MemOperand& src);
1936
1937 // Atomic bit set on word or doubleword in memory, with Store-release
1938 // semantics and without return. [Armv8.1]
1939 void stsetl(const Register& rs, const MemOperand& src);
1940
1941 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1942 void stsmaxb(const Register& rs, const MemOperand& src);
1943
1944 // Atomic signed maximum on byte in memory, with Store-release semantics and
1945 // without return. [Armv8.1]
1946 void stsmaxlb(const Register& rs, const MemOperand& src);
1947
1948 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1949 void stsmaxh(const Register& rs, const MemOperand& src);
1950
1951 // Atomic signed maximum on halfword in memory, with Store-release semantics
1952 // and without return. [Armv8.1]
1953 void stsmaxlh(const Register& rs, const MemOperand& src);
1954
1955 // Atomic signed maximum on word or doubleword in memory, without return.
1956 // [Armv8.1]
1957 void stsmax(const Register& rs, const MemOperand& src);
1958
1959 // Atomic signed maximum on word or doubleword in memory, with Store-release
1960 // semantics and without return. [Armv8.1]
1961 void stsmaxl(const Register& rs, const MemOperand& src);
1962
1963 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1964 void stsminb(const Register& rs, const MemOperand& src);
1965
1966 // Atomic signed minimum on byte in memory, with Store-release semantics and
1967 // without return. [Armv8.1]
1968 void stsminlb(const Register& rs, const MemOperand& src);
1969
1970 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1971 void stsminh(const Register& rs, const MemOperand& src);
1972
1973 // Atomic signed minimum on halfword in memory, with Store-release semantics
1974 // and without return. [Armv8.1]
1975 void stsminlh(const Register& rs, const MemOperand& src);
1976
1977 // Atomic signed minimum on word or doubleword in memory, without return.
1978 // [Armv8.1]
1979 void stsmin(const Register& rs, const MemOperand& src);
1980
1981 // Atomic signed minimum on word or doubleword in memory, with Store-release
1982 // semantics and without return. semantics [Armv8.1]
1983 void stsminl(const Register& rs, const MemOperand& src);
1984
1985 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1986 void stumaxb(const Register& rs, const MemOperand& src);
1987
1988 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1989 // without return. [Armv8.1]
1990 void stumaxlb(const Register& rs, const MemOperand& src);
1991
1992 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1993 void stumaxh(const Register& rs, const MemOperand& src);
1994
1995 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1996 // and without return. [Armv8.1]
1997 void stumaxlh(const Register& rs, const MemOperand& src);
1998
1999 // Atomic unsigned maximum on word or doubleword in memory, without return.
2000 // [Armv8.1]
2001 void stumax(const Register& rs, const MemOperand& src);
2002
2003 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
2004 // semantics and without return. [Armv8.1]
2005 void stumaxl(const Register& rs, const MemOperand& src);
2006
2007 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
2008 void stuminb(const Register& rs, const MemOperand& src);
2009
2010 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
2011 // without return. [Armv8.1]
2012 void stuminlb(const Register& rs, const MemOperand& src);
2013
2014 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
2015 void stuminh(const Register& rs, const MemOperand& src);
2016
2017 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
2018 // and without return. [Armv8.1]
2019 void stuminlh(const Register& rs, const MemOperand& src);
2020
2021 // Atomic unsigned minimum on word or doubleword in memory, without return.
2022 // [Armv8.1]
2023 void stumin(const Register& rs, const MemOperand& src);
2024
2025 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2026 // semantics and without return. [Armv8.1]
2027 void stuminl(const Register& rs, const MemOperand& src);
2028
2029 // Swap byte in memory [Armv8.1]
2030 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2031
2032 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2033 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2034
2035 // Swap byte in memory, with Store-release semantics [Armv8.1]
2036 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2037
2038 // Swap byte in memory, with Load-acquire and Store-release semantics
2039 // [Armv8.1]
2040 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2041
2042 // Swap halfword in memory [Armv8.1]
2043 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2044
2045 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2046 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2047
2048 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2049 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2050
2051 // Swap halfword in memory, with Load-acquire and Store-release semantics
2052 // [Armv8.1]
2053 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2054
2055 // Swap word or doubleword in memory [Armv8.1]
2056 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2057
2058 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2059 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2060
2061 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2062 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2063
2064 // Swap word or doubleword in memory, with Load-acquire and Store-release
2065 // semantics [Armv8.1]
2066 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2067
2068 // Load-Acquire RCpc Register byte [Armv8.3]
2069 void ldaprb(const Register& rt, const MemOperand& src);
2070
2071 // Load-Acquire RCpc Register halfword [Armv8.3]
2072 void ldaprh(const Register& rt, const MemOperand& src);
2073
2074 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2075 void ldapr(const Register& rt, const MemOperand& src);
2076
2077 // Prefetch memory.
2078 void prfm(PrefetchOperation op,
2079 const MemOperand& addr,
2080 LoadStoreScalingOption option = PreferScaledOffset);
2081
2082 // Prefetch memory (with unscaled offset).
2083 void prfum(PrefetchOperation op,
2084 const MemOperand& addr,
2085 LoadStoreScalingOption option = PreferUnscaledOffset);
2086
2087 // Prefetch memory in the literal pool.
2088 void prfm(PrefetchOperation op, RawLiteral* literal);
2089
2090 // Prefetch from pc + imm19 << 2.
2091 void prfm(PrefetchOperation op, int64_t imm19);
2092
2093 // Prefetch memory (allowing unallocated hints).
2094 void prfm(int op,
2095 const MemOperand& addr,
2096 LoadStoreScalingOption option = PreferScaledOffset);
2097
2098 // Prefetch memory (with unscaled offset, allowing unallocated hints).
2099 void prfum(int op,
2100 const MemOperand& addr,
2101 LoadStoreScalingOption option = PreferUnscaledOffset);
2102
2103 // Prefetch memory in the literal pool (allowing unallocated hints).
2104 void prfm(int op, RawLiteral* literal);
2105
2106 // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2107 void prfm(int op, int64_t imm19);
2108
2109 // Move instructions. The default shift of -1 indicates that the move
2110 // instruction will calculate an appropriate 16-bit immediate and left shift
2111 // that is equal to the 64-bit immediate argument. If an explicit left shift
2112 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2113 //
2114 // For movk, an explicit shift can be used to indicate which half word should
2115 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2116 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2117 // most-significant.
2118
2119 // Move immediate and keep.
2120 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2121 MoveWide(rd, imm, shift, MOVK);
2122 }
2123
2124 // Move inverted immediate.
2125 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2126 MoveWide(rd, imm, shift, MOVN);
2127 }
2128
2129 // Move immediate.
2130 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2131 MoveWide(rd, imm, shift, MOVZ);
2132 }
2133
2134 // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2135 void mov(const Register& rd, uint64_t imm) {
2136 if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2137 VIXL_UNIMPLEMENTED();
2138 }
2139 }
2140
2141 // Misc instructions.
2142
2143 // Monitor debug-mode breakpoint.
2144 void brk(int code);
2145
2146 // Halting debug-mode breakpoint.
2147 void hlt(int code);
2148
2149 // Generate exception targeting EL1.
2150 void svc(int code);
2151
2152 // Generate undefined instruction exception.
2153 void udf(int code);
2154
2155 // Move register to register.
2156 void mov(const Register& rd, const Register& rn);
2157
2158 // Move inverted operand to register.
2159 void mvn(const Register& rd, const Operand& operand);
2160
2161 // System instructions.
2162
2163 // Move to register from system register.
2164 void mrs(const Register& xt, SystemRegister sysreg);
2165
2166 // Move from register to system register.
2167 void msr(SystemRegister sysreg, const Register& xt);
2168
2169 // Invert carry flag [Armv8.4].
2170 void cfinv();
2171
2172 // Convert floating-point condition flags from alternative format to Arm
2173 // format [Armv8.5].
2174 void xaflag();
2175
2176 // Convert floating-point condition flags from Arm format to alternative
2177 // format [Armv8.5].
2178 void axflag();
2179
2180 // System instruction.
2181 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2182
2183 // System instruction with pre-encoded op (op1:crn:crm:op2).
2184 void sys(int op, const Register& xt = xzr);
2185
2186 // System data cache operation.
2187 void dc(DataCacheOp op, const Register& rt);
2188
2189 // System instruction cache operation.
2190 void ic(InstructionCacheOp op, const Register& rt);
2191
2192 // System hint (named type).
2193 void hint(SystemHint code);
2194
2195 // System hint (numbered type).
2196 void hint(int imm7);
2197
2198 // Clear exclusive monitor.
2199 void clrex(int imm4 = 0xf);
2200
2201 // Data memory barrier.
2202 void dmb(BarrierDomain domain, BarrierType type);
2203
2204 // Data synchronization barrier.
2205 void dsb(BarrierDomain domain, BarrierType type);
2206
2207 // Instruction synchronization barrier.
2208 void isb();
2209
2210 // Error synchronization barrier.
2211 void esb();
2212
2213 // Conditional speculation dependency barrier.
2214 void csdb();
2215
2216 // No-op.
nop()2217 void nop() { hint(NOP); }
2218
2219 // Branch target identification.
2220 void bti(BranchTargetIdentifier id);
2221
2222 // FP and NEON instructions.
2223
2224 // Move double precision immediate to FP register.
2225 void fmov(const VRegister& vd, double imm);
2226
2227 // Move single precision immediate to FP register.
2228 void fmov(const VRegister& vd, float imm);
2229
2230 // Move half precision immediate to FP register [Armv8.2].
2231 void fmov(const VRegister& vd, Float16 imm);
2232
2233 // Move FP register to register.
2234 void fmov(const Register& rd, const VRegister& fn);
2235
2236 // Move register to FP register.
2237 void fmov(const VRegister& vd, const Register& rn);
2238
2239 // Move FP register to FP register.
2240 void fmov(const VRegister& vd, const VRegister& fn);
2241
2242 // Move 64-bit register to top half of 128-bit FP register.
2243 void fmov(const VRegister& vd, int index, const Register& rn);
2244
2245 // Move top half of 128-bit FP register to 64-bit register.
2246 void fmov(const Register& rd, const VRegister& vn, int index);
2247
2248 // FP add.
2249 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2250
2251 // FP subtract.
2252 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2253
2254 // FP multiply.
2255 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2256
2257 // FP fused multiply-add.
2258 void fmadd(const VRegister& vd,
2259 const VRegister& vn,
2260 const VRegister& vm,
2261 const VRegister& va);
2262
2263 // FP fused multiply-subtract.
2264 void fmsub(const VRegister& vd,
2265 const VRegister& vn,
2266 const VRegister& vm,
2267 const VRegister& va);
2268
2269 // FP fused multiply-add and negate.
2270 void fnmadd(const VRegister& vd,
2271 const VRegister& vn,
2272 const VRegister& vm,
2273 const VRegister& va);
2274
2275 // FP fused multiply-subtract and negate.
2276 void fnmsub(const VRegister& vd,
2277 const VRegister& vn,
2278 const VRegister& vm,
2279 const VRegister& va);
2280
2281 // FP multiply-negate scalar.
2282 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2283
2284 // FP reciprocal exponent scalar.
2285 void frecpx(const VRegister& vd, const VRegister& vn);
2286
2287 // FP divide.
2288 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2289
2290 // FP maximum.
2291 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2292
2293 // FP minimum.
2294 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2295
2296 // FP maximum number.
2297 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2298
2299 // FP minimum number.
2300 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2301
2302 // FP absolute.
2303 void fabs(const VRegister& vd, const VRegister& vn);
2304
2305 // FP negate.
2306 void fneg(const VRegister& vd, const VRegister& vn);
2307
2308 // FP square root.
2309 void fsqrt(const VRegister& vd, const VRegister& vn);
2310
2311 // FP round to integer, nearest with ties to away.
2312 void frinta(const VRegister& vd, const VRegister& vn);
2313
2314 // FP round to integer, implicit rounding.
2315 void frinti(const VRegister& vd, const VRegister& vn);
2316
2317 // FP round to integer, toward minus infinity.
2318 void frintm(const VRegister& vd, const VRegister& vn);
2319
2320 // FP round to integer, nearest with ties to even.
2321 void frintn(const VRegister& vd, const VRegister& vn);
2322
2323 // FP round to integer, toward plus infinity.
2324 void frintp(const VRegister& vd, const VRegister& vn);
2325
2326 // FP round to integer, exact, implicit rounding.
2327 void frintx(const VRegister& vd, const VRegister& vn);
2328
2329 // FP round to integer, towards zero.
2330 void frintz(const VRegister& vd, const VRegister& vn);
2331
2332 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2333 void frint32x(const VRegister& vd, const VRegister& vn);
2334
2335 // FP round to 32-bit integer, towards zero [Armv8.5].
2336 void frint32z(const VRegister& vd, const VRegister& vn);
2337
2338 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2339 void frint64x(const VRegister& vd, const VRegister& vn);
2340
2341 // FP round to 64-bit integer, towards zero [Armv8.5].
2342 void frint64z(const VRegister& vd, const VRegister& vn);
2343
2344 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2345
2346 void FPCompareMacro(const VRegister& vn,
2347 const VRegister& vm,
2348 FPTrapFlags trap);
2349
2350 // FP compare registers.
2351 void fcmp(const VRegister& vn, const VRegister& vm);
2352
2353 // FP compare immediate.
2354 void fcmp(const VRegister& vn, double value);
2355
2356 void FPCCompareMacro(const VRegister& vn,
2357 const VRegister& vm,
2358 StatusFlags nzcv,
2359 Condition cond,
2360 FPTrapFlags trap);
2361
2362 // FP conditional compare.
2363 void fccmp(const VRegister& vn,
2364 const VRegister& vm,
2365 StatusFlags nzcv,
2366 Condition cond);
2367
2368 // FP signaling compare registers.
2369 void fcmpe(const VRegister& vn, const VRegister& vm);
2370
2371 // FP signaling compare immediate.
2372 void fcmpe(const VRegister& vn, double value);
2373
2374 // FP conditional signaling compare.
2375 void fccmpe(const VRegister& vn,
2376 const VRegister& vm,
2377 StatusFlags nzcv,
2378 Condition cond);
2379
2380 // FP conditional select.
2381 void fcsel(const VRegister& vd,
2382 const VRegister& vn,
2383 const VRegister& vm,
2384 Condition cond);
2385
2386 // Common FP Convert functions.
2387 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2388 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2389 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2390
2391 // FP convert between precisions.
2392 void fcvt(const VRegister& vd, const VRegister& vn);
2393
2394 // FP convert to higher precision.
2395 void fcvtl(const VRegister& vd, const VRegister& vn);
2396
2397 // FP convert to higher precision (second part).
2398 void fcvtl2(const VRegister& vd, const VRegister& vn);
2399
2400 // FP convert to lower precision.
2401 void fcvtn(const VRegister& vd, const VRegister& vn);
2402
2403 // FP convert to lower prevision (second part).
2404 void fcvtn2(const VRegister& vd, const VRegister& vn);
2405
2406 // FP convert to lower precision, rounding to odd.
2407 void fcvtxn(const VRegister& vd, const VRegister& vn);
2408
2409 // FP convert to lower precision, rounding to odd (second part).
2410 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2411
2412 // FP convert to signed integer, nearest with ties to away.
2413 void fcvtas(const Register& rd, const VRegister& vn);
2414
2415 // FP convert to unsigned integer, nearest with ties to away.
2416 void fcvtau(const Register& rd, const VRegister& vn);
2417
2418 // FP convert to signed integer, nearest with ties to away.
2419 void fcvtas(const VRegister& vd, const VRegister& vn);
2420
2421 // FP convert to unsigned integer, nearest with ties to away.
2422 void fcvtau(const VRegister& vd, const VRegister& vn);
2423
2424 // FP convert to signed integer, round towards -infinity.
2425 void fcvtms(const Register& rd, const VRegister& vn);
2426
2427 // FP convert to unsigned integer, round towards -infinity.
2428 void fcvtmu(const Register& rd, const VRegister& vn);
2429
2430 // FP convert to signed integer, round towards -infinity.
2431 void fcvtms(const VRegister& vd, const VRegister& vn);
2432
2433 // FP convert to unsigned integer, round towards -infinity.
2434 void fcvtmu(const VRegister& vd, const VRegister& vn);
2435
2436 // FP convert to signed integer, nearest with ties to even.
2437 void fcvtns(const Register& rd, const VRegister& vn);
2438
2439 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2440 void fjcvtzs(const Register& rd, const VRegister& vn);
2441
2442 // FP convert to unsigned integer, nearest with ties to even.
2443 void fcvtnu(const Register& rd, const VRegister& vn);
2444
2445 // FP convert to signed integer, nearest with ties to even.
2446 void fcvtns(const VRegister& rd, const VRegister& vn);
2447
2448 // FP convert to unsigned integer, nearest with ties to even.
2449 void fcvtnu(const VRegister& rd, const VRegister& vn);
2450
2451 // FP convert to signed integer or fixed-point, round towards zero.
2452 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2453
2454 // FP convert to unsigned integer or fixed-point, round towards zero.
2455 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2456
2457 // FP convert to signed integer or fixed-point, round towards zero.
2458 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2459
2460 // FP convert to unsigned integer or fixed-point, round towards zero.
2461 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2462
2463 // FP convert to signed integer, round towards +infinity.
2464 void fcvtps(const Register& rd, const VRegister& vn);
2465
2466 // FP convert to unsigned integer, round towards +infinity.
2467 void fcvtpu(const Register& rd, const VRegister& vn);
2468
2469 // FP convert to signed integer, round towards +infinity.
2470 void fcvtps(const VRegister& vd, const VRegister& vn);
2471
2472 // FP convert to unsigned integer, round towards +infinity.
2473 void fcvtpu(const VRegister& vd, const VRegister& vn);
2474
2475 // Convert signed integer or fixed point to FP.
2476 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2477
2478 // Convert unsigned integer or fixed point to FP.
2479 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2480
2481 // Convert signed integer or fixed-point to FP.
2482 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2483
2484 // Convert unsigned integer or fixed-point to FP.
2485 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2486
2487 // Unsigned absolute difference.
2488 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2489
2490 // Signed absolute difference.
2491 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2492
2493 // Unsigned absolute difference and accumulate.
2494 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2495
2496 // Signed absolute difference and accumulate.
2497 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2498
2499 // Add.
2500 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2501
2502 // Subtract.
2503 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2504
2505 // Unsigned halving add.
2506 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2507
2508 // Signed halving add.
2509 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2510
2511 // Unsigned rounding halving add.
2512 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2513
2514 // Signed rounding halving add.
2515 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2516
2517 // Unsigned halving sub.
2518 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2519
2520 // Signed halving sub.
2521 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2522
2523 // Unsigned saturating add.
2524 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2525
2526 // Signed saturating add.
2527 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2528
2529 // Unsigned saturating subtract.
2530 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2531
2532 // Signed saturating subtract.
2533 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2534
2535 // Add pairwise.
2536 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2537
2538 // Add pair of elements scalar.
2539 void addp(const VRegister& vd, const VRegister& vn);
2540
2541 // Multiply-add to accumulator.
2542 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2543
2544 // Multiply-subtract to accumulator.
2545 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2546
2547 // Multiply.
2548 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2549
2550 // Multiply by scalar element.
2551 void mul(const VRegister& vd,
2552 const VRegister& vn,
2553 const VRegister& vm,
2554 int vm_index);
2555
2556 // Multiply-add by scalar element.
2557 void mla(const VRegister& vd,
2558 const VRegister& vn,
2559 const VRegister& vm,
2560 int vm_index);
2561
2562 // Multiply-subtract by scalar element.
2563 void mls(const VRegister& vd,
2564 const VRegister& vn,
2565 const VRegister& vm,
2566 int vm_index);
2567
2568 // Signed long multiply-add by scalar element.
2569 void smlal(const VRegister& vd,
2570 const VRegister& vn,
2571 const VRegister& vm,
2572 int vm_index);
2573
2574 // Signed long multiply-add by scalar element (second part).
2575 void smlal2(const VRegister& vd,
2576 const VRegister& vn,
2577 const VRegister& vm,
2578 int vm_index);
2579
2580 // Unsigned long multiply-add by scalar element.
2581 void umlal(const VRegister& vd,
2582 const VRegister& vn,
2583 const VRegister& vm,
2584 int vm_index);
2585
2586 // Unsigned long multiply-add by scalar element (second part).
2587 void umlal2(const VRegister& vd,
2588 const VRegister& vn,
2589 const VRegister& vm,
2590 int vm_index);
2591
2592 // Signed long multiply-sub by scalar element.
2593 void smlsl(const VRegister& vd,
2594 const VRegister& vn,
2595 const VRegister& vm,
2596 int vm_index);
2597
2598 // Signed long multiply-sub by scalar element (second part).
2599 void smlsl2(const VRegister& vd,
2600 const VRegister& vn,
2601 const VRegister& vm,
2602 int vm_index);
2603
2604 // Unsigned long multiply-sub by scalar element.
2605 void umlsl(const VRegister& vd,
2606 const VRegister& vn,
2607 const VRegister& vm,
2608 int vm_index);
2609
2610 // Unsigned long multiply-sub by scalar element (second part).
2611 void umlsl2(const VRegister& vd,
2612 const VRegister& vn,
2613 const VRegister& vm,
2614 int vm_index);
2615
2616 // Signed long multiply by scalar element.
2617 void smull(const VRegister& vd,
2618 const VRegister& vn,
2619 const VRegister& vm,
2620 int vm_index);
2621
2622 // Signed long multiply by scalar element (second part).
2623 void smull2(const VRegister& vd,
2624 const VRegister& vn,
2625 const VRegister& vm,
2626 int vm_index);
2627
2628 // Unsigned long multiply by scalar element.
2629 void umull(const VRegister& vd,
2630 const VRegister& vn,
2631 const VRegister& vm,
2632 int vm_index);
2633
2634 // Unsigned long multiply by scalar element (second part).
2635 void umull2(const VRegister& vd,
2636 const VRegister& vn,
2637 const VRegister& vm,
2638 int vm_index);
2639
2640 // Signed saturating double long multiply by element.
2641 void sqdmull(const VRegister& vd,
2642 const VRegister& vn,
2643 const VRegister& vm,
2644 int vm_index);
2645
2646 // Signed saturating double long multiply by element (second part).
2647 void sqdmull2(const VRegister& vd,
2648 const VRegister& vn,
2649 const VRegister& vm,
2650 int vm_index);
2651
2652 // Signed saturating doubling long multiply-add by element.
2653 void sqdmlal(const VRegister& vd,
2654 const VRegister& vn,
2655 const VRegister& vm,
2656 int vm_index);
2657
2658 // Signed saturating doubling long multiply-add by element (second part).
2659 void sqdmlal2(const VRegister& vd,
2660 const VRegister& vn,
2661 const VRegister& vm,
2662 int vm_index);
2663
2664 // Signed saturating doubling long multiply-sub by element.
2665 void sqdmlsl(const VRegister& vd,
2666 const VRegister& vn,
2667 const VRegister& vm,
2668 int vm_index);
2669
2670 // Signed saturating doubling long multiply-sub by element (second part).
2671 void sqdmlsl2(const VRegister& vd,
2672 const VRegister& vn,
2673 const VRegister& vm,
2674 int vm_index);
2675
2676 // Compare equal.
2677 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2678
2679 // Compare signed greater than or equal.
2680 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2681
2682 // Compare signed greater than.
2683 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2684
2685 // Compare unsigned higher.
2686 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2687
2688 // Compare unsigned higher or same.
2689 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2690
2691 // Compare bitwise test bits nonzero.
2692 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2693
2694 // Compare bitwise to zero.
2695 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2696
2697 // Compare signed greater than or equal to zero.
2698 void cmge(const VRegister& vd, const VRegister& vn, int value);
2699
2700 // Compare signed greater than zero.
2701 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2702
2703 // Compare signed less than or equal to zero.
2704 void cmle(const VRegister& vd, const VRegister& vn, int value);
2705
2706 // Compare signed less than zero.
2707 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2708
2709 // Signed shift left by register.
2710 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2711
2712 // Unsigned shift left by register.
2713 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2714
2715 // Signed saturating shift left by register.
2716 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2717
2718 // Unsigned saturating shift left by register.
2719 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2720
2721 // Signed rounding shift left by register.
2722 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2723
2724 // Unsigned rounding shift left by register.
2725 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2726
2727 // Signed saturating rounding shift left by register.
2728 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2729
2730 // Unsigned saturating rounding shift left by register.
2731 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2732
2733 // Bitwise and.
2734 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2735
2736 // Bitwise or.
2737 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2738
2739 // Bitwise or immediate.
2740 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2741
2742 // Move register to register.
2743 void mov(const VRegister& vd, const VRegister& vn);
2744
2745 // Bitwise orn.
2746 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2747
2748 // Bitwise eor.
2749 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2750
2751 // Bit clear immediate.
2752 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2753
2754 // Bit clear.
2755 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2756
2757 // Bitwise insert if false.
2758 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2759
2760 // Bitwise insert if true.
2761 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2762
2763 // Bitwise select.
2764 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2765
2766 // Polynomial multiply.
2767 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2768
2769 // Vector move immediate.
2770 void movi(const VRegister& vd,
2771 const uint64_t imm,
2772 Shift shift = LSL,
2773 const int shift_amount = 0);
2774
2775 // Bitwise not.
2776 void mvn(const VRegister& vd, const VRegister& vn);
2777
2778 // Vector move inverted immediate.
2779 void mvni(const VRegister& vd,
2780 const int imm8,
2781 Shift shift = LSL,
2782 const int shift_amount = 0);
2783
2784 // Signed saturating accumulate of unsigned value.
2785 void suqadd(const VRegister& vd, const VRegister& vn);
2786
2787 // Unsigned saturating accumulate of signed value.
2788 void usqadd(const VRegister& vd, const VRegister& vn);
2789
2790 // Absolute value.
2791 void abs(const VRegister& vd, const VRegister& vn);
2792
2793 // Signed saturating absolute value.
2794 void sqabs(const VRegister& vd, const VRegister& vn);
2795
2796 // Negate.
2797 void neg(const VRegister& vd, const VRegister& vn);
2798
2799 // Signed saturating negate.
2800 void sqneg(const VRegister& vd, const VRegister& vn);
2801
2802 // Bitwise not.
2803 void not_(const VRegister& vd, const VRegister& vn);
2804
2805 // Extract narrow.
2806 void xtn(const VRegister& vd, const VRegister& vn);
2807
2808 // Extract narrow (second part).
2809 void xtn2(const VRegister& vd, const VRegister& vn);
2810
2811 // Signed saturating extract narrow.
2812 void sqxtn(const VRegister& vd, const VRegister& vn);
2813
2814 // Signed saturating extract narrow (second part).
2815 void sqxtn2(const VRegister& vd, const VRegister& vn);
2816
2817 // Unsigned saturating extract narrow.
2818 void uqxtn(const VRegister& vd, const VRegister& vn);
2819
2820 // Unsigned saturating extract narrow (second part).
2821 void uqxtn2(const VRegister& vd, const VRegister& vn);
2822
2823 // Signed saturating extract unsigned narrow.
2824 void sqxtun(const VRegister& vd, const VRegister& vn);
2825
2826 // Signed saturating extract unsigned narrow (second part).
2827 void sqxtun2(const VRegister& vd, const VRegister& vn);
2828
2829 // Extract vector from pair of vectors.
2830 void ext(const VRegister& vd,
2831 const VRegister& vn,
2832 const VRegister& vm,
2833 int index);
2834
2835 // Duplicate vector element to vector or scalar.
2836 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2837
2838 // Move vector element to scalar.
2839 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2840
2841 // Duplicate general-purpose register to vector.
2842 void dup(const VRegister& vd, const Register& rn);
2843
2844 // Insert vector element from another vector element.
2845 void ins(const VRegister& vd,
2846 int vd_index,
2847 const VRegister& vn,
2848 int vn_index);
2849
2850 // Move vector element to another vector element.
2851 void mov(const VRegister& vd,
2852 int vd_index,
2853 const VRegister& vn,
2854 int vn_index);
2855
2856 // Insert vector element from general-purpose register.
2857 void ins(const VRegister& vd, int vd_index, const Register& rn);
2858
2859 // Move general-purpose register to a vector element.
2860 void mov(const VRegister& vd, int vd_index, const Register& rn);
2861
2862 // Unsigned move vector element to general-purpose register.
2863 void umov(const Register& rd, const VRegister& vn, int vn_index);
2864
2865 // Move vector element to general-purpose register.
2866 void mov(const Register& rd, const VRegister& vn, int vn_index);
2867
2868 // Signed move vector element to general-purpose register.
2869 void smov(const Register& rd, const VRegister& vn, int vn_index);
2870
2871 // One-element structure load to one register.
2872 void ld1(const VRegister& vt, const MemOperand& src);
2873
2874 // One-element structure load to two registers.
2875 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2876
2877 // One-element structure load to three registers.
2878 void ld1(const VRegister& vt,
2879 const VRegister& vt2,
2880 const VRegister& vt3,
2881 const MemOperand& src);
2882
2883 // One-element structure load to four registers.
2884 void ld1(const VRegister& vt,
2885 const VRegister& vt2,
2886 const VRegister& vt3,
2887 const VRegister& vt4,
2888 const MemOperand& src);
2889
2890 // One-element single structure load to one lane.
2891 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2892
2893 // One-element single structure load to all lanes.
2894 void ld1r(const VRegister& vt, const MemOperand& src);
2895
2896 // Two-element structure load.
2897 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2898
2899 // Two-element single structure load to one lane.
2900 void ld2(const VRegister& vt,
2901 const VRegister& vt2,
2902 int lane,
2903 const MemOperand& src);
2904
2905 // Two-element single structure load to all lanes.
2906 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2907
2908 // Three-element structure load.
2909 void ld3(const VRegister& vt,
2910 const VRegister& vt2,
2911 const VRegister& vt3,
2912 const MemOperand& src);
2913
2914 // Three-element single structure load to one lane.
2915 void ld3(const VRegister& vt,
2916 const VRegister& vt2,
2917 const VRegister& vt3,
2918 int lane,
2919 const MemOperand& src);
2920
2921 // Three-element single structure load to all lanes.
2922 void ld3r(const VRegister& vt,
2923 const VRegister& vt2,
2924 const VRegister& vt3,
2925 const MemOperand& src);
2926
2927 // Four-element structure load.
2928 void ld4(const VRegister& vt,
2929 const VRegister& vt2,
2930 const VRegister& vt3,
2931 const VRegister& vt4,
2932 const MemOperand& src);
2933
2934 // Four-element single structure load to one lane.
2935 void ld4(const VRegister& vt,
2936 const VRegister& vt2,
2937 const VRegister& vt3,
2938 const VRegister& vt4,
2939 int lane,
2940 const MemOperand& src);
2941
2942 // Four-element single structure load to all lanes.
2943 void ld4r(const VRegister& vt,
2944 const VRegister& vt2,
2945 const VRegister& vt3,
2946 const VRegister& vt4,
2947 const MemOperand& src);
2948
2949 // Count leading sign bits.
2950 void cls(const VRegister& vd, const VRegister& vn);
2951
2952 // Count leading zero bits (vector).
2953 void clz(const VRegister& vd, const VRegister& vn);
2954
2955 // Population count per byte.
2956 void cnt(const VRegister& vd, const VRegister& vn);
2957
2958 // Reverse bit order.
2959 void rbit(const VRegister& vd, const VRegister& vn);
2960
2961 // Reverse elements in 16-bit halfwords.
2962 void rev16(const VRegister& vd, const VRegister& vn);
2963
2964 // Reverse elements in 32-bit words.
2965 void rev32(const VRegister& vd, const VRegister& vn);
2966
2967 // Reverse elements in 64-bit doublewords.
2968 void rev64(const VRegister& vd, const VRegister& vn);
2969
2970 // Unsigned reciprocal square root estimate.
2971 void ursqrte(const VRegister& vd, const VRegister& vn);
2972
2973 // Unsigned reciprocal estimate.
2974 void urecpe(const VRegister& vd, const VRegister& vn);
2975
2976 // Signed pairwise long add.
2977 void saddlp(const VRegister& vd, const VRegister& vn);
2978
2979 // Unsigned pairwise long add.
2980 void uaddlp(const VRegister& vd, const VRegister& vn);
2981
2982 // Signed pairwise long add and accumulate.
2983 void sadalp(const VRegister& vd, const VRegister& vn);
2984
2985 // Unsigned pairwise long add and accumulate.
2986 void uadalp(const VRegister& vd, const VRegister& vn);
2987
2988 // Shift left by immediate.
2989 void shl(const VRegister& vd, const VRegister& vn, int shift);
2990
2991 // Signed saturating shift left by immediate.
2992 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2993
2994 // Signed saturating shift left unsigned by immediate.
2995 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2996
2997 // Unsigned saturating shift left by immediate.
2998 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2999
3000 // Signed shift left long by immediate.
3001 void sshll(const VRegister& vd, const VRegister& vn, int shift);
3002
3003 // Signed shift left long by immediate (second part).
3004 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
3005
3006 // Signed extend long.
3007 void sxtl(const VRegister& vd, const VRegister& vn);
3008
3009 // Signed extend long (second part).
3010 void sxtl2(const VRegister& vd, const VRegister& vn);
3011
3012 // Unsigned shift left long by immediate.
3013 void ushll(const VRegister& vd, const VRegister& vn, int shift);
3014
3015 // Unsigned shift left long by immediate (second part).
3016 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
3017
3018 // Shift left long by element size.
3019 void shll(const VRegister& vd, const VRegister& vn, int shift);
3020
3021 // Shift left long by element size (second part).
3022 void shll2(const VRegister& vd, const VRegister& vn, int shift);
3023
3024 // Unsigned extend long.
3025 void uxtl(const VRegister& vd, const VRegister& vn);
3026
3027 // Unsigned extend long (second part).
3028 void uxtl2(const VRegister& vd, const VRegister& vn);
3029
3030 // Shift left by immediate and insert.
3031 void sli(const VRegister& vd, const VRegister& vn, int shift);
3032
3033 // Shift right by immediate and insert.
3034 void sri(const VRegister& vd, const VRegister& vn, int shift);
3035
3036 // Signed maximum.
3037 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3038
3039 // Signed pairwise maximum.
3040 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3041
3042 // Add across vector.
3043 void addv(const VRegister& vd, const VRegister& vn);
3044
3045 // Signed add long across vector.
3046 void saddlv(const VRegister& vd, const VRegister& vn);
3047
3048 // Unsigned add long across vector.
3049 void uaddlv(const VRegister& vd, const VRegister& vn);
3050
3051 // FP maximum number across vector.
3052 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3053
3054 // FP maximum across vector.
3055 void fmaxv(const VRegister& vd, const VRegister& vn);
3056
3057 // FP minimum number across vector.
3058 void fminnmv(const VRegister& vd, const VRegister& vn);
3059
3060 // FP minimum across vector.
3061 void fminv(const VRegister& vd, const VRegister& vn);
3062
3063 // Signed maximum across vector.
3064 void smaxv(const VRegister& vd, const VRegister& vn);
3065
3066 // Signed minimum.
3067 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3068
3069 // Signed minimum pairwise.
3070 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3071
3072 // Signed minimum across vector.
3073 void sminv(const VRegister& vd, const VRegister& vn);
3074
3075 // One-element structure store from one register.
3076 void st1(const VRegister& vt, const MemOperand& src);
3077
3078 // One-element structure store from two registers.
3079 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3080
3081 // One-element structure store from three registers.
3082 void st1(const VRegister& vt,
3083 const VRegister& vt2,
3084 const VRegister& vt3,
3085 const MemOperand& src);
3086
3087 // One-element structure store from four registers.
3088 void st1(const VRegister& vt,
3089 const VRegister& vt2,
3090 const VRegister& vt3,
3091 const VRegister& vt4,
3092 const MemOperand& src);
3093
3094 // One-element single structure store from one lane.
3095 void st1(const VRegister& vt, int lane, const MemOperand& src);
3096
3097 // Two-element structure store from two registers.
3098 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3099
3100 // Two-element single structure store from two lanes.
3101 void st2(const VRegister& vt,
3102 const VRegister& vt2,
3103 int lane,
3104 const MemOperand& src);
3105
3106 // Three-element structure store from three registers.
3107 void st3(const VRegister& vt,
3108 const VRegister& vt2,
3109 const VRegister& vt3,
3110 const MemOperand& src);
3111
3112 // Three-element single structure store from three lanes.
3113 void st3(const VRegister& vt,
3114 const VRegister& vt2,
3115 const VRegister& vt3,
3116 int lane,
3117 const MemOperand& src);
3118
3119 // Four-element structure store from four registers.
3120 void st4(const VRegister& vt,
3121 const VRegister& vt2,
3122 const VRegister& vt3,
3123 const VRegister& vt4,
3124 const MemOperand& src);
3125
3126 // Four-element single structure store from four lanes.
3127 void st4(const VRegister& vt,
3128 const VRegister& vt2,
3129 const VRegister& vt3,
3130 const VRegister& vt4,
3131 int lane,
3132 const MemOperand& src);
3133
3134 // Unsigned add long.
3135 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3136
3137 // Unsigned add long (second part).
3138 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3139
3140 // Unsigned add wide.
3141 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3142
3143 // Unsigned add wide (second part).
3144 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3145
3146 // Signed add long.
3147 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3148
3149 // Signed add long (second part).
3150 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3151
3152 // Signed add wide.
3153 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3154
3155 // Signed add wide (second part).
3156 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3157
3158 // Unsigned subtract long.
3159 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3160
3161 // Unsigned subtract long (second part).
3162 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3163
3164 // Unsigned subtract wide.
3165 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3166
3167 // Unsigned subtract wide (second part).
3168 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3169
3170 // Signed subtract long.
3171 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3172
3173 // Signed subtract long (second part).
3174 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3175
3176 // Signed integer subtract wide.
3177 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3178
3179 // Signed integer subtract wide (second part).
3180 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3181
3182 // Unsigned maximum.
3183 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3184
3185 // Unsigned pairwise maximum.
3186 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3187
3188 // Unsigned maximum across vector.
3189 void umaxv(const VRegister& vd, const VRegister& vn);
3190
3191 // Unsigned minimum.
3192 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3193
3194 // Unsigned pairwise minimum.
3195 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3196
3197 // Unsigned minimum across vector.
3198 void uminv(const VRegister& vd, const VRegister& vn);
3199
3200 // Transpose vectors (primary).
3201 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3202
3203 // Transpose vectors (secondary).
3204 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3205
3206 // Unzip vectors (primary).
3207 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3208
3209 // Unzip vectors (secondary).
3210 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3211
3212 // Zip vectors (primary).
3213 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3214
3215 // Zip vectors (secondary).
3216 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3217
3218 // Signed shift right by immediate.
3219 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3220
3221 // Unsigned shift right by immediate.
3222 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3223
3224 // Signed rounding shift right by immediate.
3225 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3226
3227 // Unsigned rounding shift right by immediate.
3228 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3229
3230 // Signed shift right by immediate and accumulate.
3231 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3232
3233 // Unsigned shift right by immediate and accumulate.
3234 void usra(const VRegister& vd, const VRegister& vn, int shift);
3235
3236 // Signed rounding shift right by immediate and accumulate.
3237 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3238
3239 // Unsigned rounding shift right by immediate and accumulate.
3240 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3241
3242 // Shift right narrow by immediate.
3243 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3244
3245 // Shift right narrow by immediate (second part).
3246 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3247
3248 // Rounding shift right narrow by immediate.
3249 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3250
3251 // Rounding shift right narrow by immediate (second part).
3252 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3253
3254 // Unsigned saturating shift right narrow by immediate.
3255 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3256
3257 // Unsigned saturating shift right narrow by immediate (second part).
3258 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3259
3260 // Unsigned saturating rounding shift right narrow by immediate.
3261 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3262
3263 // Unsigned saturating rounding shift right narrow by immediate (second part).
3264 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3265
3266 // Signed saturating shift right narrow by immediate.
3267 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3268
3269 // Signed saturating shift right narrow by immediate (second part).
3270 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3271
3272 // Signed saturating rounded shift right narrow by immediate.
3273 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3274
3275 // Signed saturating rounded shift right narrow by immediate (second part).
3276 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3277
3278 // Signed saturating shift right unsigned narrow by immediate.
3279 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3280
3281 // Signed saturating shift right unsigned narrow by immediate (second part).
3282 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3283
3284 // Signed sat rounded shift right unsigned narrow by immediate.
3285 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3286
3287 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3288 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3289
3290 // FP reciprocal step.
3291 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3292
3293 // FP reciprocal estimate.
3294 void frecpe(const VRegister& vd, const VRegister& vn);
3295
3296 // FP reciprocal square root estimate.
3297 void frsqrte(const VRegister& vd, const VRegister& vn);
3298
3299 // FP reciprocal square root step.
3300 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3301
3302 // Signed absolute difference and accumulate long.
3303 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3304
3305 // Signed absolute difference and accumulate long (second part).
3306 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3307
3308 // Unsigned absolute difference and accumulate long.
3309 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3310
3311 // Unsigned absolute difference and accumulate long (second part).
3312 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3313
3314 // Signed absolute difference long.
3315 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3316
3317 // Signed absolute difference long (second part).
3318 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3319
3320 // Unsigned absolute difference long.
3321 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3322
3323 // Unsigned absolute difference long (second part).
3324 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3325
3326 // Polynomial multiply long.
3327 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3328
3329 // Polynomial multiply long (second part).
3330 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3331
3332 // Signed long multiply-add.
3333 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3334
3335 // Signed long multiply-add (second part).
3336 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3337
3338 // Unsigned long multiply-add.
3339 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3340
3341 // Unsigned long multiply-add (second part).
3342 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3343
3344 // Signed long multiply-sub.
3345 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3346
3347 // Signed long multiply-sub (second part).
3348 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3349
3350 // Unsigned long multiply-sub.
3351 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3352
3353 // Unsigned long multiply-sub (second part).
3354 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3355
3356 // Signed long multiply.
3357 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3358
3359 // Signed long multiply (second part).
3360 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3361
3362 // Signed saturating doubling long multiply-add.
3363 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3364
3365 // Signed saturating doubling long multiply-add (second part).
3366 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3367
3368 // Signed saturating doubling long multiply-subtract.
3369 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3370
3371 // Signed saturating doubling long multiply-subtract (second part).
3372 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3373
3374 // Signed saturating doubling long multiply.
3375 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3376
3377 // Signed saturating doubling long multiply (second part).
3378 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3379
3380 // Signed saturating doubling multiply returning high half.
3381 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3382
3383 // Signed saturating rounding doubling multiply returning high half.
3384 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3385
3386 // Signed dot product [Armv8.2].
3387 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3388
3389 // Signed saturating rounding doubling multiply accumulate returning high
3390 // half [Armv8.1].
3391 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3392
3393 // Unsigned dot product [Armv8.2].
3394 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3395
3396 // Dot Product with unsigned and signed integers (vector).
3397 void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3398
3399 // Dot product with signed and unsigned integers (vector, by element).
3400 void sudot(const VRegister& vd,
3401 const VRegister& vn,
3402 const VRegister& vm,
3403 int vm_index);
3404
3405 // Dot product with unsigned and signed integers (vector, by element).
3406 void usdot(const VRegister& vd,
3407 const VRegister& vn,
3408 const VRegister& vm,
3409 int vm_index);
3410
3411 // Signed saturating rounding doubling multiply subtract returning high half
3412 // [Armv8.1].
3413 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3414
3415 // Signed saturating doubling multiply element returning high half.
3416 void sqdmulh(const VRegister& vd,
3417 const VRegister& vn,
3418 const VRegister& vm,
3419 int vm_index);
3420
3421 // Signed saturating rounding doubling multiply element returning high half.
3422 void sqrdmulh(const VRegister& vd,
3423 const VRegister& vn,
3424 const VRegister& vm,
3425 int vm_index);
3426
3427 // Signed dot product by element [Armv8.2].
3428 void sdot(const VRegister& vd,
3429 const VRegister& vn,
3430 const VRegister& vm,
3431 int vm_index);
3432
3433 // Signed saturating rounding doubling multiply accumulate element returning
3434 // high half [Armv8.1].
3435 void sqrdmlah(const VRegister& vd,
3436 const VRegister& vn,
3437 const VRegister& vm,
3438 int vm_index);
3439
3440 // Unsigned dot product by element [Armv8.2].
3441 void udot(const VRegister& vd,
3442 const VRegister& vn,
3443 const VRegister& vm,
3444 int vm_index);
3445
3446 // Signed saturating rounding doubling multiply subtract element returning
3447 // high half [Armv8.1].
3448 void sqrdmlsh(const VRegister& vd,
3449 const VRegister& vn,
3450 const VRegister& vm,
3451 int vm_index);
3452
3453 // Unsigned long multiply long.
3454 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3455
3456 // Unsigned long multiply (second part).
3457 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3458
3459 // Add narrow returning high half.
3460 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3461
3462 // Add narrow returning high half (second part).
3463 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3464
3465 // Rounding add narrow returning high half.
3466 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3467
3468 // Rounding add narrow returning high half (second part).
3469 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3470
3471 // Subtract narrow returning high half.
3472 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3473
3474 // Subtract narrow returning high half (second part).
3475 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3476
3477 // Rounding subtract narrow returning high half.
3478 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3479
3480 // Rounding subtract narrow returning high half (second part).
3481 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3482
3483 // FP vector multiply accumulate.
3484 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3485
3486 // FP fused multiply-add long to accumulator.
3487 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3488
3489 // FP fused multiply-add long to accumulator (second part).
3490 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3491
3492 // FP fused multiply-add long to accumulator by element.
3493 void fmlal(const VRegister& vd,
3494 const VRegister& vn,
3495 const VRegister& vm,
3496 int vm_index);
3497
3498 // FP fused multiply-add long to accumulator by element (second part).
3499 void fmlal2(const VRegister& vd,
3500 const VRegister& vn,
3501 const VRegister& vm,
3502 int vm_index);
3503
3504 // FP vector multiply subtract.
3505 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3506
3507 // FP fused multiply-subtract long to accumulator.
3508 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3509
3510 // FP fused multiply-subtract long to accumulator (second part).
3511 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3512
3513 // FP fused multiply-subtract long to accumulator by element.
3514 void fmlsl(const VRegister& vd,
3515 const VRegister& vn,
3516 const VRegister& vm,
3517 int vm_index);
3518
3519 // FP fused multiply-subtract long to accumulator by element (second part).
3520 void fmlsl2(const VRegister& vd,
3521 const VRegister& vn,
3522 const VRegister& vm,
3523 int vm_index);
3524
3525 // FP vector multiply extended.
3526 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3527
3528 // FP absolute greater than or equal.
3529 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3530
3531 // FP absolute greater than.
3532 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3533
3534 // FP multiply by element.
3535 void fmul(const VRegister& vd,
3536 const VRegister& vn,
3537 const VRegister& vm,
3538 int vm_index);
3539
3540 // FP fused multiply-add to accumulator by element.
3541 void fmla(const VRegister& vd,
3542 const VRegister& vn,
3543 const VRegister& vm,
3544 int vm_index);
3545
3546 // FP fused multiply-sub from accumulator by element.
3547 void fmls(const VRegister& vd,
3548 const VRegister& vn,
3549 const VRegister& vm,
3550 int vm_index);
3551
3552 // FP multiply extended by element.
3553 void fmulx(const VRegister& vd,
3554 const VRegister& vn,
3555 const VRegister& vm,
3556 int vm_index);
3557
3558 // FP compare equal.
3559 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3560
3561 // FP greater than.
3562 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3563
3564 // FP greater than or equal.
3565 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3566
3567 // FP compare equal to zero.
3568 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3569
3570 // FP greater than zero.
3571 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3572
3573 // FP greater than or equal to zero.
3574 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3575
3576 // FP less than or equal to zero.
3577 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3578
3579 // FP less than to zero.
3580 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3581
3582 // FP absolute difference.
3583 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3584
3585 // FP pairwise add vector.
3586 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3587
3588 // FP pairwise add scalar.
3589 void faddp(const VRegister& vd, const VRegister& vn);
3590
3591 // FP pairwise maximum vector.
3592 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3593
3594 // FP pairwise maximum scalar.
3595 void fmaxp(const VRegister& vd, const VRegister& vn);
3596
3597 // FP pairwise minimum vector.
3598 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3599
3600 // FP pairwise minimum scalar.
3601 void fminp(const VRegister& vd, const VRegister& vn);
3602
3603 // FP pairwise maximum number vector.
3604 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3605
3606 // FP pairwise maximum number scalar.
3607 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3608
3609 // FP pairwise minimum number vector.
3610 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3611
3612 // FP pairwise minimum number scalar.
3613 void fminnmp(const VRegister& vd, const VRegister& vn);
3614
3615 // v8.3 complex numbers - note that these are only partial/helper functions
3616 // and must be used in series in order to perform full CN operations.
3617
3618 // FP complex multiply accumulate (by element) [Armv8.3].
3619 void fcmla(const VRegister& vd,
3620 const VRegister& vn,
3621 const VRegister& vm,
3622 int vm_index,
3623 int rot);
3624
3625 // FP complex multiply accumulate [Armv8.3].
3626 void fcmla(const VRegister& vd,
3627 const VRegister& vn,
3628 const VRegister& vm,
3629 int rot);
3630
3631 // FP complex add [Armv8.3].
3632 void fcadd(const VRegister& vd,
3633 const VRegister& vn,
3634 const VRegister& vm,
3635 int rot);
3636
3637 // Signed 8-bit integer matrix multiply-accumulate (vector).
3638 void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3639
3640 // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3641 void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3642
3643 // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3644 void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3645
3646 // Scalable Vector Extensions.
3647
3648 // Absolute value (predicated).
3649 void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3650
3651 // Add vectors (predicated).
3652 void add(const ZRegister& zd,
3653 const PRegisterM& pg,
3654 const ZRegister& zn,
3655 const ZRegister& zm);
3656
3657 // Add vectors (unpredicated).
3658 void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3659
3660 // Add immediate (unpredicated).
3661 void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3662
3663 // Add multiple of predicate register size to scalar register.
3664 void addpl(const Register& xd, const Register& xn, int imm6);
3665
3666 // Add multiple of vector register size to scalar register.
3667 void addvl(const Register& xd, const Register& xn, int imm6);
3668
3669 // Compute vector address.
3670 void adr(const ZRegister& zd, const SVEMemOperand& addr);
3671
3672 // Bitwise AND predicates.
3673 void and_(const PRegisterWithLaneSize& pd,
3674 const PRegisterZ& pg,
3675 const PRegisterWithLaneSize& pn,
3676 const PRegisterWithLaneSize& pm);
3677
3678 // Bitwise AND vectors (predicated).
3679 void and_(const ZRegister& zd,
3680 const PRegisterM& pg,
3681 const ZRegister& zn,
3682 const ZRegister& zm);
3683
3684 // Bitwise AND with immediate (unpredicated).
3685 void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3686
3687 // Bitwise AND vectors (unpredicated).
3688 void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3689
3690 // Bitwise AND predicates.
3691 void ands(const PRegisterWithLaneSize& pd,
3692 const PRegisterZ& pg,
3693 const PRegisterWithLaneSize& pn,
3694 const PRegisterWithLaneSize& pm);
3695
3696 // Bitwise AND reduction to scalar.
3697 void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3698
3699 // Arithmetic shift right by immediate (predicated).
3700 void asr(const ZRegister& zd,
3701 const PRegisterM& pg,
3702 const ZRegister& zn,
3703 int shift);
3704
3705 // Arithmetic shift right by 64-bit wide elements (predicated).
3706 void asr(const ZRegister& zd,
3707 const PRegisterM& pg,
3708 const ZRegister& zn,
3709 const ZRegister& zm);
3710
3711 // Arithmetic shift right by immediate (unpredicated).
3712 void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3713
3714 // Arithmetic shift right by 64-bit wide elements (unpredicated).
3715 void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3716
3717 // Arithmetic shift right for divide by immediate (predicated).
3718 void asrd(const ZRegister& zd,
3719 const PRegisterM& pg,
3720 const ZRegister& zn,
3721 int shift);
3722
3723 // Reversed arithmetic shift right by vector (predicated).
3724 void asrr(const ZRegister& zd,
3725 const PRegisterM& pg,
3726 const ZRegister& zn,
3727 const ZRegister& zm);
3728
3729 // Bitwise clear predicates.
3730 void bic(const PRegisterWithLaneSize& pd,
3731 const PRegisterZ& pg,
3732 const PRegisterWithLaneSize& pn,
3733 const PRegisterWithLaneSize& pm);
3734
3735 // Bitwise clear vectors (predicated).
3736 void bic(const ZRegister& zd,
3737 const PRegisterM& pg,
3738 const ZRegister& zn,
3739 const ZRegister& zm);
3740
3741 // Bitwise clear bits using immediate (unpredicated).
3742 void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3743
3744 // Bitwise clear vectors (unpredicated).
3745 void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3746
3747 // Bitwise clear predicates.
3748 void bics(const PRegisterWithLaneSize& pd,
3749 const PRegisterZ& pg,
3750 const PRegisterWithLaneSize& pn,
3751 const PRegisterWithLaneSize& pm);
3752
3753 // Break after first true condition.
3754 void brka(const PRegisterWithLaneSize& pd,
3755 const PRegister& pg,
3756 const PRegisterWithLaneSize& pn);
3757
3758 // Break after first true condition.
3759 void brkas(const PRegisterWithLaneSize& pd,
3760 const PRegisterZ& pg,
3761 const PRegisterWithLaneSize& pn);
3762
3763 // Break before first true condition.
3764 void brkb(const PRegisterWithLaneSize& pd,
3765 const PRegister& pg,
3766 const PRegisterWithLaneSize& pn);
3767
3768 // Break before first true condition.
3769 void brkbs(const PRegisterWithLaneSize& pd,
3770 const PRegisterZ& pg,
3771 const PRegisterWithLaneSize& pn);
3772
3773 // Propagate break to next partition.
3774 void brkn(const PRegisterWithLaneSize& pd,
3775 const PRegisterZ& pg,
3776 const PRegisterWithLaneSize& pn,
3777 const PRegisterWithLaneSize& pm);
3778
3779 // Propagate break to next partition.
3780 void brkns(const PRegisterWithLaneSize& pd,
3781 const PRegisterZ& pg,
3782 const PRegisterWithLaneSize& pn,
3783 const PRegisterWithLaneSize& pm);
3784
3785 // Break after first true condition, propagating from previous partition.
3786 void brkpa(const PRegisterWithLaneSize& pd,
3787 const PRegisterZ& pg,
3788 const PRegisterWithLaneSize& pn,
3789 const PRegisterWithLaneSize& pm);
3790
3791 // Break after first true condition, propagating from previous partition.
3792 void brkpas(const PRegisterWithLaneSize& pd,
3793 const PRegisterZ& pg,
3794 const PRegisterWithLaneSize& pn,
3795 const PRegisterWithLaneSize& pm);
3796
3797 // Break before first true condition, propagating from previous partition.
3798 void brkpb(const PRegisterWithLaneSize& pd,
3799 const PRegisterZ& pg,
3800 const PRegisterWithLaneSize& pn,
3801 const PRegisterWithLaneSize& pm);
3802
3803 // Break before first true condition, propagating from previous partition.
3804 void brkpbs(const PRegisterWithLaneSize& pd,
3805 const PRegisterZ& pg,
3806 const PRegisterWithLaneSize& pn,
3807 const PRegisterWithLaneSize& pm);
3808
3809 // Conditionally extract element after last to general-purpose register.
3810 void clasta(const Register& rd,
3811 const PRegister& pg,
3812 const Register& rn,
3813 const ZRegister& zm);
3814
3815 // Conditionally extract element after last to SIMD&FP scalar register.
3816 void clasta(const VRegister& vd,
3817 const PRegister& pg,
3818 const VRegister& vn,
3819 const ZRegister& zm);
3820
3821 // Conditionally extract element after last to vector register.
3822 void clasta(const ZRegister& zd,
3823 const PRegister& pg,
3824 const ZRegister& zn,
3825 const ZRegister& zm);
3826
3827 // Conditionally extract last element to general-purpose register.
3828 void clastb(const Register& rd,
3829 const PRegister& pg,
3830 const Register& rn,
3831 const ZRegister& zm);
3832
3833 // Conditionally extract last element to SIMD&FP scalar register.
3834 void clastb(const VRegister& vd,
3835 const PRegister& pg,
3836 const VRegister& vn,
3837 const ZRegister& zm);
3838
3839 // Conditionally extract last element to vector register.
3840 void clastb(const ZRegister& zd,
3841 const PRegister& pg,
3842 const ZRegister& zn,
3843 const ZRegister& zm);
3844
3845 // Count leading sign bits (predicated).
3846 void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3847
3848 // Count leading zero bits (predicated).
3849 void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3850
3851 void cmp(Condition cond,
3852 const PRegisterWithLaneSize& pd,
3853 const PRegisterZ& pg,
3854 const ZRegister& zn,
3855 const ZRegister& zm);
3856
3857 // Compare vector to 64-bit wide elements.
3858 void cmpeq(const PRegisterWithLaneSize& pd,
3859 const PRegisterZ& pg,
3860 const ZRegister& zn,
3861 const ZRegister& zm);
3862
3863 // Compare vector to immediate.
3864 void cmpeq(const PRegisterWithLaneSize& pd,
3865 const PRegisterZ& pg,
3866 const ZRegister& zn,
3867 int imm5);
3868
3869 // Compare vector to 64-bit wide elements.
3870 void cmpge(const PRegisterWithLaneSize& pd,
3871 const PRegisterZ& pg,
3872 const ZRegister& zn,
3873 const ZRegister& zm);
3874
3875 // Compare vector to immediate.
3876 void cmpge(const PRegisterWithLaneSize& pd,
3877 const PRegisterZ& pg,
3878 const ZRegister& zn,
3879 int imm5);
3880
3881 // Compare vector to 64-bit wide elements.
3882 void cmpgt(const PRegisterWithLaneSize& pd,
3883 const PRegisterZ& pg,
3884 const ZRegister& zn,
3885 const ZRegister& zm);
3886
3887 // Compare vector to immediate.
3888 void cmpgt(const PRegisterWithLaneSize& pd,
3889 const PRegisterZ& pg,
3890 const ZRegister& zn,
3891 int imm5);
3892
3893 // Compare vector to 64-bit wide elements.
3894 void cmphi(const PRegisterWithLaneSize& pd,
3895 const PRegisterZ& pg,
3896 const ZRegister& zn,
3897 const ZRegister& zm);
3898
3899 // Compare vector to immediate.
3900 void cmphi(const PRegisterWithLaneSize& pd,
3901 const PRegisterZ& pg,
3902 const ZRegister& zn,
3903 unsigned imm7);
3904
3905 // Compare vector to 64-bit wide elements.
3906 void cmphs(const PRegisterWithLaneSize& pd,
3907 const PRegisterZ& pg,
3908 const ZRegister& zn,
3909 const ZRegister& zm);
3910
3911 // Compare vector to immediate.
3912 void cmphs(const PRegisterWithLaneSize& pd,
3913 const PRegisterZ& pg,
3914 const ZRegister& zn,
3915 unsigned imm7);
3916
3917 // Compare vector to 64-bit wide elements.
3918 void cmple(const PRegisterWithLaneSize& pd,
3919 const PRegisterZ& pg,
3920 const ZRegister& zn,
3921 const ZRegister& zm);
3922
3923 // Compare vector to immediate.
3924 void cmple(const PRegisterWithLaneSize& pd,
3925 const PRegisterZ& pg,
3926 const ZRegister& zn,
3927 int imm5);
3928
3929 // Compare vector to 64-bit wide elements.
3930 void cmplo(const PRegisterWithLaneSize& pd,
3931 const PRegisterZ& pg,
3932 const ZRegister& zn,
3933 const ZRegister& zm);
3934
3935 // Compare vector to immediate.
3936 void cmplo(const PRegisterWithLaneSize& pd,
3937 const PRegisterZ& pg,
3938 const ZRegister& zn,
3939 unsigned imm7);
3940
3941 // Compare vector to 64-bit wide elements.
3942 void cmpls(const PRegisterWithLaneSize& pd,
3943 const PRegisterZ& pg,
3944 const ZRegister& zn,
3945 const ZRegister& zm);
3946
3947 // Compare vector to immediate.
3948 void cmpls(const PRegisterWithLaneSize& pd,
3949 const PRegisterZ& pg,
3950 const ZRegister& zn,
3951 unsigned imm7);
3952
3953 // Compare vector to 64-bit wide elements.
3954 void cmplt(const PRegisterWithLaneSize& pd,
3955 const PRegisterZ& pg,
3956 const ZRegister& zn,
3957 const ZRegister& zm);
3958
3959 // Compare vector to immediate.
3960 void cmplt(const PRegisterWithLaneSize& pd,
3961 const PRegisterZ& pg,
3962 const ZRegister& zn,
3963 int imm5);
3964
3965 // Compare vector to 64-bit wide elements.
3966 void cmpne(const PRegisterWithLaneSize& pd,
3967 const PRegisterZ& pg,
3968 const ZRegister& zn,
3969 const ZRegister& zm);
3970
3971 // Compare vector to immediate.
3972 void cmpne(const PRegisterWithLaneSize& pd,
3973 const PRegisterZ& pg,
3974 const ZRegister& zn,
3975 int imm5);
3976
3977 // Logically invert boolean condition in vector (predicated).
3978 void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3979
3980 // Count non-zero bits (predicated).
3981 void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3982
3983 // Set scalar to multiple of predicate constraint element count.
3984 void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3985
3986 // Set scalar to multiple of predicate constraint element count.
3987 void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3988
3989 // Set scalar to multiple of predicate constraint element count.
3990 void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3991
3992 // Set scalar to active predicate element count.
3993 void cntp(const Register& xd,
3994 const PRegister& pg,
3995 const PRegisterWithLaneSize& pn);
3996
3997 // Set scalar to multiple of predicate constraint element count.
3998 void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3999
4000 // Shuffle active elements of vector to the right and fill with zero.
4001 void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4002
4003 // Copy signed integer immediate to vector elements (predicated).
4004 void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4005
4006 // Copy general-purpose register to vector elements (predicated).
4007 void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4008
4009 // Copy SIMD&FP scalar register to vector elements (predicated).
4010 void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4011
4012 // Compare and terminate loop.
4013 void ctermeq(const Register& rn, const Register& rm);
4014
4015 // Compare and terminate loop.
4016 void ctermne(const Register& rn, const Register& rm);
4017
4018 // Decrement scalar by multiple of predicate constraint element count.
4019 void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4020
4021 // Decrement scalar by multiple of predicate constraint element count.
4022 void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4023
4024 // Decrement vector by multiple of predicate constraint element count.
4025 void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4026
4027 // Decrement scalar by multiple of predicate constraint element count.
4028 void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4029
4030 // Decrement vector by multiple of predicate constraint element count.
4031 void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4032
4033 // Decrement scalar by active predicate element count.
4034 void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4035
4036 // Decrement vector by active predicate element count.
4037 void decp(const ZRegister& zdn, const PRegister& pg);
4038
4039 // Decrement scalar by multiple of predicate constraint element count.
4040 void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4041
4042 // Decrement vector by multiple of predicate constraint element count.
4043 void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4044
4045 // Broadcast general-purpose register to vector elements (unpredicated).
4046 void dup(const ZRegister& zd, const Register& xn);
4047
4048 // Broadcast indexed element to vector (unpredicated).
4049 void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4050
4051 // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4052 // assembler will pick an appropriate immediate and left shift that is
4053 // equivalent to the immediate argument. If an explicit left shift is
4054 // specified (0 or 8), the immediate must be a signed 8-bit integer.
4055
4056 // Broadcast signed immediate to vector elements (unpredicated).
4057 void dup(const ZRegister& zd, int imm8, int shift = -1);
4058
4059 // Broadcast logical bitmask immediate to vector (unpredicated).
4060 void dupm(const ZRegister& zd, uint64_t imm);
4061
4062 // Bitwise exclusive OR with inverted immediate (unpredicated).
4063 void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4064
4065 // Bitwise exclusive OR predicates.
4066 void eor(const PRegisterWithLaneSize& pd,
4067 const PRegisterZ& pg,
4068 const PRegisterWithLaneSize& pn,
4069 const PRegisterWithLaneSize& pm);
4070
4071 // Bitwise exclusive OR vectors (predicated).
4072 void eor(const ZRegister& zd,
4073 const PRegisterM& pg,
4074 const ZRegister& zn,
4075 const ZRegister& zm);
4076
4077 // Bitwise exclusive OR with immediate (unpredicated).
4078 void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4079
4080 // Bitwise exclusive OR vectors (unpredicated).
4081 void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4082
4083 // Bitwise exclusive OR predicates.
4084 void eors(const PRegisterWithLaneSize& pd,
4085 const PRegisterZ& pg,
4086 const PRegisterWithLaneSize& pn,
4087 const PRegisterWithLaneSize& pm);
4088
4089 // Bitwise XOR reduction to scalar.
4090 void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4091
4092 // Extract vector from pair of vectors.
4093 void ext(const ZRegister& zd,
4094 const ZRegister& zn,
4095 const ZRegister& zm,
4096 unsigned offset);
4097
4098 // Floating-point absolute difference (predicated).
4099 void fabd(const ZRegister& zd,
4100 const PRegisterM& pg,
4101 const ZRegister& zn,
4102 const ZRegister& zm);
4103
4104 // Floating-point absolute value (predicated).
4105 void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4106
4107 // Floating-point absolute compare vectors.
4108 void facge(const PRegisterWithLaneSize& pd,
4109 const PRegisterZ& pg,
4110 const ZRegister& zn,
4111 const ZRegister& zm);
4112
4113 // Floating-point absolute compare vectors.
4114 void facgt(const PRegisterWithLaneSize& pd,
4115 const PRegisterZ& pg,
4116 const ZRegister& zn,
4117 const ZRegister& zm);
4118
4119 // Floating-point add immediate (predicated).
4120 void fadd(const ZRegister& zd,
4121 const PRegisterM& pg,
4122 const ZRegister& zn,
4123 double imm);
4124
4125 // Floating-point add vector (predicated).
4126 void fadd(const ZRegister& zd,
4127 const PRegisterM& pg,
4128 const ZRegister& zn,
4129 const ZRegister& zm);
4130
4131 // Floating-point add vector (unpredicated).
4132 void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4133
4134 // Floating-point add strictly-ordered reduction, accumulating in scalar.
4135 void fadda(const VRegister& vd,
4136 const PRegister& pg,
4137 const VRegister& vn,
4138 const ZRegister& zm);
4139
4140 // Floating-point add recursive reduction to scalar.
4141 void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4142
4143 // Floating-point complex add with rotate (predicated).
4144 void fcadd(const ZRegister& zd,
4145 const PRegisterM& pg,
4146 const ZRegister& zn,
4147 const ZRegister& zm,
4148 int rot);
4149
4150 // Floating-point compare vector with zero.
4151 void fcmeq(const PRegisterWithLaneSize& pd,
4152 const PRegisterZ& pg,
4153 const ZRegister& zn,
4154 double zero);
4155
4156 // Floating-point compare vectors.
4157 void fcmeq(const PRegisterWithLaneSize& pd,
4158 const PRegisterZ& pg,
4159 const ZRegister& zn,
4160 const ZRegister& zm);
4161
4162 // Floating-point compare vector with zero.
4163 void fcmge(const PRegisterWithLaneSize& pd,
4164 const PRegisterZ& pg,
4165 const ZRegister& zn,
4166 double zero);
4167
4168 // Floating-point compare vectors.
4169 void fcmge(const PRegisterWithLaneSize& pd,
4170 const PRegisterZ& pg,
4171 const ZRegister& zn,
4172 const ZRegister& zm);
4173
4174 // Floating-point compare vector with zero.
4175 void fcmgt(const PRegisterWithLaneSize& pd,
4176 const PRegisterZ& pg,
4177 const ZRegister& zn,
4178 double zero);
4179
4180 // Floating-point compare vectors.
4181 void fcmgt(const PRegisterWithLaneSize& pd,
4182 const PRegisterZ& pg,
4183 const ZRegister& zn,
4184 const ZRegister& zm);
4185
4186 // Floating-point complex multiply-add with rotate (predicated).
4187 void fcmla(const ZRegister& zda,
4188 const PRegisterM& pg,
4189 const ZRegister& zn,
4190 const ZRegister& zm,
4191 int rot);
4192
4193 // Floating-point complex multiply-add by indexed values with rotate.
4194 void fcmla(const ZRegister& zda,
4195 const ZRegister& zn,
4196 const ZRegister& zm,
4197 int index,
4198 int rot);
4199
4200 // Floating-point compare vector with zero.
4201 void fcmle(const PRegisterWithLaneSize& pd,
4202 const PRegisterZ& pg,
4203 const ZRegister& zn,
4204 double zero);
4205
4206 // Floating-point compare vector with zero.
4207 void fcmlt(const PRegisterWithLaneSize& pd,
4208 const PRegisterZ& pg,
4209 const ZRegister& zn,
4210 double zero);
4211
4212 // Floating-point compare vector with zero.
4213 void fcmne(const PRegisterWithLaneSize& pd,
4214 const PRegisterZ& pg,
4215 const ZRegister& zn,
4216 double zero);
4217
4218 // Floating-point compare vectors.
4219 void fcmne(const PRegisterWithLaneSize& pd,
4220 const PRegisterZ& pg,
4221 const ZRegister& zn,
4222 const ZRegister& zm);
4223
4224 // Floating-point compare vectors.
4225 void fcmuo(const PRegisterWithLaneSize& pd,
4226 const PRegisterZ& pg,
4227 const ZRegister& zn,
4228 const ZRegister& zm);
4229
4230 // Copy floating-point immediate to vector elements (predicated).
4231 void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4232
4233 // Copy half-precision floating-point immediate to vector elements
4234 // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4235 void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4236 fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4237 }
4238
4239 // Floating-point convert precision (predicated).
4240 void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4241
4242 // Floating-point convert to signed integer, rounding toward zero
4243 // (predicated).
4244 void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4245
4246 // Floating-point convert to unsigned integer, rounding toward zero
4247 // (predicated).
4248 void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4249
4250 // Floating-point divide by vector (predicated).
4251 void fdiv(const ZRegister& zd,
4252 const PRegisterM& pg,
4253 const ZRegister& zn,
4254 const ZRegister& zm);
4255
4256 // Floating-point reversed divide by vector (predicated).
4257 void fdivr(const ZRegister& zd,
4258 const PRegisterM& pg,
4259 const ZRegister& zn,
4260 const ZRegister& zm);
4261
4262 // Broadcast floating-point immediate to vector elements.
4263 void fdup(const ZRegister& zd, double imm);
4264
4265 // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4266 void fdup(const ZRegister& zd, Float16 imm) {
4267 fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4268 }
4269
4270 // Floating-point exponential accelerator.
4271 void fexpa(const ZRegister& zd, const ZRegister& zn);
4272
4273 // Floating-point fused multiply-add vectors (predicated), writing
4274 // multiplicand [Zdn = Za + Zdn * Zm].
4275 void fmad(const ZRegister& zdn,
4276 const PRegisterM& pg,
4277 const ZRegister& zm,
4278 const ZRegister& za);
4279
4280 // Floating-point maximum with immediate (predicated).
4281 void fmax(const ZRegister& zd,
4282 const PRegisterM& pg,
4283 const ZRegister& zn,
4284 double imm);
4285
4286 // Floating-point maximum (predicated).
4287 void fmax(const ZRegister& zd,
4288 const PRegisterM& pg,
4289 const ZRegister& zn,
4290 const ZRegister& zm);
4291
4292 // Floating-point maximum number with immediate (predicated).
4293 void fmaxnm(const ZRegister& zd,
4294 const PRegisterM& pg,
4295 const ZRegister& zn,
4296 double imm);
4297
4298 // Floating-point maximum number (predicated).
4299 void fmaxnm(const ZRegister& zd,
4300 const PRegisterM& pg,
4301 const ZRegister& zn,
4302 const ZRegister& zm);
4303
4304 // Floating-point maximum number recursive reduction to scalar.
4305 void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4306
4307 // Floating-point maximum recursive reduction to scalar.
4308 void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4309
4310 // Floating-point minimum with immediate (predicated).
4311 void fmin(const ZRegister& zd,
4312 const PRegisterM& pg,
4313 const ZRegister& zn,
4314 double imm);
4315
4316 // Floating-point minimum (predicated).
4317 void fmin(const ZRegister& zd,
4318 const PRegisterM& pg,
4319 const ZRegister& zn,
4320 const ZRegister& zm);
4321
4322 // Floating-point minimum number with immediate (predicated).
4323 void fminnm(const ZRegister& zd,
4324 const PRegisterM& pg,
4325 const ZRegister& zn,
4326 double imm);
4327
4328 // Floating-point minimum number (predicated).
4329 void fminnm(const ZRegister& zd,
4330 const PRegisterM& pg,
4331 const ZRegister& zn,
4332 const ZRegister& zm);
4333
4334 // Floating-point minimum number recursive reduction to scalar.
4335 void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4336
4337 // Floating-point minimum recursive reduction to scalar.
4338 void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4339
4340 // Floating-point fused multiply-add vectors (predicated), writing addend
4341 // [Zda = Zda + Zn * Zm].
4342 void fmla(const ZRegister& zda,
4343 const PRegisterM& pg,
4344 const ZRegister& zn,
4345 const ZRegister& zm);
4346
4347 // Floating-point fused multiply-add by indexed elements
4348 // (Zda = Zda + Zn * Zm[indexed]).
4349 void fmla(const ZRegister& zda,
4350 const ZRegister& zn,
4351 const ZRegister& zm,
4352 int index);
4353
4354 // Floating-point fused multiply-subtract vectors (predicated), writing
4355 // addend [Zda = Zda + -Zn * Zm].
4356 void fmls(const ZRegister& zda,
4357 const PRegisterM& pg,
4358 const ZRegister& zn,
4359 const ZRegister& zm);
4360
4361 // Floating-point fused multiply-subtract by indexed elements
4362 // (Zda = Zda + -Zn * Zm[indexed]).
4363 void fmls(const ZRegister& zda,
4364 const ZRegister& zn,
4365 const ZRegister& zm,
4366 int index);
4367
4368 // Move 8-bit floating-point immediate to vector elements (unpredicated).
4369 void fmov(const ZRegister& zd, double imm);
4370
4371 // Move 8-bit floating-point immediate to vector elements (predicated).
4372 void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4373
4374 // Floating-point fused multiply-subtract vectors (predicated), writing
4375 // multiplicand [Zdn = Za + -Zdn * Zm].
4376 void fmsb(const ZRegister& zdn,
4377 const PRegisterM& pg,
4378 const ZRegister& zm,
4379 const ZRegister& za);
4380
4381 // Floating-point multiply by immediate (predicated).
4382 void fmul(const ZRegister& zd,
4383 const PRegisterM& pg,
4384 const ZRegister& zn,
4385 double imm);
4386
4387 // Floating-point multiply vectors (predicated).
4388 void fmul(const ZRegister& zd,
4389 const PRegisterM& pg,
4390 const ZRegister& zn,
4391 const ZRegister& zm);
4392
4393 // Floating-point multiply by indexed elements.
4394 void fmul(const ZRegister& zd,
4395 const ZRegister& zn,
4396 const ZRegister& zm,
4397 unsigned index);
4398
4399 // Floating-point multiply vectors (unpredicated).
4400 void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4401
4402 // Floating-point multiply-extended vectors (predicated).
4403 void fmulx(const ZRegister& zd,
4404 const PRegisterM& pg,
4405 const ZRegister& zn,
4406 const ZRegister& zm);
4407
4408 // Floating-point negate (predicated).
4409 void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4410
4411 // Floating-point negated fused multiply-add vectors (predicated), writing
4412 // multiplicand [Zdn = -Za + -Zdn * Zm].
4413 void fnmad(const ZRegister& zdn,
4414 const PRegisterM& pg,
4415 const ZRegister& zm,
4416 const ZRegister& za);
4417
4418 // Floating-point negated fused multiply-add vectors (predicated), writing
4419 // addend [Zda = -Zda + -Zn * Zm].
4420 void fnmla(const ZRegister& zda,
4421 const PRegisterM& pg,
4422 const ZRegister& zn,
4423 const ZRegister& zm);
4424
4425 // Floating-point negated fused multiply-subtract vectors (predicated),
4426 // writing addend [Zda = -Zda + Zn * Zm].
4427 void fnmls(const ZRegister& zda,
4428 const PRegisterM& pg,
4429 const ZRegister& zn,
4430 const ZRegister& zm);
4431
4432 // Floating-point negated fused multiply-subtract vectors (predicated),
4433 // writing multiplicand [Zdn = -Za + Zdn * Zm].
4434 void fnmsb(const ZRegister& zdn,
4435 const PRegisterM& pg,
4436 const ZRegister& zm,
4437 const ZRegister& za);
4438
4439 // Floating-point reciprocal estimate (unpredicated).
4440 void frecpe(const ZRegister& zd, const ZRegister& zn);
4441
4442 // Floating-point reciprocal step (unpredicated).
4443 void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4444
4445 // Floating-point reciprocal exponent (predicated).
4446 void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4447
4448 // Floating-point round to integral value (predicated).
4449 void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4450
4451 // Floating-point round to integral value (predicated).
4452 void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4453
4454 // Floating-point round to integral value (predicated).
4455 void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4456
4457 // Floating-point round to integral value (predicated).
4458 void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4459
4460 // Floating-point round to integral value (predicated).
4461 void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4462
4463 // Floating-point round to integral value (predicated).
4464 void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4465
4466 // Floating-point round to integral value (predicated).
4467 void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4468
4469 // Floating-point reciprocal square root estimate (unpredicated).
4470 void frsqrte(const ZRegister& zd, const ZRegister& zn);
4471
4472 // Floating-point reciprocal square root step (unpredicated).
4473 void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4474
4475 // Floating-point adjust exponent by vector (predicated).
4476 void fscale(const ZRegister& zd,
4477 const PRegisterM& pg,
4478 const ZRegister& zn,
4479 const ZRegister& zm);
4480
4481 // Floating-point square root (predicated).
4482 void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4483
4484 // Floating-point subtract immediate (predicated).
4485 void fsub(const ZRegister& zd,
4486 const PRegisterM& pg,
4487 const ZRegister& zn,
4488 double imm);
4489
4490 // Floating-point subtract vectors (predicated).
4491 void fsub(const ZRegister& zd,
4492 const PRegisterM& pg,
4493 const ZRegister& zn,
4494 const ZRegister& zm);
4495
4496 // Floating-point subtract vectors (unpredicated).
4497 void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4498
4499 // Floating-point reversed subtract from immediate (predicated).
4500 void fsubr(const ZRegister& zd,
4501 const PRegisterM& pg,
4502 const ZRegister& zn,
4503 double imm);
4504
4505 // Floating-point reversed subtract vectors (predicated).
4506 void fsubr(const ZRegister& zd,
4507 const PRegisterM& pg,
4508 const ZRegister& zn,
4509 const ZRegister& zm);
4510
4511 // Floating-point trigonometric multiply-add coefficient.
4512 void ftmad(const ZRegister& zd,
4513 const ZRegister& zn,
4514 const ZRegister& zm,
4515 int imm3);
4516
4517 // Floating-point trigonometric starting value.
4518 void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4519
4520 // Floating-point trigonometric select coefficient.
4521 void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4522
4523 // Increment scalar by multiple of predicate constraint element count.
4524 void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4525
4526 // Increment scalar by multiple of predicate constraint element count.
4527 void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4528
4529 // Increment vector by multiple of predicate constraint element count.
4530 void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4531
4532 // Increment scalar by multiple of predicate constraint element count.
4533 void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4534
4535 // Increment vector by multiple of predicate constraint element count.
4536 void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4537
4538 // Increment scalar by active predicate element count.
4539 void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4540
4541 // Increment vector by active predicate element count.
4542 void incp(const ZRegister& zdn, const PRegister& pg);
4543
4544 // Increment scalar by multiple of predicate constraint element count.
4545 void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4546
4547 // Increment vector by multiple of predicate constraint element count.
4548 void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4549
4550 // Create index starting from and incremented by immediate.
4551 void index(const ZRegister& zd, int start, int step);
4552
4553 // Create index starting from and incremented by general-purpose register.
4554 void index(const ZRegister& zd, const Register& rn, const Register& rm);
4555
4556 // Create index starting from general-purpose register and incremented by
4557 // immediate.
4558 void index(const ZRegister& zd, const Register& rn, int imm5);
4559
4560 // Create index starting from immediate and incremented by general-purpose
4561 // register.
4562 void index(const ZRegister& zd, int imm5, const Register& rm);
4563
4564 // Insert general-purpose register in shifted vector.
4565 void insr(const ZRegister& zdn, const Register& rm);
4566
4567 // Insert SIMD&FP scalar register in shifted vector.
4568 void insr(const ZRegister& zdn, const VRegister& vm);
4569
4570 // Extract element after last to general-purpose register.
4571 void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4572
4573 // Extract element after last to SIMD&FP scalar register.
4574 void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4575
4576 // Extract last element to general-purpose register.
4577 void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4578
4579 // Extract last element to SIMD&FP scalar register.
4580 void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4581
4582 // Contiguous/gather load bytes to vector.
4583 void ld1b(const ZRegister& zt,
4584 const PRegisterZ& pg,
4585 const SVEMemOperand& addr);
4586
4587 // Contiguous/gather load halfwords to vector.
4588 void ld1h(const ZRegister& zt,
4589 const PRegisterZ& pg,
4590 const SVEMemOperand& addr);
4591
4592 // Contiguous/gather load words to vector.
4593 void ld1w(const ZRegister& zt,
4594 const PRegisterZ& pg,
4595 const SVEMemOperand& addr);
4596
4597 // Contiguous/gather load doublewords to vector.
4598 void ld1d(const ZRegister& zt,
4599 const PRegisterZ& pg,
4600 const SVEMemOperand& addr);
4601
4602 // TODO: Merge other loads into the SVEMemOperand versions.
4603
4604 // Load and broadcast unsigned byte to vector.
4605 void ld1rb(const ZRegister& zt,
4606 const PRegisterZ& pg,
4607 const SVEMemOperand& addr);
4608
4609 // Load and broadcast unsigned halfword to vector.
4610 void ld1rh(const ZRegister& zt,
4611 const PRegisterZ& pg,
4612 const SVEMemOperand& addr);
4613
4614 // Load and broadcast unsigned word to vector.
4615 void ld1rw(const ZRegister& zt,
4616 const PRegisterZ& pg,
4617 const SVEMemOperand& addr);
4618
4619 // Load and broadcast doubleword to vector.
4620 void ld1rd(const ZRegister& zt,
4621 const PRegisterZ& pg,
4622 const SVEMemOperand& addr);
4623
4624 // Contiguous load and replicate sixteen bytes.
4625 void ld1rqb(const ZRegister& zt,
4626 const PRegisterZ& pg,
4627 const SVEMemOperand& addr);
4628
4629 // Contiguous load and replicate eight halfwords.
4630 void ld1rqh(const ZRegister& zt,
4631 const PRegisterZ& pg,
4632 const SVEMemOperand& addr);
4633
4634 // Contiguous load and replicate four words.
4635 void ld1rqw(const ZRegister& zt,
4636 const PRegisterZ& pg,
4637 const SVEMemOperand& addr);
4638
4639 // Contiguous load and replicate two doublewords.
4640 void ld1rqd(const ZRegister& zt,
4641 const PRegisterZ& pg,
4642 const SVEMemOperand& addr);
4643
4644 // Contiguous load and replicate thirty-two bytes.
4645 void ld1rob(const ZRegister& zt,
4646 const PRegisterZ& pg,
4647 const SVEMemOperand& addr);
4648
4649 // Contiguous load and replicate sixteen halfwords.
4650 void ld1roh(const ZRegister& zt,
4651 const PRegisterZ& pg,
4652 const SVEMemOperand& addr);
4653
4654 // Contiguous load and replicate eight words.
4655 void ld1row(const ZRegister& zt,
4656 const PRegisterZ& pg,
4657 const SVEMemOperand& addr);
4658
4659 // Contiguous load and replicate four doublewords.
4660 void ld1rod(const ZRegister& zt,
4661 const PRegisterZ& pg,
4662 const SVEMemOperand& addr);
4663
4664 // Load and broadcast signed byte to vector.
4665 void ld1rsb(const ZRegister& zt,
4666 const PRegisterZ& pg,
4667 const SVEMemOperand& addr);
4668
4669 // Load and broadcast signed halfword to vector.
4670 void ld1rsh(const ZRegister& zt,
4671 const PRegisterZ& pg,
4672 const SVEMemOperand& addr);
4673
4674 // Load and broadcast signed word to vector.
4675 void ld1rsw(const ZRegister& zt,
4676 const PRegisterZ& pg,
4677 const SVEMemOperand& addr);
4678
4679 // Contiguous/gather load signed bytes to vector.
4680 void ld1sb(const ZRegister& zt,
4681 const PRegisterZ& pg,
4682 const SVEMemOperand& addr);
4683
4684 // Contiguous/gather load signed halfwords to vector.
4685 void ld1sh(const ZRegister& zt,
4686 const PRegisterZ& pg,
4687 const SVEMemOperand& addr);
4688
4689 // Contiguous/gather load signed words to vector.
4690 void ld1sw(const ZRegister& zt,
4691 const PRegisterZ& pg,
4692 const SVEMemOperand& addr);
4693
4694 // TODO: Merge other loads into the SVEMemOperand versions.
4695
4696 // Contiguous load two-byte structures to two vectors.
4697 void ld2b(const ZRegister& zt1,
4698 const ZRegister& zt2,
4699 const PRegisterZ& pg,
4700 const SVEMemOperand& addr);
4701
4702 // Contiguous load two-halfword structures to two vectors.
4703 void ld2h(const ZRegister& zt1,
4704 const ZRegister& zt2,
4705 const PRegisterZ& pg,
4706 const SVEMemOperand& addr);
4707
4708 // Contiguous load two-word structures to two vectors.
4709 void ld2w(const ZRegister& zt1,
4710 const ZRegister& zt2,
4711 const PRegisterZ& pg,
4712 const SVEMemOperand& addr);
4713
4714 // Contiguous load two-doubleword structures to two vectors.
4715 void ld2d(const ZRegister& zt1,
4716 const ZRegister& zt2,
4717 const PRegisterZ& pg,
4718 const SVEMemOperand& addr);
4719
4720 // Contiguous load three-byte structures to three vectors.
4721 void ld3b(const ZRegister& zt1,
4722 const ZRegister& zt2,
4723 const ZRegister& zt3,
4724 const PRegisterZ& pg,
4725 const SVEMemOperand& addr);
4726
4727 // Contiguous load three-halfword structures to three vectors.
4728 void ld3h(const ZRegister& zt1,
4729 const ZRegister& zt2,
4730 const ZRegister& zt3,
4731 const PRegisterZ& pg,
4732 const SVEMemOperand& addr);
4733
4734 // Contiguous load three-word structures to three vectors.
4735 void ld3w(const ZRegister& zt1,
4736 const ZRegister& zt2,
4737 const ZRegister& zt3,
4738 const PRegisterZ& pg,
4739 const SVEMemOperand& addr);
4740
4741 // Contiguous load three-doubleword structures to three vectors.
4742 void ld3d(const ZRegister& zt1,
4743 const ZRegister& zt2,
4744 const ZRegister& zt3,
4745 const PRegisterZ& pg,
4746 const SVEMemOperand& addr);
4747
4748 // Contiguous load four-byte structures to four vectors.
4749 void ld4b(const ZRegister& zt1,
4750 const ZRegister& zt2,
4751 const ZRegister& zt3,
4752 const ZRegister& zt4,
4753 const PRegisterZ& pg,
4754 const SVEMemOperand& addr);
4755
4756 // Contiguous load four-halfword structures to four vectors.
4757 void ld4h(const ZRegister& zt1,
4758 const ZRegister& zt2,
4759 const ZRegister& zt3,
4760 const ZRegister& zt4,
4761 const PRegisterZ& pg,
4762 const SVEMemOperand& addr);
4763
4764 // Contiguous load four-word structures to four vectors.
4765 void ld4w(const ZRegister& zt1,
4766 const ZRegister& zt2,
4767 const ZRegister& zt3,
4768 const ZRegister& zt4,
4769 const PRegisterZ& pg,
4770 const SVEMemOperand& addr);
4771
4772 // Contiguous load four-doubleword structures to four vectors.
4773 void ld4d(const ZRegister& zt1,
4774 const ZRegister& zt2,
4775 const ZRegister& zt3,
4776 const ZRegister& zt4,
4777 const PRegisterZ& pg,
4778 const SVEMemOperand& addr);
4779
4780 // Contiguous load first-fault unsigned bytes to vector.
4781 void ldff1b(const ZRegister& zt,
4782 const PRegisterZ& pg,
4783 const SVEMemOperand& addr);
4784
4785 // Contiguous load first-fault unsigned halfwords to vector.
4786 void ldff1h(const ZRegister& zt,
4787 const PRegisterZ& pg,
4788 const SVEMemOperand& addr);
4789
4790 // Contiguous load first-fault unsigned words to vector.
4791 void ldff1w(const ZRegister& zt,
4792 const PRegisterZ& pg,
4793 const SVEMemOperand& addr);
4794
4795 // Contiguous load first-fault doublewords to vector.
4796 void ldff1d(const ZRegister& zt,
4797 const PRegisterZ& pg,
4798 const SVEMemOperand& addr);
4799
4800 // Contiguous load first-fault signed bytes to vector.
4801 void ldff1sb(const ZRegister& zt,
4802 const PRegisterZ& pg,
4803 const SVEMemOperand& addr);
4804
4805 // Contiguous load first-fault signed halfwords to vector.
4806 void ldff1sh(const ZRegister& zt,
4807 const PRegisterZ& pg,
4808 const SVEMemOperand& addr);
4809
4810 // Contiguous load first-fault signed words to vector.
4811 void ldff1sw(const ZRegister& zt,
4812 const PRegisterZ& pg,
4813 const SVEMemOperand& addr);
4814
4815 // Gather load first-fault unsigned bytes to vector.
4816 void ldff1b(const ZRegister& zt,
4817 const PRegisterZ& pg,
4818 const Register& xn,
4819 const ZRegister& zm);
4820
4821 // Gather load first-fault unsigned bytes to vector (immediate index).
4822 void ldff1b(const ZRegister& zt,
4823 const PRegisterZ& pg,
4824 const ZRegister& zn,
4825 int imm5);
4826
4827 // Gather load first-fault doublewords to vector (vector index).
4828 void ldff1d(const ZRegister& zt,
4829 const PRegisterZ& pg,
4830 const Register& xn,
4831 const ZRegister& zm);
4832
4833 // Gather load first-fault doublewords to vector (immediate index).
4834 void ldff1d(const ZRegister& zt,
4835 const PRegisterZ& pg,
4836 const ZRegister& zn,
4837 int imm5);
4838
4839 // Gather load first-fault unsigned halfwords to vector (vector index).
4840 void ldff1h(const ZRegister& zt,
4841 const PRegisterZ& pg,
4842 const Register& xn,
4843 const ZRegister& zm);
4844
4845 // Gather load first-fault unsigned halfwords to vector (immediate index).
4846 void ldff1h(const ZRegister& zt,
4847 const PRegisterZ& pg,
4848 const ZRegister& zn,
4849 int imm5);
4850
4851 // Gather load first-fault signed bytes to vector (vector index).
4852 void ldff1sb(const ZRegister& zt,
4853 const PRegisterZ& pg,
4854 const Register& xn,
4855 const ZRegister& zm);
4856
4857 // Gather load first-fault signed bytes to vector (immediate index).
4858 void ldff1sb(const ZRegister& zt,
4859 const PRegisterZ& pg,
4860 const ZRegister& zn,
4861 int imm5);
4862
4863 // Gather load first-fault signed halfwords to vector (vector index).
4864 void ldff1sh(const ZRegister& zt,
4865 const PRegisterZ& pg,
4866 const Register& xn,
4867 const ZRegister& zm);
4868
4869 // Gather load first-fault signed halfwords to vector (immediate index).
4870 void ldff1sh(const ZRegister& zt,
4871 const PRegisterZ& pg,
4872 const ZRegister& zn,
4873 int imm5);
4874
4875 // Gather load first-fault signed words to vector (vector index).
4876 void ldff1sw(const ZRegister& zt,
4877 const PRegisterZ& pg,
4878 const Register& xn,
4879 const ZRegister& zm);
4880
4881 // Gather load first-fault signed words to vector (immediate index).
4882 void ldff1sw(const ZRegister& zt,
4883 const PRegisterZ& pg,
4884 const ZRegister& zn,
4885 int imm5);
4886
4887 // Gather load first-fault unsigned words to vector (vector index).
4888 void ldff1w(const ZRegister& zt,
4889 const PRegisterZ& pg,
4890 const Register& xn,
4891 const ZRegister& zm);
4892
4893 // Gather load first-fault unsigned words to vector (immediate index).
4894 void ldff1w(const ZRegister& zt,
4895 const PRegisterZ& pg,
4896 const ZRegister& zn,
4897 int imm5);
4898
4899 // Contiguous load non-fault unsigned bytes to vector (immediate index).
4900 void ldnf1b(const ZRegister& zt,
4901 const PRegisterZ& pg,
4902 const SVEMemOperand& addr);
4903
4904 // Contiguous load non-fault doublewords to vector (immediate index).
4905 void ldnf1d(const ZRegister& zt,
4906 const PRegisterZ& pg,
4907 const SVEMemOperand& addr);
4908
4909 // Contiguous load non-fault unsigned halfwords to vector (immediate
4910 // index).
4911 void ldnf1h(const ZRegister& zt,
4912 const PRegisterZ& pg,
4913 const SVEMemOperand& addr);
4914
4915 // Contiguous load non-fault signed bytes to vector (immediate index).
4916 void ldnf1sb(const ZRegister& zt,
4917 const PRegisterZ& pg,
4918 const SVEMemOperand& addr);
4919
4920 // Contiguous load non-fault signed halfwords to vector (immediate index).
4921 void ldnf1sh(const ZRegister& zt,
4922 const PRegisterZ& pg,
4923 const SVEMemOperand& addr);
4924
4925 // Contiguous load non-fault signed words to vector (immediate index).
4926 void ldnf1sw(const ZRegister& zt,
4927 const PRegisterZ& pg,
4928 const SVEMemOperand& addr);
4929
4930 // Contiguous load non-fault unsigned words to vector (immediate index).
4931 void ldnf1w(const ZRegister& zt,
4932 const PRegisterZ& pg,
4933 const SVEMemOperand& addr);
4934
4935 // Contiguous load non-temporal bytes to vector.
4936 void ldnt1b(const ZRegister& zt,
4937 const PRegisterZ& pg,
4938 const SVEMemOperand& addr);
4939
4940 // Contiguous load non-temporal halfwords to vector.
4941 void ldnt1h(const ZRegister& zt,
4942 const PRegisterZ& pg,
4943 const SVEMemOperand& addr);
4944
4945 // Contiguous load non-temporal words to vector.
4946 void ldnt1w(const ZRegister& zt,
4947 const PRegisterZ& pg,
4948 const SVEMemOperand& addr);
4949
4950 // Contiguous load non-temporal doublewords to vector.
4951 void ldnt1d(const ZRegister& zt,
4952 const PRegisterZ& pg,
4953 const SVEMemOperand& addr);
4954
4955 // Load SVE predicate/vector register.
4956 void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4957
4958 // Logical shift left by immediate (predicated).
4959 void lsl(const ZRegister& zd,
4960 const PRegisterM& pg,
4961 const ZRegister& zn,
4962 int shift);
4963
4964 // Logical shift left by 64-bit wide elements (predicated).
4965 void lsl(const ZRegister& zd,
4966 const PRegisterM& pg,
4967 const ZRegister& zn,
4968 const ZRegister& zm);
4969
4970 // Logical shift left by immediate (unpredicated).
4971 void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4972
4973 // Logical shift left by 64-bit wide elements (unpredicated).
4974 void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4975
4976 // Reversed logical shift left by vector (predicated).
4977 void lslr(const ZRegister& zd,
4978 const PRegisterM& pg,
4979 const ZRegister& zn,
4980 const ZRegister& zm);
4981
4982 // Logical shift right by immediate (predicated).
4983 void lsr(const ZRegister& zd,
4984 const PRegisterM& pg,
4985 const ZRegister& zn,
4986 int shift);
4987
4988 // Logical shift right by 64-bit wide elements (predicated).
4989 void lsr(const ZRegister& zd,
4990 const PRegisterM& pg,
4991 const ZRegister& zn,
4992 const ZRegister& zm);
4993
4994 // Logical shift right by immediate (unpredicated).
4995 void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4996
4997 // Logical shift right by 64-bit wide elements (unpredicated).
4998 void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4999
5000 // Reversed logical shift right by vector (predicated).
5001 void lsrr(const ZRegister& zd,
5002 const PRegisterM& pg,
5003 const ZRegister& zn,
5004 const ZRegister& zm);
5005
5006 // Bitwise invert predicate.
5007 void not_(const PRegisterWithLaneSize& pd,
5008 const PRegisterZ& pg,
5009 const PRegisterWithLaneSize& pn);
5010
5011 // Bitwise invert predicate, setting the condition flags.
5012 void nots(const PRegisterWithLaneSize& pd,
5013 const PRegisterZ& pg,
5014 const PRegisterWithLaneSize& pn);
5015
5016 // Multiply-add vectors (predicated), writing multiplicand
5017 // [Zdn = Za + Zdn * Zm].
5018 void mad(const ZRegister& zdn,
5019 const PRegisterM& pg,
5020 const ZRegister& zm,
5021 const ZRegister& za);
5022
5023 // Multiply-add vectors (predicated), writing addend
5024 // [Zda = Zda + Zn * Zm].
5025 void mla(const ZRegister& zda,
5026 const PRegisterM& pg,
5027 const ZRegister& zn,
5028 const ZRegister& zm);
5029
5030 // Multiply-subtract vectors (predicated), writing addend
5031 // [Zda = Zda - Zn * Zm].
5032 void mls(const ZRegister& zda,
5033 const PRegisterM& pg,
5034 const ZRegister& zn,
5035 const ZRegister& zm);
5036
5037 // Move predicates (unpredicated)
5038 void mov(const PRegister& pd, const PRegister& pn);
5039
5040 // Move predicates (merging)
5041 void mov(const PRegisterWithLaneSize& pd,
5042 const PRegisterM& pg,
5043 const PRegisterWithLaneSize& pn);
5044
5045 // Move predicates (zeroing)
5046 void mov(const PRegisterWithLaneSize& pd,
5047 const PRegisterZ& pg,
5048 const PRegisterWithLaneSize& pn);
5049
5050 // Move general-purpose register to vector elements (unpredicated)
5051 void mov(const ZRegister& zd, const Register& xn);
5052
5053 // Move SIMD&FP scalar register to vector elements (unpredicated)
5054 void mov(const ZRegister& zd, const VRegister& vn);
5055
5056 // Move vector register (unpredicated)
5057 void mov(const ZRegister& zd, const ZRegister& zn);
5058
5059 // Move indexed element to vector elements (unpredicated)
5060 void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5061
5062 // Move general-purpose register to vector elements (predicated)
5063 void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5064
5065 // Move SIMD&FP scalar register to vector elements (predicated)
5066 void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5067
5068 // Move vector elements (predicated)
5069 void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5070
5071 // Move signed integer immediate to vector elements (predicated)
5072 void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5073
5074 // Move signed immediate to vector elements (unpredicated).
5075 void mov(const ZRegister& zd, int imm8, int shift);
5076
5077 // Move logical bitmask immediate to vector (unpredicated).
5078 void mov(const ZRegister& zd, uint64_t imm);
5079
5080 // Move predicate (unpredicated), setting the condition flags
5081 void movs(const PRegister& pd, const PRegister& pn);
5082
5083 // Move predicates (zeroing), setting the condition flags
5084 void movs(const PRegisterWithLaneSize& pd,
5085 const PRegisterZ& pg,
5086 const PRegisterWithLaneSize& pn);
5087
5088 // Move prefix (predicated).
5089 void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5090
5091 // Move prefix (unpredicated).
5092 void movprfx(const ZRegister& zd, const ZRegister& zn);
5093
5094 // Multiply-subtract vectors (predicated), writing multiplicand
5095 // [Zdn = Za - Zdn * Zm].
5096 void msb(const ZRegister& zdn,
5097 const PRegisterM& pg,
5098 const ZRegister& zm,
5099 const ZRegister& za);
5100
5101 // Multiply vectors (predicated).
5102 void mul(const ZRegister& zd,
5103 const PRegisterM& pg,
5104 const ZRegister& zn,
5105 const ZRegister& zm);
5106
5107 // Multiply by immediate (unpredicated).
5108 void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5109
5110 // Bitwise NAND predicates.
5111 void nand(const PRegisterWithLaneSize& pd,
5112 const PRegisterZ& pg,
5113 const PRegisterWithLaneSize& pn,
5114 const PRegisterWithLaneSize& pm);
5115
5116 // Bitwise NAND predicates.
5117 void nands(const PRegisterWithLaneSize& pd,
5118 const PRegisterZ& pg,
5119 const PRegisterWithLaneSize& pn,
5120 const PRegisterWithLaneSize& pm);
5121
5122 // Negate (predicated).
5123 void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5124
5125 // Bitwise NOR predicates.
5126 void nor(const PRegisterWithLaneSize& pd,
5127 const PRegisterZ& pg,
5128 const PRegisterWithLaneSize& pn,
5129 const PRegisterWithLaneSize& pm);
5130
5131 // Bitwise NOR predicates.
5132 void nors(const PRegisterWithLaneSize& pd,
5133 const PRegisterZ& pg,
5134 const PRegisterWithLaneSize& pn,
5135 const PRegisterWithLaneSize& pm);
5136
5137 // Bitwise invert vector (predicated).
5138 void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5139
5140 // Bitwise OR inverted predicate.
5141 void orn(const PRegisterWithLaneSize& pd,
5142 const PRegisterZ& pg,
5143 const PRegisterWithLaneSize& pn,
5144 const PRegisterWithLaneSize& pm);
5145
5146 // Bitwise OR inverted predicate.
5147 void orns(const PRegisterWithLaneSize& pd,
5148 const PRegisterZ& pg,
5149 const PRegisterWithLaneSize& pn,
5150 const PRegisterWithLaneSize& pm);
5151
5152 // Bitwise OR with inverted immediate (unpredicated).
5153 void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5154
5155 // Bitwise OR predicate.
5156 void orr(const PRegisterWithLaneSize& pd,
5157 const PRegisterZ& pg,
5158 const PRegisterWithLaneSize& pn,
5159 const PRegisterWithLaneSize& pm);
5160
5161 // Bitwise OR vectors (predicated).
5162 void orr(const ZRegister& zd,
5163 const PRegisterM& pg,
5164 const ZRegister& zn,
5165 const ZRegister& zm);
5166
5167 // Bitwise OR with immediate (unpredicated).
5168 void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5169
5170 // Bitwise OR vectors (unpredicated).
5171 void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5172
5173 // Bitwise OR predicate.
5174 void orrs(const PRegisterWithLaneSize& pd,
5175 const PRegisterZ& pg,
5176 const PRegisterWithLaneSize& pn,
5177 const PRegisterWithLaneSize& pm);
5178
5179 // Bitwise OR reduction to scalar.
5180 void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5181
5182 // Set all predicate elements to false.
5183 void pfalse(const PRegisterWithLaneSize& pd);
5184
5185 // Set the first active predicate element to true.
5186 void pfirst(const PRegisterWithLaneSize& pd,
5187 const PRegister& pg,
5188 const PRegisterWithLaneSize& pn);
5189
5190 // Find next active predicate.
5191 void pnext(const PRegisterWithLaneSize& pd,
5192 const PRegister& pg,
5193 const PRegisterWithLaneSize& pn);
5194
5195 // Prefetch bytes.
5196 void prfb(PrefetchOperation prfop,
5197 const PRegister& pg,
5198 const SVEMemOperand& addr);
5199
5200 // Prefetch halfwords.
5201 void prfh(PrefetchOperation prfop,
5202 const PRegister& pg,
5203 const SVEMemOperand& addr);
5204
5205 // Prefetch words.
5206 void prfw(PrefetchOperation prfop,
5207 const PRegister& pg,
5208 const SVEMemOperand& addr);
5209
5210 // Prefetch doublewords.
5211 void prfd(PrefetchOperation prfop,
5212 const PRegister& pg,
5213 const SVEMemOperand& addr);
5214
5215 // Set condition flags for predicate.
5216 void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5217
5218 // Initialise predicate from named constraint.
5219 void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5220
5221 // Initialise predicate from named constraint.
5222 void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5223
5224 // Unpack and widen half of predicate.
5225 void punpkhi(const PRegisterWithLaneSize& pd,
5226 const PRegisterWithLaneSize& pn);
5227
5228 // Unpack and widen half of predicate.
5229 void punpklo(const PRegisterWithLaneSize& pd,
5230 const PRegisterWithLaneSize& pn);
5231
5232 // Reverse bits (predicated).
5233 void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5234
5235 // Read the first-fault register.
5236 void rdffr(const PRegisterWithLaneSize& pd);
5237
5238 // Return predicate of succesfully loaded elements.
5239 void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5240
5241 // Return predicate of succesfully loaded elements.
5242 void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5243
5244 // Read multiple of vector register size to scalar register.
5245 void rdvl(const Register& xd, int imm6);
5246
5247 // Reverse all elements in a predicate.
5248 void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5249
5250 // Reverse all elements in a vector (unpredicated).
5251 void rev(const ZRegister& zd, const ZRegister& zn);
5252
5253 // Reverse bytes / halfwords / words within elements (predicated).
5254 void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5255
5256 // Reverse bytes / halfwords / words within elements (predicated).
5257 void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5258
5259 // Reverse bytes / halfwords / words within elements (predicated).
5260 void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5261
5262 // Signed absolute difference (predicated).
5263 void sabd(const ZRegister& zd,
5264 const PRegisterM& pg,
5265 const ZRegister& zn,
5266 const ZRegister& zm);
5267
5268 // Signed add reduction to scalar.
5269 void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5270
5271 // Signed integer convert to floating-point (predicated).
5272 void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5273
5274 // Signed divide (predicated).
5275 void sdiv(const ZRegister& zd,
5276 const PRegisterM& pg,
5277 const ZRegister& zn,
5278 const ZRegister& zm);
5279
5280 // Signed reversed divide (predicated).
5281 void sdivr(const ZRegister& zd,
5282 const PRegisterM& pg,
5283 const ZRegister& zn,
5284 const ZRegister& zm);
5285
5286 // Signed dot product by indexed quadtuplet.
5287 void sdot(const ZRegister& zda,
5288 const ZRegister& zn,
5289 const ZRegister& zm,
5290 int index);
5291
5292 // Signed dot product.
5293 void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5294
5295 // Conditionally select elements from two predicates.
5296 void sel(const PRegisterWithLaneSize& pd,
5297 const PRegister& pg,
5298 const PRegisterWithLaneSize& pn,
5299 const PRegisterWithLaneSize& pm);
5300
5301 // Conditionally select elements from two vectors.
5302 void sel(const ZRegister& zd,
5303 const PRegister& pg,
5304 const ZRegister& zn,
5305 const ZRegister& zm);
5306
5307 // Initialise the first-fault register to all true.
5308 void setffr();
5309
5310 // Signed maximum vectors (predicated).
5311 void smax(const ZRegister& zd,
5312 const PRegisterM& pg,
5313 const ZRegister& zn,
5314 const ZRegister& zm);
5315
5316 // Signed maximum with immediate (unpredicated).
5317 void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5318
5319 // Signed maximum reduction to scalar.
5320 void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5321
5322 // Signed minimum vectors (predicated).
5323 void smin(const ZRegister& zd,
5324 const PRegisterM& pg,
5325 const ZRegister& zn,
5326 const ZRegister& zm);
5327
5328 // Signed minimum with immediate (unpredicated).
5329 void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5330
5331 // Signed minimum reduction to scalar.
5332 void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5333
5334 // Signed multiply returning high half (predicated).
5335 void smulh(const ZRegister& zd,
5336 const PRegisterM& pg,
5337 const ZRegister& zn,
5338 const ZRegister& zm);
5339
5340 // Splice two vectors under predicate control.
5341 void splice(const ZRegister& zd,
5342 const PRegister& pg,
5343 const ZRegister& zn,
5344 const ZRegister& zm);
5345
5346 // Splice two vectors under predicate control (constructive).
5347 void splice_con(const ZRegister& zd,
5348 const PRegister& pg,
5349 const ZRegister& zn,
5350 const ZRegister& zm);
5351
5352 // Signed saturating add vectors (unpredicated).
5353 void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5354
5355 // Signed saturating add immediate (unpredicated).
5356 void sqadd(const ZRegister& zd,
5357 const ZRegister& zn,
5358 int imm8,
5359 int shift = -1);
5360
5361 // Signed saturating decrement scalar by multiple of 8-bit predicate
5362 // constraint element count.
5363 void sqdecb(const Register& xd,
5364 const Register& wn,
5365 int pattern,
5366 int multiplier);
5367
5368 // Signed saturating decrement scalar by multiple of 8-bit predicate
5369 // constraint element count.
5370 void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5371
5372 // Signed saturating decrement scalar by multiple of 64-bit predicate
5373 // constraint element count.
5374 void sqdecd(const Register& xd,
5375 const Register& wn,
5376 int pattern = SVE_ALL,
5377 int multiplier = 1);
5378
5379 // Signed saturating decrement scalar by multiple of 64-bit predicate
5380 // constraint element count.
5381 void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5382
5383 // Signed saturating decrement vector by multiple of 64-bit predicate
5384 // constraint element count.
5385 void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5386
5387 // Signed saturating decrement scalar by multiple of 16-bit predicate
5388 // constraint element count.
5389 void sqdech(const Register& xd,
5390 const Register& wn,
5391 int pattern = SVE_ALL,
5392 int multiplier = 1);
5393
5394 // Signed saturating decrement scalar by multiple of 16-bit predicate
5395 // constraint element count.
5396 void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5397
5398 // Signed saturating decrement vector by multiple of 16-bit predicate
5399 // constraint element count.
5400 void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5401
5402 // Signed saturating decrement scalar by active predicate element count.
5403 void sqdecp(const Register& xd,
5404 const PRegisterWithLaneSize& pg,
5405 const Register& wn);
5406
5407 // Signed saturating decrement scalar by active predicate element count.
5408 void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5409
5410 // Signed saturating decrement vector by active predicate element count.
5411 void sqdecp(const ZRegister& zdn, const PRegister& pg);
5412
5413 // Signed saturating decrement scalar by multiple of 32-bit predicate
5414 // constraint element count.
5415 void sqdecw(const Register& xd,
5416 const Register& wn,
5417 int pattern = SVE_ALL,
5418 int multiplier = 1);
5419
5420 // Signed saturating decrement scalar by multiple of 32-bit predicate
5421 // constraint element count.
5422 void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5423
5424 // Signed saturating decrement vector by multiple of 32-bit predicate
5425 // constraint element count.
5426 void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5427
5428 // Signed saturating increment scalar by multiple of 8-bit predicate
5429 // constraint element count.
5430 void sqincb(const Register& xd,
5431 const Register& wn,
5432 int pattern = SVE_ALL,
5433 int multiplier = 1);
5434
5435 // Signed saturating increment scalar by multiple of 8-bit predicate
5436 // constraint element count.
5437 void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5438
5439 // Signed saturating increment scalar by multiple of 64-bit predicate
5440 // constraint element count.
5441 void sqincd(const Register& xd,
5442 const Register& wn,
5443 int pattern,
5444 int multiplier);
5445
5446 // Signed saturating increment scalar by multiple of 64-bit predicate
5447 // constraint element count.
5448 void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5449
5450 // Signed saturating increment vector by multiple of 64-bit predicate
5451 // constraint element count.
5452 void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5453
5454 // Signed saturating increment scalar by multiple of 16-bit predicate
5455 // constraint element count.
5456 void sqinch(const Register& xd,
5457 const Register& wn,
5458 int pattern = SVE_ALL,
5459 int multiplier = 1);
5460
5461 // Signed saturating increment scalar by multiple of 16-bit predicate
5462 // constraint element count.
5463 void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5464
5465 // Signed saturating increment vector by multiple of 16-bit predicate
5466 // constraint element count.
5467 void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5468
5469 // Signed saturating increment scalar by active predicate element count.
5470 void sqincp(const Register& xd,
5471 const PRegisterWithLaneSize& pg,
5472 const Register& wn);
5473
5474 // Signed saturating increment scalar by active predicate element count.
5475 void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5476
5477 // Signed saturating increment vector by active predicate element count.
5478 void sqincp(const ZRegister& zdn, const PRegister& pg);
5479
5480 // Signed saturating increment scalar by multiple of 32-bit predicate
5481 // constraint element count.
5482 void sqincw(const Register& xd,
5483 const Register& wn,
5484 int pattern = SVE_ALL,
5485 int multiplier = 1);
5486
5487 // Signed saturating increment scalar by multiple of 32-bit predicate
5488 // constraint element count.
5489 void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5490
5491 // Signed saturating increment vector by multiple of 32-bit predicate
5492 // constraint element count.
5493 void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5494
5495 // Signed saturating subtract vectors (unpredicated).
5496 void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5497
5498 // Signed saturating subtract immediate (unpredicated).
5499 void sqsub(const ZRegister& zd,
5500 const ZRegister& zn,
5501 int imm8,
5502 int shift = -1);
5503
5504 // Contiguous/scatter store bytes from vector.
5505 void st1b(const ZRegister& zt,
5506 const PRegister& pg,
5507 const SVEMemOperand& addr);
5508
5509 // Contiguous/scatter store halfwords from vector.
5510 void st1h(const ZRegister& zt,
5511 const PRegister& pg,
5512 const SVEMemOperand& addr);
5513
5514 // Contiguous/scatter store words from vector.
5515 void st1w(const ZRegister& zt,
5516 const PRegister& pg,
5517 const SVEMemOperand& addr);
5518
5519 // Contiguous/scatter store doublewords from vector.
5520 void st1d(const ZRegister& zt,
5521 const PRegister& pg,
5522 const SVEMemOperand& addr);
5523
5524 // Contiguous store two-byte structures from two vectors.
5525 void st2b(const ZRegister& zt1,
5526 const ZRegister& zt2,
5527 const PRegister& pg,
5528 const SVEMemOperand& addr);
5529
5530 // Contiguous store two-halfword structures from two vectors.
5531 void st2h(const ZRegister& zt1,
5532 const ZRegister& zt2,
5533 const PRegister& pg,
5534 const SVEMemOperand& addr);
5535
5536 // Contiguous store two-word structures from two vectors.
5537 void st2w(const ZRegister& zt1,
5538 const ZRegister& zt2,
5539 const PRegister& pg,
5540 const SVEMemOperand& addr);
5541
5542 // Contiguous store two-doubleword structures from two vectors,
5543 void st2d(const ZRegister& zt1,
5544 const ZRegister& zt2,
5545 const PRegister& pg,
5546 const SVEMemOperand& addr);
5547
5548 // Contiguous store three-byte structures from three vectors.
5549 void st3b(const ZRegister& zt1,
5550 const ZRegister& zt2,
5551 const ZRegister& zt3,
5552 const PRegister& pg,
5553 const SVEMemOperand& addr);
5554
5555 // Contiguous store three-halfword structures from three vectors.
5556 void st3h(const ZRegister& zt1,
5557 const ZRegister& zt2,
5558 const ZRegister& zt3,
5559 const PRegister& pg,
5560 const SVEMemOperand& addr);
5561
5562 // Contiguous store three-word structures from three vectors.
5563 void st3w(const ZRegister& zt1,
5564 const ZRegister& zt2,
5565 const ZRegister& zt3,
5566 const PRegister& pg,
5567 const SVEMemOperand& addr);
5568
5569 // Contiguous store three-doubleword structures from three vectors.
5570 void st3d(const ZRegister& zt1,
5571 const ZRegister& zt2,
5572 const ZRegister& zt3,
5573 const PRegister& pg,
5574 const SVEMemOperand& addr);
5575
5576 // Contiguous store four-byte structures from four vectors.
5577 void st4b(const ZRegister& zt1,
5578 const ZRegister& zt2,
5579 const ZRegister& zt3,
5580 const ZRegister& zt4,
5581 const PRegister& pg,
5582 const SVEMemOperand& addr);
5583
5584 // Contiguous store four-halfword structures from four vectors.
5585 void st4h(const ZRegister& zt1,
5586 const ZRegister& zt2,
5587 const ZRegister& zt3,
5588 const ZRegister& zt4,
5589 const PRegister& pg,
5590 const SVEMemOperand& addr);
5591
5592 // Contiguous store four-word structures from four vectors.
5593 void st4w(const ZRegister& zt1,
5594 const ZRegister& zt2,
5595 const ZRegister& zt3,
5596 const ZRegister& zt4,
5597 const PRegister& pg,
5598 const SVEMemOperand& addr);
5599
5600 // Contiguous store four-doubleword structures from four vectors.
5601 void st4d(const ZRegister& zt1,
5602 const ZRegister& zt2,
5603 const ZRegister& zt3,
5604 const ZRegister& zt4,
5605 const PRegister& pg,
5606 const SVEMemOperand& addr);
5607
5608 // Contiguous store non-temporal bytes from vector.
5609 void stnt1b(const ZRegister& zt,
5610 const PRegister& pg,
5611 const SVEMemOperand& addr);
5612
5613 // Contiguous store non-temporal halfwords from vector.
5614 void stnt1h(const ZRegister& zt,
5615 const PRegister& pg,
5616 const SVEMemOperand& addr);
5617
5618 // Contiguous store non-temporal words from vector.
5619 void stnt1w(const ZRegister& zt,
5620 const PRegister& pg,
5621 const SVEMemOperand& addr);
5622
5623 // Contiguous store non-temporal doublewords from vector.
5624 void stnt1d(const ZRegister& zt,
5625 const PRegister& pg,
5626 const SVEMemOperand& addr);
5627
5628 // Store SVE predicate/vector register.
5629 void str(const CPURegister& rt, const SVEMemOperand& addr);
5630
5631 // Subtract vectors (predicated).
5632 void sub(const ZRegister& zd,
5633 const PRegisterM& pg,
5634 const ZRegister& zn,
5635 const ZRegister& zm);
5636
5637 // Subtract vectors (unpredicated).
5638 void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5639
5640 // Subtract immediate (unpredicated).
5641 void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5642
5643 // Reversed subtract vectors (predicated).
5644 void subr(const ZRegister& zd,
5645 const PRegisterM& pg,
5646 const ZRegister& zn,
5647 const ZRegister& zm);
5648
5649 // Reversed subtract from immediate (unpredicated).
5650 void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5651
5652 // Signed unpack and extend half of vector.
5653 void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5654
5655 // Signed unpack and extend half of vector.
5656 void sunpklo(const ZRegister& zd, const ZRegister& zn);
5657
5658 // Signed byte extend (predicated).
5659 void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5660
5661 // Signed halfword extend (predicated).
5662 void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5663
5664 // Signed word extend (predicated).
5665 void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5666
5667 // Programmable table lookup/permute using vector of indices into a
5668 // vector.
5669 void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5670
5671 // Interleave even or odd elements from two predicates.
5672 void trn1(const PRegisterWithLaneSize& pd,
5673 const PRegisterWithLaneSize& pn,
5674 const PRegisterWithLaneSize& pm);
5675
5676 // Interleave even or odd elements from two vectors.
5677 void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5678
5679 // Interleave even or odd elements from two predicates.
5680 void trn2(const PRegisterWithLaneSize& pd,
5681 const PRegisterWithLaneSize& pn,
5682 const PRegisterWithLaneSize& pm);
5683
5684 // Interleave even or odd elements from two vectors.
5685 void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5686
5687 // Unsigned absolute difference (predicated).
5688 void uabd(const ZRegister& zd,
5689 const PRegisterM& pg,
5690 const ZRegister& zn,
5691 const ZRegister& zm);
5692
5693 // Unsigned add reduction to scalar.
5694 void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5695
5696 // Unsigned integer convert to floating-point (predicated).
5697 void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5698
5699 // Unsigned divide (predicated).
5700 void udiv(const ZRegister& zd,
5701 const PRegisterM& pg,
5702 const ZRegister& zn,
5703 const ZRegister& zm);
5704
5705 // Unsigned reversed divide (predicated).
5706 void udivr(const ZRegister& zd,
5707 const PRegisterM& pg,
5708 const ZRegister& zn,
5709 const ZRegister& zm);
5710
5711 // Unsigned dot product by indexed quadtuplet.
5712 void udot(const ZRegister& zda,
5713 const ZRegister& zn,
5714 const ZRegister& zm,
5715 int index);
5716
5717 // Unsigned dot product.
5718 void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5719
5720 // Unsigned maximum vectors (predicated).
5721 void umax(const ZRegister& zd,
5722 const PRegisterM& pg,
5723 const ZRegister& zn,
5724 const ZRegister& zm);
5725
5726 // Unsigned maximum with immediate (unpredicated).
5727 void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5728
5729 // Unsigned maximum reduction to scalar.
5730 void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5731
5732 // Unsigned minimum vectors (predicated).
5733 void umin(const ZRegister& zd,
5734 const PRegisterM& pg,
5735 const ZRegister& zn,
5736 const ZRegister& zm);
5737
5738 // Unsigned minimum with immediate (unpredicated).
5739 void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5740
5741 // Unsigned minimum reduction to scalar.
5742 void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5743
5744 // Unsigned multiply returning high half (predicated).
5745 void umulh(const ZRegister& zd,
5746 const PRegisterM& pg,
5747 const ZRegister& zn,
5748 const ZRegister& zm);
5749
5750 // Unsigned saturating add vectors (unpredicated).
5751 void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5752
5753 // Unsigned saturating add immediate (unpredicated).
5754 void uqadd(const ZRegister& zd,
5755 const ZRegister& zn,
5756 int imm8,
5757 int shift = -1);
5758
5759 // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5760 // constraint element count.
5761 void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5762
5763 // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5764 // constraint element count.
5765 void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5766
5767 // Unsigned saturating decrement vector by multiple of 64-bit predicate
5768 // constraint element count.
5769 void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5770
5771 // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5772 // constraint element count.
5773 void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5774
5775 // Unsigned saturating decrement vector by multiple of 16-bit predicate
5776 // constraint element count.
5777 void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5778
5779 // Unsigned saturating decrement scalar by active predicate element count.
5780 void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5781
5782 // Unsigned saturating decrement vector by active predicate element count.
5783 void uqdecp(const ZRegister& zdn, const PRegister& pg);
5784
5785 // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5786 // constraint element count.
5787 void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5788
5789 // Unsigned saturating decrement vector by multiple of 32-bit predicate
5790 // constraint element count.
5791 void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5792
5793 // Unsigned saturating increment scalar by multiple of 8-bit predicate
5794 // constraint element count.
5795 void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5796
5797 // Unsigned saturating increment scalar by multiple of 64-bit predicate
5798 // constraint element count.
5799 void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5800
5801 // Unsigned saturating increment vector by multiple of 64-bit predicate
5802 // constraint element count.
5803 void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5804
5805 // Unsigned saturating increment scalar by multiple of 16-bit predicate
5806 // constraint element count.
5807 void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5808
5809 // Unsigned saturating increment vector by multiple of 16-bit predicate
5810 // constraint element count.
5811 void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5812
5813 // Unsigned saturating increment scalar by active predicate element count.
5814 void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5815
5816 // Unsigned saturating increment vector by active predicate element count.
5817 void uqincp(const ZRegister& zdn, const PRegister& pg);
5818
5819 // Unsigned saturating increment scalar by multiple of 32-bit predicate
5820 // constraint element count.
5821 void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5822
5823 // Unsigned saturating increment vector by multiple of 32-bit predicate
5824 // constraint element count.
5825 void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5826
5827 // Unsigned saturating subtract vectors (unpredicated).
5828 void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5829
5830 // Unsigned saturating subtract immediate (unpredicated).
5831 void uqsub(const ZRegister& zd,
5832 const ZRegister& zn,
5833 int imm8,
5834 int shift = -1);
5835
5836 // Unsigned unpack and extend half of vector.
5837 void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5838
5839 // Unsigned unpack and extend half of vector.
5840 void uunpklo(const ZRegister& zd, const ZRegister& zn);
5841
5842 // Unsigned byte extend (predicated).
5843 void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5844
5845 // Unsigned halfword extend (predicated).
5846 void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5847
5848 // Unsigned word extend (predicated).
5849 void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5850
5851 // Concatenate even or odd elements from two predicates.
5852 void uzp1(const PRegisterWithLaneSize& pd,
5853 const PRegisterWithLaneSize& pn,
5854 const PRegisterWithLaneSize& pm);
5855
5856 // Concatenate even or odd elements from two vectors.
5857 void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5858
5859 // Concatenate even or odd elements from two predicates.
5860 void uzp2(const PRegisterWithLaneSize& pd,
5861 const PRegisterWithLaneSize& pn,
5862 const PRegisterWithLaneSize& pm);
5863
5864 // Concatenate even or odd elements from two vectors.
5865 void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5866
5867 // While incrementing signed scalar less than or equal to scalar.
5868 void whilele(const PRegisterWithLaneSize& pd,
5869 const Register& rn,
5870 const Register& rm);
5871
5872 // While incrementing unsigned scalar lower than scalar.
5873 void whilelo(const PRegisterWithLaneSize& pd,
5874 const Register& rn,
5875 const Register& rm);
5876
5877 // While incrementing unsigned scalar lower or same as scalar.
5878 void whilels(const PRegisterWithLaneSize& pd,
5879 const Register& rn,
5880 const Register& rm);
5881
5882 // While incrementing signed scalar less than scalar.
5883 void whilelt(const PRegisterWithLaneSize& pd,
5884 const Register& rn,
5885 const Register& rm);
5886
5887 // Write the first-fault register.
5888 void wrffr(const PRegisterWithLaneSize& pn);
5889
5890 // Interleave elements from two half predicates.
5891 void zip1(const PRegisterWithLaneSize& pd,
5892 const PRegisterWithLaneSize& pn,
5893 const PRegisterWithLaneSize& pm);
5894
5895 // Interleave elements from two half vectors.
5896 void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5897
5898 // Interleave elements from two half predicates.
5899 void zip2(const PRegisterWithLaneSize& pd,
5900 const PRegisterWithLaneSize& pn,
5901 const PRegisterWithLaneSize& pm);
5902
5903 // Interleave elements from two half vectors.
5904 void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5905
5906 // Add with carry long (bottom).
5907 void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5908
5909 // Add with carry long (top).
5910 void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5911
5912 // Add narrow high part (bottom).
5913 void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5914
5915 // Add narrow high part (top).
5916 void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5917
5918 // Add pairwise.
5919 void addp(const ZRegister& zd,
5920 const PRegisterM& pg,
5921 const ZRegister& zn,
5922 const ZRegister& zm);
5923
5924 // Bitwise clear and exclusive OR.
5925 void bcax(const ZRegister& zd,
5926 const ZRegister& zn,
5927 const ZRegister& zm,
5928 const ZRegister& zk);
5929
5930 // Scatter lower bits into positions selected by bitmask.
5931 void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5932
5933 // Gather lower bits from positions selected by bitmask.
5934 void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5935
5936 // Group bits to right or left as selected by bitmask.
5937 void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5938
5939 // Bitwise select.
5940 void bsl(const ZRegister& zd,
5941 const ZRegister& zn,
5942 const ZRegister& zm,
5943 const ZRegister& zk);
5944
5945 // Bitwise select with first input inverted.
5946 void bsl1n(const ZRegister& zd,
5947 const ZRegister& zn,
5948 const ZRegister& zm,
5949 const ZRegister& zk);
5950
5951 // Bitwise select with second input inverted.
5952 void bsl2n(const ZRegister& zd,
5953 const ZRegister& zn,
5954 const ZRegister& zm,
5955 const ZRegister& zk);
5956
5957 // Complex integer add with rotate.
5958 void cadd(const ZRegister& zd,
5959 const ZRegister& zn,
5960 const ZRegister& zm,
5961 int rot);
5962
5963 // Complex integer dot product (indexed).
5964 void cdot(const ZRegister& zda,
5965 const ZRegister& zn,
5966 const ZRegister& zm,
5967 int index,
5968 int rot);
5969
5970 // Complex integer dot product.
5971 void cdot(const ZRegister& zda,
5972 const ZRegister& zn,
5973 const ZRegister& zm,
5974 int rot);
5975
5976 // Complex integer multiply-add with rotate (indexed).
5977 void cmla(const ZRegister& zda,
5978 const ZRegister& zn,
5979 const ZRegister& zm,
5980 int index,
5981 int rot);
5982
5983 // Complex integer multiply-add with rotate.
5984 void cmla(const ZRegister& zda,
5985 const ZRegister& zn,
5986 const ZRegister& zm,
5987 int rot);
5988
5989 // Bitwise exclusive OR of three vectors.
5990 void eor3(const ZRegister& zd,
5991 const ZRegister& zn,
5992 const ZRegister& zm,
5993 const ZRegister& zk);
5994
5995 // Interleaving exclusive OR (bottom, top).
5996 void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5997
5998 // Interleaving exclusive OR (top, bottom).
5999 void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6000
6001 // Floating-point add pairwise.
6002 void faddp(const ZRegister& zd,
6003 const PRegisterM& pg,
6004 const ZRegister& zn,
6005 const ZRegister& zm);
6006
6007 // Floating-point up convert long (top, predicated).
6008 void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6009
6010 // Floating-point down convert and narrow (top, predicated).
6011 void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6012
6013 // Floating-point down convert, rounding to odd (predicated).
6014 void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6015
6016 // Floating-point down convert, rounding to odd (top, predicated).
6017 void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6018
6019 // Floating-point base 2 logarithm as integer.
6020 void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6021
6022 // Floating-point maximum number pairwise.
6023 void fmaxnmp(const ZRegister& zd,
6024 const PRegisterM& pg,
6025 const ZRegister& zn,
6026 const ZRegister& zm);
6027
6028 // Floating-point maximum pairwise.
6029 void fmaxp(const ZRegister& zd,
6030 const PRegisterM& pg,
6031 const ZRegister& zn,
6032 const ZRegister& zm);
6033
6034 // Floating-point minimum number pairwise.
6035 void fminnmp(const ZRegister& zd,
6036 const PRegisterM& pg,
6037 const ZRegister& zn,
6038 const ZRegister& zm);
6039
6040 // Floating-point minimum pairwise.
6041 void fminp(const ZRegister& zd,
6042 const PRegisterM& pg,
6043 const ZRegister& zn,
6044 const ZRegister& zm);
6045
6046 // Half-precision floating-point multiply-add long to single-precision
6047 // (bottom).
6048 void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6049
6050 // Half-precision floating-point multiply-add long to single-precision
6051 // (top).
6052 void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6053
6054 // Half-precision floating-point multiply-subtract long from
6055 // single-precision (bottom).
6056 void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6057
6058 // Half-precision floating-point multiply-subtract long from
6059 // single-precision (top, indexed).
6060 void fmlslt(const ZRegister& zda,
6061 const ZRegister& zn,
6062 const ZRegister& zm,
6063 int index);
6064
6065 // Half-precision floating-point multiply-add long to single-precision
6066 // (bottom, indexed).
6067 void fmlalb(const ZRegister& zda,
6068 const ZRegister& zn,
6069 const ZRegister& zm,
6070 int index);
6071
6072 // Half-precision floating-point multiply-add long to single-precision
6073 // (top, indexed).
6074 void fmlalt(const ZRegister& zda,
6075 const ZRegister& zn,
6076 const ZRegister& zm,
6077 int index);
6078
6079 // Half-precision floating-point multiply-subtract long from
6080 // single-precision (bottom, indexed).
6081 void fmlslb(const ZRegister& zda,
6082 const ZRegister& zn,
6083 const ZRegister& zm,
6084 int index);
6085
6086 // Half-precision floating-point multiply-subtract long from
6087 // single-precision (top).
6088 void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6089
6090 // Count matching elements in vector.
6091 void histcnt(const ZRegister& zd,
6092 const PRegisterZ& pg,
6093 const ZRegister& zn,
6094 const ZRegister& zm);
6095
6096 // Count matching elements in vector segments.
6097 void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6098
6099 // Gather load non-temporal signed bytes.
6100 void ldnt1sb(const ZRegister& zt,
6101 const PRegisterZ& pg,
6102 const SVEMemOperand& addr);
6103
6104 // Gather load non-temporal signed halfwords.
6105 void ldnt1sh(const ZRegister& zt,
6106 const PRegisterZ& pg,
6107 const SVEMemOperand& addr);
6108
6109 // Gather load non-temporal signed words.
6110 void ldnt1sw(const ZRegister& zt,
6111 const PRegisterZ& pg,
6112 const SVEMemOperand& addr);
6113
6114 // Detect any matching elements, setting the condition flags.
6115 void match(const PRegisterWithLaneSize& pd,
6116 const PRegisterZ& pg,
6117 const ZRegister& zn,
6118 const ZRegister& zm);
6119
6120 // Multiply-add to accumulator (indexed).
6121 void mla(const ZRegister& zda,
6122 const ZRegister& zn,
6123 const ZRegister& zm,
6124 int index);
6125
6126 // Multiply-subtract from accumulator (indexed).
6127 void mls(const ZRegister& zda,
6128 const ZRegister& zn,
6129 const ZRegister& zm,
6130 int index);
6131
6132 // Multiply (indexed).
6133 void mul(const ZRegister& zd,
6134 const ZRegister& zn,
6135 const ZRegister& zm,
6136 int index);
6137
6138 // Multiply vectors (unpredicated).
6139 void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6140
6141 // Bitwise inverted select.
6142 void nbsl(const ZRegister& zd,
6143 const ZRegister& zn,
6144 const ZRegister& zm,
6145 const ZRegister& zk);
6146
6147 // Detect no matching elements, setting the condition flags.
6148 void nmatch(const PRegisterWithLaneSize& pd,
6149 const PRegisterZ& pg,
6150 const ZRegister& zn,
6151 const ZRegister& zm);
6152
6153 // Polynomial multiply vectors (unpredicated).
6154 void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6155
6156 // Polynomial multiply long (bottom).
6157 void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6158
6159 // Polynomial multiply long (top).
6160 void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6161
6162 // Rounding add narrow high part (bottom).
6163 void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6164
6165 // Rounding add narrow high part (top).
6166 void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6167
6168 // Rounding shift right narrow by immediate (bottom).
6169 void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6170
6171 // Rounding shift right narrow by immediate (top).
6172 void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6173
6174 // Rounding subtract narrow high part (bottom).
6175 void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6176
6177 // Rounding subtract narrow high part (top).
6178 void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6179
6180 // Signed absolute difference and accumulate.
6181 void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6182
6183 // Signed absolute difference and accumulate long (bottom).
6184 void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6185
6186 // Signed absolute difference and accumulate long (top).
6187 void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6188
6189 // Signed absolute difference long (bottom).
6190 void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6191
6192 // Signed absolute difference long (top).
6193 void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6194
6195 // Signed add and accumulate long pairwise.
6196 void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6197
6198 // Signed add long (bottom).
6199 void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6200
6201 // Signed add long (bottom + top).
6202 void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6203
6204 // Signed add long (top).
6205 void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6206
6207 // Signed add wide (bottom).
6208 void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6209
6210 // Signed add wide (top).
6211 void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6212
6213 // Subtract with carry long (bottom).
6214 void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6215
6216 // Subtract with carry long (top).
6217 void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6218
6219 // Signed halving addition.
6220 void shadd(const ZRegister& zd,
6221 const PRegisterM& pg,
6222 const ZRegister& zn,
6223 const ZRegister& zm);
6224
6225 // Shift right narrow by immediate (bottom).
6226 void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6227
6228 // Shift right narrow by immediate (top).
6229 void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6230
6231 // Signed halving subtract.
6232 void shsub(const ZRegister& zd,
6233 const PRegisterM& pg,
6234 const ZRegister& zn,
6235 const ZRegister& zm);
6236
6237 // Signed halving subtract reversed vectors.
6238 void shsubr(const ZRegister& zd,
6239 const PRegisterM& pg,
6240 const ZRegister& zn,
6241 const ZRegister& zm);
6242
6243 // Shift left and insert (immediate).
6244 void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6245
6246 // Signed maximum pairwise.
6247 void smaxp(const ZRegister& zd,
6248 const PRegisterM& pg,
6249 const ZRegister& zn,
6250 const ZRegister& zm);
6251
6252 // Signed minimum pairwise.
6253 void sminp(const ZRegister& zd,
6254 const PRegisterM& pg,
6255 const ZRegister& zn,
6256 const ZRegister& zm);
6257
6258 // Signed multiply-add long to accumulator (bottom, indexed).
6259 void smlalb(const ZRegister& zda,
6260 const ZRegister& zn,
6261 const ZRegister& zm,
6262 int index);
6263
6264 // Signed multiply-add long to accumulator (bottom).
6265 void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6266
6267 // Signed multiply-add long to accumulator (top, indexed).
6268 void smlalt(const ZRegister& zda,
6269 const ZRegister& zn,
6270 const ZRegister& zm,
6271 int index);
6272
6273 // Signed multiply-add long to accumulator (top).
6274 void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6275
6276 // Signed multiply-subtract long from accumulator (bottom, indexed).
6277 void smlslb(const ZRegister& zda,
6278 const ZRegister& zn,
6279 const ZRegister& zm,
6280 int index);
6281
6282 // Signed multiply-subtract long from accumulator (bottom).
6283 void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6284
6285 // Signed multiply-subtract long from accumulator (top, indexed).
6286 void smlslt(const ZRegister& zda,
6287 const ZRegister& zn,
6288 const ZRegister& zm,
6289 int index);
6290
6291 // Signed multiply-subtract long from accumulator (top).
6292 void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6293
6294 // Signed multiply returning high half (unpredicated).
6295 void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6296
6297 // Signed multiply long (bottom, indexed).
6298 void smullb(const ZRegister& zd,
6299 const ZRegister& zn,
6300 const ZRegister& zm,
6301 int index);
6302
6303 // Signed multiply long (bottom).
6304 void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6305
6306 // Signed multiply long (top, indexed).
6307 void smullt(const ZRegister& zd,
6308 const ZRegister& zn,
6309 const ZRegister& zm,
6310 int index);
6311
6312 // Signed multiply long (top).
6313 void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6314
6315 // Signed saturating absolute value.
6316 void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6317
6318 // Signed saturating addition (predicated).
6319 void sqadd(const ZRegister& zd,
6320 const PRegisterM& pg,
6321 const ZRegister& zn,
6322 const ZRegister& zm);
6323
6324 // Saturating complex integer add with rotate.
6325 void sqcadd(const ZRegister& zd,
6326 const ZRegister& zn,
6327 const ZRegister& zm,
6328 int rot);
6329
6330 // Signed saturating doubling multiply-add long to accumulator (bottom,
6331 // indexed).
6332 void sqdmlalb(const ZRegister& zda,
6333 const ZRegister& zn,
6334 const ZRegister& zm,
6335 int index);
6336
6337 // Signed saturating doubling multiply-add long to accumulator (bottom).
6338 void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6339
6340 // Signed saturating doubling multiply-add long to accumulator (bottom x
6341 // top).
6342 void sqdmlalbt(const ZRegister& zda,
6343 const ZRegister& zn,
6344 const ZRegister& zm);
6345
6346 // Signed saturating doubling multiply-add long to accumulator (top,
6347 // indexed).
6348 void sqdmlalt(const ZRegister& zda,
6349 const ZRegister& zn,
6350 const ZRegister& zm,
6351 int index);
6352
6353 // Signed saturating doubling multiply-add long to accumulator (top).
6354 void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6355
6356 // Signed saturating doubling multiply-subtract long from accumulator
6357 // (bottom, indexed).
6358 void sqdmlslb(const ZRegister& zda,
6359 const ZRegister& zn,
6360 const ZRegister& zm,
6361 int index);
6362
6363 // Signed saturating doubling multiply-subtract long from accumulator
6364 // (bottom).
6365 void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6366
6367 // Signed saturating doubling multiply-subtract long from accumulator
6368 // (bottom x top).
6369 void sqdmlslbt(const ZRegister& zda,
6370 const ZRegister& zn,
6371 const ZRegister& zm);
6372
6373 // Signed saturating doubling multiply-subtract long from accumulator
6374 // (top, indexed).
6375 void sqdmlslt(const ZRegister& zda,
6376 const ZRegister& zn,
6377 const ZRegister& zm,
6378 int index);
6379
6380 // Signed saturating doubling multiply-subtract long from accumulator
6381 // (top).
6382 void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6383
6384 // Signed saturating doubling multiply high (indexed).
6385 void sqdmulh(const ZRegister& zd,
6386 const ZRegister& zn,
6387 const ZRegister& zm,
6388 int index);
6389
6390 // Signed saturating doubling multiply high (unpredicated).
6391 void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6392
6393 // Signed saturating doubling multiply long (bottom, indexed).
6394 void sqdmullb(const ZRegister& zd,
6395 const ZRegister& zn,
6396 const ZRegister& zm,
6397 int index);
6398
6399 // Signed saturating doubling multiply long (bottom).
6400 void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6401
6402 // Signed saturating doubling multiply long (top, indexed).
6403 void sqdmullt(const ZRegister& zd,
6404 const ZRegister& zn,
6405 const ZRegister& zm,
6406 int index);
6407
6408 // Signed saturating doubling multiply long (top).
6409 void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6410
6411 // Signed saturating negate.
6412 void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6413
6414 // Saturating rounding doubling complex integer multiply-add high with
6415 // rotate (indexed).
6416 void sqrdcmlah(const ZRegister& zda,
6417 const ZRegister& zn,
6418 const ZRegister& zm,
6419 int index,
6420 int rot);
6421
6422 // Saturating rounding doubling complex integer multiply-add high with
6423 // rotate.
6424 void sqrdcmlah(const ZRegister& zda,
6425 const ZRegister& zn,
6426 const ZRegister& zm,
6427 int rot);
6428
6429 // Signed saturating rounding doubling multiply-add high to accumulator
6430 // (indexed).
6431 void sqrdmlah(const ZRegister& zda,
6432 const ZRegister& zn,
6433 const ZRegister& zm,
6434 int index);
6435
6436 // Signed saturating rounding doubling multiply-add high to accumulator
6437 // (unpredicated).
6438 void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6439
6440 // Signed saturating rounding doubling multiply-subtract high from
6441 // accumulator (indexed).
6442 void sqrdmlsh(const ZRegister& zda,
6443 const ZRegister& zn,
6444 const ZRegister& zm,
6445 int index);
6446
6447 // Signed saturating rounding doubling multiply-subtract high from
6448 // accumulator (unpredicated).
6449 void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6450
6451 // Signed saturating rounding doubling multiply high (indexed).
6452 void sqrdmulh(const ZRegister& zd,
6453 const ZRegister& zn,
6454 const ZRegister& zm,
6455 int index);
6456
6457 // Signed saturating rounding doubling multiply high (unpredicated).
6458 void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6459
6460 // Signed saturating rounding shift left by vector (predicated).
6461 void sqrshl(const ZRegister& zd,
6462 const PRegisterM& pg,
6463 const ZRegister& zn,
6464 const ZRegister& zm);
6465
6466 // Signed saturating rounding shift left reversed vectors (predicated).
6467 void sqrshlr(const ZRegister& zd,
6468 const PRegisterM& pg,
6469 const ZRegister& zn,
6470 const ZRegister& zm);
6471
6472 // Signed saturating rounding shift right narrow by immediate (bottom).
6473 void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6474
6475 // Signed saturating rounding shift right narrow by immediate (top).
6476 void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6477
6478 // Signed saturating rounding shift right unsigned narrow by immediate
6479 // (bottom).
6480 void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6481
6482 // Signed saturating rounding shift right unsigned narrow by immediate
6483 // (top).
6484 void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6485
6486 // Signed saturating shift left by immediate.
6487 void sqshl(const ZRegister& zd,
6488 const PRegisterM& pg,
6489 const ZRegister& zn,
6490 int shift);
6491
6492 // Signed saturating shift left by vector (predicated).
6493 void sqshl(const ZRegister& zd,
6494 const PRegisterM& pg,
6495 const ZRegister& zn,
6496 const ZRegister& zm);
6497
6498 // Signed saturating shift left reversed vectors (predicated).
6499 void sqshlr(const ZRegister& zd,
6500 const PRegisterM& pg,
6501 const ZRegister& zn,
6502 const ZRegister& zm);
6503
6504 // Signed saturating shift left unsigned by immediate.
6505 void sqshlu(const ZRegister& zd,
6506 const PRegisterM& pg,
6507 const ZRegister& zn,
6508 int shift);
6509
6510 // Signed saturating shift right narrow by immediate (bottom).
6511 void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6512
6513 // Signed saturating shift right narrow by immediate (top).
6514 void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6515
6516 // Signed saturating shift right unsigned narrow by immediate (bottom).
6517 void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6518
6519 // Signed saturating shift right unsigned narrow by immediate (top).
6520 void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6521
6522 // Signed saturating subtraction (predicated).
6523 void sqsub(const ZRegister& zd,
6524 const PRegisterM& pg,
6525 const ZRegister& zn,
6526 const ZRegister& zm);
6527
6528 // Signed saturating subtraction reversed vectors (predicated).
6529 void sqsubr(const ZRegister& zd,
6530 const PRegisterM& pg,
6531 const ZRegister& zn,
6532 const ZRegister& zm);
6533
6534 // Signed saturating extract narrow (bottom).
6535 void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6536
6537 // Signed saturating extract narrow (top).
6538 void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6539
6540 // Signed saturating unsigned extract narrow (bottom).
6541 void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6542
6543 // Signed saturating unsigned extract narrow (top).
6544 void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6545
6546 // Signed rounding halving addition.
6547 void srhadd(const ZRegister& zd,
6548 const PRegisterM& pg,
6549 const ZRegister& zn,
6550 const ZRegister& zm);
6551
6552 // Shift right and insert (immediate).
6553 void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6554
6555 // Signed rounding shift left by vector (predicated).
6556 void srshl(const ZRegister& zd,
6557 const PRegisterM& pg,
6558 const ZRegister& zn,
6559 const ZRegister& zm);
6560
6561 // Signed rounding shift left reversed vectors (predicated).
6562 void srshlr(const ZRegister& zd,
6563 const PRegisterM& pg,
6564 const ZRegister& zn,
6565 const ZRegister& zm);
6566
6567 // Signed rounding shift right by immediate.
6568 void srshr(const ZRegister& zd,
6569 const PRegisterM& pg,
6570 const ZRegister& zn,
6571 int shift);
6572
6573 // Signed rounding shift right and accumulate (immediate).
6574 void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6575
6576 // Signed shift left long by immediate (bottom).
6577 void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6578
6579 // Signed shift left long by immediate (top).
6580 void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6581
6582 // Signed shift right and accumulate (immediate).
6583 void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6584
6585 // Signed subtract long (bottom).
6586 void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6587
6588 // Signed subtract long (bottom - top).
6589 void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6590
6591 // Signed subtract long (top).
6592 void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6593
6594 // Signed subtract long (top - bottom).
6595 void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6596
6597 // Signed subtract wide (bottom).
6598 void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6599
6600 // Signed subtract wide (top).
6601 void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6602
6603 // Subtract narrow high part (bottom).
6604 void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6605
6606 // Subtract narrow high part (top).
6607 void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6608
6609 // Signed saturating addition of unsigned value.
6610 void suqadd(const ZRegister& zd,
6611 const PRegisterM& pg,
6612 const ZRegister& zn,
6613 const ZRegister& zm);
6614
6615 // Programmable table lookup in one or two vector table (zeroing).
6616 void tbl(const ZRegister& zd,
6617 const ZRegister& zn1,
6618 const ZRegister& zn2,
6619 const ZRegister& zm);
6620
6621 // Programmable table lookup in single vector table (merging).
6622 void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6623
6624 // Unsigned absolute difference and accumulate.
6625 void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6626
6627 // Unsigned absolute difference and accumulate long (bottom).
6628 void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6629
6630 // Unsigned absolute difference and accumulate long (top).
6631 void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6632
6633 // Unsigned absolute difference long (bottom).
6634 void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6635
6636 // Unsigned absolute difference long (top).
6637 void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6638
6639 // Unsigned add and accumulate long pairwise.
6640 void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6641
6642 // Unsigned add long (bottom).
6643 void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6644
6645 // Unsigned add long (top).
6646 void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6647
6648 // Unsigned add wide (bottom).
6649 void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6650
6651 // Unsigned add wide (top).
6652 void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6653
6654 // Unsigned halving addition.
6655 void uhadd(const ZRegister& zd,
6656 const PRegisterM& pg,
6657 const ZRegister& zn,
6658 const ZRegister& zm);
6659
6660 // Unsigned halving subtract.
6661 void uhsub(const ZRegister& zd,
6662 const PRegisterM& pg,
6663 const ZRegister& zn,
6664 const ZRegister& zm);
6665
6666 // Unsigned halving subtract reversed vectors.
6667 void uhsubr(const ZRegister& zd,
6668 const PRegisterM& pg,
6669 const ZRegister& zn,
6670 const ZRegister& zm);
6671
6672 // Unsigned maximum pairwise.
6673 void umaxp(const ZRegister& zd,
6674 const PRegisterM& pg,
6675 const ZRegister& zn,
6676 const ZRegister& zm);
6677
6678 // Unsigned minimum pairwise.
6679 void uminp(const ZRegister& zd,
6680 const PRegisterM& pg,
6681 const ZRegister& zn,
6682 const ZRegister& zm);
6683
6684 // Unsigned multiply-add long to accumulator (bottom, indexed).
6685 void umlalb(const ZRegister& zda,
6686 const ZRegister& zn,
6687 const ZRegister& zm,
6688 int index);
6689
6690 // Unsigned multiply-add long to accumulator (bottom).
6691 void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6692
6693 // Unsigned multiply-add long to accumulator (top, indexed).
6694 void umlalt(const ZRegister& zda,
6695 const ZRegister& zn,
6696 const ZRegister& zm,
6697 int index);
6698
6699 // Unsigned multiply-add long to accumulator (top).
6700 void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6701
6702 // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6703 void umlslb(const ZRegister& zda,
6704 const ZRegister& zn,
6705 const ZRegister& zm,
6706 int index);
6707
6708 // Unsigned multiply-subtract long from accumulator (bottom).
6709 void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6710
6711 // Unsigned multiply-subtract long from accumulator (top, indexed).
6712 void umlslt(const ZRegister& zda,
6713 const ZRegister& zn,
6714 const ZRegister& zm,
6715 int index);
6716
6717 // Unsigned multiply-subtract long from accumulator (top).
6718 void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6719
6720 // Unsigned multiply returning high half (unpredicated).
6721 void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6722
6723 // Unsigned multiply long (bottom, indexed).
6724 void umullb(const ZRegister& zd,
6725 const ZRegister& zn,
6726 const ZRegister& zm,
6727 int index);
6728
6729 // Unsigned multiply long (bottom).
6730 void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6731
6732 // Unsigned multiply long (top, indexed).
6733 void umullt(const ZRegister& zd,
6734 const ZRegister& zn,
6735 const ZRegister& zm,
6736 int index);
6737
6738 // Unsigned multiply long (top).
6739 void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6740
6741 // Unsigned saturating addition (predicated).
6742 void uqadd(const ZRegister& zd,
6743 const PRegisterM& pg,
6744 const ZRegister& zn,
6745 const ZRegister& zm);
6746
6747 // Unsigned saturating rounding shift left by vector (predicated).
6748 void uqrshl(const ZRegister& zd,
6749 const PRegisterM& pg,
6750 const ZRegister& zn,
6751 const ZRegister& zm);
6752
6753 // Unsigned saturating rounding shift left reversed vectors (predicated).
6754 void uqrshlr(const ZRegister& zd,
6755 const PRegisterM& pg,
6756 const ZRegister& zn,
6757 const ZRegister& zm);
6758
6759 // Unsigned saturating rounding shift right narrow by immediate (bottom).
6760 void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6761
6762 // Unsigned saturating rounding shift right narrow by immediate (top).
6763 void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6764
6765 // Unsigned saturating shift left by immediate.
6766 void uqshl(const ZRegister& zd,
6767 const PRegisterM& pg,
6768 const ZRegister& zn,
6769 int shift);
6770
6771 // Unsigned saturating shift left by vector (predicated).
6772 void uqshl(const ZRegister& zd,
6773 const PRegisterM& pg,
6774 const ZRegister& zn,
6775 const ZRegister& zm);
6776
6777 // Unsigned saturating shift left reversed vectors (predicated).
6778 void uqshlr(const ZRegister& zd,
6779 const PRegisterM& pg,
6780 const ZRegister& zn,
6781 const ZRegister& zm);
6782
6783 // Unsigned saturating shift right narrow by immediate (bottom).
6784 void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6785
6786 // Unsigned saturating shift right narrow by immediate (top).
6787 void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6788
6789 // Unsigned saturating subtraction (predicated).
6790 void uqsub(const ZRegister& zd,
6791 const PRegisterM& pg,
6792 const ZRegister& zn,
6793 const ZRegister& zm);
6794
6795 // Unsigned saturating subtraction reversed vectors (predicated).
6796 void uqsubr(const ZRegister& zd,
6797 const PRegisterM& pg,
6798 const ZRegister& zn,
6799 const ZRegister& zm);
6800
6801 // Unsigned saturating extract narrow (bottom).
6802 void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6803
6804 // Unsigned saturating extract narrow (top).
6805 void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6806
6807 // Unsigned reciprocal estimate (predicated).
6808 void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6809
6810 // Unsigned rounding halving addition.
6811 void urhadd(const ZRegister& zd,
6812 const PRegisterM& pg,
6813 const ZRegister& zn,
6814 const ZRegister& zm);
6815
6816 // Unsigned rounding shift left by vector (predicated).
6817 void urshl(const ZRegister& zd,
6818 const PRegisterM& pg,
6819 const ZRegister& zn,
6820 const ZRegister& zm);
6821
6822 // Unsigned rounding shift left reversed vectors (predicated).
6823 void urshlr(const ZRegister& zd,
6824 const PRegisterM& pg,
6825 const ZRegister& zn,
6826 const ZRegister& zm);
6827
6828 // Unsigned rounding shift right by immediate.
6829 void urshr(const ZRegister& zd,
6830 const PRegisterM& pg,
6831 const ZRegister& zn,
6832 int shift);
6833
6834 // Unsigned reciprocal square root estimate (predicated).
6835 void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6836
6837 // Unsigned rounding shift right and accumulate (immediate).
6838 void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6839
6840 // Unsigned shift left long by immediate (bottom).
6841 void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6842
6843 // Unsigned shift left long by immediate (top).
6844 void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6845
6846 // Unsigned saturating addition of signed value.
6847 void usqadd(const ZRegister& zd,
6848 const PRegisterM& pg,
6849 const ZRegister& zn,
6850 const ZRegister& zm);
6851
6852 // Unsigned shift right and accumulate (immediate).
6853 void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6854
6855 // Unsigned subtract long (bottom).
6856 void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6857
6858 // Unsigned subtract long (top).
6859 void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6860
6861 // Unsigned subtract wide (bottom).
6862 void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6863
6864 // Unsigned subtract wide (top).
6865 void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6866
6867 // While decrementing signed scalar greater than or equal to scalar.
6868 void whilege(const PRegisterWithLaneSize& pd,
6869 const Register& rn,
6870 const Register& rm);
6871
6872 // While decrementing signed scalar greater than scalar.
6873 void whilegt(const PRegisterWithLaneSize& pd,
6874 const Register& rn,
6875 const Register& rm);
6876
6877 // While decrementing unsigned scalar higher than scalar.
6878 void whilehi(const PRegisterWithLaneSize& pd,
6879 const Register& rn,
6880 const Register& rm);
6881
6882 // While decrementing unsigned scalar higher or same as scalar.
6883 void whilehs(const PRegisterWithLaneSize& pd,
6884 const Register& rn,
6885 const Register& rm);
6886
6887 // While free of read-after-write conflicts.
6888 void whilerw(const PRegisterWithLaneSize& pd,
6889 const Register& rn,
6890 const Register& rm);
6891
6892 // While free of write-after-read/write conflicts.
6893 void whilewr(const PRegisterWithLaneSize& pd,
6894 const Register& rn,
6895 const Register& rm);
6896
6897 // Bitwise exclusive OR and rotate right by immediate.
6898 void xar(const ZRegister& zd,
6899 const ZRegister& zn,
6900 const ZRegister& zm,
6901 int shift);
6902
6903 // Floating-point matrix multiply-accumulate.
6904 void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6905
6906 // Signed integer matrix multiply-accumulate.
6907 void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6908
6909 // Unsigned by signed integer matrix multiply-accumulate.
6910 void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6911
6912 // Unsigned integer matrix multiply-accumulate.
6913 void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6914
6915 // Unsigned by signed integer dot product.
6916 void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6917
6918 // Unsigned by signed integer indexed dot product.
6919 void usdot(const ZRegister& zda,
6920 const ZRegister& zn,
6921 const ZRegister& zm,
6922 int index);
6923
6924 // Signed by unsigned integer indexed dot product.
6925 void sudot(const ZRegister& zda,
6926 const ZRegister& zn,
6927 const ZRegister& zm,
6928 int index);
6929
6930 // Add with Tag.
6931 void addg(const Register& xd, const Register& xn, int offset, int tag_offset);
6932
6933 // Tag Mask Insert.
6934 void gmi(const Register& xd, const Register& xn, const Register& xm);
6935
6936 // Insert Random Tag.
6937 void irg(const Register& xd, const Register& xn, const Register& xm = xzr);
6938
6939 // Load Allocation Tag.
6940 void ldg(const Register& xt, const MemOperand& addr);
6941
6942 void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op);
6943
6944 // Store Allocation Tags.
6945 void st2g(const Register& xt, const MemOperand& addr);
6946
6947 // Store Allocation Tag.
6948 void stg(const Register& xt, const MemOperand& addr);
6949
6950 // Store Allocation Tag and Pair of registers.
6951 void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr);
6952
6953 // Store Allocation Tags, Zeroing.
6954 void stz2g(const Register& xt, const MemOperand& addr);
6955
6956 // Store Allocation Tag, Zeroing.
6957 void stzg(const Register& xt, const MemOperand& addr);
6958
6959 // Subtract with Tag.
6960 void subg(const Register& xd, const Register& xn, int offset, int tag_offset);
6961
6962 // Subtract Pointer.
6963 void subp(const Register& xd, const Register& xn, const Register& xm);
6964
6965 // Subtract Pointer, setting Flags.
6966 void subps(const Register& xd, const Register& xn, const Register& xm);
6967
6968 // Compare with Tag.
cmpp(const Register & xn,const Register & xm)6969 void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); }
6970
6971 // Memory Copy.
6972 void cpye(const Register& rd, const Register& rs, const Register& rn);
6973
6974 // Memory Copy, reads and writes non-temporal.
6975 void cpyen(const Register& rd, const Register& rs, const Register& rn);
6976
6977 // Memory Copy, reads non-temporal.
6978 void cpyern(const Register& rd, const Register& rs, const Register& rn);
6979
6980 // Memory Copy, writes non-temporal.
6981 void cpyewn(const Register& rd, const Register& rs, const Register& rn);
6982
6983 // Memory Copy Forward-only.
6984 void cpyfe(const Register& rd, const Register& rs, const Register& rn);
6985
6986 // Memory Copy Forward-only, reads and writes non-temporal.
6987 void cpyfen(const Register& rd, const Register& rs, const Register& rn);
6988
6989 // Memory Copy Forward-only, reads non-temporal.
6990 void cpyfern(const Register& rd, const Register& rs, const Register& rn);
6991
6992 // Memory Copy Forward-only, writes non-temporal.
6993 void cpyfewn(const Register& rd, const Register& rs, const Register& rn);
6994
6995 // Memory Copy Forward-only.
6996 void cpyfm(const Register& rd, const Register& rs, const Register& rn);
6997
6998 // Memory Copy Forward-only, reads and writes non-temporal.
6999 void cpyfmn(const Register& rd, const Register& rs, const Register& rn);
7000
7001 // Memory Copy Forward-only, reads non-temporal.
7002 void cpyfmrn(const Register& rd, const Register& rs, const Register& rn);
7003
7004 // Memory Copy Forward-only, writes non-temporal.
7005 void cpyfmwn(const Register& rd, const Register& rs, const Register& rn);
7006
7007 // Memory Copy Forward-only.
7008 void cpyfp(const Register& rd, const Register& rs, const Register& rn);
7009
7010 // Memory Copy Forward-only, reads and writes non-temporal.
7011 void cpyfpn(const Register& rd, const Register& rs, const Register& rn);
7012
7013 // Memory Copy Forward-only, reads non-temporal.
7014 void cpyfprn(const Register& rd, const Register& rs, const Register& rn);
7015
7016 // Memory Copy Forward-only, writes non-temporal.
7017 void cpyfpwn(const Register& rd, const Register& rs, const Register& rn);
7018
7019 // Memory Copy.
7020 void cpym(const Register& rd, const Register& rs, const Register& rn);
7021
7022 // Memory Copy, reads and writes non-temporal.
7023 void cpymn(const Register& rd, const Register& rs, const Register& rn);
7024
7025 // Memory Copy, reads non-temporal.
7026 void cpymrn(const Register& rd, const Register& rs, const Register& rn);
7027
7028 // Memory Copy, writes non-temporal.
7029 void cpymwn(const Register& rd, const Register& rs, const Register& rn);
7030
7031 // Memory Copy.
7032 void cpyp(const Register& rd, const Register& rs, const Register& rn);
7033
7034 // Memory Copy, reads and writes non-temporal.
7035 void cpypn(const Register& rd, const Register& rs, const Register& rn);
7036
7037 // Memory Copy, reads non-temporal.
7038 void cpyprn(const Register& rd, const Register& rs, const Register& rn);
7039
7040 // Memory Copy, writes non-temporal.
7041 void cpypwn(const Register& rd, const Register& rs, const Register& rn);
7042
7043 // Memory Set.
7044 void sete(const Register& rd, const Register& rn, const Register& rs);
7045
7046 // Memory Set, non-temporal.
7047 void seten(const Register& rd, const Register& rn, const Register& rs);
7048
7049 // Memory Set with tag setting.
7050 void setge(const Register& rd, const Register& rn, const Register& rs);
7051
7052 // Memory Set with tag setting, non-temporal.
7053 void setgen(const Register& rd, const Register& rn, const Register& rs);
7054
7055 // Memory Set with tag setting.
7056 void setgm(const Register& rd, const Register& rn, const Register& rs);
7057
7058 // Memory Set with tag setting, non-temporal.
7059 void setgmn(const Register& rd, const Register& rn, const Register& rs);
7060
7061 // Memory Set with tag setting.
7062 void setgp(const Register& rd, const Register& rn, const Register& rs);
7063
7064 // Memory Set with tag setting, non-temporal.
7065 void setgpn(const Register& rd, const Register& rn, const Register& rs);
7066
7067 // Memory Set.
7068 void setm(const Register& rd, const Register& rn, const Register& rs);
7069
7070 // Memory Set, non-temporal.
7071 void setmn(const Register& rd, const Register& rn, const Register& rs);
7072
7073 // Memory Set.
7074 void setp(const Register& rd, const Register& rn, const Register& rs);
7075
7076 // Memory Set, non-temporal.
7077 void setpn(const Register& rd, const Register& rn, const Register& rs);
7078
7079 // Absolute value.
7080 void abs(const Register& rd, const Register& rn);
7081
7082 // Count bits.
7083 void cnt(const Register& rd, const Register& rn);
7084
7085 // Count Trailing Zeros.
7086 void ctz(const Register& rd, const Register& rn);
7087
7088 // Signed Maximum.
7089 void smax(const Register& rd, const Register& rn, const Operand& op);
7090
7091 // Signed Minimum.
7092 void smin(const Register& rd, const Register& rn, const Operand& op);
7093
7094 // Unsigned Maximum.
7095 void umax(const Register& rd, const Register& rn, const Operand& op);
7096
7097 // Unsigned Minimum.
7098 void umin(const Register& rd, const Register& rn, const Operand& op);
7099
7100 // Emit generic instructions.
7101
7102 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)7103 void dci(Instr raw_inst) { Emit(raw_inst); }
7104
7105 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)7106 void dc32(uint32_t data) { dc(data); }
7107
7108 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)7109 void dc64(uint64_t data) { dc(data); }
7110
7111 // Emit data in the instruction stream.
7112 template <typename T>
dc(T data)7113 void dc(T data) {
7114 VIXL_ASSERT(AllowAssembler());
7115 GetBuffer()->Emit<T>(data);
7116 }
7117
7118 // Copy a string into the instruction stream, including the terminating NULL
7119 // character. The instruction pointer is then aligned correctly for
7120 // subsequent instructions.
EmitString(const char * string)7121 void EmitString(const char* string) {
7122 VIXL_ASSERT(string != NULL);
7123 VIXL_ASSERT(AllowAssembler());
7124
7125 GetBuffer()->EmitString(string);
7126 GetBuffer()->Align();
7127 }
7128
7129 // Code generation helpers.
7130 static bool OneInstrMoveImmediateHelper(Assembler* assm,
7131 const Register& dst,
7132 uint64_t imm);
7133
7134 // Register encoding.
7135 template <int hibit, int lobit>
Rx(CPURegister rx)7136 static Instr Rx(CPURegister rx) {
7137 VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
7138 return ImmUnsignedField<hibit, lobit>(rx.GetCode());
7139 }
7140
7141 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
7142 #define REGISTER_ENCODER(N) \
7143 static Instr R##N(CPURegister r##N) { \
7144 return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
7145 }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)7146 CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
7147 #undef REGISTER_ENCODER
7148 #undef CPU_REGISTER_FIELD_NAMES
7149
7150 static Instr RmNot31(CPURegister rm) {
7151 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
7152 VIXL_ASSERT(!rm.IsZero());
7153 return Rm(rm);
7154 }
7155
7156 // These encoding functions allow the stack pointer to be encoded, and
7157 // disallow the zero register.
RdSP(Register rd)7158 static Instr RdSP(Register rd) {
7159 VIXL_ASSERT(!rd.IsZero());
7160 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
7161 }
7162
RnSP(Register rn)7163 static Instr RnSP(Register rn) {
7164 VIXL_ASSERT(!rn.IsZero());
7165 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
7166 }
7167
RmSP(Register rm)7168 static Instr RmSP(Register rm) {
7169 VIXL_ASSERT(!rm.IsZero());
7170 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7171 }
7172
Pd(PRegister pd)7173 static Instr Pd(PRegister pd) {
7174 return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7175 }
7176
Pm(PRegister pm)7177 static Instr Pm(PRegister pm) {
7178 return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7179 }
7180
Pn(PRegister pn)7181 static Instr Pn(PRegister pn) {
7182 return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7183 }
7184
PgLow8(PRegister pg)7185 static Instr PgLow8(PRegister pg) {
7186 // Governing predicates can be merging, zeroing, or unqualified. They should
7187 // never have a lane size.
7188 VIXL_ASSERT(!pg.HasLaneSize());
7189 return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7190 }
7191
7192 template <int hibit, int lobit>
Pg(PRegister pg)7193 static Instr Pg(PRegister pg) {
7194 // Governing predicates can be merging, zeroing, or unqualified. They should
7195 // never have a lane size.
7196 VIXL_ASSERT(!pg.HasLaneSize());
7197 return Rx<hibit, lobit>(pg);
7198 }
7199
7200 // Flags encoding.
Flags(FlagsUpdate S)7201 static Instr Flags(FlagsUpdate S) {
7202 if (S == SetFlags) {
7203 return 1 << FlagsUpdate_offset;
7204 } else if (S == LeaveFlags) {
7205 return 0 << FlagsUpdate_offset;
7206 }
7207 VIXL_UNREACHABLE();
7208 return 0;
7209 }
7210
Cond(Condition cond)7211 static Instr Cond(Condition cond) { return cond << Condition_offset; }
7212
7213 // Generic immediate encoding.
7214 template <int hibit, int lobit>
ImmField(int64_t imm)7215 static Instr ImmField(int64_t imm) {
7216 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7217 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7218 int fieldsize = hibit - lobit + 1;
7219 VIXL_ASSERT(IsIntN(fieldsize, imm));
7220 return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7221 }
7222
7223 // For unsigned immediate encoding.
7224 // TODO: Handle signed and unsigned immediate in satisfactory way.
7225 template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7226 static Instr ImmUnsignedField(uint64_t imm) {
7227 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7228 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7229 VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7230 return static_cast<Instr>(imm << lobit);
7231 }
7232
7233 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7234 static Instr ImmPCRelAddress(int64_t imm21) {
7235 VIXL_ASSERT(IsInt21(imm21));
7236 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7237 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7238 Instr immlo = imm << ImmPCRelLo_offset;
7239 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7240 }
7241
7242 // Branch encoding.
ImmUncondBranch(int64_t imm26)7243 static Instr ImmUncondBranch(int64_t imm26) {
7244 VIXL_ASSERT(IsInt26(imm26));
7245 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7246 }
7247
ImmCondBranch(int64_t imm19)7248 static Instr ImmCondBranch(int64_t imm19) {
7249 VIXL_ASSERT(IsInt19(imm19));
7250 return TruncateToUint19(imm19) << ImmCondBranch_offset;
7251 }
7252
ImmCmpBranch(int64_t imm19)7253 static Instr ImmCmpBranch(int64_t imm19) {
7254 VIXL_ASSERT(IsInt19(imm19));
7255 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7256 }
7257
ImmTestBranch(int64_t imm14)7258 static Instr ImmTestBranch(int64_t imm14) {
7259 VIXL_ASSERT(IsInt14(imm14));
7260 return TruncateToUint14(imm14) << ImmTestBranch_offset;
7261 }
7262
ImmTestBranchBit(unsigned bit_pos)7263 static Instr ImmTestBranchBit(unsigned bit_pos) {
7264 VIXL_ASSERT(IsUint6(bit_pos));
7265 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7266 unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7267 unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7268 bit5 &= ImmTestBranchBit5_mask;
7269 bit40 &= ImmTestBranchBit40_mask;
7270 return bit5 | bit40;
7271 }
7272
7273 // Data Processing encoding.
SF(Register rd)7274 static Instr SF(Register rd) {
7275 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7276 }
7277
ImmAddSub(int imm)7278 static Instr ImmAddSub(int imm) {
7279 VIXL_ASSERT(IsImmAddSub(imm));
7280 if (IsUint12(imm)) { // No shift required.
7281 imm <<= ImmAddSub_offset;
7282 } else {
7283 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7284 }
7285 return imm;
7286 }
7287
SVEImmSetBits(unsigned imms,unsigned lane_size)7288 static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7289 VIXL_ASSERT(IsUint6(imms));
7290 VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7291 USE(lane_size);
7292 return imms << SVEImmSetBits_offset;
7293 }
7294
SVEImmRotate(unsigned immr,unsigned lane_size)7295 static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7296 VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7297 USE(lane_size);
7298 return immr << SVEImmRotate_offset;
7299 }
7300
SVEBitN(unsigned bitn)7301 static Instr SVEBitN(unsigned bitn) {
7302 VIXL_ASSERT(IsUint1(bitn));
7303 return bitn << SVEBitN_offset;
7304 }
7305
7306 static Instr SVEDtype(unsigned msize_in_bytes_log2,
7307 unsigned esize_in_bytes_log2,
7308 bool is_signed,
7309 int dtype_h_lsb = 23,
7310 int dtype_l_lsb = 21) {
7311 VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7312 VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7313 Instr dtype_h = msize_in_bytes_log2;
7314 Instr dtype_l = esize_in_bytes_log2;
7315 // Signed forms use the encodings where msize would be greater than esize.
7316 if (is_signed) {
7317 dtype_h = dtype_h ^ 0x3;
7318 dtype_l = dtype_l ^ 0x3;
7319 }
7320 VIXL_ASSERT(IsUint2(dtype_h));
7321 VIXL_ASSERT(IsUint2(dtype_l));
7322 VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7323
7324 return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7325 }
7326
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7327 static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7328 unsigned esize_in_bytes_log2,
7329 bool is_signed) {
7330 return SVEDtype(msize_in_bytes_log2,
7331 esize_in_bytes_log2,
7332 is_signed,
7333 23,
7334 13);
7335 }
7336
ImmS(unsigned imms,unsigned reg_size)7337 static Instr ImmS(unsigned imms, unsigned reg_size) {
7338 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7339 ((reg_size == kWRegSize) && IsUint5(imms)));
7340 USE(reg_size);
7341 return imms << ImmS_offset;
7342 }
7343
ImmR(unsigned immr,unsigned reg_size)7344 static Instr ImmR(unsigned immr, unsigned reg_size) {
7345 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7346 ((reg_size == kWRegSize) && IsUint5(immr)));
7347 USE(reg_size);
7348 VIXL_ASSERT(IsUint6(immr));
7349 return immr << ImmR_offset;
7350 }
7351
ImmSetBits(unsigned imms,unsigned reg_size)7352 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7353 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7354 VIXL_ASSERT(IsUint6(imms));
7355 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7356 USE(reg_size);
7357 return imms << ImmSetBits_offset;
7358 }
7359
ImmRotate(unsigned immr,unsigned reg_size)7360 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7361 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7362 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7363 ((reg_size == kWRegSize) && IsUint5(immr)));
7364 USE(reg_size);
7365 return immr << ImmRotate_offset;
7366 }
7367
ImmLLiteral(int64_t imm19)7368 static Instr ImmLLiteral(int64_t imm19) {
7369 VIXL_ASSERT(IsInt19(imm19));
7370 return TruncateToUint19(imm19) << ImmLLiteral_offset;
7371 }
7372
BitN(unsigned bitn,unsigned reg_size)7373 static Instr BitN(unsigned bitn, unsigned reg_size) {
7374 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7375 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7376 USE(reg_size);
7377 return bitn << BitN_offset;
7378 }
7379
ShiftDP(Shift shift)7380 static Instr ShiftDP(Shift shift) {
7381 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7382 return shift << ShiftDP_offset;
7383 }
7384
ImmDPShift(unsigned amount)7385 static Instr ImmDPShift(unsigned amount) {
7386 VIXL_ASSERT(IsUint6(amount));
7387 return amount << ImmDPShift_offset;
7388 }
7389
ExtendMode(Extend extend)7390 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7391
ImmExtendShift(unsigned left_shift)7392 static Instr ImmExtendShift(unsigned left_shift) {
7393 VIXL_ASSERT(left_shift <= 4);
7394 return left_shift << ImmExtendShift_offset;
7395 }
7396
ImmCondCmp(unsigned imm)7397 static Instr ImmCondCmp(unsigned imm) {
7398 VIXL_ASSERT(IsUint5(imm));
7399 return imm << ImmCondCmp_offset;
7400 }
7401
Nzcv(StatusFlags nzcv)7402 static Instr Nzcv(StatusFlags nzcv) {
7403 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7404 }
7405
7406 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7407 static Instr ImmLSUnsigned(int64_t imm12) {
7408 VIXL_ASSERT(IsUint12(imm12));
7409 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7410 }
7411
ImmLS(int64_t imm9)7412 static Instr ImmLS(int64_t imm9) {
7413 VIXL_ASSERT(IsInt9(imm9));
7414 return TruncateToUint9(imm9) << ImmLS_offset;
7415 }
7416
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7417 static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7418 const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
7419 VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes));
7420 int64_t scaled_imm7 = imm7 / access_size_in_bytes;
7421 VIXL_ASSERT(IsInt7(scaled_imm7));
7422 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7423 }
7424
ImmShiftLS(unsigned shift_amount)7425 static Instr ImmShiftLS(unsigned shift_amount) {
7426 VIXL_ASSERT(IsUint1(shift_amount));
7427 return shift_amount << ImmShiftLS_offset;
7428 }
7429
ImmLSPAC(int64_t imm10)7430 static Instr ImmLSPAC(int64_t imm10) {
7431 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7432 int64_t scaled_imm10 = imm10 / (1 << 3);
7433 VIXL_ASSERT(IsInt10(scaled_imm10));
7434 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7435 return (s_bit << ImmLSPACHi_offset) |
7436 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7437 }
7438
ImmPrefetchOperation(int imm5)7439 static Instr ImmPrefetchOperation(int imm5) {
7440 VIXL_ASSERT(IsUint5(imm5));
7441 return imm5 << ImmPrefetchOperation_offset;
7442 }
7443
ImmException(int imm16)7444 static Instr ImmException(int imm16) {
7445 VIXL_ASSERT(IsUint16(imm16));
7446 return imm16 << ImmException_offset;
7447 }
7448
ImmUdf(int imm16)7449 static Instr ImmUdf(int imm16) {
7450 VIXL_ASSERT(IsUint16(imm16));
7451 return imm16 << ImmUdf_offset;
7452 }
7453
ImmSystemRegister(int imm16)7454 static Instr ImmSystemRegister(int imm16) {
7455 VIXL_ASSERT(IsUint16(imm16));
7456 return imm16 << ImmSystemRegister_offset;
7457 }
7458
ImmRMIFRotation(int imm6)7459 static Instr ImmRMIFRotation(int imm6) {
7460 VIXL_ASSERT(IsUint6(imm6));
7461 return imm6 << ImmRMIFRotation_offset;
7462 }
7463
ImmHint(int imm7)7464 static Instr ImmHint(int imm7) {
7465 VIXL_ASSERT(IsUint7(imm7));
7466 return imm7 << ImmHint_offset;
7467 }
7468
CRm(int imm4)7469 static Instr CRm(int imm4) {
7470 VIXL_ASSERT(IsUint4(imm4));
7471 return imm4 << CRm_offset;
7472 }
7473
CRn(int imm4)7474 static Instr CRn(int imm4) {
7475 VIXL_ASSERT(IsUint4(imm4));
7476 return imm4 << CRn_offset;
7477 }
7478
SysOp(int imm14)7479 static Instr SysOp(int imm14) {
7480 VIXL_ASSERT(IsUint14(imm14));
7481 return imm14 << SysOp_offset;
7482 }
7483
ImmSysOp1(int imm3)7484 static Instr ImmSysOp1(int imm3) {
7485 VIXL_ASSERT(IsUint3(imm3));
7486 return imm3 << SysOp1_offset;
7487 }
7488
ImmSysOp2(int imm3)7489 static Instr ImmSysOp2(int imm3) {
7490 VIXL_ASSERT(IsUint3(imm3));
7491 return imm3 << SysOp2_offset;
7492 }
7493
ImmBarrierDomain(int imm2)7494 static Instr ImmBarrierDomain(int imm2) {
7495 VIXL_ASSERT(IsUint2(imm2));
7496 return imm2 << ImmBarrierDomain_offset;
7497 }
7498
ImmBarrierType(int imm2)7499 static Instr ImmBarrierType(int imm2) {
7500 VIXL_ASSERT(IsUint2(imm2));
7501 return imm2 << ImmBarrierType_offset;
7502 }
7503
7504 // Move immediates encoding.
ImmMoveWide(uint64_t imm)7505 static Instr ImmMoveWide(uint64_t imm) {
7506 VIXL_ASSERT(IsUint16(imm));
7507 return static_cast<Instr>(imm << ImmMoveWide_offset);
7508 }
7509
ShiftMoveWide(int64_t shift)7510 static Instr ShiftMoveWide(int64_t shift) {
7511 VIXL_ASSERT(IsUint2(shift));
7512 return static_cast<Instr>(shift << ShiftMoveWide_offset);
7513 }
7514
7515 // FP Immediates.
7516 static Instr ImmFP16(Float16 imm);
7517 static Instr ImmFP32(float imm);
7518 static Instr ImmFP64(double imm);
7519
7520 // FP register type.
FPType(VRegister fd)7521 static Instr FPType(VRegister fd) {
7522 VIXL_ASSERT(fd.IsScalar());
7523 switch (fd.GetSizeInBits()) {
7524 case 16:
7525 return FP16;
7526 case 32:
7527 return FP32;
7528 case 64:
7529 return FP64;
7530 default:
7531 VIXL_UNREACHABLE();
7532 return 0;
7533 }
7534 }
7535
FPScale(unsigned scale)7536 static Instr FPScale(unsigned scale) {
7537 VIXL_ASSERT(IsUint6(scale));
7538 return scale << FPScale_offset;
7539 }
7540
7541 // Immediate field checking helpers.
7542 static bool IsImmAddSub(int64_t immediate);
7543 static bool IsImmConditionalCompare(int64_t immediate);
7544 static bool IsImmFP16(Float16 imm);
7545
IsImmFP32(float imm)7546 static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); }
7547
7548 static bool IsImmFP32(uint32_t bits);
7549
IsImmFP64(double imm)7550 static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); }
7551
7552 static bool IsImmFP64(uint64_t bits);
7553 static bool IsImmLogical(uint64_t value,
7554 unsigned width,
7555 unsigned* n = NULL,
7556 unsigned* imm_s = NULL,
7557 unsigned* imm_r = NULL);
7558 static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7559 static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7560 static bool IsImmLSUnscaled(int64_t offset);
7561 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7562 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7563
7564 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7565 static Instr VFormat(VRegister vd) {
7566 if (vd.Is64Bits()) {
7567 switch (vd.GetLanes()) {
7568 case 2:
7569 return NEON_2S;
7570 case 4:
7571 return NEON_4H;
7572 case 8:
7573 return NEON_8B;
7574 default:
7575 return 0xffffffff;
7576 }
7577 } else {
7578 VIXL_ASSERT(vd.Is128Bits());
7579 switch (vd.GetLanes()) {
7580 case 2:
7581 return NEON_2D;
7582 case 4:
7583 return NEON_4S;
7584 case 8:
7585 return NEON_8H;
7586 case 16:
7587 return NEON_16B;
7588 default:
7589 return 0xffffffff;
7590 }
7591 }
7592 }
7593
7594 // Instruction bits for vector format in floating point data processing
7595 // operations.
FPFormat(VRegister vd)7596 static Instr FPFormat(VRegister vd) {
7597 switch (vd.GetLanes()) {
7598 case 1:
7599 // Floating point scalar formats.
7600 switch (vd.GetSizeInBits()) {
7601 case 16:
7602 return FP16;
7603 case 32:
7604 return FP32;
7605 case 64:
7606 return FP64;
7607 default:
7608 VIXL_UNREACHABLE();
7609 }
7610 break;
7611 case 2:
7612 // Two lane floating point vector formats.
7613 switch (vd.GetSizeInBits()) {
7614 case 64:
7615 return NEON_FP_2S;
7616 case 128:
7617 return NEON_FP_2D;
7618 default:
7619 VIXL_UNREACHABLE();
7620 }
7621 break;
7622 case 4:
7623 // Four lane floating point vector formats.
7624 switch (vd.GetSizeInBits()) {
7625 case 64:
7626 return NEON_FP_4H;
7627 case 128:
7628 return NEON_FP_4S;
7629 default:
7630 VIXL_UNREACHABLE();
7631 }
7632 break;
7633 case 8:
7634 // Eight lane floating point vector format.
7635 VIXL_ASSERT(vd.Is128Bits());
7636 return NEON_FP_8H;
7637 default:
7638 VIXL_UNREACHABLE();
7639 return 0;
7640 }
7641 VIXL_UNREACHABLE();
7642 return 0;
7643 }
7644
7645 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7646 static Instr LSVFormat(VRegister vd) {
7647 if (vd.Is64Bits()) {
7648 switch (vd.GetLanes()) {
7649 case 1:
7650 return LS_NEON_1D;
7651 case 2:
7652 return LS_NEON_2S;
7653 case 4:
7654 return LS_NEON_4H;
7655 case 8:
7656 return LS_NEON_8B;
7657 default:
7658 return 0xffffffff;
7659 }
7660 } else {
7661 VIXL_ASSERT(vd.Is128Bits());
7662 switch (vd.GetLanes()) {
7663 case 2:
7664 return LS_NEON_2D;
7665 case 4:
7666 return LS_NEON_4S;
7667 case 8:
7668 return LS_NEON_8H;
7669 case 16:
7670 return LS_NEON_16B;
7671 default:
7672 return 0xffffffff;
7673 }
7674 }
7675 }
7676
7677 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7678 static Instr SFormat(VRegister vd) {
7679 VIXL_ASSERT(vd.GetLanes() == 1);
7680 switch (vd.GetSizeInBytes()) {
7681 case 1:
7682 return NEON_B;
7683 case 2:
7684 return NEON_H;
7685 case 4:
7686 return NEON_S;
7687 case 8:
7688 return NEON_D;
7689 default:
7690 return 0xffffffff;
7691 }
7692 }
7693
7694 template <typename T>
SVESize(const T & rd)7695 static Instr SVESize(const T& rd) {
7696 VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7697 VIXL_ASSERT(rd.HasLaneSize());
7698 switch (rd.GetLaneSizeInBytes()) {
7699 case 1:
7700 return SVE_B;
7701 case 2:
7702 return SVE_H;
7703 case 4:
7704 return SVE_S;
7705 case 8:
7706 return SVE_D;
7707 default:
7708 return 0xffffffff;
7709 }
7710 }
7711
ImmSVEPredicateConstraint(int pattern)7712 static Instr ImmSVEPredicateConstraint(int pattern) {
7713 VIXL_ASSERT(IsUint5(pattern));
7714 return (pattern << ImmSVEPredicateConstraint_offset) &
7715 ImmSVEPredicateConstraint_mask;
7716 }
7717
ImmNEONHLM(int index,int num_bits)7718 static Instr ImmNEONHLM(int index, int num_bits) {
7719 int h, l, m;
7720 if (num_bits == 3) {
7721 VIXL_ASSERT(IsUint3(index));
7722 h = (index >> 2) & 1;
7723 l = (index >> 1) & 1;
7724 m = (index >> 0) & 1;
7725 } else if (num_bits == 2) {
7726 VIXL_ASSERT(IsUint2(index));
7727 h = (index >> 1) & 1;
7728 l = (index >> 0) & 1;
7729 m = 0;
7730 } else {
7731 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7732 h = (index >> 0) & 1;
7733 l = 0;
7734 m = 0;
7735 }
7736 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7737 }
7738
ImmRotFcadd(int rot)7739 static Instr ImmRotFcadd(int rot) {
7740 VIXL_ASSERT(rot == 90 || rot == 270);
7741 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7742 }
7743
ImmRotFcmlaSca(int rot)7744 static Instr ImmRotFcmlaSca(int rot) {
7745 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7746 return (rot / 90) << ImmRotFcmlaSca_offset;
7747 }
7748
ImmRotFcmlaVec(int rot)7749 static Instr ImmRotFcmlaVec(int rot) {
7750 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7751 return (rot / 90) << ImmRotFcmlaVec_offset;
7752 }
7753
ImmNEONExt(int imm4)7754 static Instr ImmNEONExt(int imm4) {
7755 VIXL_ASSERT(IsUint4(imm4));
7756 return imm4 << ImmNEONExt_offset;
7757 }
7758
ImmNEON5(Instr format,int index)7759 static Instr ImmNEON5(Instr format, int index) {
7760 VIXL_ASSERT(IsUint4(index));
7761 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7762 int imm5 = (index << (s + 1)) | (1 << s);
7763 return imm5 << ImmNEON5_offset;
7764 }
7765
ImmNEON4(Instr format,int index)7766 static Instr ImmNEON4(Instr format, int index) {
7767 VIXL_ASSERT(IsUint4(index));
7768 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7769 int imm4 = index << s;
7770 return imm4 << ImmNEON4_offset;
7771 }
7772
ImmNEONabcdefgh(int imm8)7773 static Instr ImmNEONabcdefgh(int imm8) {
7774 VIXL_ASSERT(IsUint8(imm8));
7775 Instr instr;
7776 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7777 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7778 return instr;
7779 }
7780
NEONCmode(int cmode)7781 static Instr NEONCmode(int cmode) {
7782 VIXL_ASSERT(IsUint4(cmode));
7783 return cmode << NEONCmode_offset;
7784 }
7785
NEONModImmOp(int op)7786 static Instr NEONModImmOp(int op) {
7787 VIXL_ASSERT(IsUint1(op));
7788 return op << NEONModImmOp_offset;
7789 }
7790
7791 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7792 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7793 VIXL_ASSERT(label->IsBound());
7794 return GetBuffer().GetOffsetFrom(label->GetLocation());
7795 }
7796 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7797 size_t SizeOfCodeGeneratedSince(Label* label) const) {
7798 return GetSizeOfCodeGeneratedSince(label);
7799 }
7800
7801 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7802 size_t GetBufferCapacity() const) {
7803 return GetBuffer().GetCapacity();
7804 }
7805 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7806 return GetBuffer().GetCapacity();
7807 }
7808
7809 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7810 size_t GetRemainingBufferSpace() const) {
7811 return GetBuffer().GetRemainingBytes();
7812 }
7813 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7814 size_t RemainingBufferSpace() const) {
7815 return GetBuffer().GetRemainingBytes();
7816 }
7817
GetPic()7818 PositionIndependentCodeOption GetPic() const { return pic_; }
7819 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7820 return GetPic();
7821 }
7822
GetCPUFeatures()7823 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7824
SetCPUFeatures(const CPUFeatures & cpu_features)7825 void SetCPUFeatures(const CPUFeatures& cpu_features) {
7826 cpu_features_ = cpu_features;
7827 }
7828
AllowPageOffsetDependentCode()7829 bool AllowPageOffsetDependentCode() const {
7830 return (GetPic() == PageOffsetDependentCode) ||
7831 (GetPic() == PositionDependentCode);
7832 }
7833
AppropriateZeroRegFor(const CPURegister & reg)7834 static Register AppropriateZeroRegFor(const CPURegister& reg) {
7835 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7836 }
7837
7838 protected:
7839 void LoadStore(const CPURegister& rt,
7840 const MemOperand& addr,
7841 LoadStoreOp op,
7842 LoadStoreScalingOption option = PreferScaledOffset);
7843
7844 void LoadStorePAC(const Register& xt,
7845 const MemOperand& addr,
7846 LoadStorePACOp op);
7847
7848 void LoadStorePair(const CPURegister& rt,
7849 const CPURegister& rt2,
7850 const MemOperand& addr,
7851 LoadStorePairOp op);
7852 void LoadStoreStruct(const VRegister& vt,
7853 const MemOperand& addr,
7854 NEONLoadStoreMultiStructOp op);
7855 void LoadStoreStruct1(const VRegister& vt,
7856 int reg_count,
7857 const MemOperand& addr);
7858 void LoadStoreStructSingle(const VRegister& vt,
7859 uint32_t lane,
7860 const MemOperand& addr,
7861 NEONLoadStoreSingleStructOp op);
7862 void LoadStoreStructSingleAllLanes(const VRegister& vt,
7863 const MemOperand& addr,
7864 NEONLoadStoreSingleStructOp op);
7865 void LoadStoreStructVerify(const VRegister& vt,
7866 const MemOperand& addr,
7867 Instr op);
7868
7869 // Set `is_load` to false in default as it's only used in the
7870 // scalar-plus-vector form.
7871 Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7872 int num_regs,
7873 const SVEMemOperand& addr,
7874 bool is_load = false);
7875
7876 // E.g. st1b, st1h, ...
7877 // This supports both contiguous and scatter stores.
7878 void SVESt1Helper(unsigned msize_in_bytes_log2,
7879 const ZRegister& zt,
7880 const PRegister& pg,
7881 const SVEMemOperand& addr);
7882
7883 // E.g. ld1b, ld1h, ...
7884 // This supports both contiguous and gather loads.
7885 void SVELd1Helper(unsigned msize_in_bytes_log2,
7886 const ZRegister& zt,
7887 const PRegisterZ& pg,
7888 const SVEMemOperand& addr,
7889 bool is_signed);
7890
7891 // E.g. ld1rb, ld1rh, ...
7892 void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7893 const ZRegister& zt,
7894 const PRegisterZ& pg,
7895 const SVEMemOperand& addr,
7896 bool is_signed);
7897
7898 // E.g. ldff1b, ldff1h, ...
7899 // This supports both contiguous and gather loads.
7900 void SVELdff1Helper(unsigned msize_in_bytes_log2,
7901 const ZRegister& zt,
7902 const PRegisterZ& pg,
7903 const SVEMemOperand& addr,
7904 bool is_signed);
7905
7906 // Common code for the helpers above.
7907 void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7908 const ZRegister& zt,
7909 const PRegister& pg,
7910 const SVEMemOperand& addr,
7911 bool is_signed,
7912 Instr op);
7913
7914 // Common code for the helpers above.
7915 void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7916 const ZRegister& zt,
7917 const PRegister& pg,
7918 const SVEMemOperand& addr,
7919 bool is_load,
7920 bool is_signed,
7921 bool is_first_fault);
7922
7923 // E.g. st2b, st3h, ...
7924 void SVESt234Helper(int num_regs,
7925 const ZRegister& zt1,
7926 const PRegister& pg,
7927 const SVEMemOperand& addr);
7928
7929 // E.g. ld2b, ld3h, ...
7930 void SVELd234Helper(int num_regs,
7931 const ZRegister& zt1,
7932 const PRegisterZ& pg,
7933 const SVEMemOperand& addr);
7934
7935 // Common code for the helpers above.
7936 void SVELdSt234Helper(int num_regs,
7937 const ZRegister& zt1,
7938 const PRegister& pg,
7939 const SVEMemOperand& addr,
7940 Instr op);
7941
7942 // E.g. ld1qb, ld1qh, ldnt1b, ...
7943 void SVELd1St1ScaImmHelper(const ZRegister& zt,
7944 const PRegister& pg,
7945 const SVEMemOperand& addr,
7946 Instr regoffset_op,
7947 Instr immoffset_op,
7948 int imm_divisor = 1);
7949
7950 void SVELd1VecScaHelper(const ZRegister& zt,
7951 const PRegister& pg,
7952 const SVEMemOperand& addr,
7953 uint32_t msize,
7954 bool is_signed);
7955 void SVESt1VecScaHelper(const ZRegister& zt,
7956 const PRegister& pg,
7957 const SVEMemOperand& addr,
7958 uint32_t msize);
7959
7960 void Prefetch(PrefetchOperation op,
7961 const MemOperand& addr,
7962 LoadStoreScalingOption option = PreferScaledOffset);
7963 void Prefetch(int op,
7964 const MemOperand& addr,
7965 LoadStoreScalingOption option = PreferScaledOffset);
7966
7967 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7968 // reports a bogus uninitialised warning then.
7969 void Logical(const Register& rd,
7970 const Register& rn,
7971 const Operand operand,
7972 LogicalOp op);
7973
7974 void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7975
7976 void LogicalImmediate(const Register& rd,
7977 const Register& rn,
7978 unsigned n,
7979 unsigned imm_s,
7980 unsigned imm_r,
7981 LogicalOp op);
7982
7983 void ConditionalCompare(const Register& rn,
7984 const Operand& operand,
7985 StatusFlags nzcv,
7986 Condition cond,
7987 ConditionalCompareOp op);
7988
7989 void AddSubWithCarry(const Register& rd,
7990 const Register& rn,
7991 const Operand& operand,
7992 FlagsUpdate S,
7993 AddSubWithCarryOp op);
7994
7995 void CompareVectors(const PRegisterWithLaneSize& pd,
7996 const PRegisterZ& pg,
7997 const ZRegister& zn,
7998 const ZRegister& zm,
7999 SVEIntCompareVectorsOp op);
8000
8001 void CompareVectors(const PRegisterWithLaneSize& pd,
8002 const PRegisterZ& pg,
8003 const ZRegister& zn,
8004 int imm,
8005 SVEIntCompareSignedImmOp op);
8006
8007 void CompareVectors(const PRegisterWithLaneSize& pd,
8008 const PRegisterZ& pg,
8009 const ZRegister& zn,
8010 unsigned imm,
8011 SVEIntCompareUnsignedImmOp op);
8012
8013 void SVEIntAddSubtractImmUnpredicatedHelper(
8014 SVEIntAddSubtractImm_UnpredicatedOp op,
8015 const ZRegister& zd,
8016 int imm8,
8017 int shift);
8018
8019 void SVEElementCountToRegisterHelper(Instr op,
8020 const Register& rd,
8021 int pattern,
8022 int multiplier);
8023
8024 Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
8025
8026 Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
8027
8028 void SVEBitwiseShiftImmediate(const ZRegister& zd,
8029 const ZRegister& zn,
8030 Instr encoded_imm,
8031 Instr op);
8032
8033 void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
8034 const PRegisterM& pg,
8035 Instr encoded_imm,
8036 Instr op);
8037
8038 Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
8039 const ZRegister& zm,
8040 int index,
8041 Instr op_h,
8042 Instr op_s,
8043 Instr op_d);
8044
8045 Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
8046
8047 Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
8048
8049 void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
8050 const PRegister& pg,
8051 const SVEMemOperand& addr,
8052 int prefetch_size);
8053
8054 void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
8055 const PRegister& pg,
8056 const SVEMemOperand& addr,
8057 int prefetch_size);
8058
8059 void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
8060 const PRegister& pg,
8061 const SVEMemOperand& addr,
8062 int prefetch_size);
8063
8064 void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
8065 const PRegister& pg,
8066 const SVEMemOperand& addr,
8067 int prefetch_size);
8068
8069 void SVEPrefetchHelper(PrefetchOperation prfop,
8070 const PRegister& pg,
8071 const SVEMemOperand& addr,
8072 int prefetch_size);
8073
SVEImmPrefetchOperation(PrefetchOperation prfop)8074 static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
8075 // SVE only supports PLD and PST, not PLI.
8076 VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
8077 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
8078 // Check that we can simply map bits.
8079 VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
8080 VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
8081 // Remaining operations map directly.
8082 return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
8083 }
8084
8085 // Functions for emulating operands not directly supported by the instruction
8086 // set.
8087 void EmitShift(const Register& rd,
8088 const Register& rn,
8089 Shift shift,
8090 unsigned amount);
8091 void EmitExtendShift(const Register& rd,
8092 const Register& rn,
8093 Extend extend,
8094 unsigned left_shift);
8095
8096 void AddSub(const Register& rd,
8097 const Register& rn,
8098 const Operand& operand,
8099 FlagsUpdate S,
8100 AddSubOp op);
8101
8102 void NEONTable(const VRegister& vd,
8103 const VRegister& vn,
8104 const VRegister& vm,
8105 NEONTableOp op);
8106
8107 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
8108 // registers. Only simple loads are supported; sign- and zero-extension (such
8109 // as in LDPSW_x or LDRB_w) are not supported.
8110 static LoadStoreOp LoadOpFor(const CPURegister& rt);
8111 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
8112 const CPURegister& rt2);
8113 static LoadStoreOp StoreOpFor(const CPURegister& rt);
8114 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
8115 const CPURegister& rt2);
8116 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
8117 const CPURegister& rt, const CPURegister& rt2);
8118 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
8119 const CPURegister& rt, const CPURegister& rt2);
8120 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
8121
8122 // Convenience pass-through for CPU feature checks.
8123 bool CPUHas(CPUFeatures::Feature feature0,
8124 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
8125 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
8126 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
8127 return cpu_features_.Has(feature0, feature1, feature2, feature3);
8128 }
8129
8130 // Determine whether the target CPU has the specified registers, based on the
8131 // currently-enabled CPU features. Presence of a register does not imply
8132 // support for arbitrary operations on it. For example, CPUs with FP have H
8133 // registers, but most half-precision operations require the FPHalf feature.
8134 //
8135 // These are used to check CPU features in loads and stores that have the same
8136 // entry point for both integer and FP registers.
8137 bool CPUHas(const CPURegister& rt) const;
8138 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
8139
8140 bool CPUHas(SystemRegister sysreg) const;
8141
8142 private:
8143 static uint32_t FP16ToImm8(Float16 imm);
8144 static uint32_t FP32ToImm8(float imm);
8145 static uint32_t FP64ToImm8(double imm);
8146
8147 // Instruction helpers.
8148 void MoveWide(const Register& rd,
8149 uint64_t imm,
8150 int shift,
8151 MoveWideImmediateOp mov_op);
8152 void DataProcShiftedRegister(const Register& rd,
8153 const Register& rn,
8154 const Operand& operand,
8155 FlagsUpdate S,
8156 Instr op);
8157 void DataProcExtendedRegister(const Register& rd,
8158 const Register& rn,
8159 const Operand& operand,
8160 FlagsUpdate S,
8161 Instr op);
8162 void LoadStorePairNonTemporal(const CPURegister& rt,
8163 const CPURegister& rt2,
8164 const MemOperand& addr,
8165 LoadStorePairNonTemporalOp op);
8166 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
8167 void ConditionalSelect(const Register& rd,
8168 const Register& rn,
8169 const Register& rm,
8170 Condition cond,
8171 ConditionalSelectOp op);
8172 void DataProcessing1Source(const Register& rd,
8173 const Register& rn,
8174 DataProcessing1SourceOp op);
8175 void DataProcessing3Source(const Register& rd,
8176 const Register& rn,
8177 const Register& rm,
8178 const Register& ra,
8179 DataProcessing3SourceOp op);
8180 void FPDataProcessing1Source(const VRegister& fd,
8181 const VRegister& fn,
8182 FPDataProcessing1SourceOp op);
8183 void FPDataProcessing3Source(const VRegister& fd,
8184 const VRegister& fn,
8185 const VRegister& fm,
8186 const VRegister& fa,
8187 FPDataProcessing3SourceOp op);
8188 void NEONAcrossLanesL(const VRegister& vd,
8189 const VRegister& vn,
8190 NEONAcrossLanesOp op);
8191 void NEONAcrossLanes(const VRegister& vd,
8192 const VRegister& vn,
8193 NEONAcrossLanesOp op,
8194 Instr op_half);
8195 void NEONModifiedImmShiftLsl(const VRegister& vd,
8196 const int imm8,
8197 const int left_shift,
8198 NEONModifiedImmediateOp op);
8199 void NEONModifiedImmShiftMsl(const VRegister& vd,
8200 const int imm8,
8201 const int shift_amount,
8202 NEONModifiedImmediateOp op);
8203 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8204 void NEON3Same(const VRegister& vd,
8205 const VRegister& vn,
8206 const VRegister& vm,
8207 NEON3SameOp vop);
8208 void NEON3SameFP16(const VRegister& vd,
8209 const VRegister& vn,
8210 const VRegister& vm,
8211 Instr op);
8212 void NEONFP3Same(const VRegister& vd,
8213 const VRegister& vn,
8214 const VRegister& vm,
8215 Instr op);
8216 void NEON3DifferentL(const VRegister& vd,
8217 const VRegister& vn,
8218 const VRegister& vm,
8219 NEON3DifferentOp vop);
8220 void NEON3DifferentW(const VRegister& vd,
8221 const VRegister& vn,
8222 const VRegister& vm,
8223 NEON3DifferentOp vop);
8224 void NEON3DifferentHN(const VRegister& vd,
8225 const VRegister& vn,
8226 const VRegister& vm,
8227 NEON3DifferentOp vop);
8228 void NEONFP2RegMisc(const VRegister& vd,
8229 const VRegister& vn,
8230 NEON2RegMiscOp vop,
8231 double value = 0.0);
8232 void NEONFP2RegMiscFP16(const VRegister& vd,
8233 const VRegister& vn,
8234 NEON2RegMiscFP16Op vop,
8235 double value = 0.0);
8236 void NEON2RegMisc(const VRegister& vd,
8237 const VRegister& vn,
8238 NEON2RegMiscOp vop,
8239 int value = 0);
8240 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8241 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8242 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8243 void NEONPerm(const VRegister& vd,
8244 const VRegister& vn,
8245 const VRegister& vm,
8246 NEONPermOp op);
8247 void NEONFPByElement(const VRegister& vd,
8248 const VRegister& vn,
8249 const VRegister& vm,
8250 int vm_index,
8251 NEONByIndexedElementOp op,
8252 NEONByIndexedElementOp op_half);
8253 void NEONByElement(const VRegister& vd,
8254 const VRegister& vn,
8255 const VRegister& vm,
8256 int vm_index,
8257 NEONByIndexedElementOp op);
8258 void NEONByElementL(const VRegister& vd,
8259 const VRegister& vn,
8260 const VRegister& vm,
8261 int vm_index,
8262 NEONByIndexedElementOp op);
8263 void NEONShiftImmediate(const VRegister& vd,
8264 const VRegister& vn,
8265 NEONShiftImmediateOp op,
8266 int immh_immb);
8267 void NEONShiftLeftImmediate(const VRegister& vd,
8268 const VRegister& vn,
8269 int shift,
8270 NEONShiftImmediateOp op);
8271 void NEONShiftRightImmediate(const VRegister& vd,
8272 const VRegister& vn,
8273 int shift,
8274 NEONShiftImmediateOp op);
8275 void NEONShiftImmediateL(const VRegister& vd,
8276 const VRegister& vn,
8277 int shift,
8278 NEONShiftImmediateOp op);
8279 void NEONShiftImmediateN(const VRegister& vd,
8280 const VRegister& vn,
8281 int shift,
8282 NEONShiftImmediateOp op);
8283 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8284
8285 // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8286 // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8287 void ResolveSVEImm8Shift(int* imm8, int* shift);
8288
8289 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8290
8291 // Encode the specified MemOperand for the specified access size and scaling
8292 // preference.
8293 Instr LoadStoreMemOperand(const MemOperand& addr,
8294 unsigned access_size_in_bytes_log2,
8295 LoadStoreScalingOption option);
8296
8297 // Link the current (not-yet-emitted) instruction to the specified label, then
8298 // return an offset to be encoded in the instruction. If the label is not yet
8299 // bound, an offset of 0 is returned.
8300 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8301 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8302 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8303
8304 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8305 template <int element_shift>
8306 ptrdiff_t LinkAndGetOffsetTo(Label* label);
8307
8308 // Literal load offset are in words (32-bit).
8309 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8310
8311 // Emit the instruction in buffer_.
Emit(Instr instruction)8312 void Emit(Instr instruction) {
8313 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8314 VIXL_ASSERT(AllowAssembler());
8315 GetBuffer()->Emit32(instruction);
8316 }
8317
8318 PositionIndependentCodeOption pic_;
8319
8320 CPUFeatures cpu_features_;
8321 };
8322
8323
8324 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8325 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8326 return UpdateValue(new_value,
8327 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8328 }
8329
8330
8331 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8332 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8333 return UpdateValue(high64,
8334 low64,
8335 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8336 }
8337
8338
8339 } // namespace aarch64
8340
8341 // Required InvalSet template specialisations.
8342 // TODO: These template specialisations should not live in this file. Move
8343 // Label out of the aarch64 namespace in order to share its implementation
8344 // later.
8345 #define INVAL_SET_TEMPLATE_PARAMETERS \
8346 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
8347 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8348 aarch64::Label::kReclaimFactor
8349 template <>
GetKey(const ptrdiff_t & element)8350 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8351 const ptrdiff_t& element) {
8352 return element;
8353 }
8354 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8355 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8356 ptrdiff_t key) {
8357 *element = key;
8358 }
8359 #undef INVAL_SET_TEMPLATE_PARAMETERS
8360
8361 } // namespace vixl
8362
8363 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8364