1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36
37 #include "operands-aarch64.h"
38
39 namespace vixl {
40 namespace aarch64 {
41
42 class LabelTestHelper; // Forward declaration.
43
44
45 class Label {
46 public:
Label()47 Label() : location_(kLocationUnbound) {}
~Label()48 ~Label() {
49 // All links to a label must have been resolved before it is destructed.
50 VIXL_ASSERT(!IsLinked());
51 }
52
IsBound()53 bool IsBound() const { return location_ >= 0; }
IsLinked()54 bool IsLinked() const { return !links_.empty(); }
55
GetLocation()56 ptrdiff_t GetLocation() const { return location_; }
57 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
58 return GetLocation();
59 }
60
61 static const int kNPreallocatedLinks = 4;
62 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
63 static const size_t kReclaimFrom = 512;
64 static const size_t kReclaimFactor = 2;
65
66 typedef InvalSet<ptrdiff_t,
67 kNPreallocatedLinks,
68 ptrdiff_t,
69 kInvalidLinkKey,
70 kReclaimFrom,
71 kReclaimFactor>
72 LinksSetBase;
73 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
74
75 private:
76 class LinksSet : public LinksSetBase {
77 public:
LinksSet()78 LinksSet() : LinksSetBase() {}
79 };
80
81 // Allows iterating over the links of a label. The behaviour is undefined if
82 // the list of links is modified in any way while iterating.
83 class LabelLinksIterator : public LabelLinksIteratorBase {
84 public:
LabelLinksIterator(Label * label)85 explicit LabelLinksIterator(Label* label)
86 : LabelLinksIteratorBase(&label->links_) {}
87
88 // TODO: Remove these and use the STL-like interface instead.
89 using LabelLinksIteratorBase::Advance;
90 using LabelLinksIteratorBase::Current;
91 };
92
Bind(ptrdiff_t location)93 void Bind(ptrdiff_t location) {
94 // Labels can only be bound once.
95 VIXL_ASSERT(!IsBound());
96 location_ = location;
97 }
98
AddLink(ptrdiff_t instruction)99 void AddLink(ptrdiff_t instruction) {
100 // If a label is bound, the assembler already has the information it needs
101 // to write the instruction, so there is no need to add it to links_.
102 VIXL_ASSERT(!IsBound());
103 links_.insert(instruction);
104 }
105
DeleteLink(ptrdiff_t instruction)106 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
107
ClearAllLinks()108 void ClearAllLinks() { links_.clear(); }
109
110 // TODO: The comment below considers average case complexity for our
111 // usual use-cases. The elements of interest are:
112 // - Branches to a label are emitted in order: branch instructions to a label
113 // are generated at an offset in the code generation buffer greater than any
114 // other branch to that same label already generated. As an example, this can
115 // be broken when an instruction is patched to become a branch. Note that the
116 // code will still work, but the complexity considerations below may locally
117 // not apply any more.
118 // - Veneers are generated in order: for multiple branches of the same type
119 // branching to the same unbound label going out of range, veneers are
120 // generated in growing order of the branch instruction offset from the start
121 // of the buffer.
122 //
123 // When creating a veneer for a branch going out of range, the link for this
124 // branch needs to be removed from this `links_`. Since all branches are
125 // tracked in one underlying InvalSet, the complexity for this deletion is the
126 // same as for finding the element, ie. O(n), where n is the number of links
127 // in the set.
128 // This could be reduced to O(1) by using the same trick as used when tracking
129 // branch information for veneers: split the container to use one set per type
130 // of branch. With that setup, when a veneer is created and the link needs to
131 // be deleted, if the two points above hold, it must be the minimum element of
132 // the set for its type of branch, and that minimum element will be accessible
133 // in O(1).
134
135 // The offsets of the instructions that have linked to this label.
136 LinksSet links_;
137 // The label location.
138 ptrdiff_t location_;
139
140 static const ptrdiff_t kLocationUnbound = -1;
141
142 // It is not safe to copy labels, so disable the copy constructor and operator
143 // by declaring them private (without an implementation).
144 #if __cplusplus >= 201103L
145 Label(const Label&) = delete;
146 void operator=(const Label&) = delete;
147 #else
148 Label(const Label&);
149 void operator=(const Label&);
150 #endif
151
152 // The Assembler class is responsible for binding and linking labels, since
153 // the stored offsets need to be consistent with the Assembler's buffer.
154 friend class Assembler;
155 // The MacroAssembler and VeneerPool handle resolution of branches to distant
156 // targets.
157 friend class MacroAssembler;
158 friend class VeneerPool;
159 };
160
161
162 class Assembler;
163 class LiteralPool;
164
165 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
166 // stream and loaded through a pc relative load. The same literal can be
167 // referred to by multiple instructions but a literal can only reside at one
168 // place in memory. A literal can be used by a load before or after being
169 // placed in memory.
170 //
171 // Internally an offset of 0 is associated with a literal which has been
172 // neither used nor placed. Then two possibilities arise:
173 // 1) the label is placed, the offset (stored as offset + 1) is used to
174 // resolve any subsequent load using the label.
175 // 2) the label is not placed and offset is the offset of the last load using
176 // the literal (stored as -offset -1). If multiple loads refer to this
177 // literal then the last load holds the offset of the preceding load and
178 // all loads form a chain. Once the offset is placed all the loads in the
179 // chain are resolved and future loads fall back to possibility 1.
180 class RawLiteral {
181 public:
182 enum DeletionPolicy {
183 kDeletedOnPlacementByPool,
184 kDeletedOnPoolDestruction,
185 kManuallyDeleted
186 };
187
188 RawLiteral(size_t size,
189 LiteralPool* literal_pool,
190 DeletionPolicy deletion_policy = kManuallyDeleted);
191
192 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
193 // actually pointing to `Literal<T>` objects.
~RawLiteral()194 virtual ~RawLiteral() {}
195
GetSize()196 size_t GetSize() const {
197 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
198 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
199 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
200 (size_ == kQRegSizeInBytes));
201 return size_;
202 }
203 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
204
GetRawValue128Low64()205 uint64_t GetRawValue128Low64() const {
206 VIXL_ASSERT(size_ == kQRegSizeInBytes);
207 return low64_;
208 }
209 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
210 return GetRawValue128Low64();
211 }
212
GetRawValue128High64()213 uint64_t GetRawValue128High64() const {
214 VIXL_ASSERT(size_ == kQRegSizeInBytes);
215 return high64_;
216 }
217 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
218 return GetRawValue128High64();
219 }
220
GetRawValue64()221 uint64_t GetRawValue64() const {
222 VIXL_ASSERT(size_ == kXRegSizeInBytes);
223 VIXL_ASSERT(high64_ == 0);
224 return low64_;
225 }
226 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
227 return GetRawValue64();
228 }
229
GetRawValue32()230 uint32_t GetRawValue32() const {
231 VIXL_ASSERT(size_ == kWRegSizeInBytes);
232 VIXL_ASSERT(high64_ == 0);
233 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
234 return static_cast<uint32_t>(low64_);
235 }
236 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
237 return GetRawValue32();
238 }
239
IsUsed()240 bool IsUsed() const { return offset_ < 0; }
IsPlaced()241 bool IsPlaced() const { return offset_ > 0; }
242
GetLiteralPool()243 LiteralPool* GetLiteralPool() const { return literal_pool_; }
244
GetOffset()245 ptrdiff_t GetOffset() const {
246 VIXL_ASSERT(IsPlaced());
247 return offset_ - 1;
248 }
249 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
250
251 protected:
SetOffset(ptrdiff_t offset)252 void SetOffset(ptrdiff_t offset) {
253 VIXL_ASSERT(offset >= 0);
254 VIXL_ASSERT(IsWordAligned(offset));
255 VIXL_ASSERT(!IsPlaced());
256 offset_ = offset + 1;
257 }
set_offset(ptrdiff_t offset)258 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
259 SetOffset(offset);
260 }
261
GetLastUse()262 ptrdiff_t GetLastUse() const {
263 VIXL_ASSERT(IsUsed());
264 return -offset_ - 1;
265 }
266 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
267
SetLastUse(ptrdiff_t offset)268 void SetLastUse(ptrdiff_t offset) {
269 VIXL_ASSERT(offset >= 0);
270 VIXL_ASSERT(IsWordAligned(offset));
271 VIXL_ASSERT(!IsPlaced());
272 offset_ = -offset - 1;
273 }
set_last_use(ptrdiff_t offset)274 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
275 SetLastUse(offset);
276 }
277
278 size_t size_;
279 ptrdiff_t offset_;
280 uint64_t low64_;
281 uint64_t high64_;
282
283 private:
284 LiteralPool* literal_pool_;
285 DeletionPolicy deletion_policy_;
286
287 friend class Assembler;
288 friend class LiteralPool;
289 };
290
291
292 template <typename T>
293 class Literal : public RawLiteral {
294 public:
295 explicit Literal(T value,
296 LiteralPool* literal_pool = NULL,
297 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)298 : RawLiteral(sizeof(value), literal_pool, ownership) {
299 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
300 UpdateValue(value);
301 }
302
303 Literal(T high64,
304 T low64,
305 LiteralPool* literal_pool = NULL,
306 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)307 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
308 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
309 UpdateValue(high64, low64);
310 }
311
~Literal()312 virtual ~Literal() {}
313
314 // Update the value of this literal, if necessary by rewriting the value in
315 // the pool.
316 // If the literal has already been placed in a literal pool, the address of
317 // the start of the code buffer must be provided, as the literal only knows it
318 // offset from there. This also allows patching the value after the code has
319 // been moved in memory.
320 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
321 VIXL_ASSERT(sizeof(new_value) == size_);
322 memcpy(&low64_, &new_value, sizeof(new_value));
323 if (IsPlaced()) {
324 VIXL_ASSERT(code_buffer != NULL);
325 RewriteValueInCode(code_buffer);
326 }
327 }
328
329 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
330 VIXL_ASSERT(sizeof(low64) == size_ / 2);
331 memcpy(&low64_, &low64, sizeof(low64));
332 memcpy(&high64_, &high64, sizeof(high64));
333 if (IsPlaced()) {
334 VIXL_ASSERT(code_buffer != NULL);
335 RewriteValueInCode(code_buffer);
336 }
337 }
338
339 void UpdateValue(T new_value, const Assembler* assembler);
340 void UpdateValue(T high64, T low64, const Assembler* assembler);
341
342 private:
RewriteValueInCode(uint8_t * code_buffer)343 void RewriteValueInCode(uint8_t* code_buffer) {
344 VIXL_ASSERT(IsPlaced());
345 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
346 switch (GetSize()) {
347 case kSRegSizeInBytes:
348 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
349 GetRawValue32();
350 break;
351 case kDRegSizeInBytes:
352 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
353 GetRawValue64();
354 break;
355 default:
356 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
357 uint64_t* base_address =
358 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
359 *base_address = GetRawValue128Low64();
360 *(base_address + 1) = GetRawValue128High64();
361 }
362 }
363 };
364
365
366 // Control whether or not position-independent code should be emitted.
367 enum PositionIndependentCodeOption {
368 // All code generated will be position-independent; all branches and
369 // references to labels generated with the Label class will use PC-relative
370 // addressing.
371 PositionIndependentCode,
372
373 // Allow VIXL to generate code that refers to absolute addresses. With this
374 // option, it will not be possible to copy the code buffer and run it from a
375 // different address; code must be generated in its final location.
376 PositionDependentCode,
377
378 // Allow VIXL to assume that the bottom 12 bits of the address will be
379 // constant, but that the top 48 bits may change. This allows `adrp` to
380 // function in systems which copy code between pages, but otherwise maintain
381 // 4KB page alignment.
382 PageOffsetDependentCode
383 };
384
385
386 // Control how scaled- and unscaled-offset loads and stores are generated.
387 enum LoadStoreScalingOption {
388 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
389 // register-offset, pre-index or post-index instructions if necessary.
390 PreferScaledOffset,
391
392 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
393 // register-offset, pre-index or post-index instructions if necessary.
394 PreferUnscaledOffset,
395
396 // Require scaled-immediate-offset instructions.
397 RequireScaledOffset,
398
399 // Require unscaled-immediate-offset instructions.
400 RequireUnscaledOffset
401 };
402
403
404 // Assembler.
405 class Assembler : public vixl::internal::AssemblerBase {
406 public:
407 explicit Assembler(
408 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)409 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
410 explicit Assembler(
411 size_t capacity,
412 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)413 : AssemblerBase(capacity),
414 pic_(pic),
415 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
416 Assembler(byte* buffer,
417 size_t capacity,
418 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)419 : AssemblerBase(buffer, capacity),
420 pic_(pic),
421 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
422
423 // Upon destruction, the code will assert that one of the following is true:
424 // * The Assembler object has not been used.
425 // * Nothing has been emitted since the last Reset() call.
426 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()427 ~Assembler() {}
428
429 // System functions.
430
431 // Start generating code from the beginning of the buffer, discarding any code
432 // and data that has already been emitted into the buffer.
433 void Reset();
434
435 // Bind a label to the current PC.
436 void bind(Label* label);
437
438 // Bind a label to a specified offset from the start of the buffer.
439 void BindToOffset(Label* label, ptrdiff_t offset);
440
441 // Place a literal at the current PC.
442 void place(RawLiteral* literal);
443
444 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
445 return GetCursorOffset();
446 }
447
448 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
449 ptrdiff_t GetBufferEndOffset() const) {
450 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
451 }
452 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
453 ptrdiff_t BufferEndOffset() const) {
454 return GetBuffer().GetCapacity();
455 }
456
457 // Return the address of a bound label.
458 template <typename T>
GetLabelAddress(const Label * label)459 T GetLabelAddress(const Label* label) const {
460 VIXL_ASSERT(label->IsBound());
461 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
462 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
463 }
464
GetInstructionAt(ptrdiff_t instruction_offset)465 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
466 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
467 }
468 VIXL_DEPRECATED("GetInstructionAt",
469 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
470 return GetInstructionAt(instruction_offset);
471 }
472
GetInstructionOffset(Instruction * instruction)473 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
474 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
475 ptrdiff_t offset =
476 instruction - GetBuffer()->GetStartAddress<Instruction*>();
477 VIXL_ASSERT((0 <= offset) &&
478 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
479 return offset;
480 }
481 VIXL_DEPRECATED("GetInstructionOffset",
482 ptrdiff_t InstructionOffset(Instruction* instruction)) {
483 return GetInstructionOffset(instruction);
484 }
485
486 // Instruction set functions.
487
488 // Branch / Jump instructions.
489
490 // Branch to register.
491 void br(const Register& xn);
492
493 // Branch with link to register.
494 void blr(const Register& xn);
495
496 // Branch to register with return hint.
497 void ret(const Register& xn = lr);
498
499 // Branch to register, with pointer authentication. Using key A and a modifier
500 // of zero [Armv8.3].
501 void braaz(const Register& xn);
502
503 // Branch to register, with pointer authentication. Using key B and a modifier
504 // of zero [Armv8.3].
505 void brabz(const Register& xn);
506
507 // Branch with link to register, with pointer authentication. Using key A and
508 // a modifier of zero [Armv8.3].
509 void blraaz(const Register& xn);
510
511 // Branch with link to register, with pointer authentication. Using key B and
512 // a modifier of zero [Armv8.3].
513 void blrabz(const Register& xn);
514
515 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
516 void retaa();
517
518 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
519 void retab();
520
521 // Branch to register, with pointer authentication. Using key A [Armv8.3].
522 void braa(const Register& xn, const Register& xm);
523
524 // Branch to register, with pointer authentication. Using key B [Armv8.3].
525 void brab(const Register& xn, const Register& xm);
526
527 // Branch with link to register, with pointer authentication. Using key A
528 // [Armv8.3].
529 void blraa(const Register& xn, const Register& xm);
530
531 // Branch with link to register, with pointer authentication. Using key B
532 // [Armv8.3].
533 void blrab(const Register& xn, const Register& xm);
534
535 // Unconditional branch to label.
536 void b(Label* label);
537
538 // Conditional branch to label.
539 void b(Label* label, Condition cond);
540
541 // Unconditional branch to PC offset.
542 void b(int64_t imm26);
543
544 // Conditional branch to PC offset.
545 void b(int64_t imm19, Condition cond);
546
547 // Branch with link to label.
548 void bl(Label* label);
549
550 // Branch with link to PC offset.
551 void bl(int64_t imm26);
552
553 // Compare and branch to label if zero.
554 void cbz(const Register& rt, Label* label);
555
556 // Compare and branch to PC offset if zero.
557 void cbz(const Register& rt, int64_t imm19);
558
559 // Compare and branch to label if not zero.
560 void cbnz(const Register& rt, Label* label);
561
562 // Compare and branch to PC offset if not zero.
563 void cbnz(const Register& rt, int64_t imm19);
564
565 // Table lookup from one register.
566 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
567
568 // Table lookup from two registers.
569 void tbl(const VRegister& vd,
570 const VRegister& vn,
571 const VRegister& vn2,
572 const VRegister& vm);
573
574 // Table lookup from three registers.
575 void tbl(const VRegister& vd,
576 const VRegister& vn,
577 const VRegister& vn2,
578 const VRegister& vn3,
579 const VRegister& vm);
580
581 // Table lookup from four registers.
582 void tbl(const VRegister& vd,
583 const VRegister& vn,
584 const VRegister& vn2,
585 const VRegister& vn3,
586 const VRegister& vn4,
587 const VRegister& vm);
588
589 // Table lookup extension from one register.
590 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
591
592 // Table lookup extension from two registers.
593 void tbx(const VRegister& vd,
594 const VRegister& vn,
595 const VRegister& vn2,
596 const VRegister& vm);
597
598 // Table lookup extension from three registers.
599 void tbx(const VRegister& vd,
600 const VRegister& vn,
601 const VRegister& vn2,
602 const VRegister& vn3,
603 const VRegister& vm);
604
605 // Table lookup extension from four registers.
606 void tbx(const VRegister& vd,
607 const VRegister& vn,
608 const VRegister& vn2,
609 const VRegister& vn3,
610 const VRegister& vn4,
611 const VRegister& vm);
612
613 // Test bit and branch to label if zero.
614 void tbz(const Register& rt, unsigned bit_pos, Label* label);
615
616 // Test bit and branch to PC offset if zero.
617 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
618
619 // Test bit and branch to label if not zero.
620 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
621
622 // Test bit and branch to PC offset if not zero.
623 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
624
625 // Address calculation instructions.
626 // Calculate a PC-relative address. Unlike for branches the offset in adr is
627 // unscaled (i.e. the result can be unaligned).
628
629 // Calculate the address of a label.
630 void adr(const Register& xd, Label* label);
631
632 // Calculate the address of a PC offset.
633 void adr(const Register& xd, int64_t imm21);
634
635 // Calculate the page address of a label.
636 void adrp(const Register& xd, Label* label);
637
638 // Calculate the page address of a PC offset.
639 void adrp(const Register& xd, int64_t imm21);
640
641 // Data Processing instructions.
642
643 // Add.
644 void add(const Register& rd, const Register& rn, const Operand& operand);
645
646 // Add and update status flags.
647 void adds(const Register& rd, const Register& rn, const Operand& operand);
648
649 // Compare negative.
650 void cmn(const Register& rn, const Operand& operand);
651
652 // Subtract.
653 void sub(const Register& rd, const Register& rn, const Operand& operand);
654
655 // Subtract and update status flags.
656 void subs(const Register& rd, const Register& rn, const Operand& operand);
657
658 // Compare.
659 void cmp(const Register& rn, const Operand& operand);
660
661 // Negate.
662 void neg(const Register& rd, const Operand& operand);
663
664 // Negate and update status flags.
665 void negs(const Register& rd, const Operand& operand);
666
667 // Add with carry bit.
668 void adc(const Register& rd, const Register& rn, const Operand& operand);
669
670 // Add with carry bit and update status flags.
671 void adcs(const Register& rd, const Register& rn, const Operand& operand);
672
673 // Subtract with carry bit.
674 void sbc(const Register& rd, const Register& rn, const Operand& operand);
675
676 // Subtract with carry bit and update status flags.
677 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
678
679 // Rotate register right and insert into NZCV flags under the control of a
680 // mask [Armv8.4].
681 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
682
683 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
684 void setf8(const Register& rn);
685
686 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
687 void setf16(const Register& rn);
688
689 // Negate with carry bit.
690 void ngc(const Register& rd, const Operand& operand);
691
692 // Negate with carry bit and update status flags.
693 void ngcs(const Register& rd, const Operand& operand);
694
695 // Logical instructions.
696
697 // Bitwise and (A & B).
698 void and_(const Register& rd, const Register& rn, const Operand& operand);
699
700 // Bitwise and (A & B) and update status flags.
701 void ands(const Register& rd, const Register& rn, const Operand& operand);
702
703 // Bit test and set flags.
704 void tst(const Register& rn, const Operand& operand);
705
706 // Bit clear (A & ~B).
707 void bic(const Register& rd, const Register& rn, const Operand& operand);
708
709 // Bit clear (A & ~B) and update status flags.
710 void bics(const Register& rd, const Register& rn, const Operand& operand);
711
712 // Bitwise or (A | B).
713 void orr(const Register& rd, const Register& rn, const Operand& operand);
714
715 // Bitwise nor (A | ~B).
716 void orn(const Register& rd, const Register& rn, const Operand& operand);
717
718 // Bitwise eor/xor (A ^ B).
719 void eor(const Register& rd, const Register& rn, const Operand& operand);
720
721 // Bitwise enor/xnor (A ^ ~B).
722 void eon(const Register& rd, const Register& rn, const Operand& operand);
723
724 // Logical shift left by variable.
725 void lslv(const Register& rd, const Register& rn, const Register& rm);
726
727 // Logical shift right by variable.
728 void lsrv(const Register& rd, const Register& rn, const Register& rm);
729
730 // Arithmetic shift right by variable.
731 void asrv(const Register& rd, const Register& rn, const Register& rm);
732
733 // Rotate right by variable.
734 void rorv(const Register& rd, const Register& rn, const Register& rm);
735
736 // Bitfield instructions.
737
738 // Bitfield move.
739 void bfm(const Register& rd,
740 const Register& rn,
741 unsigned immr,
742 unsigned imms);
743
744 // Signed bitfield move.
745 void sbfm(const Register& rd,
746 const Register& rn,
747 unsigned immr,
748 unsigned imms);
749
750 // Unsigned bitfield move.
751 void ubfm(const Register& rd,
752 const Register& rn,
753 unsigned immr,
754 unsigned imms);
755
756 // Bfm aliases.
757
758 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)759 void bfi(const Register& rd,
760 const Register& rn,
761 unsigned lsb,
762 unsigned width) {
763 VIXL_ASSERT(width >= 1);
764 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
765 bfm(rd,
766 rn,
767 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
768 width - 1);
769 }
770
771 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)772 void bfxil(const Register& rd,
773 const Register& rn,
774 unsigned lsb,
775 unsigned width) {
776 VIXL_ASSERT(width >= 1);
777 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
778 bfm(rd, rn, lsb, lsb + width - 1);
779 }
780
781 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)782 void bfc(const Register& rd, unsigned lsb, unsigned width) {
783 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
784 }
785
786 // Sbfm aliases.
787
788 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)789 void asr(const Register& rd, const Register& rn, unsigned shift) {
790 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
791 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
792 }
793
794 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)795 void sbfiz(const Register& rd,
796 const Register& rn,
797 unsigned lsb,
798 unsigned width) {
799 VIXL_ASSERT(width >= 1);
800 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
801 sbfm(rd,
802 rn,
803 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
804 width - 1);
805 }
806
807 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)808 void sbfx(const Register& rd,
809 const Register& rn,
810 unsigned lsb,
811 unsigned width) {
812 VIXL_ASSERT(width >= 1);
813 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
814 sbfm(rd, rn, lsb, lsb + width - 1);
815 }
816
817 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)818 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
819
820 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)821 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
822
823 // Signed extend word.
sxtw(const Register & rd,const Register & rn)824 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
825
826 // Ubfm aliases.
827
828 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)829 void lsl(const Register& rd, const Register& rn, unsigned shift) {
830 unsigned reg_size = rd.GetSizeInBits();
831 VIXL_ASSERT(shift < reg_size);
832 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
833 }
834
835 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)836 void lsr(const Register& rd, const Register& rn, unsigned shift) {
837 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
838 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
839 }
840
841 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)842 void ubfiz(const Register& rd,
843 const Register& rn,
844 unsigned lsb,
845 unsigned width) {
846 VIXL_ASSERT(width >= 1);
847 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
848 ubfm(rd,
849 rn,
850 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
851 width - 1);
852 }
853
854 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)855 void ubfx(const Register& rd,
856 const Register& rn,
857 unsigned lsb,
858 unsigned width) {
859 VIXL_ASSERT(width >= 1);
860 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
861 ubfm(rd, rn, lsb, lsb + width - 1);
862 }
863
864 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)865 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
866
867 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)868 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
869
870 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)871 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
872
873 // Extract.
874 void extr(const Register& rd,
875 const Register& rn,
876 const Register& rm,
877 unsigned lsb);
878
879 // Conditional select: rd = cond ? rn : rm.
880 void csel(const Register& rd,
881 const Register& rn,
882 const Register& rm,
883 Condition cond);
884
885 // Conditional select increment: rd = cond ? rn : rm + 1.
886 void csinc(const Register& rd,
887 const Register& rn,
888 const Register& rm,
889 Condition cond);
890
891 // Conditional select inversion: rd = cond ? rn : ~rm.
892 void csinv(const Register& rd,
893 const Register& rn,
894 const Register& rm,
895 Condition cond);
896
897 // Conditional select negation: rd = cond ? rn : -rm.
898 void csneg(const Register& rd,
899 const Register& rn,
900 const Register& rm,
901 Condition cond);
902
903 // Conditional set: rd = cond ? 1 : 0.
904 void cset(const Register& rd, Condition cond);
905
906 // Conditional set mask: rd = cond ? -1 : 0.
907 void csetm(const Register& rd, Condition cond);
908
909 // Conditional increment: rd = cond ? rn + 1 : rn.
910 void cinc(const Register& rd, const Register& rn, Condition cond);
911
912 // Conditional invert: rd = cond ? ~rn : rn.
913 void cinv(const Register& rd, const Register& rn, Condition cond);
914
915 // Conditional negate: rd = cond ? -rn : rn.
916 void cneg(const Register& rd, const Register& rn, Condition cond);
917
918 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)919 void ror(const Register& rd, const Register& rs, unsigned shift) {
920 extr(rd, rs, rs, shift);
921 }
922
923 // Conditional comparison.
924
925 // Conditional compare negative.
926 void ccmn(const Register& rn,
927 const Operand& operand,
928 StatusFlags nzcv,
929 Condition cond);
930
931 // Conditional compare.
932 void ccmp(const Register& rn,
933 const Operand& operand,
934 StatusFlags nzcv,
935 Condition cond);
936
937 // CRC-32 checksum from byte.
938 void crc32b(const Register& wd, const Register& wn, const Register& wm);
939
940 // CRC-32 checksum from half-word.
941 void crc32h(const Register& wd, const Register& wn, const Register& wm);
942
943 // CRC-32 checksum from word.
944 void crc32w(const Register& wd, const Register& wn, const Register& wm);
945
946 // CRC-32 checksum from double word.
947 void crc32x(const Register& wd, const Register& wn, const Register& xm);
948
949 // CRC-32 C checksum from byte.
950 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
951
952 // CRC-32 C checksum from half-word.
953 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
954
955 // CRC-32 C checksum from word.
956 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
957
958 // CRC-32C checksum from double word.
959 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
960
961 // Multiply.
962 void mul(const Register& rd, const Register& rn, const Register& rm);
963
964 // Negated multiply.
965 void mneg(const Register& rd, const Register& rn, const Register& rm);
966
967 // Signed long multiply: 32 x 32 -> 64-bit.
968 void smull(const Register& xd, const Register& wn, const Register& wm);
969
970 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
971 void smulh(const Register& xd, const Register& xn, const Register& xm);
972
973 // Multiply and accumulate.
974 void madd(const Register& rd,
975 const Register& rn,
976 const Register& rm,
977 const Register& ra);
978
979 // Multiply and subtract.
980 void msub(const Register& rd,
981 const Register& rn,
982 const Register& rm,
983 const Register& ra);
984
985 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
986 void smaddl(const Register& xd,
987 const Register& wn,
988 const Register& wm,
989 const Register& xa);
990
991 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
992 void umaddl(const Register& xd,
993 const Register& wn,
994 const Register& wm,
995 const Register& xa);
996
997 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)998 void umull(const Register& xd, const Register& wn, const Register& wm) {
999 umaddl(xd, wn, wm, xzr);
1000 }
1001
1002 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1003 void umulh(const Register& xd, const Register& xn, const Register& xm);
1004
1005 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1006 void smsubl(const Register& xd,
1007 const Register& wn,
1008 const Register& wm,
1009 const Register& xa);
1010
1011 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1012 void umsubl(const Register& xd,
1013 const Register& wn,
1014 const Register& wm,
1015 const Register& xa);
1016
1017 // Signed integer divide.
1018 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1019
1020 // Unsigned integer divide.
1021 void udiv(const Register& rd, const Register& rn, const Register& rm);
1022
1023 // Bit reverse.
1024 void rbit(const Register& rd, const Register& rn);
1025
1026 // Reverse bytes in 16-bit half words.
1027 void rev16(const Register& rd, const Register& rn);
1028
1029 // Reverse bytes in 32-bit words.
1030 void rev32(const Register& xd, const Register& xn);
1031
1032 // Reverse bytes in 64-bit general purpose register, an alias for rev
1033 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1034 void rev64(const Register& xd, const Register& xn) {
1035 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1036 rev(xd, xn);
1037 }
1038
1039 // Reverse bytes.
1040 void rev(const Register& rd, const Register& rn);
1041
1042 // Count leading zeroes.
1043 void clz(const Register& rd, const Register& rn);
1044
1045 // Count leading sign bits.
1046 void cls(const Register& rd, const Register& rn);
1047
1048 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1049 void pacia(const Register& xd, const Register& rn);
1050
1051 // Pointer Authentication Code for Instruction address, using key A and a
1052 // modifier of zero [Armv8.3].
1053 void paciza(const Register& xd);
1054
1055 // Pointer Authentication Code for Instruction address, using key A, with
1056 // address in x17 and modifier in x16 [Armv8.3].
1057 void pacia1716();
1058
1059 // Pointer Authentication Code for Instruction address, using key A, with
1060 // address in LR and modifier in SP [Armv8.3].
1061 void paciasp();
1062
1063 // Pointer Authentication Code for Instruction address, using key A, with
1064 // address in LR and a modifier of zero [Armv8.3].
1065 void paciaz();
1066
1067 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1068 void pacib(const Register& xd, const Register& xn);
1069
1070 // Pointer Authentication Code for Instruction address, using key B and a
1071 // modifier of zero [Armv8.3].
1072 void pacizb(const Register& xd);
1073
1074 // Pointer Authentication Code for Instruction address, using key B, with
1075 // address in x17 and modifier in x16 [Armv8.3].
1076 void pacib1716();
1077
1078 // Pointer Authentication Code for Instruction address, using key B, with
1079 // address in LR and modifier in SP [Armv8.3].
1080 void pacibsp();
1081
1082 // Pointer Authentication Code for Instruction address, using key B, with
1083 // address in LR and a modifier of zero [Armv8.3].
1084 void pacibz();
1085
1086 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1087 void pacda(const Register& xd, const Register& xn);
1088
1089 // Pointer Authentication Code for Data address, using key A and a modifier of
1090 // zero [Armv8.3].
1091 void pacdza(const Register& xd);
1092
1093 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1094 void pacdb(const Register& xd, const Register& xn);
1095
1096 // Pointer Authentication Code for Data address, using key B and a modifier of
1097 // zero [Armv8.3].
1098 void pacdzb(const Register& xd);
1099
1100 // Pointer Authentication Code, using Generic key [Armv8.3].
1101 void pacga(const Register& xd, const Register& xn, const Register& xm);
1102
1103 // Authenticate Instruction address, using key A [Armv8.3].
1104 void autia(const Register& xd, const Register& xn);
1105
1106 // Authenticate Instruction address, using key A and a modifier of zero
1107 // [Armv8.3].
1108 void autiza(const Register& xd);
1109
1110 // Authenticate Instruction address, using key A, with address in x17 and
1111 // modifier in x16 [Armv8.3].
1112 void autia1716();
1113
1114 // Authenticate Instruction address, using key A, with address in LR and
1115 // modifier in SP [Armv8.3].
1116 void autiasp();
1117
1118 // Authenticate Instruction address, using key A, with address in LR and a
1119 // modifier of zero [Armv8.3].
1120 void autiaz();
1121
1122 // Authenticate Instruction address, using key B [Armv8.3].
1123 void autib(const Register& xd, const Register& xn);
1124
1125 // Authenticate Instruction address, using key B and a modifier of zero
1126 // [Armv8.3].
1127 void autizb(const Register& xd);
1128
1129 // Authenticate Instruction address, using key B, with address in x17 and
1130 // modifier in x16 [Armv8.3].
1131 void autib1716();
1132
1133 // Authenticate Instruction address, using key B, with address in LR and
1134 // modifier in SP [Armv8.3].
1135 void autibsp();
1136
1137 // Authenticate Instruction address, using key B, with address in LR and a
1138 // modifier of zero [Armv8.3].
1139 void autibz();
1140
1141 // Authenticate Data address, using key A [Armv8.3].
1142 void autda(const Register& xd, const Register& xn);
1143
1144 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1145 void autdza(const Register& xd);
1146
1147 // Authenticate Data address, using key B [Armv8.3].
1148 void autdb(const Register& xd, const Register& xn);
1149
1150 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1151 void autdzb(const Register& xd);
1152
1153 // Strip Pointer Authentication Code of Data address [Armv8.3].
1154 void xpacd(const Register& xd);
1155
1156 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1157 void xpaci(const Register& xd);
1158
1159 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1160 void xpaclri();
1161
1162 // Memory instructions.
1163
1164 // Load integer or FP register.
1165 void ldr(const CPURegister& rt,
1166 const MemOperand& src,
1167 LoadStoreScalingOption option = PreferScaledOffset);
1168
1169 // Store integer or FP register.
1170 void str(const CPURegister& rt,
1171 const MemOperand& dst,
1172 LoadStoreScalingOption option = PreferScaledOffset);
1173
1174 // Load word with sign extension.
1175 void ldrsw(const Register& xt,
1176 const MemOperand& src,
1177 LoadStoreScalingOption option = PreferScaledOffset);
1178
1179 // Load byte.
1180 void ldrb(const Register& rt,
1181 const MemOperand& src,
1182 LoadStoreScalingOption option = PreferScaledOffset);
1183
1184 // Store byte.
1185 void strb(const Register& rt,
1186 const MemOperand& dst,
1187 LoadStoreScalingOption option = PreferScaledOffset);
1188
1189 // Load byte with sign extension.
1190 void ldrsb(const Register& rt,
1191 const MemOperand& src,
1192 LoadStoreScalingOption option = PreferScaledOffset);
1193
1194 // Load half-word.
1195 void ldrh(const Register& rt,
1196 const MemOperand& src,
1197 LoadStoreScalingOption option = PreferScaledOffset);
1198
1199 // Store half-word.
1200 void strh(const Register& rt,
1201 const MemOperand& dst,
1202 LoadStoreScalingOption option = PreferScaledOffset);
1203
1204 // Load half-word with sign extension.
1205 void ldrsh(const Register& rt,
1206 const MemOperand& src,
1207 LoadStoreScalingOption option = PreferScaledOffset);
1208
1209 // Load integer or FP register (with unscaled offset).
1210 void ldur(const CPURegister& rt,
1211 const MemOperand& src,
1212 LoadStoreScalingOption option = PreferUnscaledOffset);
1213
1214 // Store integer or FP register (with unscaled offset).
1215 void stur(const CPURegister& rt,
1216 const MemOperand& src,
1217 LoadStoreScalingOption option = PreferUnscaledOffset);
1218
1219 // Load word with sign extension.
1220 void ldursw(const Register& xt,
1221 const MemOperand& src,
1222 LoadStoreScalingOption option = PreferUnscaledOffset);
1223
1224 // Load byte (with unscaled offset).
1225 void ldurb(const Register& rt,
1226 const MemOperand& src,
1227 LoadStoreScalingOption option = PreferUnscaledOffset);
1228
1229 // Store byte (with unscaled offset).
1230 void sturb(const Register& rt,
1231 const MemOperand& dst,
1232 LoadStoreScalingOption option = PreferUnscaledOffset);
1233
1234 // Load byte with sign extension (and unscaled offset).
1235 void ldursb(const Register& rt,
1236 const MemOperand& src,
1237 LoadStoreScalingOption option = PreferUnscaledOffset);
1238
1239 // Load half-word (with unscaled offset).
1240 void ldurh(const Register& rt,
1241 const MemOperand& src,
1242 LoadStoreScalingOption option = PreferUnscaledOffset);
1243
1244 // Store half-word (with unscaled offset).
1245 void sturh(const Register& rt,
1246 const MemOperand& dst,
1247 LoadStoreScalingOption option = PreferUnscaledOffset);
1248
1249 // Load half-word with sign extension (and unscaled offset).
1250 void ldursh(const Register& rt,
1251 const MemOperand& src,
1252 LoadStoreScalingOption option = PreferUnscaledOffset);
1253
1254 // Load double-word with pointer authentication, using data key A and a
1255 // modifier of zero [Armv8.3].
1256 void ldraa(const Register& xt, const MemOperand& src);
1257
1258 // Load double-word with pointer authentication, using data key B and a
1259 // modifier of zero [Armv8.3].
1260 void ldrab(const Register& xt, const MemOperand& src);
1261
1262 // Load integer or FP register pair.
1263 void ldp(const CPURegister& rt,
1264 const CPURegister& rt2,
1265 const MemOperand& src);
1266
1267 // Store integer or FP register pair.
1268 void stp(const CPURegister& rt,
1269 const CPURegister& rt2,
1270 const MemOperand& dst);
1271
1272 // Load word pair with sign extension.
1273 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1274
1275 // Load integer or FP register pair, non-temporal.
1276 void ldnp(const CPURegister& rt,
1277 const CPURegister& rt2,
1278 const MemOperand& src);
1279
1280 // Store integer or FP register pair, non-temporal.
1281 void stnp(const CPURegister& rt,
1282 const CPURegister& rt2,
1283 const MemOperand& dst);
1284
1285 // Load integer or FP register from literal pool.
1286 void ldr(const CPURegister& rt, RawLiteral* literal);
1287
1288 // Load word with sign extension from literal pool.
1289 void ldrsw(const Register& xt, RawLiteral* literal);
1290
1291 // Load integer or FP register from pc + imm19 << 2.
1292 void ldr(const CPURegister& rt, int64_t imm19);
1293
1294 // Load word with sign extension from pc + imm19 << 2.
1295 void ldrsw(const Register& xt, int64_t imm19);
1296
1297 // Store exclusive byte.
1298 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1299
1300 // Store exclusive half-word.
1301 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1302
1303 // Store exclusive register.
1304 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1305
1306 // Load exclusive byte.
1307 void ldxrb(const Register& rt, const MemOperand& src);
1308
1309 // Load exclusive half-word.
1310 void ldxrh(const Register& rt, const MemOperand& src);
1311
1312 // Load exclusive register.
1313 void ldxr(const Register& rt, const MemOperand& src);
1314
1315 // Store exclusive register pair.
1316 void stxp(const Register& rs,
1317 const Register& rt,
1318 const Register& rt2,
1319 const MemOperand& dst);
1320
1321 // Load exclusive register pair.
1322 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1323
1324 // Store-release exclusive byte.
1325 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1326
1327 // Store-release exclusive half-word.
1328 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1329
1330 // Store-release exclusive register.
1331 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1332
1333 // Load-acquire exclusive byte.
1334 void ldaxrb(const Register& rt, const MemOperand& src);
1335
1336 // Load-acquire exclusive half-word.
1337 void ldaxrh(const Register& rt, const MemOperand& src);
1338
1339 // Load-acquire exclusive register.
1340 void ldaxr(const Register& rt, const MemOperand& src);
1341
1342 // Store-release exclusive register pair.
1343 void stlxp(const Register& rs,
1344 const Register& rt,
1345 const Register& rt2,
1346 const MemOperand& dst);
1347
1348 // Load-acquire exclusive register pair.
1349 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1350
1351 // Store-release byte.
1352 void stlrb(const Register& rt, const MemOperand& dst);
1353
1354 // Store-release half-word.
1355 void stlrh(const Register& rt, const MemOperand& dst);
1356
1357 // Store-release register.
1358 void stlr(const Register& rt, const MemOperand& dst);
1359
1360 // Load-acquire byte.
1361 void ldarb(const Register& rt, const MemOperand& src);
1362
1363 // Load-acquire half-word.
1364 void ldarh(const Register& rt, const MemOperand& src);
1365
1366 // Load-acquire register.
1367 void ldar(const Register& rt, const MemOperand& src);
1368
1369 // Store LORelease byte [Armv8.1].
1370 void stllrb(const Register& rt, const MemOperand& dst);
1371
1372 // Store LORelease half-word [Armv8.1].
1373 void stllrh(const Register& rt, const MemOperand& dst);
1374
1375 // Store LORelease register [Armv8.1].
1376 void stllr(const Register& rt, const MemOperand& dst);
1377
1378 // Load LORelease byte [Armv8.1].
1379 void ldlarb(const Register& rt, const MemOperand& src);
1380
1381 // Load LORelease half-word [Armv8.1].
1382 void ldlarh(const Register& rt, const MemOperand& src);
1383
1384 // Load LORelease register [Armv8.1].
1385 void ldlar(const Register& rt, const MemOperand& src);
1386
1387 // Compare and Swap word or doubleword in memory [Armv8.1].
1388 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1389
1390 // Compare and Swap word or doubleword in memory [Armv8.1].
1391 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1392
1393 // Compare and Swap word or doubleword in memory [Armv8.1].
1394 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1395
1396 // Compare and Swap word or doubleword in memory [Armv8.1].
1397 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1398
1399 // Compare and Swap byte in memory [Armv8.1].
1400 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1401
1402 // Compare and Swap byte in memory [Armv8.1].
1403 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1404
1405 // Compare and Swap byte in memory [Armv8.1].
1406 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1407
1408 // Compare and Swap byte in memory [Armv8.1].
1409 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1410
1411 // Compare and Swap halfword in memory [Armv8.1].
1412 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1413
1414 // Compare and Swap halfword in memory [Armv8.1].
1415 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1416
1417 // Compare and Swap halfword in memory [Armv8.1].
1418 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1419
1420 // Compare and Swap halfword in memory [Armv8.1].
1421 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1422
1423 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1424 void casp(const Register& rs,
1425 const Register& rs2,
1426 const Register& rt,
1427 const Register& rt2,
1428 const MemOperand& src);
1429
1430 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1431 void caspa(const Register& rs,
1432 const Register& rs2,
1433 const Register& rt,
1434 const Register& rt2,
1435 const MemOperand& src);
1436
1437 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1438 void caspl(const Register& rs,
1439 const Register& rs2,
1440 const Register& rt,
1441 const Register& rt2,
1442 const MemOperand& src);
1443
1444 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1445 void caspal(const Register& rs,
1446 const Register& rs2,
1447 const Register& rt,
1448 const Register& rt2,
1449 const MemOperand& src);
1450
1451 // Store-release byte (with unscaled offset) [Armv8.4].
1452 void stlurb(const Register& rt, const MemOperand& dst);
1453
1454 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1455 void ldapurb(const Register& rt, const MemOperand& src);
1456
1457 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1458 void ldapursb(const Register& rt, const MemOperand& src);
1459
1460 // Store-release half-word (with unscaled offset) [Armv8.4].
1461 void stlurh(const Register& rt, const MemOperand& dst);
1462
1463 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1464 void ldapurh(const Register& rt, const MemOperand& src);
1465
1466 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1467 // [Armv8.4].
1468 void ldapursh(const Register& rt, const MemOperand& src);
1469
1470 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1471 void stlur(const Register& rt, const MemOperand& dst);
1472
1473 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1474 // [Armv8.4].
1475 void ldapur(const Register& rt, const MemOperand& src);
1476
1477 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1478 void ldapursw(const Register& xt, const MemOperand& src);
1479
1480 // Atomic add on byte in memory [Armv8.1]
1481 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1482
1483 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1484 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1485
1486 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1487 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1488
1489 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1490 // [Armv8.1]
1491 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1492
1493 // Atomic add on halfword in memory [Armv8.1]
1494 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1495
1496 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1497 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1498
1499 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1500 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1501
1502 // Atomic add on halfword in memory, with Load-acquire and Store-release
1503 // semantics [Armv8.1]
1504 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1505
1506 // Atomic add on word or doubleword in memory [Armv8.1]
1507 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1508
1509 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1510 // [Armv8.1]
1511 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1512
1513 // Atomic add on word or doubleword in memory, with Store-release semantics
1514 // [Armv8.1]
1515 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1516
1517 // Atomic add on word or doubleword in memory, with Load-acquire and
1518 // Store-release semantics [Armv8.1]
1519 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1520
1521 // Atomic bit clear on byte in memory [Armv8.1]
1522 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1523
1524 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1525 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1526
1527 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1528 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1529
1530 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1531 // semantics [Armv8.1]
1532 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1533
1534 // Atomic bit clear on halfword in memory [Armv8.1]
1535 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1536
1537 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1538 // [Armv8.1]
1539 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1540
1541 // Atomic bit clear on halfword in memory, with Store-release semantics
1542 // [Armv8.1]
1543 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1544
1545 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1546 // semantics [Armv8.1]
1547 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1548
1549 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1550 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1551
1552 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1553 // semantics [Armv8.1]
1554 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1555
1556 // Atomic bit clear on word or doubleword in memory, with Store-release
1557 // semantics [Armv8.1]
1558 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1559
1560 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1561 // Store-release semantics [Armv8.1]
1562 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1563
1564 // Atomic exclusive OR on byte in memory [Armv8.1]
1565 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1566
1567 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1568 // [Armv8.1]
1569 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1570
1571 // Atomic exclusive OR on byte in memory, with Store-release semantics
1572 // [Armv8.1]
1573 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1574
1575 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1576 // semantics [Armv8.1]
1577 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1578
1579 // Atomic exclusive OR on halfword in memory [Armv8.1]
1580 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1581
1582 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1583 // [Armv8.1]
1584 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1585
1586 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1587 // [Armv8.1]
1588 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1589
1590 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1591 // Store-release semantics [Armv8.1]
1592 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1593
1594 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1595 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1596
1597 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1598 // semantics [Armv8.1]
1599 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1600
1601 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1602 // semantics [Armv8.1]
1603 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1604
1605 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1606 // Store-release semantics [Armv8.1]
1607 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1608
1609 // Atomic bit set on byte in memory [Armv8.1]
1610 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1611
1612 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1613 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1614
1615 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1616 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1617
1618 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1619 // semantics [Armv8.1]
1620 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1621
1622 // Atomic bit set on halfword in memory [Armv8.1]
1623 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1624
1625 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1626 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1627
1628 // Atomic bit set on halfword in memory, with Store-release semantics
1629 // [Armv8.1]
1630 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1631
1632 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1633 // semantics [Armv8.1]
1634 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1635
1636 // Atomic bit set on word or doubleword in memory [Armv8.1]
1637 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1638
1639 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1640 // [Armv8.1]
1641 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1642
1643 // Atomic bit set on word or doubleword in memory, with Store-release
1644 // semantics [Armv8.1]
1645 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1646
1647 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1648 // Store-release semantics [Armv8.1]
1649 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1650
1651 // Atomic signed maximum on byte in memory [Armv8.1]
1652 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1653
1654 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1655 // [Armv8.1]
1656 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1657
1658 // Atomic signed maximum on byte in memory, with Store-release semantics
1659 // [Armv8.1]
1660 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1661
1662 // Atomic signed maximum on byte in memory, with Load-acquire and
1663 // Store-release semantics [Armv8.1]
1664 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1665
1666 // Atomic signed maximum on halfword in memory [Armv8.1]
1667 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1668
1669 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1670 // [Armv8.1]
1671 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1672
1673 // Atomic signed maximum on halfword in memory, with Store-release semantics
1674 // [Armv8.1]
1675 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1676
1677 // Atomic signed maximum on halfword in memory, with Load-acquire and
1678 // Store-release semantics [Armv8.1]
1679 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1680
1681 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1682 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1683
1684 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1685 // semantics [Armv8.1]
1686 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1687
1688 // Atomic signed maximum on word or doubleword in memory, with Store-release
1689 // semantics [Armv8.1]
1690 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1691
1692 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1693 // and Store-release semantics [Armv8.1]
1694 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1695
1696 // Atomic signed minimum on byte in memory [Armv8.1]
1697 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1698
1699 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1700 // [Armv8.1]
1701 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1702
1703 // Atomic signed minimum on byte in memory, with Store-release semantics
1704 // [Armv8.1]
1705 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1706
1707 // Atomic signed minimum on byte in memory, with Load-acquire and
1708 // Store-release semantics [Armv8.1]
1709 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1710
1711 // Atomic signed minimum on halfword in memory [Armv8.1]
1712 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1713
1714 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1715 // [Armv8.1]
1716 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1717
1718 // Atomic signed minimum on halfword in memory, with Store-release semantics
1719 // [Armv8.1]
1720 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1721
1722 // Atomic signed minimum on halfword in memory, with Load-acquire and
1723 // Store-release semantics [Armv8.1]
1724 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1725
1726 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1727 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1728
1729 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1730 // semantics [Armv8.1]
1731 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1732
1733 // Atomic signed minimum on word or doubleword in memory, with Store-release
1734 // semantics [Armv8.1]
1735 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1736
1737 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1738 // and Store-release semantics [Armv8.1]
1739 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1740
1741 // Atomic unsigned maximum on byte in memory [Armv8.1]
1742 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1743
1744 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1745 // [Armv8.1]
1746 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1747
1748 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1749 // [Armv8.1]
1750 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1751
1752 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1753 // Store-release semantics [Armv8.1]
1754 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1755
1756 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1757 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1758
1759 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1760 // [Armv8.1]
1761 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1762
1763 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1764 // [Armv8.1]
1765 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1766
1767 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1768 // Store-release semantics [Armv8.1]
1769 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1770
1771 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1772 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1773
1774 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1775 // semantics [Armv8.1]
1776 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1777
1778 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1779 // semantics [Armv8.1]
1780 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1781
1782 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1783 // and Store-release semantics [Armv8.1]
1784 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1785
1786 // Atomic unsigned minimum on byte in memory [Armv8.1]
1787 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1788
1789 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1790 // [Armv8.1]
1791 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1792
1793 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1794 // [Armv8.1]
1795 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1796
1797 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1798 // Store-release semantics [Armv8.1]
1799 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1800
1801 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1802 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1803
1804 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1805 // [Armv8.1]
1806 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1807
1808 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1809 // [Armv8.1]
1810 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1811
1812 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1813 // Store-release semantics [Armv8.1]
1814 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1815
1816 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1817 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1818
1819 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1820 // semantics [Armv8.1]
1821 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1822
1823 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1824 // semantics [Armv8.1]
1825 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1826
1827 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1828 // and Store-release semantics [Armv8.1]
1829 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1830
1831 // Atomic add on byte in memory, without return. [Armv8.1]
1832 void staddb(const Register& rs, const MemOperand& src);
1833
1834 // Atomic add on byte in memory, with Store-release semantics and without
1835 // return. [Armv8.1]
1836 void staddlb(const Register& rs, const MemOperand& src);
1837
1838 // Atomic add on halfword in memory, without return. [Armv8.1]
1839 void staddh(const Register& rs, const MemOperand& src);
1840
1841 // Atomic add on halfword in memory, with Store-release semantics and without
1842 // return. [Armv8.1]
1843 void staddlh(const Register& rs, const MemOperand& src);
1844
1845 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1846 void stadd(const Register& rs, const MemOperand& src);
1847
1848 // Atomic add on word or doubleword in memory, with Store-release semantics
1849 // and without return. [Armv8.1]
1850 void staddl(const Register& rs, const MemOperand& src);
1851
1852 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1853 void stclrb(const Register& rs, const MemOperand& src);
1854
1855 // Atomic bit clear on byte in memory, with Store-release semantics and
1856 // without return. [Armv8.1]
1857 void stclrlb(const Register& rs, const MemOperand& src);
1858
1859 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1860 void stclrh(const Register& rs, const MemOperand& src);
1861
1862 // Atomic bit clear on halfword in memory, with Store-release semantics and
1863 // without return. [Armv8.1]
1864 void stclrlh(const Register& rs, const MemOperand& src);
1865
1866 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1867 void stclr(const Register& rs, const MemOperand& src);
1868
1869 // Atomic bit clear on word or doubleword in memory, with Store-release
1870 // semantics and without return. [Armv8.1]
1871 void stclrl(const Register& rs, const MemOperand& src);
1872
1873 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1874 void steorb(const Register& rs, const MemOperand& src);
1875
1876 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1877 // without return. [Armv8.1]
1878 void steorlb(const Register& rs, const MemOperand& src);
1879
1880 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1881 void steorh(const Register& rs, const MemOperand& src);
1882
1883 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1884 // and without return. [Armv8.1]
1885 void steorlh(const Register& rs, const MemOperand& src);
1886
1887 // Atomic exclusive OR on word or doubleword in memory, without return.
1888 // [Armv8.1]
1889 void steor(const Register& rs, const MemOperand& src);
1890
1891 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1892 // semantics and without return. [Armv8.1]
1893 void steorl(const Register& rs, const MemOperand& src);
1894
1895 // Atomic bit set on byte in memory, without return. [Armv8.1]
1896 void stsetb(const Register& rs, const MemOperand& src);
1897
1898 // Atomic bit set on byte in memory, with Store-release semantics and without
1899 // return. [Armv8.1]
1900 void stsetlb(const Register& rs, const MemOperand& src);
1901
1902 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1903 void stseth(const Register& rs, const MemOperand& src);
1904
1905 // Atomic bit set on halfword in memory, with Store-release semantics and
1906 // without return. [Armv8.1]
1907 void stsetlh(const Register& rs, const MemOperand& src);
1908
1909 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1910 void stset(const Register& rs, const MemOperand& src);
1911
1912 // Atomic bit set on word or doubleword in memory, with Store-release
1913 // semantics and without return. [Armv8.1]
1914 void stsetl(const Register& rs, const MemOperand& src);
1915
1916 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1917 void stsmaxb(const Register& rs, const MemOperand& src);
1918
1919 // Atomic signed maximum on byte in memory, with Store-release semantics and
1920 // without return. [Armv8.1]
1921 void stsmaxlb(const Register& rs, const MemOperand& src);
1922
1923 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1924 void stsmaxh(const Register& rs, const MemOperand& src);
1925
1926 // Atomic signed maximum on halfword in memory, with Store-release semantics
1927 // and without return. [Armv8.1]
1928 void stsmaxlh(const Register& rs, const MemOperand& src);
1929
1930 // Atomic signed maximum on word or doubleword in memory, without return.
1931 // [Armv8.1]
1932 void stsmax(const Register& rs, const MemOperand& src);
1933
1934 // Atomic signed maximum on word or doubleword in memory, with Store-release
1935 // semantics and without return. [Armv8.1]
1936 void stsmaxl(const Register& rs, const MemOperand& src);
1937
1938 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1939 void stsminb(const Register& rs, const MemOperand& src);
1940
1941 // Atomic signed minimum on byte in memory, with Store-release semantics and
1942 // without return. [Armv8.1]
1943 void stsminlb(const Register& rs, const MemOperand& src);
1944
1945 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1946 void stsminh(const Register& rs, const MemOperand& src);
1947
1948 // Atomic signed minimum on halfword in memory, with Store-release semantics
1949 // and without return. [Armv8.1]
1950 void stsminlh(const Register& rs, const MemOperand& src);
1951
1952 // Atomic signed minimum on word or doubleword in memory, without return.
1953 // [Armv8.1]
1954 void stsmin(const Register& rs, const MemOperand& src);
1955
1956 // Atomic signed minimum on word or doubleword in memory, with Store-release
1957 // semantics and without return. semantics [Armv8.1]
1958 void stsminl(const Register& rs, const MemOperand& src);
1959
1960 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1961 void stumaxb(const Register& rs, const MemOperand& src);
1962
1963 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1964 // without return. [Armv8.1]
1965 void stumaxlb(const Register& rs, const MemOperand& src);
1966
1967 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1968 void stumaxh(const Register& rs, const MemOperand& src);
1969
1970 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1971 // and without return. [Armv8.1]
1972 void stumaxlh(const Register& rs, const MemOperand& src);
1973
1974 // Atomic unsigned maximum on word or doubleword in memory, without return.
1975 // [Armv8.1]
1976 void stumax(const Register& rs, const MemOperand& src);
1977
1978 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1979 // semantics and without return. [Armv8.1]
1980 void stumaxl(const Register& rs, const MemOperand& src);
1981
1982 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1983 void stuminb(const Register& rs, const MemOperand& src);
1984
1985 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1986 // without return. [Armv8.1]
1987 void stuminlb(const Register& rs, const MemOperand& src);
1988
1989 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1990 void stuminh(const Register& rs, const MemOperand& src);
1991
1992 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1993 // and without return. [Armv8.1]
1994 void stuminlh(const Register& rs, const MemOperand& src);
1995
1996 // Atomic unsigned minimum on word or doubleword in memory, without return.
1997 // [Armv8.1]
1998 void stumin(const Register& rs, const MemOperand& src);
1999
2000 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2001 // semantics and without return. [Armv8.1]
2002 void stuminl(const Register& rs, const MemOperand& src);
2003
2004 // Swap byte in memory [Armv8.1]
2005 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2006
2007 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2008 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2009
2010 // Swap byte in memory, with Store-release semantics [Armv8.1]
2011 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2012
2013 // Swap byte in memory, with Load-acquire and Store-release semantics
2014 // [Armv8.1]
2015 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2016
2017 // Swap halfword in memory [Armv8.1]
2018 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2019
2020 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2021 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2022
2023 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2024 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2025
2026 // Swap halfword in memory, with Load-acquire and Store-release semantics
2027 // [Armv8.1]
2028 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2029
2030 // Swap word or doubleword in memory [Armv8.1]
2031 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2032
2033 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2034 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2035
2036 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2037 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2038
2039 // Swap word or doubleword in memory, with Load-acquire and Store-release
2040 // semantics [Armv8.1]
2041 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2042
2043 // Load-Acquire RCpc Register byte [Armv8.3]
2044 void ldaprb(const Register& rt, const MemOperand& src);
2045
2046 // Load-Acquire RCpc Register halfword [Armv8.3]
2047 void ldaprh(const Register& rt, const MemOperand& src);
2048
2049 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2050 void ldapr(const Register& rt, const MemOperand& src);
2051
2052 // Prefetch memory.
2053 void prfm(PrefetchOperation op,
2054 const MemOperand& addr,
2055 LoadStoreScalingOption option = PreferScaledOffset);
2056
2057 // Prefetch memory (with unscaled offset).
2058 void prfum(PrefetchOperation op,
2059 const MemOperand& addr,
2060 LoadStoreScalingOption option = PreferUnscaledOffset);
2061
2062 // Prefetch memory in the literal pool.
2063 void prfm(PrefetchOperation op, RawLiteral* literal);
2064
2065 // Prefetch from pc + imm19 << 2.
2066 void prfm(PrefetchOperation op, int64_t imm19);
2067
2068 // Prefetch memory (allowing unallocated hints).
2069 void prfm(int op,
2070 const MemOperand& addr,
2071 LoadStoreScalingOption option = PreferScaledOffset);
2072
2073 // Prefetch memory (with unscaled offset, allowing unallocated hints).
2074 void prfum(int op,
2075 const MemOperand& addr,
2076 LoadStoreScalingOption option = PreferUnscaledOffset);
2077
2078 // Prefetch memory in the literal pool (allowing unallocated hints).
2079 void prfm(int op, RawLiteral* literal);
2080
2081 // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2082 void prfm(int op, int64_t imm19);
2083
2084 // Move instructions. The default shift of -1 indicates that the move
2085 // instruction will calculate an appropriate 16-bit immediate and left shift
2086 // that is equal to the 64-bit immediate argument. If an explicit left shift
2087 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2088 //
2089 // For movk, an explicit shift can be used to indicate which half word should
2090 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2091 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2092 // most-significant.
2093
2094 // Move immediate and keep.
2095 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2096 MoveWide(rd, imm, shift, MOVK);
2097 }
2098
2099 // Move inverted immediate.
2100 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2101 MoveWide(rd, imm, shift, MOVN);
2102 }
2103
2104 // Move immediate.
2105 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2106 MoveWide(rd, imm, shift, MOVZ);
2107 }
2108
2109 // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2110 void mov(const Register& rd, uint64_t imm) {
2111 if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2112 VIXL_UNIMPLEMENTED();
2113 }
2114 }
2115
2116 // Misc instructions.
2117
2118 // Monitor debug-mode breakpoint.
2119 void brk(int code);
2120
2121 // Halting debug-mode breakpoint.
2122 void hlt(int code);
2123
2124 // Generate exception targeting EL1.
2125 void svc(int code);
2126
2127 // Generate undefined instruction exception.
2128 void udf(int code);
2129
2130 // Move register to register.
2131 void mov(const Register& rd, const Register& rn);
2132
2133 // Move inverted operand to register.
2134 void mvn(const Register& rd, const Operand& operand);
2135
2136 // System instructions.
2137
2138 // Move to register from system register.
2139 void mrs(const Register& xt, SystemRegister sysreg);
2140
2141 // Move from register to system register.
2142 void msr(SystemRegister sysreg, const Register& xt);
2143
2144 // Invert carry flag [Armv8.4].
2145 void cfinv();
2146
2147 // Convert floating-point condition flags from alternative format to Arm
2148 // format [Armv8.5].
2149 void xaflag();
2150
2151 // Convert floating-point condition flags from Arm format to alternative
2152 // format [Armv8.5].
2153 void axflag();
2154
2155 // System instruction.
2156 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2157
2158 // System instruction with pre-encoded op (op1:crn:crm:op2).
2159 void sys(int op, const Register& xt = xzr);
2160
2161 // System instruction with result.
2162 void sysl(int op, const Register& xt = xzr);
2163
2164 // System data cache operation.
2165 void dc(DataCacheOp op, const Register& rt);
2166
2167 // System instruction cache operation.
2168 void ic(InstructionCacheOp op, const Register& rt);
2169
2170 // System hint (named type).
2171 void hint(SystemHint code);
2172
2173 // System hint (numbered type).
2174 void hint(int imm7);
2175
2176 // Clear exclusive monitor.
2177 void clrex(int imm4 = 0xf);
2178
2179 // Data memory barrier.
2180 void dmb(BarrierDomain domain, BarrierType type);
2181
2182 // Data synchronization barrier.
2183 void dsb(BarrierDomain domain, BarrierType type);
2184
2185 // Instruction synchronization barrier.
2186 void isb();
2187
2188 // Error synchronization barrier.
2189 void esb();
2190
2191 // Conditional speculation dependency barrier.
2192 void csdb();
2193
2194 // No-op.
nop()2195 void nop() { hint(NOP); }
2196
2197 // Branch target identification.
2198 void bti(BranchTargetIdentifier id);
2199
2200 // FP and NEON instructions.
2201
2202 // Move double precision immediate to FP register.
2203 void fmov(const VRegister& vd, double imm);
2204
2205 // Move single precision immediate to FP register.
2206 void fmov(const VRegister& vd, float imm);
2207
2208 // Move half precision immediate to FP register [Armv8.2].
2209 void fmov(const VRegister& vd, Float16 imm);
2210
2211 // Move FP register to register.
2212 void fmov(const Register& rd, const VRegister& fn);
2213
2214 // Move register to FP register.
2215 void fmov(const VRegister& vd, const Register& rn);
2216
2217 // Move FP register to FP register.
2218 void fmov(const VRegister& vd, const VRegister& fn);
2219
2220 // Move 64-bit register to top half of 128-bit FP register.
2221 void fmov(const VRegister& vd, int index, const Register& rn);
2222
2223 // Move top half of 128-bit FP register to 64-bit register.
2224 void fmov(const Register& rd, const VRegister& vn, int index);
2225
2226 // FP add.
2227 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2228
2229 // FP subtract.
2230 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2231
2232 // FP multiply.
2233 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2234
2235 // FP fused multiply-add.
2236 void fmadd(const VRegister& vd,
2237 const VRegister& vn,
2238 const VRegister& vm,
2239 const VRegister& va);
2240
2241 // FP fused multiply-subtract.
2242 void fmsub(const VRegister& vd,
2243 const VRegister& vn,
2244 const VRegister& vm,
2245 const VRegister& va);
2246
2247 // FP fused multiply-add and negate.
2248 void fnmadd(const VRegister& vd,
2249 const VRegister& vn,
2250 const VRegister& vm,
2251 const VRegister& va);
2252
2253 // FP fused multiply-subtract and negate.
2254 void fnmsub(const VRegister& vd,
2255 const VRegister& vn,
2256 const VRegister& vm,
2257 const VRegister& va);
2258
2259 // FP multiply-negate scalar.
2260 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2261
2262 // FP reciprocal exponent scalar.
2263 void frecpx(const VRegister& vd, const VRegister& vn);
2264
2265 // FP divide.
2266 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2267
2268 // FP maximum.
2269 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2270
2271 // FP minimum.
2272 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2273
2274 // FP maximum number.
2275 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2276
2277 // FP minimum number.
2278 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2279
2280 // FP absolute.
2281 void fabs(const VRegister& vd, const VRegister& vn);
2282
2283 // FP negate.
2284 void fneg(const VRegister& vd, const VRegister& vn);
2285
2286 // FP square root.
2287 void fsqrt(const VRegister& vd, const VRegister& vn);
2288
2289 // FP round to integer, nearest with ties to away.
2290 void frinta(const VRegister& vd, const VRegister& vn);
2291
2292 // FP round to integer, implicit rounding.
2293 void frinti(const VRegister& vd, const VRegister& vn);
2294
2295 // FP round to integer, toward minus infinity.
2296 void frintm(const VRegister& vd, const VRegister& vn);
2297
2298 // FP round to integer, nearest with ties to even.
2299 void frintn(const VRegister& vd, const VRegister& vn);
2300
2301 // FP round to integer, toward plus infinity.
2302 void frintp(const VRegister& vd, const VRegister& vn);
2303
2304 // FP round to integer, exact, implicit rounding.
2305 void frintx(const VRegister& vd, const VRegister& vn);
2306
2307 // FP round to integer, towards zero.
2308 void frintz(const VRegister& vd, const VRegister& vn);
2309
2310 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2311 void frint32x(const VRegister& vd, const VRegister& vn);
2312
2313 // FP round to 32-bit integer, towards zero [Armv8.5].
2314 void frint32z(const VRegister& vd, const VRegister& vn);
2315
2316 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2317 void frint64x(const VRegister& vd, const VRegister& vn);
2318
2319 // FP round to 64-bit integer, towards zero [Armv8.5].
2320 void frint64z(const VRegister& vd, const VRegister& vn);
2321
2322 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2323
2324 void FPCompareMacro(const VRegister& vn,
2325 const VRegister& vm,
2326 FPTrapFlags trap);
2327
2328 // FP compare registers.
2329 void fcmp(const VRegister& vn, const VRegister& vm);
2330
2331 // FP compare immediate.
2332 void fcmp(const VRegister& vn, double value);
2333
2334 void FPCCompareMacro(const VRegister& vn,
2335 const VRegister& vm,
2336 StatusFlags nzcv,
2337 Condition cond,
2338 FPTrapFlags trap);
2339
2340 // FP conditional compare.
2341 void fccmp(const VRegister& vn,
2342 const VRegister& vm,
2343 StatusFlags nzcv,
2344 Condition cond);
2345
2346 // FP signaling compare registers.
2347 void fcmpe(const VRegister& vn, const VRegister& vm);
2348
2349 // FP signaling compare immediate.
2350 void fcmpe(const VRegister& vn, double value);
2351
2352 // FP conditional signaling compare.
2353 void fccmpe(const VRegister& vn,
2354 const VRegister& vm,
2355 StatusFlags nzcv,
2356 Condition cond);
2357
2358 // FP conditional select.
2359 void fcsel(const VRegister& vd,
2360 const VRegister& vn,
2361 const VRegister& vm,
2362 Condition cond);
2363
2364 // Common FP Convert functions.
2365 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2366 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2367 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2368
2369 // FP convert between precisions.
2370 void fcvt(const VRegister& vd, const VRegister& vn);
2371
2372 // FP convert to higher precision.
2373 void fcvtl(const VRegister& vd, const VRegister& vn);
2374
2375 // FP convert to higher precision (second part).
2376 void fcvtl2(const VRegister& vd, const VRegister& vn);
2377
2378 // FP convert to lower precision.
2379 void fcvtn(const VRegister& vd, const VRegister& vn);
2380
2381 // FP convert to lower prevision (second part).
2382 void fcvtn2(const VRegister& vd, const VRegister& vn);
2383
2384 // FP convert to lower precision, rounding to odd.
2385 void fcvtxn(const VRegister& vd, const VRegister& vn);
2386
2387 // FP convert to lower precision, rounding to odd (second part).
2388 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2389
2390 // FP convert to signed integer, nearest with ties to away.
2391 void fcvtas(const Register& rd, const VRegister& vn);
2392
2393 // FP convert to unsigned integer, nearest with ties to away.
2394 void fcvtau(const Register& rd, const VRegister& vn);
2395
2396 // FP convert to signed integer, nearest with ties to away.
2397 void fcvtas(const VRegister& vd, const VRegister& vn);
2398
2399 // FP convert to unsigned integer, nearest with ties to away.
2400 void fcvtau(const VRegister& vd, const VRegister& vn);
2401
2402 // FP convert to signed integer, round towards -infinity.
2403 void fcvtms(const Register& rd, const VRegister& vn);
2404
2405 // FP convert to unsigned integer, round towards -infinity.
2406 void fcvtmu(const Register& rd, const VRegister& vn);
2407
2408 // FP convert to signed integer, round towards -infinity.
2409 void fcvtms(const VRegister& vd, const VRegister& vn);
2410
2411 // FP convert to unsigned integer, round towards -infinity.
2412 void fcvtmu(const VRegister& vd, const VRegister& vn);
2413
2414 // FP convert to signed integer, nearest with ties to even.
2415 void fcvtns(const Register& rd, const VRegister& vn);
2416
2417 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2418 void fjcvtzs(const Register& rd, const VRegister& vn);
2419
2420 // FP convert to unsigned integer, nearest with ties to even.
2421 void fcvtnu(const Register& rd, const VRegister& vn);
2422
2423 // FP convert to signed integer, nearest with ties to even.
2424 void fcvtns(const VRegister& rd, const VRegister& vn);
2425
2426 // FP convert to unsigned integer, nearest with ties to even.
2427 void fcvtnu(const VRegister& rd, const VRegister& vn);
2428
2429 // FP convert to signed integer or fixed-point, round towards zero.
2430 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2431
2432 // FP convert to unsigned integer or fixed-point, round towards zero.
2433 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2434
2435 // FP convert to signed integer or fixed-point, round towards zero.
2436 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2437
2438 // FP convert to unsigned integer or fixed-point, round towards zero.
2439 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2440
2441 // FP convert to signed integer, round towards +infinity.
2442 void fcvtps(const Register& rd, const VRegister& vn);
2443
2444 // FP convert to unsigned integer, round towards +infinity.
2445 void fcvtpu(const Register& rd, const VRegister& vn);
2446
2447 // FP convert to signed integer, round towards +infinity.
2448 void fcvtps(const VRegister& vd, const VRegister& vn);
2449
2450 // FP convert to unsigned integer, round towards +infinity.
2451 void fcvtpu(const VRegister& vd, const VRegister& vn);
2452
2453 // Convert signed integer or fixed point to FP.
2454 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2455
2456 // Convert unsigned integer or fixed point to FP.
2457 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2458
2459 // Convert signed integer or fixed-point to FP.
2460 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2461
2462 // Convert unsigned integer or fixed-point to FP.
2463 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2464
2465 // Unsigned absolute difference.
2466 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2467
2468 // Signed absolute difference.
2469 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2470
2471 // Unsigned absolute difference and accumulate.
2472 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2473
2474 // Signed absolute difference and accumulate.
2475 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2476
2477 // Add.
2478 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2479
2480 // Subtract.
2481 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2482
2483 // Unsigned halving add.
2484 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2485
2486 // Signed halving add.
2487 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2488
2489 // Unsigned rounding halving add.
2490 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2491
2492 // Signed rounding halving add.
2493 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2494
2495 // Unsigned halving sub.
2496 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497
2498 // Signed halving sub.
2499 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500
2501 // Unsigned saturating add.
2502 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503
2504 // Signed saturating add.
2505 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2506
2507 // Unsigned saturating subtract.
2508 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2509
2510 // Signed saturating subtract.
2511 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2512
2513 // Add pairwise.
2514 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2515
2516 // Add pair of elements scalar.
2517 void addp(const VRegister& vd, const VRegister& vn);
2518
2519 // Multiply-add to accumulator.
2520 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521
2522 // Multiply-subtract to accumulator.
2523 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524
2525 // Multiply.
2526 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2527
2528 // Multiply by scalar element.
2529 void mul(const VRegister& vd,
2530 const VRegister& vn,
2531 const VRegister& vm,
2532 int vm_index);
2533
2534 // Multiply-add by scalar element.
2535 void mla(const VRegister& vd,
2536 const VRegister& vn,
2537 const VRegister& vm,
2538 int vm_index);
2539
2540 // Multiply-subtract by scalar element.
2541 void mls(const VRegister& vd,
2542 const VRegister& vn,
2543 const VRegister& vm,
2544 int vm_index);
2545
2546 // Signed long multiply-add by scalar element.
2547 void smlal(const VRegister& vd,
2548 const VRegister& vn,
2549 const VRegister& vm,
2550 int vm_index);
2551
2552 // Signed long multiply-add by scalar element (second part).
2553 void smlal2(const VRegister& vd,
2554 const VRegister& vn,
2555 const VRegister& vm,
2556 int vm_index);
2557
2558 // Unsigned long multiply-add by scalar element.
2559 void umlal(const VRegister& vd,
2560 const VRegister& vn,
2561 const VRegister& vm,
2562 int vm_index);
2563
2564 // Unsigned long multiply-add by scalar element (second part).
2565 void umlal2(const VRegister& vd,
2566 const VRegister& vn,
2567 const VRegister& vm,
2568 int vm_index);
2569
2570 // Signed long multiply-sub by scalar element.
2571 void smlsl(const VRegister& vd,
2572 const VRegister& vn,
2573 const VRegister& vm,
2574 int vm_index);
2575
2576 // Signed long multiply-sub by scalar element (second part).
2577 void smlsl2(const VRegister& vd,
2578 const VRegister& vn,
2579 const VRegister& vm,
2580 int vm_index);
2581
2582 // Unsigned long multiply-sub by scalar element.
2583 void umlsl(const VRegister& vd,
2584 const VRegister& vn,
2585 const VRegister& vm,
2586 int vm_index);
2587
2588 // Unsigned long multiply-sub by scalar element (second part).
2589 void umlsl2(const VRegister& vd,
2590 const VRegister& vn,
2591 const VRegister& vm,
2592 int vm_index);
2593
2594 // Signed long multiply by scalar element.
2595 void smull(const VRegister& vd,
2596 const VRegister& vn,
2597 const VRegister& vm,
2598 int vm_index);
2599
2600 // Signed long multiply by scalar element (second part).
2601 void smull2(const VRegister& vd,
2602 const VRegister& vn,
2603 const VRegister& vm,
2604 int vm_index);
2605
2606 // Unsigned long multiply by scalar element.
2607 void umull(const VRegister& vd,
2608 const VRegister& vn,
2609 const VRegister& vm,
2610 int vm_index);
2611
2612 // Unsigned long multiply by scalar element (second part).
2613 void umull2(const VRegister& vd,
2614 const VRegister& vn,
2615 const VRegister& vm,
2616 int vm_index);
2617
2618 // Signed saturating double long multiply by element.
2619 void sqdmull(const VRegister& vd,
2620 const VRegister& vn,
2621 const VRegister& vm,
2622 int vm_index);
2623
2624 // Signed saturating double long multiply by element (second part).
2625 void sqdmull2(const VRegister& vd,
2626 const VRegister& vn,
2627 const VRegister& vm,
2628 int vm_index);
2629
2630 // Signed saturating doubling long multiply-add by element.
2631 void sqdmlal(const VRegister& vd,
2632 const VRegister& vn,
2633 const VRegister& vm,
2634 int vm_index);
2635
2636 // Signed saturating doubling long multiply-add by element (second part).
2637 void sqdmlal2(const VRegister& vd,
2638 const VRegister& vn,
2639 const VRegister& vm,
2640 int vm_index);
2641
2642 // Signed saturating doubling long multiply-sub by element.
2643 void sqdmlsl(const VRegister& vd,
2644 const VRegister& vn,
2645 const VRegister& vm,
2646 int vm_index);
2647
2648 // Signed saturating doubling long multiply-sub by element (second part).
2649 void sqdmlsl2(const VRegister& vd,
2650 const VRegister& vn,
2651 const VRegister& vm,
2652 int vm_index);
2653
2654 // Compare equal.
2655 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2656
2657 // Compare signed greater than or equal.
2658 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2659
2660 // Compare signed greater than.
2661 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2662
2663 // Compare unsigned higher.
2664 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2665
2666 // Compare unsigned higher or same.
2667 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2668
2669 // Compare bitwise test bits nonzero.
2670 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2671
2672 // Compare bitwise to zero.
2673 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2674
2675 // Compare signed greater than or equal to zero.
2676 void cmge(const VRegister& vd, const VRegister& vn, int value);
2677
2678 // Compare signed greater than zero.
2679 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2680
2681 // Compare signed less than or equal to zero.
2682 void cmle(const VRegister& vd, const VRegister& vn, int value);
2683
2684 // Compare signed less than zero.
2685 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2686
2687 // Signed shift left by register.
2688 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2689
2690 // Unsigned shift left by register.
2691 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2692
2693 // Signed saturating shift left by register.
2694 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2695
2696 // Unsigned saturating shift left by register.
2697 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2698
2699 // Signed rounding shift left by register.
2700 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2701
2702 // Unsigned rounding shift left by register.
2703 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2704
2705 // Signed saturating rounding shift left by register.
2706 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2707
2708 // Unsigned saturating rounding shift left by register.
2709 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2710
2711 // Bitwise and.
2712 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2713
2714 // Bitwise or.
2715 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2716
2717 // Bitwise or immediate.
2718 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2719
2720 // Move register to register.
2721 void mov(const VRegister& vd, const VRegister& vn);
2722
2723 // Bitwise orn.
2724 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2725
2726 // Bitwise eor.
2727 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2728
2729 // Bit clear immediate.
2730 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2731
2732 // Bit clear.
2733 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2734
2735 // Bitwise insert if false.
2736 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2737
2738 // Bitwise insert if true.
2739 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2740
2741 // Bitwise select.
2742 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2743
2744 // Polynomial multiply.
2745 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2746
2747 // Vector move immediate.
2748 void movi(const VRegister& vd,
2749 const uint64_t imm,
2750 Shift shift = LSL,
2751 const int shift_amount = 0);
2752
2753 // Bitwise not.
2754 void mvn(const VRegister& vd, const VRegister& vn);
2755
2756 // Vector move inverted immediate.
2757 void mvni(const VRegister& vd,
2758 const int imm8,
2759 Shift shift = LSL,
2760 const int shift_amount = 0);
2761
2762 // Signed saturating accumulate of unsigned value.
2763 void suqadd(const VRegister& vd, const VRegister& vn);
2764
2765 // Unsigned saturating accumulate of signed value.
2766 void usqadd(const VRegister& vd, const VRegister& vn);
2767
2768 // Absolute value.
2769 void abs(const VRegister& vd, const VRegister& vn);
2770
2771 // Signed saturating absolute value.
2772 void sqabs(const VRegister& vd, const VRegister& vn);
2773
2774 // Negate.
2775 void neg(const VRegister& vd, const VRegister& vn);
2776
2777 // Signed saturating negate.
2778 void sqneg(const VRegister& vd, const VRegister& vn);
2779
2780 // Bitwise not.
2781 void not_(const VRegister& vd, const VRegister& vn);
2782
2783 // Extract narrow.
2784 void xtn(const VRegister& vd, const VRegister& vn);
2785
2786 // Extract narrow (second part).
2787 void xtn2(const VRegister& vd, const VRegister& vn);
2788
2789 // Signed saturating extract narrow.
2790 void sqxtn(const VRegister& vd, const VRegister& vn);
2791
2792 // Signed saturating extract narrow (second part).
2793 void sqxtn2(const VRegister& vd, const VRegister& vn);
2794
2795 // Unsigned saturating extract narrow.
2796 void uqxtn(const VRegister& vd, const VRegister& vn);
2797
2798 // Unsigned saturating extract narrow (second part).
2799 void uqxtn2(const VRegister& vd, const VRegister& vn);
2800
2801 // Signed saturating extract unsigned narrow.
2802 void sqxtun(const VRegister& vd, const VRegister& vn);
2803
2804 // Signed saturating extract unsigned narrow (second part).
2805 void sqxtun2(const VRegister& vd, const VRegister& vn);
2806
2807 // Extract vector from pair of vectors.
2808 void ext(const VRegister& vd,
2809 const VRegister& vn,
2810 const VRegister& vm,
2811 int index);
2812
2813 // Duplicate vector element to vector or scalar.
2814 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2815
2816 // Move vector element to scalar.
2817 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2818
2819 // Duplicate general-purpose register to vector.
2820 void dup(const VRegister& vd, const Register& rn);
2821
2822 // Insert vector element from another vector element.
2823 void ins(const VRegister& vd,
2824 int vd_index,
2825 const VRegister& vn,
2826 int vn_index);
2827
2828 // Move vector element to another vector element.
2829 void mov(const VRegister& vd,
2830 int vd_index,
2831 const VRegister& vn,
2832 int vn_index);
2833
2834 // Insert vector element from general-purpose register.
2835 void ins(const VRegister& vd, int vd_index, const Register& rn);
2836
2837 // Move general-purpose register to a vector element.
2838 void mov(const VRegister& vd, int vd_index, const Register& rn);
2839
2840 // Unsigned move vector element to general-purpose register.
2841 void umov(const Register& rd, const VRegister& vn, int vn_index);
2842
2843 // Move vector element to general-purpose register.
2844 void mov(const Register& rd, const VRegister& vn, int vn_index);
2845
2846 // Signed move vector element to general-purpose register.
2847 void smov(const Register& rd, const VRegister& vn, int vn_index);
2848
2849 // One-element structure load to one register.
2850 void ld1(const VRegister& vt, const MemOperand& src);
2851
2852 // One-element structure load to two registers.
2853 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2854
2855 // One-element structure load to three registers.
2856 void ld1(const VRegister& vt,
2857 const VRegister& vt2,
2858 const VRegister& vt3,
2859 const MemOperand& src);
2860
2861 // One-element structure load to four registers.
2862 void ld1(const VRegister& vt,
2863 const VRegister& vt2,
2864 const VRegister& vt3,
2865 const VRegister& vt4,
2866 const MemOperand& src);
2867
2868 // One-element single structure load to one lane.
2869 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2870
2871 // One-element single structure load to all lanes.
2872 void ld1r(const VRegister& vt, const MemOperand& src);
2873
2874 // Two-element structure load.
2875 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2876
2877 // Two-element single structure load to one lane.
2878 void ld2(const VRegister& vt,
2879 const VRegister& vt2,
2880 int lane,
2881 const MemOperand& src);
2882
2883 // Two-element single structure load to all lanes.
2884 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2885
2886 // Three-element structure load.
2887 void ld3(const VRegister& vt,
2888 const VRegister& vt2,
2889 const VRegister& vt3,
2890 const MemOperand& src);
2891
2892 // Three-element single structure load to one lane.
2893 void ld3(const VRegister& vt,
2894 const VRegister& vt2,
2895 const VRegister& vt3,
2896 int lane,
2897 const MemOperand& src);
2898
2899 // Three-element single structure load to all lanes.
2900 void ld3r(const VRegister& vt,
2901 const VRegister& vt2,
2902 const VRegister& vt3,
2903 const MemOperand& src);
2904
2905 // Four-element structure load.
2906 void ld4(const VRegister& vt,
2907 const VRegister& vt2,
2908 const VRegister& vt3,
2909 const VRegister& vt4,
2910 const MemOperand& src);
2911
2912 // Four-element single structure load to one lane.
2913 void ld4(const VRegister& vt,
2914 const VRegister& vt2,
2915 const VRegister& vt3,
2916 const VRegister& vt4,
2917 int lane,
2918 const MemOperand& src);
2919
2920 // Four-element single structure load to all lanes.
2921 void ld4r(const VRegister& vt,
2922 const VRegister& vt2,
2923 const VRegister& vt3,
2924 const VRegister& vt4,
2925 const MemOperand& src);
2926
2927 // Count leading sign bits.
2928 void cls(const VRegister& vd, const VRegister& vn);
2929
2930 // Count leading zero bits (vector).
2931 void clz(const VRegister& vd, const VRegister& vn);
2932
2933 // Population count per byte.
2934 void cnt(const VRegister& vd, const VRegister& vn);
2935
2936 // Reverse bit order.
2937 void rbit(const VRegister& vd, const VRegister& vn);
2938
2939 // Reverse elements in 16-bit halfwords.
2940 void rev16(const VRegister& vd, const VRegister& vn);
2941
2942 // Reverse elements in 32-bit words.
2943 void rev32(const VRegister& vd, const VRegister& vn);
2944
2945 // Reverse elements in 64-bit doublewords.
2946 void rev64(const VRegister& vd, const VRegister& vn);
2947
2948 // Unsigned reciprocal square root estimate.
2949 void ursqrte(const VRegister& vd, const VRegister& vn);
2950
2951 // Unsigned reciprocal estimate.
2952 void urecpe(const VRegister& vd, const VRegister& vn);
2953
2954 // Signed pairwise long add.
2955 void saddlp(const VRegister& vd, const VRegister& vn);
2956
2957 // Unsigned pairwise long add.
2958 void uaddlp(const VRegister& vd, const VRegister& vn);
2959
2960 // Signed pairwise long add and accumulate.
2961 void sadalp(const VRegister& vd, const VRegister& vn);
2962
2963 // Unsigned pairwise long add and accumulate.
2964 void uadalp(const VRegister& vd, const VRegister& vn);
2965
2966 // Shift left by immediate.
2967 void shl(const VRegister& vd, const VRegister& vn, int shift);
2968
2969 // Signed saturating shift left by immediate.
2970 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2971
2972 // Signed saturating shift left unsigned by immediate.
2973 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2974
2975 // Unsigned saturating shift left by immediate.
2976 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2977
2978 // Signed shift left long by immediate.
2979 void sshll(const VRegister& vd, const VRegister& vn, int shift);
2980
2981 // Signed shift left long by immediate (second part).
2982 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2983
2984 // Signed extend long.
2985 void sxtl(const VRegister& vd, const VRegister& vn);
2986
2987 // Signed extend long (second part).
2988 void sxtl2(const VRegister& vd, const VRegister& vn);
2989
2990 // Unsigned shift left long by immediate.
2991 void ushll(const VRegister& vd, const VRegister& vn, int shift);
2992
2993 // Unsigned shift left long by immediate (second part).
2994 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2995
2996 // Shift left long by element size.
2997 void shll(const VRegister& vd, const VRegister& vn, int shift);
2998
2999 // Shift left long by element size (second part).
3000 void shll2(const VRegister& vd, const VRegister& vn, int shift);
3001
3002 // Unsigned extend long.
3003 void uxtl(const VRegister& vd, const VRegister& vn);
3004
3005 // Unsigned extend long (second part).
3006 void uxtl2(const VRegister& vd, const VRegister& vn);
3007
3008 // Shift left by immediate and insert.
3009 void sli(const VRegister& vd, const VRegister& vn, int shift);
3010
3011 // Shift right by immediate and insert.
3012 void sri(const VRegister& vd, const VRegister& vn, int shift);
3013
3014 // Signed maximum.
3015 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3016
3017 // Signed pairwise maximum.
3018 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3019
3020 // Add across vector.
3021 void addv(const VRegister& vd, const VRegister& vn);
3022
3023 // Signed add long across vector.
3024 void saddlv(const VRegister& vd, const VRegister& vn);
3025
3026 // Unsigned add long across vector.
3027 void uaddlv(const VRegister& vd, const VRegister& vn);
3028
3029 // FP maximum number across vector.
3030 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3031
3032 // FP maximum across vector.
3033 void fmaxv(const VRegister& vd, const VRegister& vn);
3034
3035 // FP minimum number across vector.
3036 void fminnmv(const VRegister& vd, const VRegister& vn);
3037
3038 // FP minimum across vector.
3039 void fminv(const VRegister& vd, const VRegister& vn);
3040
3041 // Signed maximum across vector.
3042 void smaxv(const VRegister& vd, const VRegister& vn);
3043
3044 // Signed minimum.
3045 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3046
3047 // Signed minimum pairwise.
3048 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3049
3050 // Signed minimum across vector.
3051 void sminv(const VRegister& vd, const VRegister& vn);
3052
3053 // One-element structure store from one register.
3054 void st1(const VRegister& vt, const MemOperand& src);
3055
3056 // One-element structure store from two registers.
3057 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3058
3059 // One-element structure store from three registers.
3060 void st1(const VRegister& vt,
3061 const VRegister& vt2,
3062 const VRegister& vt3,
3063 const MemOperand& src);
3064
3065 // One-element structure store from four registers.
3066 void st1(const VRegister& vt,
3067 const VRegister& vt2,
3068 const VRegister& vt3,
3069 const VRegister& vt4,
3070 const MemOperand& src);
3071
3072 // One-element single structure store from one lane.
3073 void st1(const VRegister& vt, int lane, const MemOperand& src);
3074
3075 // Two-element structure store from two registers.
3076 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3077
3078 // Two-element single structure store from two lanes.
3079 void st2(const VRegister& vt,
3080 const VRegister& vt2,
3081 int lane,
3082 const MemOperand& src);
3083
3084 // Three-element structure store from three registers.
3085 void st3(const VRegister& vt,
3086 const VRegister& vt2,
3087 const VRegister& vt3,
3088 const MemOperand& src);
3089
3090 // Three-element single structure store from three lanes.
3091 void st3(const VRegister& vt,
3092 const VRegister& vt2,
3093 const VRegister& vt3,
3094 int lane,
3095 const MemOperand& src);
3096
3097 // Four-element structure store from four registers.
3098 void st4(const VRegister& vt,
3099 const VRegister& vt2,
3100 const VRegister& vt3,
3101 const VRegister& vt4,
3102 const MemOperand& src);
3103
3104 // Four-element single structure store from four lanes.
3105 void st4(const VRegister& vt,
3106 const VRegister& vt2,
3107 const VRegister& vt3,
3108 const VRegister& vt4,
3109 int lane,
3110 const MemOperand& src);
3111
3112 // Unsigned add long.
3113 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3114
3115 // Unsigned add long (second part).
3116 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3117
3118 // Unsigned add wide.
3119 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3120
3121 // Unsigned add wide (second part).
3122 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3123
3124 // Signed add long.
3125 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3126
3127 // Signed add long (second part).
3128 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3129
3130 // Signed add wide.
3131 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3132
3133 // Signed add wide (second part).
3134 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3135
3136 // Unsigned subtract long.
3137 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3138
3139 // Unsigned subtract long (second part).
3140 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3141
3142 // Unsigned subtract wide.
3143 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3144
3145 // Unsigned subtract wide (second part).
3146 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3147
3148 // Signed subtract long.
3149 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3150
3151 // Signed subtract long (second part).
3152 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3153
3154 // Signed integer subtract wide.
3155 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3156
3157 // Signed integer subtract wide (second part).
3158 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3159
3160 // Unsigned maximum.
3161 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3162
3163 // Unsigned pairwise maximum.
3164 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3165
3166 // Unsigned maximum across vector.
3167 void umaxv(const VRegister& vd, const VRegister& vn);
3168
3169 // Unsigned minimum.
3170 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3171
3172 // Unsigned pairwise minimum.
3173 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3174
3175 // Unsigned minimum across vector.
3176 void uminv(const VRegister& vd, const VRegister& vn);
3177
3178 // Transpose vectors (primary).
3179 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3180
3181 // Transpose vectors (secondary).
3182 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3183
3184 // Unzip vectors (primary).
3185 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3186
3187 // Unzip vectors (secondary).
3188 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3189
3190 // Zip vectors (primary).
3191 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3192
3193 // Zip vectors (secondary).
3194 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3195
3196 // Signed shift right by immediate.
3197 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3198
3199 // Unsigned shift right by immediate.
3200 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3201
3202 // Signed rounding shift right by immediate.
3203 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3204
3205 // Unsigned rounding shift right by immediate.
3206 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3207
3208 // Signed shift right by immediate and accumulate.
3209 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3210
3211 // Unsigned shift right by immediate and accumulate.
3212 void usra(const VRegister& vd, const VRegister& vn, int shift);
3213
3214 // Signed rounding shift right by immediate and accumulate.
3215 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3216
3217 // Unsigned rounding shift right by immediate and accumulate.
3218 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3219
3220 // Shift right narrow by immediate.
3221 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3222
3223 // Shift right narrow by immediate (second part).
3224 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3225
3226 // Rounding shift right narrow by immediate.
3227 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3228
3229 // Rounding shift right narrow by immediate (second part).
3230 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3231
3232 // Unsigned saturating shift right narrow by immediate.
3233 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3234
3235 // Unsigned saturating shift right narrow by immediate (second part).
3236 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3237
3238 // Unsigned saturating rounding shift right narrow by immediate.
3239 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3240
3241 // Unsigned saturating rounding shift right narrow by immediate (second part).
3242 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3243
3244 // Signed saturating shift right narrow by immediate.
3245 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3246
3247 // Signed saturating shift right narrow by immediate (second part).
3248 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3249
3250 // Signed saturating rounded shift right narrow by immediate.
3251 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3252
3253 // Signed saturating rounded shift right narrow by immediate (second part).
3254 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3255
3256 // Signed saturating shift right unsigned narrow by immediate.
3257 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3258
3259 // Signed saturating shift right unsigned narrow by immediate (second part).
3260 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3261
3262 // Signed sat rounded shift right unsigned narrow by immediate.
3263 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3264
3265 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3266 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3267
3268 // FP reciprocal step.
3269 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3270
3271 // FP reciprocal estimate.
3272 void frecpe(const VRegister& vd, const VRegister& vn);
3273
3274 // FP reciprocal square root estimate.
3275 void frsqrte(const VRegister& vd, const VRegister& vn);
3276
3277 // FP reciprocal square root step.
3278 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3279
3280 // Signed absolute difference and accumulate long.
3281 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3282
3283 // Signed absolute difference and accumulate long (second part).
3284 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3285
3286 // Unsigned absolute difference and accumulate long.
3287 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3288
3289 // Unsigned absolute difference and accumulate long (second part).
3290 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3291
3292 // Signed absolute difference long.
3293 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3294
3295 // Signed absolute difference long (second part).
3296 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3297
3298 // Unsigned absolute difference long.
3299 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3300
3301 // Unsigned absolute difference long (second part).
3302 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3303
3304 // Polynomial multiply long.
3305 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3306
3307 // Polynomial multiply long (second part).
3308 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3309
3310 // Signed long multiply-add.
3311 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3312
3313 // Signed long multiply-add (second part).
3314 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3315
3316 // Unsigned long multiply-add.
3317 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3318
3319 // Unsigned long multiply-add (second part).
3320 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3321
3322 // Signed long multiply-sub.
3323 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3324
3325 // Signed long multiply-sub (second part).
3326 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3327
3328 // Unsigned long multiply-sub.
3329 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3330
3331 // Unsigned long multiply-sub (second part).
3332 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3333
3334 // Signed long multiply.
3335 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3336
3337 // Signed long multiply (second part).
3338 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3339
3340 // Signed saturating doubling long multiply-add.
3341 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3342
3343 // Signed saturating doubling long multiply-add (second part).
3344 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3345
3346 // Signed saturating doubling long multiply-subtract.
3347 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3348
3349 // Signed saturating doubling long multiply-subtract (second part).
3350 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3351
3352 // Signed saturating doubling long multiply.
3353 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3354
3355 // Signed saturating doubling long multiply (second part).
3356 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3357
3358 // Signed saturating doubling multiply returning high half.
3359 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3360
3361 // Signed saturating rounding doubling multiply returning high half.
3362 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3363
3364 // Signed dot product [Armv8.2].
3365 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366
3367 // Signed saturating rounding doubling multiply accumulate returning high
3368 // half [Armv8.1].
3369 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3370
3371 // Unsigned dot product [Armv8.2].
3372 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3373
3374 // Dot Product with unsigned and signed integers (vector).
3375 void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3376
3377 // Dot product with signed and unsigned integers (vector, by element).
3378 void sudot(const VRegister& vd,
3379 const VRegister& vn,
3380 const VRegister& vm,
3381 int vm_index);
3382
3383 // Dot product with unsigned and signed integers (vector, by element).
3384 void usdot(const VRegister& vd,
3385 const VRegister& vn,
3386 const VRegister& vm,
3387 int vm_index);
3388
3389 // Signed saturating rounding doubling multiply subtract returning high half
3390 // [Armv8.1].
3391 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3392
3393 // Signed saturating doubling multiply element returning high half.
3394 void sqdmulh(const VRegister& vd,
3395 const VRegister& vn,
3396 const VRegister& vm,
3397 int vm_index);
3398
3399 // Signed saturating rounding doubling multiply element returning high half.
3400 void sqrdmulh(const VRegister& vd,
3401 const VRegister& vn,
3402 const VRegister& vm,
3403 int vm_index);
3404
3405 // Signed dot product by element [Armv8.2].
3406 void sdot(const VRegister& vd,
3407 const VRegister& vn,
3408 const VRegister& vm,
3409 int vm_index);
3410
3411 // Signed saturating rounding doubling multiply accumulate element returning
3412 // high half [Armv8.1].
3413 void sqrdmlah(const VRegister& vd,
3414 const VRegister& vn,
3415 const VRegister& vm,
3416 int vm_index);
3417
3418 // Unsigned dot product by element [Armv8.2].
3419 void udot(const VRegister& vd,
3420 const VRegister& vn,
3421 const VRegister& vm,
3422 int vm_index);
3423
3424 // Signed saturating rounding doubling multiply subtract element returning
3425 // high half [Armv8.1].
3426 void sqrdmlsh(const VRegister& vd,
3427 const VRegister& vn,
3428 const VRegister& vm,
3429 int vm_index);
3430
3431 // Unsigned long multiply long.
3432 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3433
3434 // Unsigned long multiply (second part).
3435 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3436
3437 // Add narrow returning high half.
3438 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3439
3440 // Add narrow returning high half (second part).
3441 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3442
3443 // Rounding add narrow returning high half.
3444 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3445
3446 // Rounding add narrow returning high half (second part).
3447 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3448
3449 // Subtract narrow returning high half.
3450 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3451
3452 // Subtract narrow returning high half (second part).
3453 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3454
3455 // Rounding subtract narrow returning high half.
3456 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3457
3458 // Rounding subtract narrow returning high half (second part).
3459 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3460
3461 // FP vector multiply accumulate.
3462 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3463
3464 // FP fused multiply-add long to accumulator.
3465 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3466
3467 // FP fused multiply-add long to accumulator (second part).
3468 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3469
3470 // FP fused multiply-add long to accumulator by element.
3471 void fmlal(const VRegister& vd,
3472 const VRegister& vn,
3473 const VRegister& vm,
3474 int vm_index);
3475
3476 // FP fused multiply-add long to accumulator by element (second part).
3477 void fmlal2(const VRegister& vd,
3478 const VRegister& vn,
3479 const VRegister& vm,
3480 int vm_index);
3481
3482 // FP vector multiply subtract.
3483 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3484
3485 // FP fused multiply-subtract long to accumulator.
3486 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3487
3488 // FP fused multiply-subtract long to accumulator (second part).
3489 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3490
3491 // FP fused multiply-subtract long to accumulator by element.
3492 void fmlsl(const VRegister& vd,
3493 const VRegister& vn,
3494 const VRegister& vm,
3495 int vm_index);
3496
3497 // FP fused multiply-subtract long to accumulator by element (second part).
3498 void fmlsl2(const VRegister& vd,
3499 const VRegister& vn,
3500 const VRegister& vm,
3501 int vm_index);
3502
3503 // FP vector multiply extended.
3504 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3505
3506 // FP absolute greater than or equal.
3507 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3508
3509 // FP absolute greater than.
3510 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3511
3512 // FP multiply by element.
3513 void fmul(const VRegister& vd,
3514 const VRegister& vn,
3515 const VRegister& vm,
3516 int vm_index);
3517
3518 // FP fused multiply-add to accumulator by element.
3519 void fmla(const VRegister& vd,
3520 const VRegister& vn,
3521 const VRegister& vm,
3522 int vm_index);
3523
3524 // FP fused multiply-sub from accumulator by element.
3525 void fmls(const VRegister& vd,
3526 const VRegister& vn,
3527 const VRegister& vm,
3528 int vm_index);
3529
3530 // FP multiply extended by element.
3531 void fmulx(const VRegister& vd,
3532 const VRegister& vn,
3533 const VRegister& vm,
3534 int vm_index);
3535
3536 // FP compare equal.
3537 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3538
3539 // FP greater than.
3540 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3541
3542 // FP greater than or equal.
3543 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3544
3545 // FP compare equal to zero.
3546 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3547
3548 // FP greater than zero.
3549 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3550
3551 // FP greater than or equal to zero.
3552 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3553
3554 // FP less than or equal to zero.
3555 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3556
3557 // FP less than to zero.
3558 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3559
3560 // FP absolute difference.
3561 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3562
3563 // FP pairwise add vector.
3564 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3565
3566 // FP pairwise add scalar.
3567 void faddp(const VRegister& vd, const VRegister& vn);
3568
3569 // FP pairwise maximum vector.
3570 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3571
3572 // FP pairwise maximum scalar.
3573 void fmaxp(const VRegister& vd, const VRegister& vn);
3574
3575 // FP pairwise minimum vector.
3576 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3577
3578 // FP pairwise minimum scalar.
3579 void fminp(const VRegister& vd, const VRegister& vn);
3580
3581 // FP pairwise maximum number vector.
3582 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3583
3584 // FP pairwise maximum number scalar.
3585 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3586
3587 // FP pairwise minimum number vector.
3588 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3589
3590 // FP pairwise minimum number scalar.
3591 void fminnmp(const VRegister& vd, const VRegister& vn);
3592
3593 // v8.3 complex numbers - note that these are only partial/helper functions
3594 // and must be used in series in order to perform full CN operations.
3595
3596 // FP complex multiply accumulate (by element) [Armv8.3].
3597 void fcmla(const VRegister& vd,
3598 const VRegister& vn,
3599 const VRegister& vm,
3600 int vm_index,
3601 int rot);
3602
3603 // FP complex multiply accumulate [Armv8.3].
3604 void fcmla(const VRegister& vd,
3605 const VRegister& vn,
3606 const VRegister& vm,
3607 int rot);
3608
3609 // FP complex add [Armv8.3].
3610 void fcadd(const VRegister& vd,
3611 const VRegister& vn,
3612 const VRegister& vm,
3613 int rot);
3614
3615 // Signed 8-bit integer matrix multiply-accumulate (vector).
3616 void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3617
3618 // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3619 void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3620
3621 // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3622 void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3623
3624 // Bit Clear and exclusive-OR.
3625 void bcax(const VRegister& vd,
3626 const VRegister& vn,
3627 const VRegister& vm,
3628 const VRegister& va);
3629
3630 // Three-way Exclusive-OR.
3631 void eor3(const VRegister& vd,
3632 const VRegister& vn,
3633 const VRegister& vm,
3634 const VRegister& va);
3635
3636 // Exclusive-OR and Rotate.
3637 void xar(const VRegister& vd,
3638 const VRegister& vn,
3639 const VRegister& vm,
3640 int rotate);
3641
3642 // Rotate and Exclusive-OR
3643 void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3644
3645 // SHA1 hash update (choose).
3646 void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3647
3648 // SHA1 fixed rotate.
3649 void sha1h(const VRegister& sd, const VRegister& sn);
3650
3651 // SHA1 hash update (majority).
3652 void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3653
3654 // SHA1 hash update (parity).
3655 void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3656
3657 // SHA1 schedule update 0.
3658 void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3659
3660 // SHA1 schedule update 1.
3661 void sha1su1(const VRegister& vd, const VRegister& vn);
3662
3663 // SHA256 hash update (part 1).
3664 void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3665
3666 // SHA256 hash update (part 2).
3667 void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3668
3669 // SHA256 schedule update 0.
3670 void sha256su0(const VRegister& vd, const VRegister& vn);
3671
3672 // SHA256 schedule update 1.
3673 void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3674
3675 // SHA512 hash update part 1.
3676 void sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3677
3678 // SHA512 hash update part 2.
3679 void sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3680
3681 // SHA512 schedule Update 0.
3682 void sha512su0(const VRegister& vd, const VRegister& vn);
3683
3684 // SHA512 schedule Update 1.
3685 void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3686
3687 // AES single round decryption.
3688 void aesd(const VRegister& vd, const VRegister& vn);
3689
3690 // AES single round encryption.
3691 void aese(const VRegister& vd, const VRegister& vn);
3692
3693 // AES inverse mix columns.
3694 void aesimc(const VRegister& vd, const VRegister& vn);
3695
3696 // AES mix columns.
3697 void aesmc(const VRegister& vd, const VRegister& vn);
3698
3699 // SM3PARTW1.
3700 void sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3701
3702 // SM3PARTW2.
3703 void sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3704
3705 // SM3SS1.
3706 void sm3ss1(const VRegister& vd,
3707 const VRegister& vn,
3708 const VRegister& vm,
3709 const VRegister& va);
3710
3711 // SM3TT1A.
3712 void sm3tt1a(const VRegister& vd,
3713 const VRegister& vn,
3714 const VRegister& vm,
3715 int index);
3716
3717 // SM3TT1B.
3718 void sm3tt1b(const VRegister& vd,
3719 const VRegister& vn,
3720 const VRegister& vm,
3721 int index);
3722
3723 // SM3TT2A.
3724 void sm3tt2a(const VRegister& vd,
3725 const VRegister& vn,
3726 const VRegister& vm,
3727 int index);
3728
3729 // SM3TT2B.
3730 void sm3tt2b(const VRegister& vd,
3731 const VRegister& vn,
3732 const VRegister& vm,
3733 int index);
3734
3735 // SM4 Encode.
3736 void sm4e(const VRegister& vd, const VRegister& vn);
3737
3738 // SM4 Key.
3739 void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3740
3741 // Scalable Vector Extensions.
3742
3743 // Absolute value (predicated).
3744 void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3745
3746 // Add vectors (predicated).
3747 void add(const ZRegister& zd,
3748 const PRegisterM& pg,
3749 const ZRegister& zn,
3750 const ZRegister& zm);
3751
3752 // Add vectors (unpredicated).
3753 void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3754
3755 // Add immediate (unpredicated).
3756 void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3757
3758 // Add multiple of predicate register size to scalar register.
3759 void addpl(const Register& xd, const Register& xn, int imm6);
3760
3761 // Add multiple of vector register size to scalar register.
3762 void addvl(const Register& xd, const Register& xn, int imm6);
3763
3764 // Compute vector address.
3765 void adr(const ZRegister& zd, const SVEMemOperand& addr);
3766
3767 // Bitwise AND predicates.
3768 void and_(const PRegisterWithLaneSize& pd,
3769 const PRegisterZ& pg,
3770 const PRegisterWithLaneSize& pn,
3771 const PRegisterWithLaneSize& pm);
3772
3773 // Bitwise AND vectors (predicated).
3774 void and_(const ZRegister& zd,
3775 const PRegisterM& pg,
3776 const ZRegister& zn,
3777 const ZRegister& zm);
3778
3779 // Bitwise AND with immediate (unpredicated).
3780 void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3781
3782 // Bitwise AND vectors (unpredicated).
3783 void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3784
3785 // Bitwise AND predicates.
3786 void ands(const PRegisterWithLaneSize& pd,
3787 const PRegisterZ& pg,
3788 const PRegisterWithLaneSize& pn,
3789 const PRegisterWithLaneSize& pm);
3790
3791 // Bitwise AND reduction to scalar.
3792 void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3793
3794 // Arithmetic shift right by immediate (predicated).
3795 void asr(const ZRegister& zd,
3796 const PRegisterM& pg,
3797 const ZRegister& zn,
3798 int shift);
3799
3800 // Arithmetic shift right by 64-bit wide elements (predicated).
3801 void asr(const ZRegister& zd,
3802 const PRegisterM& pg,
3803 const ZRegister& zn,
3804 const ZRegister& zm);
3805
3806 // Arithmetic shift right by immediate (unpredicated).
3807 void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3808
3809 // Arithmetic shift right by 64-bit wide elements (unpredicated).
3810 void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3811
3812 // Arithmetic shift right for divide by immediate (predicated).
3813 void asrd(const ZRegister& zd,
3814 const PRegisterM& pg,
3815 const ZRegister& zn,
3816 int shift);
3817
3818 // Reversed arithmetic shift right by vector (predicated).
3819 void asrr(const ZRegister& zd,
3820 const PRegisterM& pg,
3821 const ZRegister& zn,
3822 const ZRegister& zm);
3823
3824 // Bitwise clear predicates.
3825 void bic(const PRegisterWithLaneSize& pd,
3826 const PRegisterZ& pg,
3827 const PRegisterWithLaneSize& pn,
3828 const PRegisterWithLaneSize& pm);
3829
3830 // Bitwise clear vectors (predicated).
3831 void bic(const ZRegister& zd,
3832 const PRegisterM& pg,
3833 const ZRegister& zn,
3834 const ZRegister& zm);
3835
3836 // Bitwise clear bits using immediate (unpredicated).
3837 void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3838
3839 // Bitwise clear vectors (unpredicated).
3840 void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3841
3842 // Bitwise clear predicates.
3843 void bics(const PRegisterWithLaneSize& pd,
3844 const PRegisterZ& pg,
3845 const PRegisterWithLaneSize& pn,
3846 const PRegisterWithLaneSize& pm);
3847
3848 // Break after first true condition.
3849 void brka(const PRegisterWithLaneSize& pd,
3850 const PRegister& pg,
3851 const PRegisterWithLaneSize& pn);
3852
3853 // Break after first true condition.
3854 void brkas(const PRegisterWithLaneSize& pd,
3855 const PRegisterZ& pg,
3856 const PRegisterWithLaneSize& pn);
3857
3858 // Break before first true condition.
3859 void brkb(const PRegisterWithLaneSize& pd,
3860 const PRegister& pg,
3861 const PRegisterWithLaneSize& pn);
3862
3863 // Break before first true condition.
3864 void brkbs(const PRegisterWithLaneSize& pd,
3865 const PRegisterZ& pg,
3866 const PRegisterWithLaneSize& pn);
3867
3868 // Propagate break to next partition.
3869 void brkn(const PRegisterWithLaneSize& pd,
3870 const PRegisterZ& pg,
3871 const PRegisterWithLaneSize& pn,
3872 const PRegisterWithLaneSize& pm);
3873
3874 // Propagate break to next partition.
3875 void brkns(const PRegisterWithLaneSize& pd,
3876 const PRegisterZ& pg,
3877 const PRegisterWithLaneSize& pn,
3878 const PRegisterWithLaneSize& pm);
3879
3880 // Break after first true condition, propagating from previous partition.
3881 void brkpa(const PRegisterWithLaneSize& pd,
3882 const PRegisterZ& pg,
3883 const PRegisterWithLaneSize& pn,
3884 const PRegisterWithLaneSize& pm);
3885
3886 // Break after first true condition, propagating from previous partition.
3887 void brkpas(const PRegisterWithLaneSize& pd,
3888 const PRegisterZ& pg,
3889 const PRegisterWithLaneSize& pn,
3890 const PRegisterWithLaneSize& pm);
3891
3892 // Break before first true condition, propagating from previous partition.
3893 void brkpb(const PRegisterWithLaneSize& pd,
3894 const PRegisterZ& pg,
3895 const PRegisterWithLaneSize& pn,
3896 const PRegisterWithLaneSize& pm);
3897
3898 // Break before first true condition, propagating from previous partition.
3899 void brkpbs(const PRegisterWithLaneSize& pd,
3900 const PRegisterZ& pg,
3901 const PRegisterWithLaneSize& pn,
3902 const PRegisterWithLaneSize& pm);
3903
3904 // Conditionally extract element after last to general-purpose register.
3905 void clasta(const Register& rd,
3906 const PRegister& pg,
3907 const Register& rn,
3908 const ZRegister& zm);
3909
3910 // Conditionally extract element after last to SIMD&FP scalar register.
3911 void clasta(const VRegister& vd,
3912 const PRegister& pg,
3913 const VRegister& vn,
3914 const ZRegister& zm);
3915
3916 // Conditionally extract element after last to vector register.
3917 void clasta(const ZRegister& zd,
3918 const PRegister& pg,
3919 const ZRegister& zn,
3920 const ZRegister& zm);
3921
3922 // Conditionally extract last element to general-purpose register.
3923 void clastb(const Register& rd,
3924 const PRegister& pg,
3925 const Register& rn,
3926 const ZRegister& zm);
3927
3928 // Conditionally extract last element to SIMD&FP scalar register.
3929 void clastb(const VRegister& vd,
3930 const PRegister& pg,
3931 const VRegister& vn,
3932 const ZRegister& zm);
3933
3934 // Conditionally extract last element to vector register.
3935 void clastb(const ZRegister& zd,
3936 const PRegister& pg,
3937 const ZRegister& zn,
3938 const ZRegister& zm);
3939
3940 // Count leading sign bits (predicated).
3941 void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3942
3943 // Count leading zero bits (predicated).
3944 void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3945
3946 void cmp(Condition cond,
3947 const PRegisterWithLaneSize& pd,
3948 const PRegisterZ& pg,
3949 const ZRegister& zn,
3950 const ZRegister& zm);
3951
3952 // Compare vector to 64-bit wide elements.
3953 void cmpeq(const PRegisterWithLaneSize& pd,
3954 const PRegisterZ& pg,
3955 const ZRegister& zn,
3956 const ZRegister& zm);
3957
3958 // Compare vector to immediate.
3959 void cmpeq(const PRegisterWithLaneSize& pd,
3960 const PRegisterZ& pg,
3961 const ZRegister& zn,
3962 int imm5);
3963
3964 // Compare vector to 64-bit wide elements.
3965 void cmpge(const PRegisterWithLaneSize& pd,
3966 const PRegisterZ& pg,
3967 const ZRegister& zn,
3968 const ZRegister& zm);
3969
3970 // Compare vector to immediate.
3971 void cmpge(const PRegisterWithLaneSize& pd,
3972 const PRegisterZ& pg,
3973 const ZRegister& zn,
3974 int imm5);
3975
3976 // Compare vector to 64-bit wide elements.
3977 void cmpgt(const PRegisterWithLaneSize& pd,
3978 const PRegisterZ& pg,
3979 const ZRegister& zn,
3980 const ZRegister& zm);
3981
3982 // Compare vector to immediate.
3983 void cmpgt(const PRegisterWithLaneSize& pd,
3984 const PRegisterZ& pg,
3985 const ZRegister& zn,
3986 int imm5);
3987
3988 // Compare vector to 64-bit wide elements.
3989 void cmphi(const PRegisterWithLaneSize& pd,
3990 const PRegisterZ& pg,
3991 const ZRegister& zn,
3992 const ZRegister& zm);
3993
3994 // Compare vector to immediate.
3995 void cmphi(const PRegisterWithLaneSize& pd,
3996 const PRegisterZ& pg,
3997 const ZRegister& zn,
3998 unsigned imm7);
3999
4000 // Compare vector to 64-bit wide elements.
4001 void cmphs(const PRegisterWithLaneSize& pd,
4002 const PRegisterZ& pg,
4003 const ZRegister& zn,
4004 const ZRegister& zm);
4005
4006 // Compare vector to immediate.
4007 void cmphs(const PRegisterWithLaneSize& pd,
4008 const PRegisterZ& pg,
4009 const ZRegister& zn,
4010 unsigned imm7);
4011
4012 // Compare vector to 64-bit wide elements.
4013 void cmple(const PRegisterWithLaneSize& pd,
4014 const PRegisterZ& pg,
4015 const ZRegister& zn,
4016 const ZRegister& zm);
4017
4018 // Compare vector to immediate.
4019 void cmple(const PRegisterWithLaneSize& pd,
4020 const PRegisterZ& pg,
4021 const ZRegister& zn,
4022 int imm5);
4023
4024 // Compare vector to 64-bit wide elements.
4025 void cmplo(const PRegisterWithLaneSize& pd,
4026 const PRegisterZ& pg,
4027 const ZRegister& zn,
4028 const ZRegister& zm);
4029
4030 // Compare vector to immediate.
4031 void cmplo(const PRegisterWithLaneSize& pd,
4032 const PRegisterZ& pg,
4033 const ZRegister& zn,
4034 unsigned imm7);
4035
4036 // Compare vector to 64-bit wide elements.
4037 void cmpls(const PRegisterWithLaneSize& pd,
4038 const PRegisterZ& pg,
4039 const ZRegister& zn,
4040 const ZRegister& zm);
4041
4042 // Compare vector to immediate.
4043 void cmpls(const PRegisterWithLaneSize& pd,
4044 const PRegisterZ& pg,
4045 const ZRegister& zn,
4046 unsigned imm7);
4047
4048 // Compare vector to 64-bit wide elements.
4049 void cmplt(const PRegisterWithLaneSize& pd,
4050 const PRegisterZ& pg,
4051 const ZRegister& zn,
4052 const ZRegister& zm);
4053
4054 // Compare vector to immediate.
4055 void cmplt(const PRegisterWithLaneSize& pd,
4056 const PRegisterZ& pg,
4057 const ZRegister& zn,
4058 int imm5);
4059
4060 // Compare vector to 64-bit wide elements.
4061 void cmpne(const PRegisterWithLaneSize& pd,
4062 const PRegisterZ& pg,
4063 const ZRegister& zn,
4064 const ZRegister& zm);
4065
4066 // Compare vector to immediate.
4067 void cmpne(const PRegisterWithLaneSize& pd,
4068 const PRegisterZ& pg,
4069 const ZRegister& zn,
4070 int imm5);
4071
4072 // Logically invert boolean condition in vector (predicated).
4073 void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4074
4075 // Count non-zero bits (predicated).
4076 void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4077
4078 // Set scalar to multiple of predicate constraint element count.
4079 void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4080
4081 // Set scalar to multiple of predicate constraint element count.
4082 void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4083
4084 // Set scalar to multiple of predicate constraint element count.
4085 void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4086
4087 // Set scalar to active predicate element count.
4088 void cntp(const Register& xd,
4089 const PRegister& pg,
4090 const PRegisterWithLaneSize& pn);
4091
4092 // Set scalar to multiple of predicate constraint element count.
4093 void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4094
4095 // Shuffle active elements of vector to the right and fill with zero.
4096 void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4097
4098 // Copy signed integer immediate to vector elements (predicated).
4099 void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4100
4101 // Copy general-purpose register to vector elements (predicated).
4102 void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4103
4104 // Copy SIMD&FP scalar register to vector elements (predicated).
4105 void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4106
4107 // Compare and terminate loop.
4108 void ctermeq(const Register& rn, const Register& rm);
4109
4110 // Compare and terminate loop.
4111 void ctermne(const Register& rn, const Register& rm);
4112
4113 // Decrement scalar by multiple of predicate constraint element count.
4114 void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4115
4116 // Decrement scalar by multiple of predicate constraint element count.
4117 void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4118
4119 // Decrement vector by multiple of predicate constraint element count.
4120 void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4121
4122 // Decrement scalar by multiple of predicate constraint element count.
4123 void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4124
4125 // Decrement vector by multiple of predicate constraint element count.
4126 void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4127
4128 // Decrement scalar by active predicate element count.
4129 void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4130
4131 // Decrement vector by active predicate element count.
4132 void decp(const ZRegister& zdn, const PRegister& pg);
4133
4134 // Decrement scalar by multiple of predicate constraint element count.
4135 void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4136
4137 // Decrement vector by multiple of predicate constraint element count.
4138 void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4139
4140 // Broadcast general-purpose register to vector elements (unpredicated).
4141 void dup(const ZRegister& zd, const Register& xn);
4142
4143 // Broadcast indexed element to vector (unpredicated).
4144 void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4145
4146 // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4147 // assembler will pick an appropriate immediate and left shift that is
4148 // equivalent to the immediate argument. If an explicit left shift is
4149 // specified (0 or 8), the immediate must be a signed 8-bit integer.
4150
4151 // Broadcast signed immediate to vector elements (unpredicated).
4152 void dup(const ZRegister& zd, int imm8, int shift = -1);
4153
4154 // Broadcast logical bitmask immediate to vector (unpredicated).
4155 void dupm(const ZRegister& zd, uint64_t imm);
4156
4157 // Bitwise exclusive OR with inverted immediate (unpredicated).
4158 void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4159
4160 // Bitwise exclusive OR predicates.
4161 void eor(const PRegisterWithLaneSize& pd,
4162 const PRegisterZ& pg,
4163 const PRegisterWithLaneSize& pn,
4164 const PRegisterWithLaneSize& pm);
4165
4166 // Bitwise exclusive OR vectors (predicated).
4167 void eor(const ZRegister& zd,
4168 const PRegisterM& pg,
4169 const ZRegister& zn,
4170 const ZRegister& zm);
4171
4172 // Bitwise exclusive OR with immediate (unpredicated).
4173 void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4174
4175 // Bitwise exclusive OR vectors (unpredicated).
4176 void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4177
4178 // Bitwise exclusive OR predicates.
4179 void eors(const PRegisterWithLaneSize& pd,
4180 const PRegisterZ& pg,
4181 const PRegisterWithLaneSize& pn,
4182 const PRegisterWithLaneSize& pm);
4183
4184 // Bitwise XOR reduction to scalar.
4185 void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4186
4187 // Extract vector from pair of vectors.
4188 void ext(const ZRegister& zd,
4189 const ZRegister& zn,
4190 const ZRegister& zm,
4191 unsigned offset);
4192
4193 // Floating-point absolute difference (predicated).
4194 void fabd(const ZRegister& zd,
4195 const PRegisterM& pg,
4196 const ZRegister& zn,
4197 const ZRegister& zm);
4198
4199 // Floating-point absolute value (predicated).
4200 void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4201
4202 // Floating-point absolute compare vectors.
4203 void facge(const PRegisterWithLaneSize& pd,
4204 const PRegisterZ& pg,
4205 const ZRegister& zn,
4206 const ZRegister& zm);
4207
4208 // Floating-point absolute compare vectors.
4209 void facgt(const PRegisterWithLaneSize& pd,
4210 const PRegisterZ& pg,
4211 const ZRegister& zn,
4212 const ZRegister& zm);
4213
4214 // Floating-point add immediate (predicated).
4215 void fadd(const ZRegister& zd,
4216 const PRegisterM& pg,
4217 const ZRegister& zn,
4218 double imm);
4219
4220 // Floating-point add vector (predicated).
4221 void fadd(const ZRegister& zd,
4222 const PRegisterM& pg,
4223 const ZRegister& zn,
4224 const ZRegister& zm);
4225
4226 // Floating-point add vector (unpredicated).
4227 void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4228
4229 // Floating-point add strictly-ordered reduction, accumulating in scalar.
4230 void fadda(const VRegister& vd,
4231 const PRegister& pg,
4232 const VRegister& vn,
4233 const ZRegister& zm);
4234
4235 // Floating-point add recursive reduction to scalar.
4236 void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4237
4238 // Floating-point complex add with rotate (predicated).
4239 void fcadd(const ZRegister& zd,
4240 const PRegisterM& pg,
4241 const ZRegister& zn,
4242 const ZRegister& zm,
4243 int rot);
4244
4245 // Floating-point compare vector with zero.
4246 void fcmeq(const PRegisterWithLaneSize& pd,
4247 const PRegisterZ& pg,
4248 const ZRegister& zn,
4249 double zero);
4250
4251 // Floating-point compare vectors.
4252 void fcmeq(const PRegisterWithLaneSize& pd,
4253 const PRegisterZ& pg,
4254 const ZRegister& zn,
4255 const ZRegister& zm);
4256
4257 // Floating-point compare vector with zero.
4258 void fcmge(const PRegisterWithLaneSize& pd,
4259 const PRegisterZ& pg,
4260 const ZRegister& zn,
4261 double zero);
4262
4263 // Floating-point compare vectors.
4264 void fcmge(const PRegisterWithLaneSize& pd,
4265 const PRegisterZ& pg,
4266 const ZRegister& zn,
4267 const ZRegister& zm);
4268
4269 // Floating-point compare vector with zero.
4270 void fcmgt(const PRegisterWithLaneSize& pd,
4271 const PRegisterZ& pg,
4272 const ZRegister& zn,
4273 double zero);
4274
4275 // Floating-point compare vectors.
4276 void fcmgt(const PRegisterWithLaneSize& pd,
4277 const PRegisterZ& pg,
4278 const ZRegister& zn,
4279 const ZRegister& zm);
4280
4281 // Floating-point complex multiply-add with rotate (predicated).
4282 void fcmla(const ZRegister& zda,
4283 const PRegisterM& pg,
4284 const ZRegister& zn,
4285 const ZRegister& zm,
4286 int rot);
4287
4288 // Floating-point complex multiply-add by indexed values with rotate.
4289 void fcmla(const ZRegister& zda,
4290 const ZRegister& zn,
4291 const ZRegister& zm,
4292 int index,
4293 int rot);
4294
4295 // Floating-point compare vector with zero.
4296 void fcmle(const PRegisterWithLaneSize& pd,
4297 const PRegisterZ& pg,
4298 const ZRegister& zn,
4299 double zero);
4300
4301 // Floating-point compare vector with zero.
4302 void fcmlt(const PRegisterWithLaneSize& pd,
4303 const PRegisterZ& pg,
4304 const ZRegister& zn,
4305 double zero);
4306
4307 // Floating-point compare vector with zero.
4308 void fcmne(const PRegisterWithLaneSize& pd,
4309 const PRegisterZ& pg,
4310 const ZRegister& zn,
4311 double zero);
4312
4313 // Floating-point compare vectors.
4314 void fcmne(const PRegisterWithLaneSize& pd,
4315 const PRegisterZ& pg,
4316 const ZRegister& zn,
4317 const ZRegister& zm);
4318
4319 // Floating-point compare vectors.
4320 void fcmuo(const PRegisterWithLaneSize& pd,
4321 const PRegisterZ& pg,
4322 const ZRegister& zn,
4323 const ZRegister& zm);
4324
4325 // Copy floating-point immediate to vector elements (predicated).
4326 void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4327
4328 // Copy half-precision floating-point immediate to vector elements
4329 // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4330 void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4331 fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4332 }
4333
4334 // Floating-point convert precision (predicated).
4335 void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4336
4337 // Floating-point convert to signed integer, rounding toward zero
4338 // (predicated).
4339 void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4340
4341 // Floating-point convert to unsigned integer, rounding toward zero
4342 // (predicated).
4343 void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4344
4345 // Floating-point divide by vector (predicated).
4346 void fdiv(const ZRegister& zd,
4347 const PRegisterM& pg,
4348 const ZRegister& zn,
4349 const ZRegister& zm);
4350
4351 // Floating-point reversed divide by vector (predicated).
4352 void fdivr(const ZRegister& zd,
4353 const PRegisterM& pg,
4354 const ZRegister& zn,
4355 const ZRegister& zm);
4356
4357 // Broadcast floating-point immediate to vector elements.
4358 void fdup(const ZRegister& zd, double imm);
4359
4360 // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4361 void fdup(const ZRegister& zd, Float16 imm) {
4362 fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4363 }
4364
4365 // Floating-point exponential accelerator.
4366 void fexpa(const ZRegister& zd, const ZRegister& zn);
4367
4368 // Floating-point fused multiply-add vectors (predicated), writing
4369 // multiplicand [Zdn = Za + Zdn * Zm].
4370 void fmad(const ZRegister& zdn,
4371 const PRegisterM& pg,
4372 const ZRegister& zm,
4373 const ZRegister& za);
4374
4375 // Floating-point maximum with immediate (predicated).
4376 void fmax(const ZRegister& zd,
4377 const PRegisterM& pg,
4378 const ZRegister& zn,
4379 double imm);
4380
4381 // Floating-point maximum (predicated).
4382 void fmax(const ZRegister& zd,
4383 const PRegisterM& pg,
4384 const ZRegister& zn,
4385 const ZRegister& zm);
4386
4387 // Floating-point maximum number with immediate (predicated).
4388 void fmaxnm(const ZRegister& zd,
4389 const PRegisterM& pg,
4390 const ZRegister& zn,
4391 double imm);
4392
4393 // Floating-point maximum number (predicated).
4394 void fmaxnm(const ZRegister& zd,
4395 const PRegisterM& pg,
4396 const ZRegister& zn,
4397 const ZRegister& zm);
4398
4399 // Floating-point maximum number recursive reduction to scalar.
4400 void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4401
4402 // Floating-point maximum recursive reduction to scalar.
4403 void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4404
4405 // Floating-point minimum with immediate (predicated).
4406 void fmin(const ZRegister& zd,
4407 const PRegisterM& pg,
4408 const ZRegister& zn,
4409 double imm);
4410
4411 // Floating-point minimum (predicated).
4412 void fmin(const ZRegister& zd,
4413 const PRegisterM& pg,
4414 const ZRegister& zn,
4415 const ZRegister& zm);
4416
4417 // Floating-point minimum number with immediate (predicated).
4418 void fminnm(const ZRegister& zd,
4419 const PRegisterM& pg,
4420 const ZRegister& zn,
4421 double imm);
4422
4423 // Floating-point minimum number (predicated).
4424 void fminnm(const ZRegister& zd,
4425 const PRegisterM& pg,
4426 const ZRegister& zn,
4427 const ZRegister& zm);
4428
4429 // Floating-point minimum number recursive reduction to scalar.
4430 void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4431
4432 // Floating-point minimum recursive reduction to scalar.
4433 void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4434
4435 // Floating-point fused multiply-add vectors (predicated), writing addend
4436 // [Zda = Zda + Zn * Zm].
4437 void fmla(const ZRegister& zda,
4438 const PRegisterM& pg,
4439 const ZRegister& zn,
4440 const ZRegister& zm);
4441
4442 // Floating-point fused multiply-add by indexed elements
4443 // (Zda = Zda + Zn * Zm[indexed]).
4444 void fmla(const ZRegister& zda,
4445 const ZRegister& zn,
4446 const ZRegister& zm,
4447 int index);
4448
4449 // Floating-point fused multiply-subtract vectors (predicated), writing
4450 // addend [Zda = Zda + -Zn * Zm].
4451 void fmls(const ZRegister& zda,
4452 const PRegisterM& pg,
4453 const ZRegister& zn,
4454 const ZRegister& zm);
4455
4456 // Floating-point fused multiply-subtract by indexed elements
4457 // (Zda = Zda + -Zn * Zm[indexed]).
4458 void fmls(const ZRegister& zda,
4459 const ZRegister& zn,
4460 const ZRegister& zm,
4461 int index);
4462
4463 // Move 8-bit floating-point immediate to vector elements (unpredicated).
4464 void fmov(const ZRegister& zd, double imm);
4465
4466 // Move 8-bit floating-point immediate to vector elements (predicated).
4467 void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4468
4469 // Floating-point fused multiply-subtract vectors (predicated), writing
4470 // multiplicand [Zdn = Za + -Zdn * Zm].
4471 void fmsb(const ZRegister& zdn,
4472 const PRegisterM& pg,
4473 const ZRegister& zm,
4474 const ZRegister& za);
4475
4476 // Floating-point multiply by immediate (predicated).
4477 void fmul(const ZRegister& zd,
4478 const PRegisterM& pg,
4479 const ZRegister& zn,
4480 double imm);
4481
4482 // Floating-point multiply vectors (predicated).
4483 void fmul(const ZRegister& zd,
4484 const PRegisterM& pg,
4485 const ZRegister& zn,
4486 const ZRegister& zm);
4487
4488 // Floating-point multiply by indexed elements.
4489 void fmul(const ZRegister& zd,
4490 const ZRegister& zn,
4491 const ZRegister& zm,
4492 unsigned index);
4493
4494 // Floating-point multiply vectors (unpredicated).
4495 void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4496
4497 // Floating-point multiply-extended vectors (predicated).
4498 void fmulx(const ZRegister& zd,
4499 const PRegisterM& pg,
4500 const ZRegister& zn,
4501 const ZRegister& zm);
4502
4503 // Floating-point negate (predicated).
4504 void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4505
4506 // Floating-point negated fused multiply-add vectors (predicated), writing
4507 // multiplicand [Zdn = -Za + -Zdn * Zm].
4508 void fnmad(const ZRegister& zdn,
4509 const PRegisterM& pg,
4510 const ZRegister& zm,
4511 const ZRegister& za);
4512
4513 // Floating-point negated fused multiply-add vectors (predicated), writing
4514 // addend [Zda = -Zda + -Zn * Zm].
4515 void fnmla(const ZRegister& zda,
4516 const PRegisterM& pg,
4517 const ZRegister& zn,
4518 const ZRegister& zm);
4519
4520 // Floating-point negated fused multiply-subtract vectors (predicated),
4521 // writing addend [Zda = -Zda + Zn * Zm].
4522 void fnmls(const ZRegister& zda,
4523 const PRegisterM& pg,
4524 const ZRegister& zn,
4525 const ZRegister& zm);
4526
4527 // Floating-point negated fused multiply-subtract vectors (predicated),
4528 // writing multiplicand [Zdn = -Za + Zdn * Zm].
4529 void fnmsb(const ZRegister& zdn,
4530 const PRegisterM& pg,
4531 const ZRegister& zm,
4532 const ZRegister& za);
4533
4534 // Floating-point reciprocal estimate (unpredicated).
4535 void frecpe(const ZRegister& zd, const ZRegister& zn);
4536
4537 // Floating-point reciprocal step (unpredicated).
4538 void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4539
4540 // Floating-point reciprocal exponent (predicated).
4541 void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4542
4543 // Floating-point round to integral value (predicated).
4544 void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4545
4546 // Floating-point round to integral value (predicated).
4547 void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4548
4549 // Floating-point round to integral value (predicated).
4550 void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4551
4552 // Floating-point round to integral value (predicated).
4553 void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4554
4555 // Floating-point round to integral value (predicated).
4556 void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4557
4558 // Floating-point round to integral value (predicated).
4559 void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4560
4561 // Floating-point round to integral value (predicated).
4562 void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4563
4564 // Floating-point reciprocal square root estimate (unpredicated).
4565 void frsqrte(const ZRegister& zd, const ZRegister& zn);
4566
4567 // Floating-point reciprocal square root step (unpredicated).
4568 void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4569
4570 // Floating-point adjust exponent by vector (predicated).
4571 void fscale(const ZRegister& zd,
4572 const PRegisterM& pg,
4573 const ZRegister& zn,
4574 const ZRegister& zm);
4575
4576 // Floating-point square root (predicated).
4577 void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4578
4579 // Floating-point subtract immediate (predicated).
4580 void fsub(const ZRegister& zd,
4581 const PRegisterM& pg,
4582 const ZRegister& zn,
4583 double imm);
4584
4585 // Floating-point subtract vectors (predicated).
4586 void fsub(const ZRegister& zd,
4587 const PRegisterM& pg,
4588 const ZRegister& zn,
4589 const ZRegister& zm);
4590
4591 // Floating-point subtract vectors (unpredicated).
4592 void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4593
4594 // Floating-point reversed subtract from immediate (predicated).
4595 void fsubr(const ZRegister& zd,
4596 const PRegisterM& pg,
4597 const ZRegister& zn,
4598 double imm);
4599
4600 // Floating-point reversed subtract vectors (predicated).
4601 void fsubr(const ZRegister& zd,
4602 const PRegisterM& pg,
4603 const ZRegister& zn,
4604 const ZRegister& zm);
4605
4606 // Floating-point trigonometric multiply-add coefficient.
4607 void ftmad(const ZRegister& zd,
4608 const ZRegister& zn,
4609 const ZRegister& zm,
4610 int imm3);
4611
4612 // Floating-point trigonometric starting value.
4613 void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4614
4615 // Floating-point trigonometric select coefficient.
4616 void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4617
4618 // Increment scalar by multiple of predicate constraint element count.
4619 void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4620
4621 // Increment scalar by multiple of predicate constraint element count.
4622 void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4623
4624 // Increment vector by multiple of predicate constraint element count.
4625 void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4626
4627 // Increment scalar by multiple of predicate constraint element count.
4628 void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4629
4630 // Increment vector by multiple of predicate constraint element count.
4631 void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4632
4633 // Increment scalar by active predicate element count.
4634 void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4635
4636 // Increment vector by active predicate element count.
4637 void incp(const ZRegister& zdn, const PRegister& pg);
4638
4639 // Increment scalar by multiple of predicate constraint element count.
4640 void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4641
4642 // Increment vector by multiple of predicate constraint element count.
4643 void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4644
4645 // Create index starting from and incremented by immediate.
4646 void index(const ZRegister& zd, int start, int step);
4647
4648 // Create index starting from and incremented by general-purpose register.
4649 void index(const ZRegister& zd, const Register& rn, const Register& rm);
4650
4651 // Create index starting from general-purpose register and incremented by
4652 // immediate.
4653 void index(const ZRegister& zd, const Register& rn, int imm5);
4654
4655 // Create index starting from immediate and incremented by general-purpose
4656 // register.
4657 void index(const ZRegister& zd, int imm5, const Register& rm);
4658
4659 // Insert general-purpose register in shifted vector.
4660 void insr(const ZRegister& zdn, const Register& rm);
4661
4662 // Insert SIMD&FP scalar register in shifted vector.
4663 void insr(const ZRegister& zdn, const VRegister& vm);
4664
4665 // Extract element after last to general-purpose register.
4666 void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4667
4668 // Extract element after last to SIMD&FP scalar register.
4669 void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4670
4671 // Extract last element to general-purpose register.
4672 void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4673
4674 // Extract last element to SIMD&FP scalar register.
4675 void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4676
4677 // Contiguous/gather load bytes to vector.
4678 void ld1b(const ZRegister& zt,
4679 const PRegisterZ& pg,
4680 const SVEMemOperand& addr);
4681
4682 // Contiguous/gather load halfwords to vector.
4683 void ld1h(const ZRegister& zt,
4684 const PRegisterZ& pg,
4685 const SVEMemOperand& addr);
4686
4687 // Contiguous/gather load words to vector.
4688 void ld1w(const ZRegister& zt,
4689 const PRegisterZ& pg,
4690 const SVEMemOperand& addr);
4691
4692 // Contiguous/gather load doublewords to vector.
4693 void ld1d(const ZRegister& zt,
4694 const PRegisterZ& pg,
4695 const SVEMemOperand& addr);
4696
4697 // TODO: Merge other loads into the SVEMemOperand versions.
4698
4699 // Load and broadcast unsigned byte to vector.
4700 void ld1rb(const ZRegister& zt,
4701 const PRegisterZ& pg,
4702 const SVEMemOperand& addr);
4703
4704 // Load and broadcast unsigned halfword to vector.
4705 void ld1rh(const ZRegister& zt,
4706 const PRegisterZ& pg,
4707 const SVEMemOperand& addr);
4708
4709 // Load and broadcast unsigned word to vector.
4710 void ld1rw(const ZRegister& zt,
4711 const PRegisterZ& pg,
4712 const SVEMemOperand& addr);
4713
4714 // Load and broadcast doubleword to vector.
4715 void ld1rd(const ZRegister& zt,
4716 const PRegisterZ& pg,
4717 const SVEMemOperand& addr);
4718
4719 // Contiguous load and replicate sixteen bytes.
4720 void ld1rqb(const ZRegister& zt,
4721 const PRegisterZ& pg,
4722 const SVEMemOperand& addr);
4723
4724 // Contiguous load and replicate eight halfwords.
4725 void ld1rqh(const ZRegister& zt,
4726 const PRegisterZ& pg,
4727 const SVEMemOperand& addr);
4728
4729 // Contiguous load and replicate four words.
4730 void ld1rqw(const ZRegister& zt,
4731 const PRegisterZ& pg,
4732 const SVEMemOperand& addr);
4733
4734 // Contiguous load and replicate two doublewords.
4735 void ld1rqd(const ZRegister& zt,
4736 const PRegisterZ& pg,
4737 const SVEMemOperand& addr);
4738
4739 // Contiguous load and replicate thirty-two bytes.
4740 void ld1rob(const ZRegister& zt,
4741 const PRegisterZ& pg,
4742 const SVEMemOperand& addr);
4743
4744 // Contiguous load and replicate sixteen halfwords.
4745 void ld1roh(const ZRegister& zt,
4746 const PRegisterZ& pg,
4747 const SVEMemOperand& addr);
4748
4749 // Contiguous load and replicate eight words.
4750 void ld1row(const ZRegister& zt,
4751 const PRegisterZ& pg,
4752 const SVEMemOperand& addr);
4753
4754 // Contiguous load and replicate four doublewords.
4755 void ld1rod(const ZRegister& zt,
4756 const PRegisterZ& pg,
4757 const SVEMemOperand& addr);
4758
4759 // Load and broadcast signed byte to vector.
4760 void ld1rsb(const ZRegister& zt,
4761 const PRegisterZ& pg,
4762 const SVEMemOperand& addr);
4763
4764 // Load and broadcast signed halfword to vector.
4765 void ld1rsh(const ZRegister& zt,
4766 const PRegisterZ& pg,
4767 const SVEMemOperand& addr);
4768
4769 // Load and broadcast signed word to vector.
4770 void ld1rsw(const ZRegister& zt,
4771 const PRegisterZ& pg,
4772 const SVEMemOperand& addr);
4773
4774 // Contiguous/gather load signed bytes to vector.
4775 void ld1sb(const ZRegister& zt,
4776 const PRegisterZ& pg,
4777 const SVEMemOperand& addr);
4778
4779 // Contiguous/gather load signed halfwords to vector.
4780 void ld1sh(const ZRegister& zt,
4781 const PRegisterZ& pg,
4782 const SVEMemOperand& addr);
4783
4784 // Contiguous/gather load signed words to vector.
4785 void ld1sw(const ZRegister& zt,
4786 const PRegisterZ& pg,
4787 const SVEMemOperand& addr);
4788
4789 // TODO: Merge other loads into the SVEMemOperand versions.
4790
4791 // Contiguous load two-byte structures to two vectors.
4792 void ld2b(const ZRegister& zt1,
4793 const ZRegister& zt2,
4794 const PRegisterZ& pg,
4795 const SVEMemOperand& addr);
4796
4797 // Contiguous load two-halfword structures to two vectors.
4798 void ld2h(const ZRegister& zt1,
4799 const ZRegister& zt2,
4800 const PRegisterZ& pg,
4801 const SVEMemOperand& addr);
4802
4803 // Contiguous load two-word structures to two vectors.
4804 void ld2w(const ZRegister& zt1,
4805 const ZRegister& zt2,
4806 const PRegisterZ& pg,
4807 const SVEMemOperand& addr);
4808
4809 // Contiguous load two-doubleword structures to two vectors.
4810 void ld2d(const ZRegister& zt1,
4811 const ZRegister& zt2,
4812 const PRegisterZ& pg,
4813 const SVEMemOperand& addr);
4814
4815 // Contiguous load three-byte structures to three vectors.
4816 void ld3b(const ZRegister& zt1,
4817 const ZRegister& zt2,
4818 const ZRegister& zt3,
4819 const PRegisterZ& pg,
4820 const SVEMemOperand& addr);
4821
4822 // Contiguous load three-halfword structures to three vectors.
4823 void ld3h(const ZRegister& zt1,
4824 const ZRegister& zt2,
4825 const ZRegister& zt3,
4826 const PRegisterZ& pg,
4827 const SVEMemOperand& addr);
4828
4829 // Contiguous load three-word structures to three vectors.
4830 void ld3w(const ZRegister& zt1,
4831 const ZRegister& zt2,
4832 const ZRegister& zt3,
4833 const PRegisterZ& pg,
4834 const SVEMemOperand& addr);
4835
4836 // Contiguous load three-doubleword structures to three vectors.
4837 void ld3d(const ZRegister& zt1,
4838 const ZRegister& zt2,
4839 const ZRegister& zt3,
4840 const PRegisterZ& pg,
4841 const SVEMemOperand& addr);
4842
4843 // Contiguous load four-byte structures to four vectors.
4844 void ld4b(const ZRegister& zt1,
4845 const ZRegister& zt2,
4846 const ZRegister& zt3,
4847 const ZRegister& zt4,
4848 const PRegisterZ& pg,
4849 const SVEMemOperand& addr);
4850
4851 // Contiguous load four-halfword structures to four vectors.
4852 void ld4h(const ZRegister& zt1,
4853 const ZRegister& zt2,
4854 const ZRegister& zt3,
4855 const ZRegister& zt4,
4856 const PRegisterZ& pg,
4857 const SVEMemOperand& addr);
4858
4859 // Contiguous load four-word structures to four vectors.
4860 void ld4w(const ZRegister& zt1,
4861 const ZRegister& zt2,
4862 const ZRegister& zt3,
4863 const ZRegister& zt4,
4864 const PRegisterZ& pg,
4865 const SVEMemOperand& addr);
4866
4867 // Contiguous load four-doubleword structures to four vectors.
4868 void ld4d(const ZRegister& zt1,
4869 const ZRegister& zt2,
4870 const ZRegister& zt3,
4871 const ZRegister& zt4,
4872 const PRegisterZ& pg,
4873 const SVEMemOperand& addr);
4874
4875 // Contiguous load first-fault unsigned bytes to vector.
4876 void ldff1b(const ZRegister& zt,
4877 const PRegisterZ& pg,
4878 const SVEMemOperand& addr);
4879
4880 // Contiguous load first-fault unsigned halfwords to vector.
4881 void ldff1h(const ZRegister& zt,
4882 const PRegisterZ& pg,
4883 const SVEMemOperand& addr);
4884
4885 // Contiguous load first-fault unsigned words to vector.
4886 void ldff1w(const ZRegister& zt,
4887 const PRegisterZ& pg,
4888 const SVEMemOperand& addr);
4889
4890 // Contiguous load first-fault doublewords to vector.
4891 void ldff1d(const ZRegister& zt,
4892 const PRegisterZ& pg,
4893 const SVEMemOperand& addr);
4894
4895 // Contiguous load first-fault signed bytes to vector.
4896 void ldff1sb(const ZRegister& zt,
4897 const PRegisterZ& pg,
4898 const SVEMemOperand& addr);
4899
4900 // Contiguous load first-fault signed halfwords to vector.
4901 void ldff1sh(const ZRegister& zt,
4902 const PRegisterZ& pg,
4903 const SVEMemOperand& addr);
4904
4905 // Contiguous load first-fault signed words to vector.
4906 void ldff1sw(const ZRegister& zt,
4907 const PRegisterZ& pg,
4908 const SVEMemOperand& addr);
4909
4910 // Gather load first-fault unsigned bytes to vector.
4911 void ldff1b(const ZRegister& zt,
4912 const PRegisterZ& pg,
4913 const Register& xn,
4914 const ZRegister& zm);
4915
4916 // Gather load first-fault unsigned bytes to vector (immediate index).
4917 void ldff1b(const ZRegister& zt,
4918 const PRegisterZ& pg,
4919 const ZRegister& zn,
4920 int imm5);
4921
4922 // Gather load first-fault doublewords to vector (vector index).
4923 void ldff1d(const ZRegister& zt,
4924 const PRegisterZ& pg,
4925 const Register& xn,
4926 const ZRegister& zm);
4927
4928 // Gather load first-fault doublewords to vector (immediate index).
4929 void ldff1d(const ZRegister& zt,
4930 const PRegisterZ& pg,
4931 const ZRegister& zn,
4932 int imm5);
4933
4934 // Gather load first-fault unsigned halfwords to vector (vector index).
4935 void ldff1h(const ZRegister& zt,
4936 const PRegisterZ& pg,
4937 const Register& xn,
4938 const ZRegister& zm);
4939
4940 // Gather load first-fault unsigned halfwords to vector (immediate index).
4941 void ldff1h(const ZRegister& zt,
4942 const PRegisterZ& pg,
4943 const ZRegister& zn,
4944 int imm5);
4945
4946 // Gather load first-fault signed bytes to vector (vector index).
4947 void ldff1sb(const ZRegister& zt,
4948 const PRegisterZ& pg,
4949 const Register& xn,
4950 const ZRegister& zm);
4951
4952 // Gather load first-fault signed bytes to vector (immediate index).
4953 void ldff1sb(const ZRegister& zt,
4954 const PRegisterZ& pg,
4955 const ZRegister& zn,
4956 int imm5);
4957
4958 // Gather load first-fault signed halfwords to vector (vector index).
4959 void ldff1sh(const ZRegister& zt,
4960 const PRegisterZ& pg,
4961 const Register& xn,
4962 const ZRegister& zm);
4963
4964 // Gather load first-fault signed halfwords to vector (immediate index).
4965 void ldff1sh(const ZRegister& zt,
4966 const PRegisterZ& pg,
4967 const ZRegister& zn,
4968 int imm5);
4969
4970 // Gather load first-fault signed words to vector (vector index).
4971 void ldff1sw(const ZRegister& zt,
4972 const PRegisterZ& pg,
4973 const Register& xn,
4974 const ZRegister& zm);
4975
4976 // Gather load first-fault signed words to vector (immediate index).
4977 void ldff1sw(const ZRegister& zt,
4978 const PRegisterZ& pg,
4979 const ZRegister& zn,
4980 int imm5);
4981
4982 // Gather load first-fault unsigned words to vector (vector index).
4983 void ldff1w(const ZRegister& zt,
4984 const PRegisterZ& pg,
4985 const Register& xn,
4986 const ZRegister& zm);
4987
4988 // Gather load first-fault unsigned words to vector (immediate index).
4989 void ldff1w(const ZRegister& zt,
4990 const PRegisterZ& pg,
4991 const ZRegister& zn,
4992 int imm5);
4993
4994 // Contiguous load non-fault unsigned bytes to vector (immediate index).
4995 void ldnf1b(const ZRegister& zt,
4996 const PRegisterZ& pg,
4997 const SVEMemOperand& addr);
4998
4999 // Contiguous load non-fault doublewords to vector (immediate index).
5000 void ldnf1d(const ZRegister& zt,
5001 const PRegisterZ& pg,
5002 const SVEMemOperand& addr);
5003
5004 // Contiguous load non-fault unsigned halfwords to vector (immediate
5005 // index).
5006 void ldnf1h(const ZRegister& zt,
5007 const PRegisterZ& pg,
5008 const SVEMemOperand& addr);
5009
5010 // Contiguous load non-fault signed bytes to vector (immediate index).
5011 void ldnf1sb(const ZRegister& zt,
5012 const PRegisterZ& pg,
5013 const SVEMemOperand& addr);
5014
5015 // Contiguous load non-fault signed halfwords to vector (immediate index).
5016 void ldnf1sh(const ZRegister& zt,
5017 const PRegisterZ& pg,
5018 const SVEMemOperand& addr);
5019
5020 // Contiguous load non-fault signed words to vector (immediate index).
5021 void ldnf1sw(const ZRegister& zt,
5022 const PRegisterZ& pg,
5023 const SVEMemOperand& addr);
5024
5025 // Contiguous load non-fault unsigned words to vector (immediate index).
5026 void ldnf1w(const ZRegister& zt,
5027 const PRegisterZ& pg,
5028 const SVEMemOperand& addr);
5029
5030 // Contiguous load non-temporal bytes to vector.
5031 void ldnt1b(const ZRegister& zt,
5032 const PRegisterZ& pg,
5033 const SVEMemOperand& addr);
5034
5035 // Contiguous load non-temporal halfwords to vector.
5036 void ldnt1h(const ZRegister& zt,
5037 const PRegisterZ& pg,
5038 const SVEMemOperand& addr);
5039
5040 // Contiguous load non-temporal words to vector.
5041 void ldnt1w(const ZRegister& zt,
5042 const PRegisterZ& pg,
5043 const SVEMemOperand& addr);
5044
5045 // Contiguous load non-temporal doublewords to vector.
5046 void ldnt1d(const ZRegister& zt,
5047 const PRegisterZ& pg,
5048 const SVEMemOperand& addr);
5049
5050 // Load SVE predicate/vector register.
5051 void ldr(const CPURegister& rt, const SVEMemOperand& addr);
5052
5053 // Logical shift left by immediate (predicated).
5054 void lsl(const ZRegister& zd,
5055 const PRegisterM& pg,
5056 const ZRegister& zn,
5057 int shift);
5058
5059 // Logical shift left by 64-bit wide elements (predicated).
5060 void lsl(const ZRegister& zd,
5061 const PRegisterM& pg,
5062 const ZRegister& zn,
5063 const ZRegister& zm);
5064
5065 // Logical shift left by immediate (unpredicated).
5066 void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
5067
5068 // Logical shift left by 64-bit wide elements (unpredicated).
5069 void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5070
5071 // Reversed logical shift left by vector (predicated).
5072 void lslr(const ZRegister& zd,
5073 const PRegisterM& pg,
5074 const ZRegister& zn,
5075 const ZRegister& zm);
5076
5077 // Logical shift right by immediate (predicated).
5078 void lsr(const ZRegister& zd,
5079 const PRegisterM& pg,
5080 const ZRegister& zn,
5081 int shift);
5082
5083 // Logical shift right by 64-bit wide elements (predicated).
5084 void lsr(const ZRegister& zd,
5085 const PRegisterM& pg,
5086 const ZRegister& zn,
5087 const ZRegister& zm);
5088
5089 // Logical shift right by immediate (unpredicated).
5090 void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
5091
5092 // Logical shift right by 64-bit wide elements (unpredicated).
5093 void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5094
5095 // Reversed logical shift right by vector (predicated).
5096 void lsrr(const ZRegister& zd,
5097 const PRegisterM& pg,
5098 const ZRegister& zn,
5099 const ZRegister& zm);
5100
5101 // Bitwise invert predicate.
5102 void not_(const PRegisterWithLaneSize& pd,
5103 const PRegisterZ& pg,
5104 const PRegisterWithLaneSize& pn);
5105
5106 // Bitwise invert predicate, setting the condition flags.
5107 void nots(const PRegisterWithLaneSize& pd,
5108 const PRegisterZ& pg,
5109 const PRegisterWithLaneSize& pn);
5110
5111 // Multiply-add vectors (predicated), writing multiplicand
5112 // [Zdn = Za + Zdn * Zm].
5113 void mad(const ZRegister& zdn,
5114 const PRegisterM& pg,
5115 const ZRegister& zm,
5116 const ZRegister& za);
5117
5118 // Multiply-add vectors (predicated), writing addend
5119 // [Zda = Zda + Zn * Zm].
5120 void mla(const ZRegister& zda,
5121 const PRegisterM& pg,
5122 const ZRegister& zn,
5123 const ZRegister& zm);
5124
5125 // Multiply-subtract vectors (predicated), writing addend
5126 // [Zda = Zda - Zn * Zm].
5127 void mls(const ZRegister& zda,
5128 const PRegisterM& pg,
5129 const ZRegister& zn,
5130 const ZRegister& zm);
5131
5132 // Move predicates (unpredicated)
5133 void mov(const PRegister& pd, const PRegister& pn);
5134
5135 // Move predicates (merging)
5136 void mov(const PRegisterWithLaneSize& pd,
5137 const PRegisterM& pg,
5138 const PRegisterWithLaneSize& pn);
5139
5140 // Move predicates (zeroing)
5141 void mov(const PRegisterWithLaneSize& pd,
5142 const PRegisterZ& pg,
5143 const PRegisterWithLaneSize& pn);
5144
5145 // Move general-purpose register to vector elements (unpredicated)
5146 void mov(const ZRegister& zd, const Register& xn);
5147
5148 // Move SIMD&FP scalar register to vector elements (unpredicated)
5149 void mov(const ZRegister& zd, const VRegister& vn);
5150
5151 // Move vector register (unpredicated)
5152 void mov(const ZRegister& zd, const ZRegister& zn);
5153
5154 // Move indexed element to vector elements (unpredicated)
5155 void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5156
5157 // Move general-purpose register to vector elements (predicated)
5158 void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5159
5160 // Move SIMD&FP scalar register to vector elements (predicated)
5161 void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5162
5163 // Move vector elements (predicated)
5164 void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5165
5166 // Move signed integer immediate to vector elements (predicated)
5167 void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5168
5169 // Move signed immediate to vector elements (unpredicated).
5170 void mov(const ZRegister& zd, int imm8, int shift);
5171
5172 // Move logical bitmask immediate to vector (unpredicated).
5173 void mov(const ZRegister& zd, uint64_t imm);
5174
5175 // Move predicate (unpredicated), setting the condition flags
5176 void movs(const PRegister& pd, const PRegister& pn);
5177
5178 // Move predicates (zeroing), setting the condition flags
5179 void movs(const PRegisterWithLaneSize& pd,
5180 const PRegisterZ& pg,
5181 const PRegisterWithLaneSize& pn);
5182
5183 // Move prefix (predicated).
5184 void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5185
5186 // Move prefix (unpredicated).
5187 void movprfx(const ZRegister& zd, const ZRegister& zn);
5188
5189 // Multiply-subtract vectors (predicated), writing multiplicand
5190 // [Zdn = Za - Zdn * Zm].
5191 void msb(const ZRegister& zdn,
5192 const PRegisterM& pg,
5193 const ZRegister& zm,
5194 const ZRegister& za);
5195
5196 // Multiply vectors (predicated).
5197 void mul(const ZRegister& zd,
5198 const PRegisterM& pg,
5199 const ZRegister& zn,
5200 const ZRegister& zm);
5201
5202 // Multiply by immediate (unpredicated).
5203 void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5204
5205 // Bitwise NAND predicates.
5206 void nand(const PRegisterWithLaneSize& pd,
5207 const PRegisterZ& pg,
5208 const PRegisterWithLaneSize& pn,
5209 const PRegisterWithLaneSize& pm);
5210
5211 // Bitwise NAND predicates.
5212 void nands(const PRegisterWithLaneSize& pd,
5213 const PRegisterZ& pg,
5214 const PRegisterWithLaneSize& pn,
5215 const PRegisterWithLaneSize& pm);
5216
5217 // Negate (predicated).
5218 void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5219
5220 // Bitwise NOR predicates.
5221 void nor(const PRegisterWithLaneSize& pd,
5222 const PRegisterZ& pg,
5223 const PRegisterWithLaneSize& pn,
5224 const PRegisterWithLaneSize& pm);
5225
5226 // Bitwise NOR predicates.
5227 void nors(const PRegisterWithLaneSize& pd,
5228 const PRegisterZ& pg,
5229 const PRegisterWithLaneSize& pn,
5230 const PRegisterWithLaneSize& pm);
5231
5232 // Bitwise invert vector (predicated).
5233 void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5234
5235 // Bitwise OR inverted predicate.
5236 void orn(const PRegisterWithLaneSize& pd,
5237 const PRegisterZ& pg,
5238 const PRegisterWithLaneSize& pn,
5239 const PRegisterWithLaneSize& pm);
5240
5241 // Bitwise OR inverted predicate.
5242 void orns(const PRegisterWithLaneSize& pd,
5243 const PRegisterZ& pg,
5244 const PRegisterWithLaneSize& pn,
5245 const PRegisterWithLaneSize& pm);
5246
5247 // Bitwise OR with inverted immediate (unpredicated).
5248 void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5249
5250 // Bitwise OR predicate.
5251 void orr(const PRegisterWithLaneSize& pd,
5252 const PRegisterZ& pg,
5253 const PRegisterWithLaneSize& pn,
5254 const PRegisterWithLaneSize& pm);
5255
5256 // Bitwise OR vectors (predicated).
5257 void orr(const ZRegister& zd,
5258 const PRegisterM& pg,
5259 const ZRegister& zn,
5260 const ZRegister& zm);
5261
5262 // Bitwise OR with immediate (unpredicated).
5263 void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5264
5265 // Bitwise OR vectors (unpredicated).
5266 void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5267
5268 // Bitwise OR predicate.
5269 void orrs(const PRegisterWithLaneSize& pd,
5270 const PRegisterZ& pg,
5271 const PRegisterWithLaneSize& pn,
5272 const PRegisterWithLaneSize& pm);
5273
5274 // Bitwise OR reduction to scalar.
5275 void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5276
5277 // Set all predicate elements to false.
5278 void pfalse(const PRegisterWithLaneSize& pd);
5279
5280 // Set the first active predicate element to true.
5281 void pfirst(const PRegisterWithLaneSize& pd,
5282 const PRegister& pg,
5283 const PRegisterWithLaneSize& pn);
5284
5285 // Find next active predicate.
5286 void pnext(const PRegisterWithLaneSize& pd,
5287 const PRegister& pg,
5288 const PRegisterWithLaneSize& pn);
5289
5290 // Prefetch bytes.
5291 void prfb(PrefetchOperation prfop,
5292 const PRegister& pg,
5293 const SVEMemOperand& addr);
5294
5295 // Prefetch halfwords.
5296 void prfh(PrefetchOperation prfop,
5297 const PRegister& pg,
5298 const SVEMemOperand& addr);
5299
5300 // Prefetch words.
5301 void prfw(PrefetchOperation prfop,
5302 const PRegister& pg,
5303 const SVEMemOperand& addr);
5304
5305 // Prefetch doublewords.
5306 void prfd(PrefetchOperation prfop,
5307 const PRegister& pg,
5308 const SVEMemOperand& addr);
5309
5310 // Set condition flags for predicate.
5311 void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5312
5313 // Initialise predicate from named constraint.
5314 void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5315
5316 // Initialise predicate from named constraint.
5317 void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5318
5319 // Unpack and widen half of predicate.
5320 void punpkhi(const PRegisterWithLaneSize& pd,
5321 const PRegisterWithLaneSize& pn);
5322
5323 // Unpack and widen half of predicate.
5324 void punpklo(const PRegisterWithLaneSize& pd,
5325 const PRegisterWithLaneSize& pn);
5326
5327 // Reverse bits (predicated).
5328 void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5329
5330 // Read the first-fault register.
5331 void rdffr(const PRegisterWithLaneSize& pd);
5332
5333 // Return predicate of succesfully loaded elements.
5334 void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5335
5336 // Return predicate of succesfully loaded elements.
5337 void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5338
5339 // Read multiple of vector register size to scalar register.
5340 void rdvl(const Register& xd, int imm6);
5341
5342 // Reverse all elements in a predicate.
5343 void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5344
5345 // Reverse all elements in a vector (unpredicated).
5346 void rev(const ZRegister& zd, const ZRegister& zn);
5347
5348 // Reverse bytes / halfwords / words within elements (predicated).
5349 void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5350
5351 // Reverse bytes / halfwords / words within elements (predicated).
5352 void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5353
5354 // Reverse bytes / halfwords / words within elements (predicated).
5355 void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5356
5357 // Signed absolute difference (predicated).
5358 void sabd(const ZRegister& zd,
5359 const PRegisterM& pg,
5360 const ZRegister& zn,
5361 const ZRegister& zm);
5362
5363 // Signed add reduction to scalar.
5364 void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5365
5366 // Signed integer convert to floating-point (predicated).
5367 void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5368
5369 // Signed divide (predicated).
5370 void sdiv(const ZRegister& zd,
5371 const PRegisterM& pg,
5372 const ZRegister& zn,
5373 const ZRegister& zm);
5374
5375 // Signed reversed divide (predicated).
5376 void sdivr(const ZRegister& zd,
5377 const PRegisterM& pg,
5378 const ZRegister& zn,
5379 const ZRegister& zm);
5380
5381 // Signed dot product by indexed quadtuplet.
5382 void sdot(const ZRegister& zda,
5383 const ZRegister& zn,
5384 const ZRegister& zm,
5385 int index);
5386
5387 // Signed dot product.
5388 void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5389
5390 // Conditionally select elements from two predicates.
5391 void sel(const PRegisterWithLaneSize& pd,
5392 const PRegister& pg,
5393 const PRegisterWithLaneSize& pn,
5394 const PRegisterWithLaneSize& pm);
5395
5396 // Conditionally select elements from two vectors.
5397 void sel(const ZRegister& zd,
5398 const PRegister& pg,
5399 const ZRegister& zn,
5400 const ZRegister& zm);
5401
5402 // Initialise the first-fault register to all true.
5403 void setffr();
5404
5405 // Signed maximum vectors (predicated).
5406 void smax(const ZRegister& zd,
5407 const PRegisterM& pg,
5408 const ZRegister& zn,
5409 const ZRegister& zm);
5410
5411 // Signed maximum with immediate (unpredicated).
5412 void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5413
5414 // Signed maximum reduction to scalar.
5415 void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5416
5417 // Signed minimum vectors (predicated).
5418 void smin(const ZRegister& zd,
5419 const PRegisterM& pg,
5420 const ZRegister& zn,
5421 const ZRegister& zm);
5422
5423 // Signed minimum with immediate (unpredicated).
5424 void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5425
5426 // Signed minimum reduction to scalar.
5427 void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5428
5429 // Signed multiply returning high half (predicated).
5430 void smulh(const ZRegister& zd,
5431 const PRegisterM& pg,
5432 const ZRegister& zn,
5433 const ZRegister& zm);
5434
5435 // Splice two vectors under predicate control.
5436 void splice(const ZRegister& zd,
5437 const PRegister& pg,
5438 const ZRegister& zn,
5439 const ZRegister& zm);
5440
5441 // Splice two vectors under predicate control (constructive).
5442 void splice_con(const ZRegister& zd,
5443 const PRegister& pg,
5444 const ZRegister& zn,
5445 const ZRegister& zm);
5446
5447 // Signed saturating add vectors (unpredicated).
5448 void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5449
5450 // Signed saturating add immediate (unpredicated).
5451 void sqadd(const ZRegister& zd,
5452 const ZRegister& zn,
5453 int imm8,
5454 int shift = -1);
5455
5456 // Signed saturating decrement scalar by multiple of 8-bit predicate
5457 // constraint element count.
5458 void sqdecb(const Register& xd,
5459 const Register& wn,
5460 int pattern,
5461 int multiplier);
5462
5463 // Signed saturating decrement scalar by multiple of 8-bit predicate
5464 // constraint element count.
5465 void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5466
5467 // Signed saturating decrement scalar by multiple of 64-bit predicate
5468 // constraint element count.
5469 void sqdecd(const Register& xd,
5470 const Register& wn,
5471 int pattern = SVE_ALL,
5472 int multiplier = 1);
5473
5474 // Signed saturating decrement scalar by multiple of 64-bit predicate
5475 // constraint element count.
5476 void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5477
5478 // Signed saturating decrement vector by multiple of 64-bit predicate
5479 // constraint element count.
5480 void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5481
5482 // Signed saturating decrement scalar by multiple of 16-bit predicate
5483 // constraint element count.
5484 void sqdech(const Register& xd,
5485 const Register& wn,
5486 int pattern = SVE_ALL,
5487 int multiplier = 1);
5488
5489 // Signed saturating decrement scalar by multiple of 16-bit predicate
5490 // constraint element count.
5491 void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5492
5493 // Signed saturating decrement vector by multiple of 16-bit predicate
5494 // constraint element count.
5495 void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5496
5497 // Signed saturating decrement scalar by active predicate element count.
5498 void sqdecp(const Register& xd,
5499 const PRegisterWithLaneSize& pg,
5500 const Register& wn);
5501
5502 // Signed saturating decrement scalar by active predicate element count.
5503 void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5504
5505 // Signed saturating decrement vector by active predicate element count.
5506 void sqdecp(const ZRegister& zdn, const PRegister& pg);
5507
5508 // Signed saturating decrement scalar by multiple of 32-bit predicate
5509 // constraint element count.
5510 void sqdecw(const Register& xd,
5511 const Register& wn,
5512 int pattern = SVE_ALL,
5513 int multiplier = 1);
5514
5515 // Signed saturating decrement scalar by multiple of 32-bit predicate
5516 // constraint element count.
5517 void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5518
5519 // Signed saturating decrement vector by multiple of 32-bit predicate
5520 // constraint element count.
5521 void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5522
5523 // Signed saturating increment scalar by multiple of 8-bit predicate
5524 // constraint element count.
5525 void sqincb(const Register& xd,
5526 const Register& wn,
5527 int pattern = SVE_ALL,
5528 int multiplier = 1);
5529
5530 // Signed saturating increment scalar by multiple of 8-bit predicate
5531 // constraint element count.
5532 void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5533
5534 // Signed saturating increment scalar by multiple of 64-bit predicate
5535 // constraint element count.
5536 void sqincd(const Register& xd,
5537 const Register& wn,
5538 int pattern,
5539 int multiplier);
5540
5541 // Signed saturating increment scalar by multiple of 64-bit predicate
5542 // constraint element count.
5543 void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5544
5545 // Signed saturating increment vector by multiple of 64-bit predicate
5546 // constraint element count.
5547 void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5548
5549 // Signed saturating increment scalar by multiple of 16-bit predicate
5550 // constraint element count.
5551 void sqinch(const Register& xd,
5552 const Register& wn,
5553 int pattern = SVE_ALL,
5554 int multiplier = 1);
5555
5556 // Signed saturating increment scalar by multiple of 16-bit predicate
5557 // constraint element count.
5558 void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5559
5560 // Signed saturating increment vector by multiple of 16-bit predicate
5561 // constraint element count.
5562 void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5563
5564 // Signed saturating increment scalar by active predicate element count.
5565 void sqincp(const Register& xd,
5566 const PRegisterWithLaneSize& pg,
5567 const Register& wn);
5568
5569 // Signed saturating increment scalar by active predicate element count.
5570 void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5571
5572 // Signed saturating increment vector by active predicate element count.
5573 void sqincp(const ZRegister& zdn, const PRegister& pg);
5574
5575 // Signed saturating increment scalar by multiple of 32-bit predicate
5576 // constraint element count.
5577 void sqincw(const Register& xd,
5578 const Register& wn,
5579 int pattern = SVE_ALL,
5580 int multiplier = 1);
5581
5582 // Signed saturating increment scalar by multiple of 32-bit predicate
5583 // constraint element count.
5584 void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5585
5586 // Signed saturating increment vector by multiple of 32-bit predicate
5587 // constraint element count.
5588 void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5589
5590 // Signed saturating subtract vectors (unpredicated).
5591 void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5592
5593 // Signed saturating subtract immediate (unpredicated).
5594 void sqsub(const ZRegister& zd,
5595 const ZRegister& zn,
5596 int imm8,
5597 int shift = -1);
5598
5599 // Contiguous/scatter store bytes from vector.
5600 void st1b(const ZRegister& zt,
5601 const PRegister& pg,
5602 const SVEMemOperand& addr);
5603
5604 // Contiguous/scatter store halfwords from vector.
5605 void st1h(const ZRegister& zt,
5606 const PRegister& pg,
5607 const SVEMemOperand& addr);
5608
5609 // Contiguous/scatter store words from vector.
5610 void st1w(const ZRegister& zt,
5611 const PRegister& pg,
5612 const SVEMemOperand& addr);
5613
5614 // Contiguous/scatter store doublewords from vector.
5615 void st1d(const ZRegister& zt,
5616 const PRegister& pg,
5617 const SVEMemOperand& addr);
5618
5619 // Contiguous store two-byte structures from two vectors.
5620 void st2b(const ZRegister& zt1,
5621 const ZRegister& zt2,
5622 const PRegister& pg,
5623 const SVEMemOperand& addr);
5624
5625 // Contiguous store two-halfword structures from two vectors.
5626 void st2h(const ZRegister& zt1,
5627 const ZRegister& zt2,
5628 const PRegister& pg,
5629 const SVEMemOperand& addr);
5630
5631 // Contiguous store two-word structures from two vectors.
5632 void st2w(const ZRegister& zt1,
5633 const ZRegister& zt2,
5634 const PRegister& pg,
5635 const SVEMemOperand& addr);
5636
5637 // Contiguous store two-doubleword structures from two vectors,
5638 void st2d(const ZRegister& zt1,
5639 const ZRegister& zt2,
5640 const PRegister& pg,
5641 const SVEMemOperand& addr);
5642
5643 // Contiguous store three-byte structures from three vectors.
5644 void st3b(const ZRegister& zt1,
5645 const ZRegister& zt2,
5646 const ZRegister& zt3,
5647 const PRegister& pg,
5648 const SVEMemOperand& addr);
5649
5650 // Contiguous store three-halfword structures from three vectors.
5651 void st3h(const ZRegister& zt1,
5652 const ZRegister& zt2,
5653 const ZRegister& zt3,
5654 const PRegister& pg,
5655 const SVEMemOperand& addr);
5656
5657 // Contiguous store three-word structures from three vectors.
5658 void st3w(const ZRegister& zt1,
5659 const ZRegister& zt2,
5660 const ZRegister& zt3,
5661 const PRegister& pg,
5662 const SVEMemOperand& addr);
5663
5664 // Contiguous store three-doubleword structures from three vectors.
5665 void st3d(const ZRegister& zt1,
5666 const ZRegister& zt2,
5667 const ZRegister& zt3,
5668 const PRegister& pg,
5669 const SVEMemOperand& addr);
5670
5671 // Contiguous store four-byte structures from four vectors.
5672 void st4b(const ZRegister& zt1,
5673 const ZRegister& zt2,
5674 const ZRegister& zt3,
5675 const ZRegister& zt4,
5676 const PRegister& pg,
5677 const SVEMemOperand& addr);
5678
5679 // Contiguous store four-halfword structures from four vectors.
5680 void st4h(const ZRegister& zt1,
5681 const ZRegister& zt2,
5682 const ZRegister& zt3,
5683 const ZRegister& zt4,
5684 const PRegister& pg,
5685 const SVEMemOperand& addr);
5686
5687 // Contiguous store four-word structures from four vectors.
5688 void st4w(const ZRegister& zt1,
5689 const ZRegister& zt2,
5690 const ZRegister& zt3,
5691 const ZRegister& zt4,
5692 const PRegister& pg,
5693 const SVEMemOperand& addr);
5694
5695 // Contiguous store four-doubleword structures from four vectors.
5696 void st4d(const ZRegister& zt1,
5697 const ZRegister& zt2,
5698 const ZRegister& zt3,
5699 const ZRegister& zt4,
5700 const PRegister& pg,
5701 const SVEMemOperand& addr);
5702
5703 // Contiguous store non-temporal bytes from vector.
5704 void stnt1b(const ZRegister& zt,
5705 const PRegister& pg,
5706 const SVEMemOperand& addr);
5707
5708 // Contiguous store non-temporal halfwords from vector.
5709 void stnt1h(const ZRegister& zt,
5710 const PRegister& pg,
5711 const SVEMemOperand& addr);
5712
5713 // Contiguous store non-temporal words from vector.
5714 void stnt1w(const ZRegister& zt,
5715 const PRegister& pg,
5716 const SVEMemOperand& addr);
5717
5718 // Contiguous store non-temporal doublewords from vector.
5719 void stnt1d(const ZRegister& zt,
5720 const PRegister& pg,
5721 const SVEMemOperand& addr);
5722
5723 // Store SVE predicate/vector register.
5724 void str(const CPURegister& rt, const SVEMemOperand& addr);
5725
5726 // Subtract vectors (predicated).
5727 void sub(const ZRegister& zd,
5728 const PRegisterM& pg,
5729 const ZRegister& zn,
5730 const ZRegister& zm);
5731
5732 // Subtract vectors (unpredicated).
5733 void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5734
5735 // Subtract immediate (unpredicated).
5736 void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5737
5738 // Reversed subtract vectors (predicated).
5739 void subr(const ZRegister& zd,
5740 const PRegisterM& pg,
5741 const ZRegister& zn,
5742 const ZRegister& zm);
5743
5744 // Reversed subtract from immediate (unpredicated).
5745 void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5746
5747 // Signed unpack and extend half of vector.
5748 void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5749
5750 // Signed unpack and extend half of vector.
5751 void sunpklo(const ZRegister& zd, const ZRegister& zn);
5752
5753 // Signed byte extend (predicated).
5754 void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5755
5756 // Signed halfword extend (predicated).
5757 void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5758
5759 // Signed word extend (predicated).
5760 void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5761
5762 // Programmable table lookup/permute using vector of indices into a
5763 // vector.
5764 void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5765
5766 // Interleave even or odd elements from two predicates.
5767 void trn1(const PRegisterWithLaneSize& pd,
5768 const PRegisterWithLaneSize& pn,
5769 const PRegisterWithLaneSize& pm);
5770
5771 // Interleave even or odd elements from two vectors.
5772 void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5773
5774 // Interleave even or odd elements from two predicates.
5775 void trn2(const PRegisterWithLaneSize& pd,
5776 const PRegisterWithLaneSize& pn,
5777 const PRegisterWithLaneSize& pm);
5778
5779 // Interleave even or odd elements from two vectors.
5780 void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5781
5782 // Unsigned absolute difference (predicated).
5783 void uabd(const ZRegister& zd,
5784 const PRegisterM& pg,
5785 const ZRegister& zn,
5786 const ZRegister& zm);
5787
5788 // Unsigned add reduction to scalar.
5789 void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5790
5791 // Unsigned integer convert to floating-point (predicated).
5792 void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5793
5794 // Unsigned divide (predicated).
5795 void udiv(const ZRegister& zd,
5796 const PRegisterM& pg,
5797 const ZRegister& zn,
5798 const ZRegister& zm);
5799
5800 // Unsigned reversed divide (predicated).
5801 void udivr(const ZRegister& zd,
5802 const PRegisterM& pg,
5803 const ZRegister& zn,
5804 const ZRegister& zm);
5805
5806 // Unsigned dot product by indexed quadtuplet.
5807 void udot(const ZRegister& zda,
5808 const ZRegister& zn,
5809 const ZRegister& zm,
5810 int index);
5811
5812 // Unsigned dot product.
5813 void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5814
5815 // Unsigned maximum vectors (predicated).
5816 void umax(const ZRegister& zd,
5817 const PRegisterM& pg,
5818 const ZRegister& zn,
5819 const ZRegister& zm);
5820
5821 // Unsigned maximum with immediate (unpredicated).
5822 void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5823
5824 // Unsigned maximum reduction to scalar.
5825 void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5826
5827 // Unsigned minimum vectors (predicated).
5828 void umin(const ZRegister& zd,
5829 const PRegisterM& pg,
5830 const ZRegister& zn,
5831 const ZRegister& zm);
5832
5833 // Unsigned minimum with immediate (unpredicated).
5834 void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5835
5836 // Unsigned minimum reduction to scalar.
5837 void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5838
5839 // Unsigned multiply returning high half (predicated).
5840 void umulh(const ZRegister& zd,
5841 const PRegisterM& pg,
5842 const ZRegister& zn,
5843 const ZRegister& zm);
5844
5845 // Unsigned saturating add vectors (unpredicated).
5846 void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5847
5848 // Unsigned saturating add immediate (unpredicated).
5849 void uqadd(const ZRegister& zd,
5850 const ZRegister& zn,
5851 int imm8,
5852 int shift = -1);
5853
5854 // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5855 // constraint element count.
5856 void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5857
5858 // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5859 // constraint element count.
5860 void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5861
5862 // Unsigned saturating decrement vector by multiple of 64-bit predicate
5863 // constraint element count.
5864 void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5865
5866 // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5867 // constraint element count.
5868 void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5869
5870 // Unsigned saturating decrement vector by multiple of 16-bit predicate
5871 // constraint element count.
5872 void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5873
5874 // Unsigned saturating decrement scalar by active predicate element count.
5875 void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5876
5877 // Unsigned saturating decrement vector by active predicate element count.
5878 void uqdecp(const ZRegister& zdn, const PRegister& pg);
5879
5880 // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5881 // constraint element count.
5882 void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5883
5884 // Unsigned saturating decrement vector by multiple of 32-bit predicate
5885 // constraint element count.
5886 void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5887
5888 // Unsigned saturating increment scalar by multiple of 8-bit predicate
5889 // constraint element count.
5890 void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5891
5892 // Unsigned saturating increment scalar by multiple of 64-bit predicate
5893 // constraint element count.
5894 void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5895
5896 // Unsigned saturating increment vector by multiple of 64-bit predicate
5897 // constraint element count.
5898 void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5899
5900 // Unsigned saturating increment scalar by multiple of 16-bit predicate
5901 // constraint element count.
5902 void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5903
5904 // Unsigned saturating increment vector by multiple of 16-bit predicate
5905 // constraint element count.
5906 void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5907
5908 // Unsigned saturating increment scalar by active predicate element count.
5909 void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5910
5911 // Unsigned saturating increment vector by active predicate element count.
5912 void uqincp(const ZRegister& zdn, const PRegister& pg);
5913
5914 // Unsigned saturating increment scalar by multiple of 32-bit predicate
5915 // constraint element count.
5916 void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5917
5918 // Unsigned saturating increment vector by multiple of 32-bit predicate
5919 // constraint element count.
5920 void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5921
5922 // Unsigned saturating subtract vectors (unpredicated).
5923 void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5924
5925 // Unsigned saturating subtract immediate (unpredicated).
5926 void uqsub(const ZRegister& zd,
5927 const ZRegister& zn,
5928 int imm8,
5929 int shift = -1);
5930
5931 // Unsigned unpack and extend half of vector.
5932 void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5933
5934 // Unsigned unpack and extend half of vector.
5935 void uunpklo(const ZRegister& zd, const ZRegister& zn);
5936
5937 // Unsigned byte extend (predicated).
5938 void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5939
5940 // Unsigned halfword extend (predicated).
5941 void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5942
5943 // Unsigned word extend (predicated).
5944 void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5945
5946 // Concatenate even or odd elements from two predicates.
5947 void uzp1(const PRegisterWithLaneSize& pd,
5948 const PRegisterWithLaneSize& pn,
5949 const PRegisterWithLaneSize& pm);
5950
5951 // Concatenate even or odd elements from two vectors.
5952 void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5953
5954 // Concatenate even or odd elements from two predicates.
5955 void uzp2(const PRegisterWithLaneSize& pd,
5956 const PRegisterWithLaneSize& pn,
5957 const PRegisterWithLaneSize& pm);
5958
5959 // Concatenate even or odd elements from two vectors.
5960 void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5961
5962 // While incrementing signed scalar less than or equal to scalar.
5963 void whilele(const PRegisterWithLaneSize& pd,
5964 const Register& rn,
5965 const Register& rm);
5966
5967 // While incrementing unsigned scalar lower than scalar.
5968 void whilelo(const PRegisterWithLaneSize& pd,
5969 const Register& rn,
5970 const Register& rm);
5971
5972 // While incrementing unsigned scalar lower or same as scalar.
5973 void whilels(const PRegisterWithLaneSize& pd,
5974 const Register& rn,
5975 const Register& rm);
5976
5977 // While incrementing signed scalar less than scalar.
5978 void whilelt(const PRegisterWithLaneSize& pd,
5979 const Register& rn,
5980 const Register& rm);
5981
5982 // Write the first-fault register.
5983 void wrffr(const PRegisterWithLaneSize& pn);
5984
5985 // Interleave elements from two half predicates.
5986 void zip1(const PRegisterWithLaneSize& pd,
5987 const PRegisterWithLaneSize& pn,
5988 const PRegisterWithLaneSize& pm);
5989
5990 // Interleave elements from two half vectors.
5991 void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5992
5993 // Interleave elements from two half predicates.
5994 void zip2(const PRegisterWithLaneSize& pd,
5995 const PRegisterWithLaneSize& pn,
5996 const PRegisterWithLaneSize& pm);
5997
5998 // Interleave elements from two half vectors.
5999 void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6000
6001 // Add with carry long (bottom).
6002 void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6003
6004 // Add with carry long (top).
6005 void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6006
6007 // Add narrow high part (bottom).
6008 void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6009
6010 // Add narrow high part (top).
6011 void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6012
6013 // Add pairwise.
6014 void addp(const ZRegister& zd,
6015 const PRegisterM& pg,
6016 const ZRegister& zn,
6017 const ZRegister& zm);
6018
6019 // Bitwise clear and exclusive OR.
6020 void bcax(const ZRegister& zd,
6021 const ZRegister& zn,
6022 const ZRegister& zm,
6023 const ZRegister& zk);
6024
6025 // Scatter lower bits into positions selected by bitmask.
6026 void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6027
6028 // Gather lower bits from positions selected by bitmask.
6029 void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6030
6031 // Group bits to right or left as selected by bitmask.
6032 void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6033
6034 // Bitwise select.
6035 void bsl(const ZRegister& zd,
6036 const ZRegister& zn,
6037 const ZRegister& zm,
6038 const ZRegister& zk);
6039
6040 // Bitwise select with first input inverted.
6041 void bsl1n(const ZRegister& zd,
6042 const ZRegister& zn,
6043 const ZRegister& zm,
6044 const ZRegister& zk);
6045
6046 // Bitwise select with second input inverted.
6047 void bsl2n(const ZRegister& zd,
6048 const ZRegister& zn,
6049 const ZRegister& zm,
6050 const ZRegister& zk);
6051
6052 // Complex integer add with rotate.
6053 void cadd(const ZRegister& zd,
6054 const ZRegister& zn,
6055 const ZRegister& zm,
6056 int rot);
6057
6058 // Complex integer dot product (indexed).
6059 void cdot(const ZRegister& zda,
6060 const ZRegister& zn,
6061 const ZRegister& zm,
6062 int index,
6063 int rot);
6064
6065 // Complex integer dot product.
6066 void cdot(const ZRegister& zda,
6067 const ZRegister& zn,
6068 const ZRegister& zm,
6069 int rot);
6070
6071 // Complex integer multiply-add with rotate (indexed).
6072 void cmla(const ZRegister& zda,
6073 const ZRegister& zn,
6074 const ZRegister& zm,
6075 int index,
6076 int rot);
6077
6078 // Complex integer multiply-add with rotate.
6079 void cmla(const ZRegister& zda,
6080 const ZRegister& zn,
6081 const ZRegister& zm,
6082 int rot);
6083
6084 // Bitwise exclusive OR of three vectors.
6085 void eor3(const ZRegister& zd,
6086 const ZRegister& zn,
6087 const ZRegister& zm,
6088 const ZRegister& zk);
6089
6090 // Interleaving exclusive OR (bottom, top).
6091 void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6092
6093 // Interleaving exclusive OR (top, bottom).
6094 void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6095
6096 // Floating-point add pairwise.
6097 void faddp(const ZRegister& zd,
6098 const PRegisterM& pg,
6099 const ZRegister& zn,
6100 const ZRegister& zm);
6101
6102 // Floating-point up convert long (top, predicated).
6103 void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6104
6105 // Floating-point down convert and narrow (top, predicated).
6106 void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6107
6108 // Floating-point down convert, rounding to odd (predicated).
6109 void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6110
6111 // Floating-point down convert, rounding to odd (top, predicated).
6112 void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6113
6114 // Floating-point base 2 logarithm as integer.
6115 void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6116
6117 // Floating-point maximum number pairwise.
6118 void fmaxnmp(const ZRegister& zd,
6119 const PRegisterM& pg,
6120 const ZRegister& zn,
6121 const ZRegister& zm);
6122
6123 // Floating-point maximum pairwise.
6124 void fmaxp(const ZRegister& zd,
6125 const PRegisterM& pg,
6126 const ZRegister& zn,
6127 const ZRegister& zm);
6128
6129 // Floating-point minimum number pairwise.
6130 void fminnmp(const ZRegister& zd,
6131 const PRegisterM& pg,
6132 const ZRegister& zn,
6133 const ZRegister& zm);
6134
6135 // Floating-point minimum pairwise.
6136 void fminp(const ZRegister& zd,
6137 const PRegisterM& pg,
6138 const ZRegister& zn,
6139 const ZRegister& zm);
6140
6141 // Half-precision floating-point multiply-add long to single-precision
6142 // (bottom).
6143 void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6144
6145 // Half-precision floating-point multiply-add long to single-precision
6146 // (top).
6147 void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6148
6149 // Half-precision floating-point multiply-subtract long from
6150 // single-precision (bottom).
6151 void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6152
6153 // Half-precision floating-point multiply-subtract long from
6154 // single-precision (top, indexed).
6155 void fmlslt(const ZRegister& zda,
6156 const ZRegister& zn,
6157 const ZRegister& zm,
6158 int index);
6159
6160 // Half-precision floating-point multiply-add long to single-precision
6161 // (bottom, indexed).
6162 void fmlalb(const ZRegister& zda,
6163 const ZRegister& zn,
6164 const ZRegister& zm,
6165 int index);
6166
6167 // Half-precision floating-point multiply-add long to single-precision
6168 // (top, indexed).
6169 void fmlalt(const ZRegister& zda,
6170 const ZRegister& zn,
6171 const ZRegister& zm,
6172 int index);
6173
6174 // Half-precision floating-point multiply-subtract long from
6175 // single-precision (bottom, indexed).
6176 void fmlslb(const ZRegister& zda,
6177 const ZRegister& zn,
6178 const ZRegister& zm,
6179 int index);
6180
6181 // Half-precision floating-point multiply-subtract long from
6182 // single-precision (top).
6183 void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6184
6185 // Count matching elements in vector.
6186 void histcnt(const ZRegister& zd,
6187 const PRegisterZ& pg,
6188 const ZRegister& zn,
6189 const ZRegister& zm);
6190
6191 // Count matching elements in vector segments.
6192 void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6193
6194 // Gather load non-temporal signed bytes.
6195 void ldnt1sb(const ZRegister& zt,
6196 const PRegisterZ& pg,
6197 const SVEMemOperand& addr);
6198
6199 // Gather load non-temporal signed halfwords.
6200 void ldnt1sh(const ZRegister& zt,
6201 const PRegisterZ& pg,
6202 const SVEMemOperand& addr);
6203
6204 // Gather load non-temporal signed words.
6205 void ldnt1sw(const ZRegister& zt,
6206 const PRegisterZ& pg,
6207 const SVEMemOperand& addr);
6208
6209 // Detect any matching elements, setting the condition flags.
6210 void match(const PRegisterWithLaneSize& pd,
6211 const PRegisterZ& pg,
6212 const ZRegister& zn,
6213 const ZRegister& zm);
6214
6215 // Multiply-add to accumulator (indexed).
6216 void mla(const ZRegister& zda,
6217 const ZRegister& zn,
6218 const ZRegister& zm,
6219 int index);
6220
6221 // Multiply-subtract from accumulator (indexed).
6222 void mls(const ZRegister& zda,
6223 const ZRegister& zn,
6224 const ZRegister& zm,
6225 int index);
6226
6227 // Multiply (indexed).
6228 void mul(const ZRegister& zd,
6229 const ZRegister& zn,
6230 const ZRegister& zm,
6231 int index);
6232
6233 // Multiply vectors (unpredicated).
6234 void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6235
6236 // Bitwise inverted select.
6237 void nbsl(const ZRegister& zd,
6238 const ZRegister& zn,
6239 const ZRegister& zm,
6240 const ZRegister& zk);
6241
6242 // Detect no matching elements, setting the condition flags.
6243 void nmatch(const PRegisterWithLaneSize& pd,
6244 const PRegisterZ& pg,
6245 const ZRegister& zn,
6246 const ZRegister& zm);
6247
6248 // Polynomial multiply vectors (unpredicated).
6249 void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6250
6251 // Polynomial multiply long (bottom).
6252 void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6253
6254 // Polynomial multiply long (top).
6255 void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6256
6257 // Rounding add narrow high part (bottom).
6258 void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6259
6260 // Rounding add narrow high part (top).
6261 void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6262
6263 // Rounding shift right narrow by immediate (bottom).
6264 void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6265
6266 // Rounding shift right narrow by immediate (top).
6267 void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6268
6269 // Rounding subtract narrow high part (bottom).
6270 void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6271
6272 // Rounding subtract narrow high part (top).
6273 void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6274
6275 // Signed absolute difference and accumulate.
6276 void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6277
6278 // Signed absolute difference and accumulate long (bottom).
6279 void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6280
6281 // Signed absolute difference and accumulate long (top).
6282 void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6283
6284 // Signed absolute difference long (bottom).
6285 void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6286
6287 // Signed absolute difference long (top).
6288 void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6289
6290 // Signed add and accumulate long pairwise.
6291 void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6292
6293 // Signed add long (bottom).
6294 void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6295
6296 // Signed add long (bottom + top).
6297 void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6298
6299 // Signed add long (top).
6300 void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6301
6302 // Signed add wide (bottom).
6303 void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6304
6305 // Signed add wide (top).
6306 void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6307
6308 // Subtract with carry long (bottom).
6309 void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6310
6311 // Subtract with carry long (top).
6312 void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6313
6314 // Signed halving addition.
6315 void shadd(const ZRegister& zd,
6316 const PRegisterM& pg,
6317 const ZRegister& zn,
6318 const ZRegister& zm);
6319
6320 // Shift right narrow by immediate (bottom).
6321 void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6322
6323 // Shift right narrow by immediate (top).
6324 void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6325
6326 // Signed halving subtract.
6327 void shsub(const ZRegister& zd,
6328 const PRegisterM& pg,
6329 const ZRegister& zn,
6330 const ZRegister& zm);
6331
6332 // Signed halving subtract reversed vectors.
6333 void shsubr(const ZRegister& zd,
6334 const PRegisterM& pg,
6335 const ZRegister& zn,
6336 const ZRegister& zm);
6337
6338 // Shift left and insert (immediate).
6339 void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6340
6341 // Signed maximum pairwise.
6342 void smaxp(const ZRegister& zd,
6343 const PRegisterM& pg,
6344 const ZRegister& zn,
6345 const ZRegister& zm);
6346
6347 // Signed minimum pairwise.
6348 void sminp(const ZRegister& zd,
6349 const PRegisterM& pg,
6350 const ZRegister& zn,
6351 const ZRegister& zm);
6352
6353 // Signed multiply-add long to accumulator (bottom, indexed).
6354 void smlalb(const ZRegister& zda,
6355 const ZRegister& zn,
6356 const ZRegister& zm,
6357 int index);
6358
6359 // Signed multiply-add long to accumulator (bottom).
6360 void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6361
6362 // Signed multiply-add long to accumulator (top, indexed).
6363 void smlalt(const ZRegister& zda,
6364 const ZRegister& zn,
6365 const ZRegister& zm,
6366 int index);
6367
6368 // Signed multiply-add long to accumulator (top).
6369 void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6370
6371 // Signed multiply-subtract long from accumulator (bottom, indexed).
6372 void smlslb(const ZRegister& zda,
6373 const ZRegister& zn,
6374 const ZRegister& zm,
6375 int index);
6376
6377 // Signed multiply-subtract long from accumulator (bottom).
6378 void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6379
6380 // Signed multiply-subtract long from accumulator (top, indexed).
6381 void smlslt(const ZRegister& zda,
6382 const ZRegister& zn,
6383 const ZRegister& zm,
6384 int index);
6385
6386 // Signed multiply-subtract long from accumulator (top).
6387 void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6388
6389 // Signed multiply returning high half (unpredicated).
6390 void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6391
6392 // Signed multiply long (bottom, indexed).
6393 void smullb(const ZRegister& zd,
6394 const ZRegister& zn,
6395 const ZRegister& zm,
6396 int index);
6397
6398 // Signed multiply long (bottom).
6399 void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6400
6401 // Signed multiply long (top, indexed).
6402 void smullt(const ZRegister& zd,
6403 const ZRegister& zn,
6404 const ZRegister& zm,
6405 int index);
6406
6407 // Signed multiply long (top).
6408 void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6409
6410 // Signed saturating absolute value.
6411 void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6412
6413 // Signed saturating addition (predicated).
6414 void sqadd(const ZRegister& zd,
6415 const PRegisterM& pg,
6416 const ZRegister& zn,
6417 const ZRegister& zm);
6418
6419 // Saturating complex integer add with rotate.
6420 void sqcadd(const ZRegister& zd,
6421 const ZRegister& zn,
6422 const ZRegister& zm,
6423 int rot);
6424
6425 // Signed saturating doubling multiply-add long to accumulator (bottom,
6426 // indexed).
6427 void sqdmlalb(const ZRegister& zda,
6428 const ZRegister& zn,
6429 const ZRegister& zm,
6430 int index);
6431
6432 // Signed saturating doubling multiply-add long to accumulator (bottom).
6433 void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6434
6435 // Signed saturating doubling multiply-add long to accumulator (bottom x
6436 // top).
6437 void sqdmlalbt(const ZRegister& zda,
6438 const ZRegister& zn,
6439 const ZRegister& zm);
6440
6441 // Signed saturating doubling multiply-add long to accumulator (top,
6442 // indexed).
6443 void sqdmlalt(const ZRegister& zda,
6444 const ZRegister& zn,
6445 const ZRegister& zm,
6446 int index);
6447
6448 // Signed saturating doubling multiply-add long to accumulator (top).
6449 void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6450
6451 // Signed saturating doubling multiply-subtract long from accumulator
6452 // (bottom, indexed).
6453 void sqdmlslb(const ZRegister& zda,
6454 const ZRegister& zn,
6455 const ZRegister& zm,
6456 int index);
6457
6458 // Signed saturating doubling multiply-subtract long from accumulator
6459 // (bottom).
6460 void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6461
6462 // Signed saturating doubling multiply-subtract long from accumulator
6463 // (bottom x top).
6464 void sqdmlslbt(const ZRegister& zda,
6465 const ZRegister& zn,
6466 const ZRegister& zm);
6467
6468 // Signed saturating doubling multiply-subtract long from accumulator
6469 // (top, indexed).
6470 void sqdmlslt(const ZRegister& zda,
6471 const ZRegister& zn,
6472 const ZRegister& zm,
6473 int index);
6474
6475 // Signed saturating doubling multiply-subtract long from accumulator
6476 // (top).
6477 void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6478
6479 // Signed saturating doubling multiply high (indexed).
6480 void sqdmulh(const ZRegister& zd,
6481 const ZRegister& zn,
6482 const ZRegister& zm,
6483 int index);
6484
6485 // Signed saturating doubling multiply high (unpredicated).
6486 void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6487
6488 // Signed saturating doubling multiply long (bottom, indexed).
6489 void sqdmullb(const ZRegister& zd,
6490 const ZRegister& zn,
6491 const ZRegister& zm,
6492 int index);
6493
6494 // Signed saturating doubling multiply long (bottom).
6495 void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6496
6497 // Signed saturating doubling multiply long (top, indexed).
6498 void sqdmullt(const ZRegister& zd,
6499 const ZRegister& zn,
6500 const ZRegister& zm,
6501 int index);
6502
6503 // Signed saturating doubling multiply long (top).
6504 void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6505
6506 // Signed saturating negate.
6507 void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6508
6509 // Saturating rounding doubling complex integer multiply-add high with
6510 // rotate (indexed).
6511 void sqrdcmlah(const ZRegister& zda,
6512 const ZRegister& zn,
6513 const ZRegister& zm,
6514 int index,
6515 int rot);
6516
6517 // Saturating rounding doubling complex integer multiply-add high with
6518 // rotate.
6519 void sqrdcmlah(const ZRegister& zda,
6520 const ZRegister& zn,
6521 const ZRegister& zm,
6522 int rot);
6523
6524 // Signed saturating rounding doubling multiply-add high to accumulator
6525 // (indexed).
6526 void sqrdmlah(const ZRegister& zda,
6527 const ZRegister& zn,
6528 const ZRegister& zm,
6529 int index);
6530
6531 // Signed saturating rounding doubling multiply-add high to accumulator
6532 // (unpredicated).
6533 void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6534
6535 // Signed saturating rounding doubling multiply-subtract high from
6536 // accumulator (indexed).
6537 void sqrdmlsh(const ZRegister& zda,
6538 const ZRegister& zn,
6539 const ZRegister& zm,
6540 int index);
6541
6542 // Signed saturating rounding doubling multiply-subtract high from
6543 // accumulator (unpredicated).
6544 void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6545
6546 // Signed saturating rounding doubling multiply high (indexed).
6547 void sqrdmulh(const ZRegister& zd,
6548 const ZRegister& zn,
6549 const ZRegister& zm,
6550 int index);
6551
6552 // Signed saturating rounding doubling multiply high (unpredicated).
6553 void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6554
6555 // Signed saturating rounding shift left by vector (predicated).
6556 void sqrshl(const ZRegister& zd,
6557 const PRegisterM& pg,
6558 const ZRegister& zn,
6559 const ZRegister& zm);
6560
6561 // Signed saturating rounding shift left reversed vectors (predicated).
6562 void sqrshlr(const ZRegister& zd,
6563 const PRegisterM& pg,
6564 const ZRegister& zn,
6565 const ZRegister& zm);
6566
6567 // Signed saturating rounding shift right narrow by immediate (bottom).
6568 void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6569
6570 // Signed saturating rounding shift right narrow by immediate (top).
6571 void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6572
6573 // Signed saturating rounding shift right unsigned narrow by immediate
6574 // (bottom).
6575 void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6576
6577 // Signed saturating rounding shift right unsigned narrow by immediate
6578 // (top).
6579 void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6580
6581 // Signed saturating shift left by immediate.
6582 void sqshl(const ZRegister& zd,
6583 const PRegisterM& pg,
6584 const ZRegister& zn,
6585 int shift);
6586
6587 // Signed saturating shift left by vector (predicated).
6588 void sqshl(const ZRegister& zd,
6589 const PRegisterM& pg,
6590 const ZRegister& zn,
6591 const ZRegister& zm);
6592
6593 // Signed saturating shift left reversed vectors (predicated).
6594 void sqshlr(const ZRegister& zd,
6595 const PRegisterM& pg,
6596 const ZRegister& zn,
6597 const ZRegister& zm);
6598
6599 // Signed saturating shift left unsigned by immediate.
6600 void sqshlu(const ZRegister& zd,
6601 const PRegisterM& pg,
6602 const ZRegister& zn,
6603 int shift);
6604
6605 // Signed saturating shift right narrow by immediate (bottom).
6606 void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6607
6608 // Signed saturating shift right narrow by immediate (top).
6609 void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6610
6611 // Signed saturating shift right unsigned narrow by immediate (bottom).
6612 void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6613
6614 // Signed saturating shift right unsigned narrow by immediate (top).
6615 void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6616
6617 // Signed saturating subtraction (predicated).
6618 void sqsub(const ZRegister& zd,
6619 const PRegisterM& pg,
6620 const ZRegister& zn,
6621 const ZRegister& zm);
6622
6623 // Signed saturating subtraction reversed vectors (predicated).
6624 void sqsubr(const ZRegister& zd,
6625 const PRegisterM& pg,
6626 const ZRegister& zn,
6627 const ZRegister& zm);
6628
6629 // Signed saturating extract narrow (bottom).
6630 void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6631
6632 // Signed saturating extract narrow (top).
6633 void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6634
6635 // Signed saturating unsigned extract narrow (bottom).
6636 void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6637
6638 // Signed saturating unsigned extract narrow (top).
6639 void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6640
6641 // Signed rounding halving addition.
6642 void srhadd(const ZRegister& zd,
6643 const PRegisterM& pg,
6644 const ZRegister& zn,
6645 const ZRegister& zm);
6646
6647 // Shift right and insert (immediate).
6648 void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6649
6650 // Signed rounding shift left by vector (predicated).
6651 void srshl(const ZRegister& zd,
6652 const PRegisterM& pg,
6653 const ZRegister& zn,
6654 const ZRegister& zm);
6655
6656 // Signed rounding shift left reversed vectors (predicated).
6657 void srshlr(const ZRegister& zd,
6658 const PRegisterM& pg,
6659 const ZRegister& zn,
6660 const ZRegister& zm);
6661
6662 // Signed rounding shift right by immediate.
6663 void srshr(const ZRegister& zd,
6664 const PRegisterM& pg,
6665 const ZRegister& zn,
6666 int shift);
6667
6668 // Signed rounding shift right and accumulate (immediate).
6669 void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6670
6671 // Signed shift left long by immediate (bottom).
6672 void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6673
6674 // Signed shift left long by immediate (top).
6675 void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6676
6677 // Signed shift right and accumulate (immediate).
6678 void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6679
6680 // Signed subtract long (bottom).
6681 void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6682
6683 // Signed subtract long (bottom - top).
6684 void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6685
6686 // Signed subtract long (top).
6687 void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6688
6689 // Signed subtract long (top - bottom).
6690 void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6691
6692 // Signed subtract wide (bottom).
6693 void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6694
6695 // Signed subtract wide (top).
6696 void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6697
6698 // Subtract narrow high part (bottom).
6699 void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6700
6701 // Subtract narrow high part (top).
6702 void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6703
6704 // Signed saturating addition of unsigned value.
6705 void suqadd(const ZRegister& zd,
6706 const PRegisterM& pg,
6707 const ZRegister& zn,
6708 const ZRegister& zm);
6709
6710 // Programmable table lookup in one or two vector table (zeroing).
6711 void tbl(const ZRegister& zd,
6712 const ZRegister& zn1,
6713 const ZRegister& zn2,
6714 const ZRegister& zm);
6715
6716 // Programmable table lookup in single vector table (merging).
6717 void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6718
6719 // Unsigned absolute difference and accumulate.
6720 void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6721
6722 // Unsigned absolute difference and accumulate long (bottom).
6723 void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6724
6725 // Unsigned absolute difference and accumulate long (top).
6726 void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6727
6728 // Unsigned absolute difference long (bottom).
6729 void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6730
6731 // Unsigned absolute difference long (top).
6732 void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6733
6734 // Unsigned add and accumulate long pairwise.
6735 void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6736
6737 // Unsigned add long (bottom).
6738 void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6739
6740 // Unsigned add long (top).
6741 void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6742
6743 // Unsigned add wide (bottom).
6744 void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6745
6746 // Unsigned add wide (top).
6747 void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6748
6749 // Unsigned halving addition.
6750 void uhadd(const ZRegister& zd,
6751 const PRegisterM& pg,
6752 const ZRegister& zn,
6753 const ZRegister& zm);
6754
6755 // Unsigned halving subtract.
6756 void uhsub(const ZRegister& zd,
6757 const PRegisterM& pg,
6758 const ZRegister& zn,
6759 const ZRegister& zm);
6760
6761 // Unsigned halving subtract reversed vectors.
6762 void uhsubr(const ZRegister& zd,
6763 const PRegisterM& pg,
6764 const ZRegister& zn,
6765 const ZRegister& zm);
6766
6767 // Unsigned maximum pairwise.
6768 void umaxp(const ZRegister& zd,
6769 const PRegisterM& pg,
6770 const ZRegister& zn,
6771 const ZRegister& zm);
6772
6773 // Unsigned minimum pairwise.
6774 void uminp(const ZRegister& zd,
6775 const PRegisterM& pg,
6776 const ZRegister& zn,
6777 const ZRegister& zm);
6778
6779 // Unsigned multiply-add long to accumulator (bottom, indexed).
6780 void umlalb(const ZRegister& zda,
6781 const ZRegister& zn,
6782 const ZRegister& zm,
6783 int index);
6784
6785 // Unsigned multiply-add long to accumulator (bottom).
6786 void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6787
6788 // Unsigned multiply-add long to accumulator (top, indexed).
6789 void umlalt(const ZRegister& zda,
6790 const ZRegister& zn,
6791 const ZRegister& zm,
6792 int index);
6793
6794 // Unsigned multiply-add long to accumulator (top).
6795 void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6796
6797 // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6798 void umlslb(const ZRegister& zda,
6799 const ZRegister& zn,
6800 const ZRegister& zm,
6801 int index);
6802
6803 // Unsigned multiply-subtract long from accumulator (bottom).
6804 void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6805
6806 // Unsigned multiply-subtract long from accumulator (top, indexed).
6807 void umlslt(const ZRegister& zda,
6808 const ZRegister& zn,
6809 const ZRegister& zm,
6810 int index);
6811
6812 // Unsigned multiply-subtract long from accumulator (top).
6813 void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6814
6815 // Unsigned multiply returning high half (unpredicated).
6816 void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6817
6818 // Unsigned multiply long (bottom, indexed).
6819 void umullb(const ZRegister& zd,
6820 const ZRegister& zn,
6821 const ZRegister& zm,
6822 int index);
6823
6824 // Unsigned multiply long (bottom).
6825 void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6826
6827 // Unsigned multiply long (top, indexed).
6828 void umullt(const ZRegister& zd,
6829 const ZRegister& zn,
6830 const ZRegister& zm,
6831 int index);
6832
6833 // Unsigned multiply long (top).
6834 void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6835
6836 // Unsigned saturating addition (predicated).
6837 void uqadd(const ZRegister& zd,
6838 const PRegisterM& pg,
6839 const ZRegister& zn,
6840 const ZRegister& zm);
6841
6842 // Unsigned saturating rounding shift left by vector (predicated).
6843 void uqrshl(const ZRegister& zd,
6844 const PRegisterM& pg,
6845 const ZRegister& zn,
6846 const ZRegister& zm);
6847
6848 // Unsigned saturating rounding shift left reversed vectors (predicated).
6849 void uqrshlr(const ZRegister& zd,
6850 const PRegisterM& pg,
6851 const ZRegister& zn,
6852 const ZRegister& zm);
6853
6854 // Unsigned saturating rounding shift right narrow by immediate (bottom).
6855 void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6856
6857 // Unsigned saturating rounding shift right narrow by immediate (top).
6858 void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6859
6860 // Unsigned saturating shift left by immediate.
6861 void uqshl(const ZRegister& zd,
6862 const PRegisterM& pg,
6863 const ZRegister& zn,
6864 int shift);
6865
6866 // Unsigned saturating shift left by vector (predicated).
6867 void uqshl(const ZRegister& zd,
6868 const PRegisterM& pg,
6869 const ZRegister& zn,
6870 const ZRegister& zm);
6871
6872 // Unsigned saturating shift left reversed vectors (predicated).
6873 void uqshlr(const ZRegister& zd,
6874 const PRegisterM& pg,
6875 const ZRegister& zn,
6876 const ZRegister& zm);
6877
6878 // Unsigned saturating shift right narrow by immediate (bottom).
6879 void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6880
6881 // Unsigned saturating shift right narrow by immediate (top).
6882 void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6883
6884 // Unsigned saturating subtraction (predicated).
6885 void uqsub(const ZRegister& zd,
6886 const PRegisterM& pg,
6887 const ZRegister& zn,
6888 const ZRegister& zm);
6889
6890 // Unsigned saturating subtraction reversed vectors (predicated).
6891 void uqsubr(const ZRegister& zd,
6892 const PRegisterM& pg,
6893 const ZRegister& zn,
6894 const ZRegister& zm);
6895
6896 // Unsigned saturating extract narrow (bottom).
6897 void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6898
6899 // Unsigned saturating extract narrow (top).
6900 void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6901
6902 // Unsigned reciprocal estimate (predicated).
6903 void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6904
6905 // Unsigned rounding halving addition.
6906 void urhadd(const ZRegister& zd,
6907 const PRegisterM& pg,
6908 const ZRegister& zn,
6909 const ZRegister& zm);
6910
6911 // Unsigned rounding shift left by vector (predicated).
6912 void urshl(const ZRegister& zd,
6913 const PRegisterM& pg,
6914 const ZRegister& zn,
6915 const ZRegister& zm);
6916
6917 // Unsigned rounding shift left reversed vectors (predicated).
6918 void urshlr(const ZRegister& zd,
6919 const PRegisterM& pg,
6920 const ZRegister& zn,
6921 const ZRegister& zm);
6922
6923 // Unsigned rounding shift right by immediate.
6924 void urshr(const ZRegister& zd,
6925 const PRegisterM& pg,
6926 const ZRegister& zn,
6927 int shift);
6928
6929 // Unsigned reciprocal square root estimate (predicated).
6930 void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6931
6932 // Unsigned rounding shift right and accumulate (immediate).
6933 void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6934
6935 // Unsigned shift left long by immediate (bottom).
6936 void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6937
6938 // Unsigned shift left long by immediate (top).
6939 void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6940
6941 // Unsigned saturating addition of signed value.
6942 void usqadd(const ZRegister& zd,
6943 const PRegisterM& pg,
6944 const ZRegister& zn,
6945 const ZRegister& zm);
6946
6947 // Unsigned shift right and accumulate (immediate).
6948 void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6949
6950 // Unsigned subtract long (bottom).
6951 void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6952
6953 // Unsigned subtract long (top).
6954 void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6955
6956 // Unsigned subtract wide (bottom).
6957 void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6958
6959 // Unsigned subtract wide (top).
6960 void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6961
6962 // While decrementing signed scalar greater than or equal to scalar.
6963 void whilege(const PRegisterWithLaneSize& pd,
6964 const Register& rn,
6965 const Register& rm);
6966
6967 // While decrementing signed scalar greater than scalar.
6968 void whilegt(const PRegisterWithLaneSize& pd,
6969 const Register& rn,
6970 const Register& rm);
6971
6972 // While decrementing unsigned scalar higher than scalar.
6973 void whilehi(const PRegisterWithLaneSize& pd,
6974 const Register& rn,
6975 const Register& rm);
6976
6977 // While decrementing unsigned scalar higher or same as scalar.
6978 void whilehs(const PRegisterWithLaneSize& pd,
6979 const Register& rn,
6980 const Register& rm);
6981
6982 // While free of read-after-write conflicts.
6983 void whilerw(const PRegisterWithLaneSize& pd,
6984 const Register& rn,
6985 const Register& rm);
6986
6987 // While free of write-after-read/write conflicts.
6988 void whilewr(const PRegisterWithLaneSize& pd,
6989 const Register& rn,
6990 const Register& rm);
6991
6992 // Bitwise exclusive OR and rotate right by immediate.
6993 void xar(const ZRegister& zd,
6994 const ZRegister& zn,
6995 const ZRegister& zm,
6996 int shift);
6997
6998 // Floating-point matrix multiply-accumulate.
6999 void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7000
7001 // Signed integer matrix multiply-accumulate.
7002 void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7003
7004 // Unsigned by signed integer matrix multiply-accumulate.
7005 void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7006
7007 // Unsigned integer matrix multiply-accumulate.
7008 void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7009
7010 // Unsigned by signed integer dot product.
7011 void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7012
7013 // Unsigned by signed integer indexed dot product.
7014 void usdot(const ZRegister& zda,
7015 const ZRegister& zn,
7016 const ZRegister& zm,
7017 int index);
7018
7019 // Signed by unsigned integer indexed dot product.
7020 void sudot(const ZRegister& zda,
7021 const ZRegister& zn,
7022 const ZRegister& zm,
7023 int index);
7024
7025 // Add with Tag.
7026 void addg(const Register& xd, const Register& xn, int offset, int tag_offset);
7027
7028 // Tag Mask Insert.
7029 void gmi(const Register& xd, const Register& xn, const Register& xm);
7030
7031 // Insert Random Tag.
7032 void irg(const Register& xd, const Register& xn, const Register& xm = xzr);
7033
7034 // Load Allocation Tag.
7035 void ldg(const Register& xt, const MemOperand& addr);
7036
7037 void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op);
7038
7039 // Store Allocation Tags.
7040 void st2g(const Register& xt, const MemOperand& addr);
7041
7042 // Store Allocation Tag.
7043 void stg(const Register& xt, const MemOperand& addr);
7044
7045 // Store Allocation Tag and Pair of registers.
7046 void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr);
7047
7048 // Store Allocation Tags, Zeroing.
7049 void stz2g(const Register& xt, const MemOperand& addr);
7050
7051 // Store Allocation Tag, Zeroing.
7052 void stzg(const Register& xt, const MemOperand& addr);
7053
7054 // Subtract with Tag.
7055 void subg(const Register& xd, const Register& xn, int offset, int tag_offset);
7056
7057 // Subtract Pointer.
7058 void subp(const Register& xd, const Register& xn, const Register& xm);
7059
7060 // Subtract Pointer, setting Flags.
7061 void subps(const Register& xd, const Register& xn, const Register& xm);
7062
7063 // Compare with Tag.
cmpp(const Register & xn,const Register & xm)7064 void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); }
7065
7066 // Memory Copy.
7067 void cpye(const Register& rd, const Register& rs, const Register& rn);
7068
7069 // Memory Copy, reads and writes non-temporal.
7070 void cpyen(const Register& rd, const Register& rs, const Register& rn);
7071
7072 // Memory Copy, reads non-temporal.
7073 void cpyern(const Register& rd, const Register& rs, const Register& rn);
7074
7075 // Memory Copy, writes non-temporal.
7076 void cpyewn(const Register& rd, const Register& rs, const Register& rn);
7077
7078 // Memory Copy Forward-only.
7079 void cpyfe(const Register& rd, const Register& rs, const Register& rn);
7080
7081 // Memory Copy Forward-only, reads and writes non-temporal.
7082 void cpyfen(const Register& rd, const Register& rs, const Register& rn);
7083
7084 // Memory Copy Forward-only, reads non-temporal.
7085 void cpyfern(const Register& rd, const Register& rs, const Register& rn);
7086
7087 // Memory Copy Forward-only, writes non-temporal.
7088 void cpyfewn(const Register& rd, const Register& rs, const Register& rn);
7089
7090 // Memory Copy Forward-only.
7091 void cpyfm(const Register& rd, const Register& rs, const Register& rn);
7092
7093 // Memory Copy Forward-only, reads and writes non-temporal.
7094 void cpyfmn(const Register& rd, const Register& rs, const Register& rn);
7095
7096 // Memory Copy Forward-only, reads non-temporal.
7097 void cpyfmrn(const Register& rd, const Register& rs, const Register& rn);
7098
7099 // Memory Copy Forward-only, writes non-temporal.
7100 void cpyfmwn(const Register& rd, const Register& rs, const Register& rn);
7101
7102 // Memory Copy Forward-only.
7103 void cpyfp(const Register& rd, const Register& rs, const Register& rn);
7104
7105 // Memory Copy Forward-only, reads and writes non-temporal.
7106 void cpyfpn(const Register& rd, const Register& rs, const Register& rn);
7107
7108 // Memory Copy Forward-only, reads non-temporal.
7109 void cpyfprn(const Register& rd, const Register& rs, const Register& rn);
7110
7111 // Memory Copy Forward-only, writes non-temporal.
7112 void cpyfpwn(const Register& rd, const Register& rs, const Register& rn);
7113
7114 // Memory Copy.
7115 void cpym(const Register& rd, const Register& rs, const Register& rn);
7116
7117 // Memory Copy, reads and writes non-temporal.
7118 void cpymn(const Register& rd, const Register& rs, const Register& rn);
7119
7120 // Memory Copy, reads non-temporal.
7121 void cpymrn(const Register& rd, const Register& rs, const Register& rn);
7122
7123 // Memory Copy, writes non-temporal.
7124 void cpymwn(const Register& rd, const Register& rs, const Register& rn);
7125
7126 // Memory Copy.
7127 void cpyp(const Register& rd, const Register& rs, const Register& rn);
7128
7129 // Memory Copy, reads and writes non-temporal.
7130 void cpypn(const Register& rd, const Register& rs, const Register& rn);
7131
7132 // Memory Copy, reads non-temporal.
7133 void cpyprn(const Register& rd, const Register& rs, const Register& rn);
7134
7135 // Memory Copy, writes non-temporal.
7136 void cpypwn(const Register& rd, const Register& rs, const Register& rn);
7137
7138 // Memory Set.
7139 void sete(const Register& rd, const Register& rn, const Register& rs);
7140
7141 // Memory Set, non-temporal.
7142 void seten(const Register& rd, const Register& rn, const Register& rs);
7143
7144 // Memory Set with tag setting.
7145 void setge(const Register& rd, const Register& rn, const Register& rs);
7146
7147 // Memory Set with tag setting, non-temporal.
7148 void setgen(const Register& rd, const Register& rn, const Register& rs);
7149
7150 // Memory Set with tag setting.
7151 void setgm(const Register& rd, const Register& rn, const Register& rs);
7152
7153 // Memory Set with tag setting, non-temporal.
7154 void setgmn(const Register& rd, const Register& rn, const Register& rs);
7155
7156 // Memory Set with tag setting.
7157 void setgp(const Register& rd, const Register& rn, const Register& rs);
7158
7159 // Memory Set with tag setting, non-temporal.
7160 void setgpn(const Register& rd, const Register& rn, const Register& rs);
7161
7162 // Memory Set.
7163 void setm(const Register& rd, const Register& rn, const Register& rs);
7164
7165 // Memory Set, non-temporal.
7166 void setmn(const Register& rd, const Register& rn, const Register& rs);
7167
7168 // Memory Set.
7169 void setp(const Register& rd, const Register& rn, const Register& rs);
7170
7171 // Memory Set, non-temporal.
7172 void setpn(const Register& rd, const Register& rn, const Register& rs);
7173
7174 // Absolute value.
7175 void abs(const Register& rd, const Register& rn);
7176
7177 // Count bits.
7178 void cnt(const Register& rd, const Register& rn);
7179
7180 // Count Trailing Zeros.
7181 void ctz(const Register& rd, const Register& rn);
7182
7183 // Signed Maximum.
7184 void smax(const Register& rd, const Register& rn, const Operand& op);
7185
7186 // Signed Minimum.
7187 void smin(const Register& rd, const Register& rn, const Operand& op);
7188
7189 // Unsigned Maximum.
7190 void umax(const Register& rd, const Register& rn, const Operand& op);
7191
7192 // Unsigned Minimum.
7193 void umin(const Register& rd, const Register& rn, const Operand& op);
7194
7195 // Check feature status.
7196 void chkfeat(const Register& rd);
7197
7198 // Guarded Control Stack Push.
7199 void gcspushm(const Register& rt);
7200
7201 // Guarded Control Stack Pop.
7202 void gcspopm(const Register& rt);
7203
7204 // Guarded Control Stack Switch Stack 1.
7205 void gcsss1(const Register& rt);
7206
7207 // Guarded Control Stack Switch Stack 2.
7208 void gcsss2(const Register& rt);
7209
7210 // Emit generic instructions.
7211
7212 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)7213 void dci(Instr raw_inst) { Emit(raw_inst); }
7214
7215 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)7216 void dc32(uint32_t data) { dc(data); }
7217
7218 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)7219 void dc64(uint64_t data) { dc(data); }
7220
7221 // Emit data in the instruction stream.
7222 template <typename T>
dc(T data)7223 void dc(T data) {
7224 VIXL_ASSERT(AllowAssembler());
7225 GetBuffer()->Emit<T>(data);
7226 }
7227
7228 // Copy a string into the instruction stream, including the terminating NULL
7229 // character. The instruction pointer is then aligned correctly for
7230 // subsequent instructions.
EmitString(const char * string)7231 void EmitString(const char* string) {
7232 VIXL_ASSERT(string != NULL);
7233 VIXL_ASSERT(AllowAssembler());
7234
7235 GetBuffer()->EmitString(string);
7236 GetBuffer()->Align();
7237 }
7238
7239 // Code generation helpers.
7240 static bool OneInstrMoveImmediateHelper(Assembler* assm,
7241 const Register& dst,
7242 uint64_t imm);
7243
7244 // Register encoding.
7245 template <int hibit, int lobit>
Rx(CPURegister rx)7246 static Instr Rx(CPURegister rx) {
7247 VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
7248 return ImmUnsignedField<hibit, lobit>(rx.GetCode());
7249 }
7250
7251 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
7252 #define REGISTER_ENCODER(N) \
7253 static Instr R##N(CPURegister r##N) { \
7254 return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
7255 }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)7256 CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
7257 #undef REGISTER_ENCODER
7258 #undef CPU_REGISTER_FIELD_NAMES
7259
7260 static Instr RmNot31(CPURegister rm) {
7261 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
7262 VIXL_ASSERT(!rm.IsZero());
7263 return Rm(rm);
7264 }
7265
7266 // These encoding functions allow the stack pointer to be encoded, and
7267 // disallow the zero register.
RdSP(Register rd)7268 static Instr RdSP(Register rd) {
7269 VIXL_ASSERT(!rd.IsZero());
7270 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
7271 }
7272
RnSP(Register rn)7273 static Instr RnSP(Register rn) {
7274 VIXL_ASSERT(!rn.IsZero());
7275 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
7276 }
7277
RmSP(Register rm)7278 static Instr RmSP(Register rm) {
7279 VIXL_ASSERT(!rm.IsZero());
7280 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7281 }
7282
Pd(PRegister pd)7283 static Instr Pd(PRegister pd) {
7284 return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7285 }
7286
Pm(PRegister pm)7287 static Instr Pm(PRegister pm) {
7288 return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7289 }
7290
Pn(PRegister pn)7291 static Instr Pn(PRegister pn) {
7292 return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7293 }
7294
PgLow8(PRegister pg)7295 static Instr PgLow8(PRegister pg) {
7296 // Governing predicates can be merging, zeroing, or unqualified. They should
7297 // never have a lane size.
7298 VIXL_ASSERT(!pg.HasLaneSize());
7299 return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7300 }
7301
7302 template <int hibit, int lobit>
Pg(PRegister pg)7303 static Instr Pg(PRegister pg) {
7304 // Governing predicates can be merging, zeroing, or unqualified. They should
7305 // never have a lane size.
7306 VIXL_ASSERT(!pg.HasLaneSize());
7307 return Rx<hibit, lobit>(pg);
7308 }
7309
7310 // Flags encoding.
Flags(FlagsUpdate S)7311 static Instr Flags(FlagsUpdate S) {
7312 if (S == SetFlags) {
7313 return 1 << FlagsUpdate_offset;
7314 } else if (S == LeaveFlags) {
7315 return 0 << FlagsUpdate_offset;
7316 }
7317 VIXL_UNREACHABLE();
7318 return 0;
7319 }
7320
Cond(Condition cond)7321 static Instr Cond(Condition cond) { return cond << Condition_offset; }
7322
7323 // Generic immediate encoding.
7324 template <int hibit, int lobit>
ImmField(int64_t imm)7325 static Instr ImmField(int64_t imm) {
7326 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7327 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7328 int fieldsize = hibit - lobit + 1;
7329 VIXL_ASSERT(IsIntN(fieldsize, imm));
7330 return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7331 }
7332
7333 // For unsigned immediate encoding.
7334 // TODO: Handle signed and unsigned immediate in satisfactory way.
7335 template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7336 static Instr ImmUnsignedField(uint64_t imm) {
7337 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7338 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7339 VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7340 return static_cast<Instr>(imm << lobit);
7341 }
7342
7343 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7344 static Instr ImmPCRelAddress(int64_t imm21) {
7345 VIXL_ASSERT(IsInt21(imm21));
7346 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7347 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7348 Instr immlo = imm << ImmPCRelLo_offset;
7349 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7350 }
7351
7352 // Branch encoding.
ImmUncondBranch(int64_t imm26)7353 static Instr ImmUncondBranch(int64_t imm26) {
7354 VIXL_ASSERT(IsInt26(imm26));
7355 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7356 }
7357
ImmCondBranch(int64_t imm19)7358 static Instr ImmCondBranch(int64_t imm19) {
7359 VIXL_ASSERT(IsInt19(imm19));
7360 return TruncateToUint19(imm19) << ImmCondBranch_offset;
7361 }
7362
ImmCmpBranch(int64_t imm19)7363 static Instr ImmCmpBranch(int64_t imm19) {
7364 VIXL_ASSERT(IsInt19(imm19));
7365 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7366 }
7367
ImmTestBranch(int64_t imm14)7368 static Instr ImmTestBranch(int64_t imm14) {
7369 VIXL_ASSERT(IsInt14(imm14));
7370 return TruncateToUint14(imm14) << ImmTestBranch_offset;
7371 }
7372
ImmTestBranchBit(unsigned bit_pos)7373 static Instr ImmTestBranchBit(unsigned bit_pos) {
7374 VIXL_ASSERT(IsUint6(bit_pos));
7375 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7376 unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7377 unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7378 bit5 &= ImmTestBranchBit5_mask;
7379 bit40 &= ImmTestBranchBit40_mask;
7380 return bit5 | bit40;
7381 }
7382
7383 // Data Processing encoding.
SF(Register rd)7384 static Instr SF(Register rd) {
7385 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7386 }
7387
ImmAddSub(int imm)7388 static Instr ImmAddSub(int imm) {
7389 VIXL_ASSERT(IsImmAddSub(imm));
7390 if (IsUint12(imm)) { // No shift required.
7391 imm <<= ImmAddSub_offset;
7392 } else {
7393 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7394 }
7395 return imm;
7396 }
7397
SVEImmSetBits(unsigned imms,unsigned lane_size)7398 static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7399 VIXL_ASSERT(IsUint6(imms));
7400 VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7401 USE(lane_size);
7402 return imms << SVEImmSetBits_offset;
7403 }
7404
SVEImmRotate(unsigned immr,unsigned lane_size)7405 static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7406 VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7407 USE(lane_size);
7408 return immr << SVEImmRotate_offset;
7409 }
7410
SVEBitN(unsigned bitn)7411 static Instr SVEBitN(unsigned bitn) {
7412 VIXL_ASSERT(IsUint1(bitn));
7413 return bitn << SVEBitN_offset;
7414 }
7415
7416 static Instr SVEDtype(unsigned msize_in_bytes_log2,
7417 unsigned esize_in_bytes_log2,
7418 bool is_signed,
7419 int dtype_h_lsb = 23,
7420 int dtype_l_lsb = 21) {
7421 VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7422 VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7423 Instr dtype_h = msize_in_bytes_log2;
7424 Instr dtype_l = esize_in_bytes_log2;
7425 // Signed forms use the encodings where msize would be greater than esize.
7426 if (is_signed) {
7427 dtype_h = dtype_h ^ 0x3;
7428 dtype_l = dtype_l ^ 0x3;
7429 }
7430 VIXL_ASSERT(IsUint2(dtype_h));
7431 VIXL_ASSERT(IsUint2(dtype_l));
7432 VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7433
7434 return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7435 }
7436
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7437 static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7438 unsigned esize_in_bytes_log2,
7439 bool is_signed) {
7440 return SVEDtype(msize_in_bytes_log2,
7441 esize_in_bytes_log2,
7442 is_signed,
7443 23,
7444 13);
7445 }
7446
ImmS(unsigned imms,unsigned reg_size)7447 static Instr ImmS(unsigned imms, unsigned reg_size) {
7448 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7449 ((reg_size == kWRegSize) && IsUint5(imms)));
7450 USE(reg_size);
7451 return imms << ImmS_offset;
7452 }
7453
ImmR(unsigned immr,unsigned reg_size)7454 static Instr ImmR(unsigned immr, unsigned reg_size) {
7455 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7456 ((reg_size == kWRegSize) && IsUint5(immr)));
7457 USE(reg_size);
7458 VIXL_ASSERT(IsUint6(immr));
7459 return immr << ImmR_offset;
7460 }
7461
ImmSetBits(unsigned imms,unsigned reg_size)7462 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7463 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7464 VIXL_ASSERT(IsUint6(imms));
7465 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7466 USE(reg_size);
7467 return imms << ImmSetBits_offset;
7468 }
7469
ImmRotate(unsigned immr,unsigned reg_size)7470 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7471 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7472 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7473 ((reg_size == kWRegSize) && IsUint5(immr)));
7474 USE(reg_size);
7475 return immr << ImmRotate_offset;
7476 }
7477
ImmLLiteral(int64_t imm19)7478 static Instr ImmLLiteral(int64_t imm19) {
7479 VIXL_ASSERT(IsInt19(imm19));
7480 return TruncateToUint19(imm19) << ImmLLiteral_offset;
7481 }
7482
BitN(unsigned bitn,unsigned reg_size)7483 static Instr BitN(unsigned bitn, unsigned reg_size) {
7484 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7485 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7486 USE(reg_size);
7487 return bitn << BitN_offset;
7488 }
7489
ShiftDP(Shift shift)7490 static Instr ShiftDP(Shift shift) {
7491 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7492 return shift << ShiftDP_offset;
7493 }
7494
ImmDPShift(unsigned amount)7495 static Instr ImmDPShift(unsigned amount) {
7496 VIXL_ASSERT(IsUint6(amount));
7497 return amount << ImmDPShift_offset;
7498 }
7499
ExtendMode(Extend extend)7500 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7501
ImmExtendShift(unsigned left_shift)7502 static Instr ImmExtendShift(unsigned left_shift) {
7503 VIXL_ASSERT(left_shift <= 4);
7504 return left_shift << ImmExtendShift_offset;
7505 }
7506
ImmCondCmp(unsigned imm)7507 static Instr ImmCondCmp(unsigned imm) {
7508 VIXL_ASSERT(IsUint5(imm));
7509 return imm << ImmCondCmp_offset;
7510 }
7511
Nzcv(StatusFlags nzcv)7512 static Instr Nzcv(StatusFlags nzcv) {
7513 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7514 }
7515
7516 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7517 static Instr ImmLSUnsigned(int64_t imm12) {
7518 VIXL_ASSERT(IsUint12(imm12));
7519 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7520 }
7521
ImmLS(int64_t imm9)7522 static Instr ImmLS(int64_t imm9) {
7523 VIXL_ASSERT(IsInt9(imm9));
7524 return TruncateToUint9(imm9) << ImmLS_offset;
7525 }
7526
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7527 static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7528 const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
7529 VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes));
7530 int64_t scaled_imm7 = imm7 / access_size_in_bytes;
7531 VIXL_ASSERT(IsInt7(scaled_imm7));
7532 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7533 }
7534
ImmShiftLS(unsigned shift_amount)7535 static Instr ImmShiftLS(unsigned shift_amount) {
7536 VIXL_ASSERT(IsUint1(shift_amount));
7537 return shift_amount << ImmShiftLS_offset;
7538 }
7539
ImmLSPAC(int64_t imm10)7540 static Instr ImmLSPAC(int64_t imm10) {
7541 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7542 int64_t scaled_imm10 = imm10 / (1 << 3);
7543 VIXL_ASSERT(IsInt10(scaled_imm10));
7544 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7545 return (s_bit << ImmLSPACHi_offset) |
7546 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7547 }
7548
ImmPrefetchOperation(int imm5)7549 static Instr ImmPrefetchOperation(int imm5) {
7550 VIXL_ASSERT(IsUint5(imm5));
7551 return imm5 << ImmPrefetchOperation_offset;
7552 }
7553
ImmException(int imm16)7554 static Instr ImmException(int imm16) {
7555 VIXL_ASSERT(IsUint16(imm16));
7556 return imm16 << ImmException_offset;
7557 }
7558
ImmUdf(int imm16)7559 static Instr ImmUdf(int imm16) {
7560 VIXL_ASSERT(IsUint16(imm16));
7561 return imm16 << ImmUdf_offset;
7562 }
7563
ImmSystemRegister(int imm16)7564 static Instr ImmSystemRegister(int imm16) {
7565 VIXL_ASSERT(IsUint16(imm16));
7566 return imm16 << ImmSystemRegister_offset;
7567 }
7568
ImmRMIFRotation(int imm6)7569 static Instr ImmRMIFRotation(int imm6) {
7570 VIXL_ASSERT(IsUint6(imm6));
7571 return imm6 << ImmRMIFRotation_offset;
7572 }
7573
ImmHint(int imm7)7574 static Instr ImmHint(int imm7) {
7575 VIXL_ASSERT(IsUint7(imm7));
7576 return imm7 << ImmHint_offset;
7577 }
7578
CRm(int imm4)7579 static Instr CRm(int imm4) {
7580 VIXL_ASSERT(IsUint4(imm4));
7581 return imm4 << CRm_offset;
7582 }
7583
CRn(int imm4)7584 static Instr CRn(int imm4) {
7585 VIXL_ASSERT(IsUint4(imm4));
7586 return imm4 << CRn_offset;
7587 }
7588
SysOp(int imm14)7589 static Instr SysOp(int imm14) {
7590 VIXL_ASSERT(IsUint14(imm14));
7591 return imm14 << SysOp_offset;
7592 }
7593
ImmSysOp1(int imm3)7594 static Instr ImmSysOp1(int imm3) {
7595 VIXL_ASSERT(IsUint3(imm3));
7596 return imm3 << SysOp1_offset;
7597 }
7598
ImmSysOp2(int imm3)7599 static Instr ImmSysOp2(int imm3) {
7600 VIXL_ASSERT(IsUint3(imm3));
7601 return imm3 << SysOp2_offset;
7602 }
7603
ImmBarrierDomain(int imm2)7604 static Instr ImmBarrierDomain(int imm2) {
7605 VIXL_ASSERT(IsUint2(imm2));
7606 return imm2 << ImmBarrierDomain_offset;
7607 }
7608
ImmBarrierType(int imm2)7609 static Instr ImmBarrierType(int imm2) {
7610 VIXL_ASSERT(IsUint2(imm2));
7611 return imm2 << ImmBarrierType_offset;
7612 }
7613
7614 // Move immediates encoding.
ImmMoveWide(uint64_t imm)7615 static Instr ImmMoveWide(uint64_t imm) {
7616 VIXL_ASSERT(IsUint16(imm));
7617 return static_cast<Instr>(imm << ImmMoveWide_offset);
7618 }
7619
ShiftMoveWide(int64_t shift)7620 static Instr ShiftMoveWide(int64_t shift) {
7621 VIXL_ASSERT(IsUint2(shift));
7622 return static_cast<Instr>(shift << ShiftMoveWide_offset);
7623 }
7624
7625 // FP Immediates.
7626 static Instr ImmFP16(Float16 imm);
7627 static Instr ImmFP32(float imm);
7628 static Instr ImmFP64(double imm);
7629
7630 // FP register type.
FPType(VRegister fd)7631 static Instr FPType(VRegister fd) {
7632 VIXL_ASSERT(fd.IsScalar());
7633 switch (fd.GetSizeInBits()) {
7634 case 16:
7635 return FP16;
7636 case 32:
7637 return FP32;
7638 case 64:
7639 return FP64;
7640 default:
7641 VIXL_UNREACHABLE();
7642 return 0;
7643 }
7644 }
7645
FPScale(unsigned scale)7646 static Instr FPScale(unsigned scale) {
7647 VIXL_ASSERT(IsUint6(scale));
7648 return scale << FPScale_offset;
7649 }
7650
7651 // Immediate field checking helpers.
7652 static bool IsImmAddSub(int64_t immediate);
7653 static bool IsImmConditionalCompare(int64_t immediate);
7654 static bool IsImmFP16(Float16 imm);
7655
IsImmFP32(float imm)7656 static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); }
7657
7658 static bool IsImmFP32(uint32_t bits);
7659
IsImmFP64(double imm)7660 static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); }
7661
7662 static bool IsImmFP64(uint64_t bits);
7663 static bool IsImmLogical(uint64_t value,
7664 unsigned width,
7665 unsigned* n = NULL,
7666 unsigned* imm_s = NULL,
7667 unsigned* imm_r = NULL);
7668 static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7669 static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7670 static bool IsImmLSUnscaled(int64_t offset);
7671 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7672 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7673
7674 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7675 static Instr VFormat(VRegister vd) {
7676 if (vd.Is64Bits()) {
7677 switch (vd.GetLanes()) {
7678 case 1:
7679 return NEON_1D;
7680 case 2:
7681 return NEON_2S;
7682 case 4:
7683 return NEON_4H;
7684 case 8:
7685 return NEON_8B;
7686 default:
7687 return 0xffffffff;
7688 }
7689 } else {
7690 VIXL_ASSERT(vd.Is128Bits());
7691 switch (vd.GetLanes()) {
7692 case 2:
7693 return NEON_2D;
7694 case 4:
7695 return NEON_4S;
7696 case 8:
7697 return NEON_8H;
7698 case 16:
7699 return NEON_16B;
7700 default:
7701 return 0xffffffff;
7702 }
7703 }
7704 }
7705
7706 // Instruction bits for vector format in floating point data processing
7707 // operations.
FPFormat(VRegister vd)7708 static Instr FPFormat(VRegister vd) {
7709 switch (vd.GetLanes()) {
7710 case 1:
7711 // Floating point scalar formats.
7712 switch (vd.GetSizeInBits()) {
7713 case 16:
7714 return FP16;
7715 case 32:
7716 return FP32;
7717 case 64:
7718 return FP64;
7719 default:
7720 VIXL_UNREACHABLE();
7721 }
7722 break;
7723 case 2:
7724 // Two lane floating point vector formats.
7725 switch (vd.GetSizeInBits()) {
7726 case 64:
7727 return NEON_FP_2S;
7728 case 128:
7729 return NEON_FP_2D;
7730 default:
7731 VIXL_UNREACHABLE();
7732 }
7733 break;
7734 case 4:
7735 // Four lane floating point vector formats.
7736 switch (vd.GetSizeInBits()) {
7737 case 64:
7738 return NEON_FP_4H;
7739 case 128:
7740 return NEON_FP_4S;
7741 default:
7742 VIXL_UNREACHABLE();
7743 }
7744 break;
7745 case 8:
7746 // Eight lane floating point vector format.
7747 VIXL_ASSERT(vd.Is128Bits());
7748 return NEON_FP_8H;
7749 default:
7750 VIXL_UNREACHABLE();
7751 return 0;
7752 }
7753 VIXL_UNREACHABLE();
7754 return 0;
7755 }
7756
7757 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7758 static Instr LSVFormat(VRegister vd) {
7759 if (vd.Is64Bits()) {
7760 switch (vd.GetLanes()) {
7761 case 1:
7762 return LS_NEON_1D;
7763 case 2:
7764 return LS_NEON_2S;
7765 case 4:
7766 return LS_NEON_4H;
7767 case 8:
7768 return LS_NEON_8B;
7769 default:
7770 return 0xffffffff;
7771 }
7772 } else {
7773 VIXL_ASSERT(vd.Is128Bits());
7774 switch (vd.GetLanes()) {
7775 case 2:
7776 return LS_NEON_2D;
7777 case 4:
7778 return LS_NEON_4S;
7779 case 8:
7780 return LS_NEON_8H;
7781 case 16:
7782 return LS_NEON_16B;
7783 default:
7784 return 0xffffffff;
7785 }
7786 }
7787 }
7788
7789 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7790 static Instr SFormat(VRegister vd) {
7791 VIXL_ASSERT(vd.GetLanes() == 1);
7792 switch (vd.GetSizeInBytes()) {
7793 case 1:
7794 return NEON_B;
7795 case 2:
7796 return NEON_H;
7797 case 4:
7798 return NEON_S;
7799 case 8:
7800 return NEON_D;
7801 default:
7802 return 0xffffffff;
7803 }
7804 }
7805
7806 template <typename T>
SVESize(const T & rd)7807 static Instr SVESize(const T& rd) {
7808 VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7809 VIXL_ASSERT(rd.HasLaneSize());
7810 switch (rd.GetLaneSizeInBytes()) {
7811 case 1:
7812 return SVE_B;
7813 case 2:
7814 return SVE_H;
7815 case 4:
7816 return SVE_S;
7817 case 8:
7818 return SVE_D;
7819 default:
7820 return 0xffffffff;
7821 }
7822 }
7823
ImmSVEPredicateConstraint(int pattern)7824 static Instr ImmSVEPredicateConstraint(int pattern) {
7825 VIXL_ASSERT(IsUint5(pattern));
7826 return (pattern << ImmSVEPredicateConstraint_offset) &
7827 ImmSVEPredicateConstraint_mask;
7828 }
7829
ImmNEONHLM(int index,int num_bits)7830 static Instr ImmNEONHLM(int index, int num_bits) {
7831 int h, l, m;
7832 if (num_bits == 3) {
7833 VIXL_ASSERT(IsUint3(index));
7834 h = (index >> 2) & 1;
7835 l = (index >> 1) & 1;
7836 m = (index >> 0) & 1;
7837 } else if (num_bits == 2) {
7838 VIXL_ASSERT(IsUint2(index));
7839 h = (index >> 1) & 1;
7840 l = (index >> 0) & 1;
7841 m = 0;
7842 } else {
7843 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7844 h = (index >> 0) & 1;
7845 l = 0;
7846 m = 0;
7847 }
7848 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7849 }
7850
ImmRotFcadd(int rot)7851 static Instr ImmRotFcadd(int rot) {
7852 VIXL_ASSERT(rot == 90 || rot == 270);
7853 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7854 }
7855
ImmRotFcmlaSca(int rot)7856 static Instr ImmRotFcmlaSca(int rot) {
7857 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7858 return (rot / 90) << ImmRotFcmlaSca_offset;
7859 }
7860
ImmRotFcmlaVec(int rot)7861 static Instr ImmRotFcmlaVec(int rot) {
7862 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7863 return (rot / 90) << ImmRotFcmlaVec_offset;
7864 }
7865
ImmNEONExt(int imm4)7866 static Instr ImmNEONExt(int imm4) {
7867 VIXL_ASSERT(IsUint4(imm4));
7868 return imm4 << ImmNEONExt_offset;
7869 }
7870
ImmNEON5(Instr format,int index)7871 static Instr ImmNEON5(Instr format, int index) {
7872 VIXL_ASSERT(IsUint4(index));
7873 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7874 int imm5 = (index << (s + 1)) | (1 << s);
7875 return imm5 << ImmNEON5_offset;
7876 }
7877
ImmNEON4(Instr format,int index)7878 static Instr ImmNEON4(Instr format, int index) {
7879 VIXL_ASSERT(IsUint4(index));
7880 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7881 int imm4 = index << s;
7882 return imm4 << ImmNEON4_offset;
7883 }
7884
ImmNEONabcdefgh(int imm8)7885 static Instr ImmNEONabcdefgh(int imm8) {
7886 VIXL_ASSERT(IsUint8(imm8));
7887 Instr instr;
7888 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7889 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7890 return instr;
7891 }
7892
NEONCmode(int cmode)7893 static Instr NEONCmode(int cmode) {
7894 VIXL_ASSERT(IsUint4(cmode));
7895 return cmode << NEONCmode_offset;
7896 }
7897
NEONModImmOp(int op)7898 static Instr NEONModImmOp(int op) {
7899 VIXL_ASSERT(IsUint1(op));
7900 return op << NEONModImmOp_offset;
7901 }
7902
7903 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7904 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7905 VIXL_ASSERT(label->IsBound());
7906 return GetBuffer().GetOffsetFrom(label->GetLocation());
7907 }
7908 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7909 size_t SizeOfCodeGeneratedSince(Label* label) const) {
7910 return GetSizeOfCodeGeneratedSince(label);
7911 }
7912
7913 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7914 size_t GetBufferCapacity() const) {
7915 return GetBuffer().GetCapacity();
7916 }
7917 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7918 return GetBuffer().GetCapacity();
7919 }
7920
7921 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7922 size_t GetRemainingBufferSpace() const) {
7923 return GetBuffer().GetRemainingBytes();
7924 }
7925 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7926 size_t RemainingBufferSpace() const) {
7927 return GetBuffer().GetRemainingBytes();
7928 }
7929
GetPic()7930 PositionIndependentCodeOption GetPic() const { return pic_; }
7931 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7932 return GetPic();
7933 }
7934
GetCPUFeatures()7935 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7936
SetCPUFeatures(const CPUFeatures & cpu_features)7937 void SetCPUFeatures(const CPUFeatures& cpu_features) {
7938 cpu_features_ = cpu_features;
7939 }
7940
AllowPageOffsetDependentCode()7941 bool AllowPageOffsetDependentCode() const {
7942 return (GetPic() == PageOffsetDependentCode) ||
7943 (GetPic() == PositionDependentCode);
7944 }
7945
AppropriateZeroRegFor(const CPURegister & reg)7946 static Register AppropriateZeroRegFor(const CPURegister& reg) {
7947 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7948 }
7949
7950 protected:
7951 void LoadStore(const CPURegister& rt,
7952 const MemOperand& addr,
7953 LoadStoreOp op,
7954 LoadStoreScalingOption option = PreferScaledOffset);
7955
7956 void LoadStorePAC(const Register& xt,
7957 const MemOperand& addr,
7958 LoadStorePACOp op);
7959
7960 void LoadStorePair(const CPURegister& rt,
7961 const CPURegister& rt2,
7962 const MemOperand& addr,
7963 LoadStorePairOp op);
7964 void LoadStoreStruct(const VRegister& vt,
7965 const MemOperand& addr,
7966 NEONLoadStoreMultiStructOp op);
7967 void LoadStoreStruct1(const VRegister& vt,
7968 int reg_count,
7969 const MemOperand& addr);
7970 void LoadStoreStructSingle(const VRegister& vt,
7971 uint32_t lane,
7972 const MemOperand& addr,
7973 NEONLoadStoreSingleStructOp op);
7974 void LoadStoreStructSingleAllLanes(const VRegister& vt,
7975 const MemOperand& addr,
7976 NEONLoadStoreSingleStructOp op);
7977 void LoadStoreStructVerify(const VRegister& vt,
7978 const MemOperand& addr,
7979 Instr op);
7980
7981 // Set `is_load` to false in default as it's only used in the
7982 // scalar-plus-vector form.
7983 Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7984 int num_regs,
7985 const SVEMemOperand& addr,
7986 bool is_load = false);
7987
7988 // E.g. st1b, st1h, ...
7989 // This supports both contiguous and scatter stores.
7990 void SVESt1Helper(unsigned msize_in_bytes_log2,
7991 const ZRegister& zt,
7992 const PRegister& pg,
7993 const SVEMemOperand& addr);
7994
7995 // E.g. ld1b, ld1h, ...
7996 // This supports both contiguous and gather loads.
7997 void SVELd1Helper(unsigned msize_in_bytes_log2,
7998 const ZRegister& zt,
7999 const PRegisterZ& pg,
8000 const SVEMemOperand& addr,
8001 bool is_signed);
8002
8003 // E.g. ld1rb, ld1rh, ...
8004 void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
8005 const ZRegister& zt,
8006 const PRegisterZ& pg,
8007 const SVEMemOperand& addr,
8008 bool is_signed);
8009
8010 // E.g. ldff1b, ldff1h, ...
8011 // This supports both contiguous and gather loads.
8012 void SVELdff1Helper(unsigned msize_in_bytes_log2,
8013 const ZRegister& zt,
8014 const PRegisterZ& pg,
8015 const SVEMemOperand& addr,
8016 bool is_signed);
8017
8018 // Common code for the helpers above.
8019 void SVELdSt1Helper(unsigned msize_in_bytes_log2,
8020 const ZRegister& zt,
8021 const PRegister& pg,
8022 const SVEMemOperand& addr,
8023 bool is_signed,
8024 Instr op);
8025
8026 // Common code for the helpers above.
8027 void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
8028 const ZRegister& zt,
8029 const PRegister& pg,
8030 const SVEMemOperand& addr,
8031 bool is_load,
8032 bool is_signed,
8033 bool is_first_fault);
8034
8035 // E.g. st2b, st3h, ...
8036 void SVESt234Helper(int num_regs,
8037 const ZRegister& zt1,
8038 const PRegister& pg,
8039 const SVEMemOperand& addr);
8040
8041 // E.g. ld2b, ld3h, ...
8042 void SVELd234Helper(int num_regs,
8043 const ZRegister& zt1,
8044 const PRegisterZ& pg,
8045 const SVEMemOperand& addr);
8046
8047 // Common code for the helpers above.
8048 void SVELdSt234Helper(int num_regs,
8049 const ZRegister& zt1,
8050 const PRegister& pg,
8051 const SVEMemOperand& addr,
8052 Instr op);
8053
8054 // E.g. ld1qb, ld1qh, ldnt1b, ...
8055 void SVELd1St1ScaImmHelper(const ZRegister& zt,
8056 const PRegister& pg,
8057 const SVEMemOperand& addr,
8058 Instr regoffset_op,
8059 Instr immoffset_op,
8060 int imm_divisor = 1);
8061
8062 void SVELd1VecScaHelper(const ZRegister& zt,
8063 const PRegister& pg,
8064 const SVEMemOperand& addr,
8065 uint32_t msize,
8066 bool is_signed);
8067 void SVESt1VecScaHelper(const ZRegister& zt,
8068 const PRegister& pg,
8069 const SVEMemOperand& addr,
8070 uint32_t msize);
8071
8072 void Prefetch(PrefetchOperation op,
8073 const MemOperand& addr,
8074 LoadStoreScalingOption option = PreferScaledOffset);
8075 void Prefetch(int op,
8076 const MemOperand& addr,
8077 LoadStoreScalingOption option = PreferScaledOffset);
8078
8079 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
8080 // reports a bogus uninitialised warning then.
8081 void Logical(const Register& rd,
8082 const Register& rn,
8083 const Operand operand,
8084 LogicalOp op);
8085
8086 void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
8087
8088 void LogicalImmediate(const Register& rd,
8089 const Register& rn,
8090 unsigned n,
8091 unsigned imm_s,
8092 unsigned imm_r,
8093 LogicalOp op);
8094
8095 void ConditionalCompare(const Register& rn,
8096 const Operand& operand,
8097 StatusFlags nzcv,
8098 Condition cond,
8099 ConditionalCompareOp op);
8100
8101 void AddSubWithCarry(const Register& rd,
8102 const Register& rn,
8103 const Operand& operand,
8104 FlagsUpdate S,
8105 AddSubWithCarryOp op);
8106
8107 void CompareVectors(const PRegisterWithLaneSize& pd,
8108 const PRegisterZ& pg,
8109 const ZRegister& zn,
8110 const ZRegister& zm,
8111 SVEIntCompareVectorsOp op);
8112
8113 void CompareVectors(const PRegisterWithLaneSize& pd,
8114 const PRegisterZ& pg,
8115 const ZRegister& zn,
8116 int imm,
8117 SVEIntCompareSignedImmOp op);
8118
8119 void CompareVectors(const PRegisterWithLaneSize& pd,
8120 const PRegisterZ& pg,
8121 const ZRegister& zn,
8122 unsigned imm,
8123 SVEIntCompareUnsignedImmOp op);
8124
8125 void SVEIntAddSubtractImmUnpredicatedHelper(
8126 SVEIntAddSubtractImm_UnpredicatedOp op,
8127 const ZRegister& zd,
8128 int imm8,
8129 int shift);
8130
8131 void SVEElementCountToRegisterHelper(Instr op,
8132 const Register& rd,
8133 int pattern,
8134 int multiplier);
8135
8136 Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
8137
8138 Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
8139
8140 void SVEBitwiseShiftImmediate(const ZRegister& zd,
8141 const ZRegister& zn,
8142 Instr encoded_imm,
8143 Instr op);
8144
8145 void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
8146 const PRegisterM& pg,
8147 Instr encoded_imm,
8148 Instr op);
8149
8150 Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
8151 const ZRegister& zm,
8152 int index,
8153 Instr op_h,
8154 Instr op_s,
8155 Instr op_d);
8156
8157 Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
8158
8159 Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
8160
8161 void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
8162 const PRegister& pg,
8163 const SVEMemOperand& addr,
8164 int prefetch_size);
8165
8166 void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
8167 const PRegister& pg,
8168 const SVEMemOperand& addr,
8169 int prefetch_size);
8170
8171 void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
8172 const PRegister& pg,
8173 const SVEMemOperand& addr,
8174 int prefetch_size);
8175
8176 void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
8177 const PRegister& pg,
8178 const SVEMemOperand& addr,
8179 int prefetch_size);
8180
8181 void SVEPrefetchHelper(PrefetchOperation prfop,
8182 const PRegister& pg,
8183 const SVEMemOperand& addr,
8184 int prefetch_size);
8185
SVEImmPrefetchOperation(PrefetchOperation prfop)8186 static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
8187 // SVE only supports PLD and PST, not PLI.
8188 VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
8189 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
8190 // Check that we can simply map bits.
8191 VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
8192 VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
8193 // Remaining operations map directly.
8194 return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
8195 }
8196
8197 // Functions for emulating operands not directly supported by the instruction
8198 // set.
8199 void EmitShift(const Register& rd,
8200 const Register& rn,
8201 Shift shift,
8202 unsigned amount);
8203 void EmitExtendShift(const Register& rd,
8204 const Register& rn,
8205 Extend extend,
8206 unsigned left_shift);
8207
8208 void AddSub(const Register& rd,
8209 const Register& rn,
8210 const Operand& operand,
8211 FlagsUpdate S,
8212 AddSubOp op);
8213
8214 void NEONTable(const VRegister& vd,
8215 const VRegister& vn,
8216 const VRegister& vm,
8217 NEONTableOp op);
8218
8219 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
8220 // registers. Only simple loads are supported; sign- and zero-extension (such
8221 // as in LDPSW_x or LDRB_w) are not supported.
8222 static LoadStoreOp LoadOpFor(const CPURegister& rt);
8223 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
8224 const CPURegister& rt2);
8225 static LoadStoreOp StoreOpFor(const CPURegister& rt);
8226 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
8227 const CPURegister& rt2);
8228 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
8229 const CPURegister& rt, const CPURegister& rt2);
8230 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
8231 const CPURegister& rt, const CPURegister& rt2);
8232 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
8233
8234 // Convenience pass-through for CPU feature checks.
8235 bool CPUHas(CPUFeatures::Feature feature0,
8236 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
8237 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
8238 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
8239 return cpu_features_.Has(feature0, feature1, feature2, feature3);
8240 }
8241
8242 // Determine whether the target CPU has the specified registers, based on the
8243 // currently-enabled CPU features. Presence of a register does not imply
8244 // support for arbitrary operations on it. For example, CPUs with FP have H
8245 // registers, but most half-precision operations require the FPHalf feature.
8246 //
8247 // These are used to check CPU features in loads and stores that have the same
8248 // entry point for both integer and FP registers.
8249 bool CPUHas(const CPURegister& rt) const;
8250 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
8251
8252 bool CPUHas(SystemRegister sysreg) const;
8253
8254 private:
8255 static uint32_t FP16ToImm8(Float16 imm);
8256 static uint32_t FP32ToImm8(float imm);
8257 static uint32_t FP64ToImm8(double imm);
8258
8259 // Instruction helpers.
8260 void MoveWide(const Register& rd,
8261 uint64_t imm,
8262 int shift,
8263 MoveWideImmediateOp mov_op);
8264 void DataProcShiftedRegister(const Register& rd,
8265 const Register& rn,
8266 const Operand& operand,
8267 FlagsUpdate S,
8268 Instr op);
8269 void DataProcExtendedRegister(const Register& rd,
8270 const Register& rn,
8271 const Operand& operand,
8272 FlagsUpdate S,
8273 Instr op);
8274 void LoadStorePairNonTemporal(const CPURegister& rt,
8275 const CPURegister& rt2,
8276 const MemOperand& addr,
8277 LoadStorePairNonTemporalOp op);
8278 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
8279 void ConditionalSelect(const Register& rd,
8280 const Register& rn,
8281 const Register& rm,
8282 Condition cond,
8283 ConditionalSelectOp op);
8284 void DataProcessing1Source(const Register& rd,
8285 const Register& rn,
8286 DataProcessing1SourceOp op);
8287 void DataProcessing3Source(const Register& rd,
8288 const Register& rn,
8289 const Register& rm,
8290 const Register& ra,
8291 DataProcessing3SourceOp op);
8292 void FPDataProcessing1Source(const VRegister& fd,
8293 const VRegister& fn,
8294 FPDataProcessing1SourceOp op);
8295 void FPDataProcessing3Source(const VRegister& fd,
8296 const VRegister& fn,
8297 const VRegister& fm,
8298 const VRegister& fa,
8299 FPDataProcessing3SourceOp op);
8300 void NEONAcrossLanesL(const VRegister& vd,
8301 const VRegister& vn,
8302 NEONAcrossLanesOp op);
8303 void NEONAcrossLanes(const VRegister& vd,
8304 const VRegister& vn,
8305 NEONAcrossLanesOp op,
8306 Instr op_half);
8307 void NEONModifiedImmShiftLsl(const VRegister& vd,
8308 const int imm8,
8309 const int left_shift,
8310 NEONModifiedImmediateOp op);
8311 void NEONModifiedImmShiftMsl(const VRegister& vd,
8312 const int imm8,
8313 const int shift_amount,
8314 NEONModifiedImmediateOp op);
8315 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8316 void NEON3Same(const VRegister& vd,
8317 const VRegister& vn,
8318 const VRegister& vm,
8319 NEON3SameOp vop);
8320 void NEON3SameFP16(const VRegister& vd,
8321 const VRegister& vn,
8322 const VRegister& vm,
8323 Instr op);
8324 void NEONFP3Same(const VRegister& vd,
8325 const VRegister& vn,
8326 const VRegister& vm,
8327 Instr op);
8328 void NEON3DifferentL(const VRegister& vd,
8329 const VRegister& vn,
8330 const VRegister& vm,
8331 NEON3DifferentOp vop);
8332 void NEON3DifferentW(const VRegister& vd,
8333 const VRegister& vn,
8334 const VRegister& vm,
8335 NEON3DifferentOp vop);
8336 void NEON3DifferentHN(const VRegister& vd,
8337 const VRegister& vn,
8338 const VRegister& vm,
8339 NEON3DifferentOp vop);
8340 void NEONFP2RegMisc(const VRegister& vd,
8341 const VRegister& vn,
8342 NEON2RegMiscOp vop,
8343 double value = 0.0);
8344 void NEONFP2RegMiscFP16(const VRegister& vd,
8345 const VRegister& vn,
8346 NEON2RegMiscFP16Op vop,
8347 double value = 0.0);
8348 void NEON2RegMisc(const VRegister& vd,
8349 const VRegister& vn,
8350 NEON2RegMiscOp vop,
8351 int value = 0);
8352 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8353 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8354 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8355 void NEONPerm(const VRegister& vd,
8356 const VRegister& vn,
8357 const VRegister& vm,
8358 NEONPermOp op);
8359 void NEONFPByElement(const VRegister& vd,
8360 const VRegister& vn,
8361 const VRegister& vm,
8362 int vm_index,
8363 NEONByIndexedElementOp op,
8364 NEONByIndexedElementOp op_half);
8365 void NEONByElement(const VRegister& vd,
8366 const VRegister& vn,
8367 const VRegister& vm,
8368 int vm_index,
8369 NEONByIndexedElementOp op);
8370 void NEONByElementL(const VRegister& vd,
8371 const VRegister& vn,
8372 const VRegister& vm,
8373 int vm_index,
8374 NEONByIndexedElementOp op);
8375 void NEONShiftImmediate(const VRegister& vd,
8376 const VRegister& vn,
8377 NEONShiftImmediateOp op,
8378 int immh_immb);
8379 void NEONShiftLeftImmediate(const VRegister& vd,
8380 const VRegister& vn,
8381 int shift,
8382 NEONShiftImmediateOp op);
8383 void NEONShiftRightImmediate(const VRegister& vd,
8384 const VRegister& vn,
8385 int shift,
8386 NEONShiftImmediateOp op);
8387 void NEONShiftImmediateL(const VRegister& vd,
8388 const VRegister& vn,
8389 int shift,
8390 NEONShiftImmediateOp op);
8391 void NEONShiftImmediateN(const VRegister& vd,
8392 const VRegister& vn,
8393 int shift,
8394 NEONShiftImmediateOp op);
8395 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8396
8397 // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8398 // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8399 void ResolveSVEImm8Shift(int* imm8, int* shift);
8400
8401 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8402
8403 // Encode the specified MemOperand for the specified access size and scaling
8404 // preference.
8405 Instr LoadStoreMemOperand(const MemOperand& addr,
8406 unsigned access_size_in_bytes_log2,
8407 LoadStoreScalingOption option);
8408
8409 // Link the current (not-yet-emitted) instruction to the specified label, then
8410 // return an offset to be encoded in the instruction. If the label is not yet
8411 // bound, an offset of 0 is returned.
8412 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8413 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8414 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8415
8416 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8417 template <int element_shift>
8418 ptrdiff_t LinkAndGetOffsetTo(Label* label);
8419
8420 // Literal load offset are in words (32-bit).
8421 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8422
8423 // Emit the instruction in buffer_.
Emit(Instr instruction)8424 void Emit(Instr instruction) {
8425 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8426 VIXL_ASSERT(AllowAssembler());
8427 GetBuffer()->Emit32(instruction);
8428 }
8429
8430 PositionIndependentCodeOption pic_;
8431
8432 CPUFeatures cpu_features_;
8433 };
8434
8435
8436 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8437 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8438 return UpdateValue(new_value,
8439 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8440 }
8441
8442
8443 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8444 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8445 return UpdateValue(high64,
8446 low64,
8447 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8448 }
8449
8450
8451 } // namespace aarch64
8452
8453 // Required InvalSet template specialisations.
8454 // TODO: These template specialisations should not live in this file. Move
8455 // Label out of the aarch64 namespace in order to share its implementation
8456 // later.
8457 #define INVAL_SET_TEMPLATE_PARAMETERS \
8458 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
8459 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8460 aarch64::Label::kReclaimFactor
8461 template <>
GetKey(const ptrdiff_t & element)8462 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8463 const ptrdiff_t& element) {
8464 return element;
8465 }
8466 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8467 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8468 ptrdiff_t key) {
8469 *element = key;
8470 }
8471 #undef INVAL_SET_TEMPLATE_PARAMETERS
8472
8473 } // namespace vixl
8474
8475 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8476