1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37
38 namespace vixl {
39 namespace aarch64 {
40
41 class LabelTestHelper; // Forward declaration.
42
43
44 class Label {
45 public:
Label()46 Label() : location_(kLocationUnbound) {}
~Label()47 ~Label() {
48 // All links to a label must have been resolved before it is destructed.
49 VIXL_ASSERT(!IsLinked());
50 }
51
IsBound()52 bool IsBound() const { return location_ >= 0; }
IsLinked()53 bool IsLinked() const { return !links_.empty(); }
54
GetLocation()55 ptrdiff_t GetLocation() const { return location_; }
56 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57 return GetLocation();
58 }
59
60 static const int kNPreallocatedLinks = 4;
61 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62 static const size_t kReclaimFrom = 512;
63 static const size_t kReclaimFactor = 2;
64
65 typedef InvalSet<ptrdiff_t,
66 kNPreallocatedLinks,
67 ptrdiff_t,
68 kInvalidLinkKey,
69 kReclaimFrom,
70 kReclaimFactor>
71 LinksSetBase;
72 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
73
74 private:
75 class LinksSet : public LinksSetBase {
76 public:
LinksSet()77 LinksSet() : LinksSetBase() {}
78 };
79
80 // Allows iterating over the links of a label. The behaviour is undefined if
81 // the list of links is modified in any way while iterating.
82 class LabelLinksIterator : public LabelLinksIteratorBase {
83 public:
LabelLinksIterator(Label * label)84 explicit LabelLinksIterator(Label* label)
85 : LabelLinksIteratorBase(&label->links_) {}
86
87 // TODO: Remove these and use the STL-like interface instead.
88 using LabelLinksIteratorBase::Advance;
89 using LabelLinksIteratorBase::Current;
90 };
91
Bind(ptrdiff_t location)92 void Bind(ptrdiff_t location) {
93 // Labels can only be bound once.
94 VIXL_ASSERT(!IsBound());
95 location_ = location;
96 }
97
AddLink(ptrdiff_t instruction)98 void AddLink(ptrdiff_t instruction) {
99 // If a label is bound, the assembler already has the information it needs
100 // to write the instruction, so there is no need to add it to links_.
101 VIXL_ASSERT(!IsBound());
102 links_.insert(instruction);
103 }
104
DeleteLink(ptrdiff_t instruction)105 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
106
ClearAllLinks()107 void ClearAllLinks() { links_.clear(); }
108
109 // TODO: The comment below considers average case complexity for our
110 // usual use-cases. The elements of interest are:
111 // - Branches to a label are emitted in order: branch instructions to a label
112 // are generated at an offset in the code generation buffer greater than any
113 // other branch to that same label already generated. As an example, this can
114 // be broken when an instruction is patched to become a branch. Note that the
115 // code will still work, but the complexity considerations below may locally
116 // not apply any more.
117 // - Veneers are generated in order: for multiple branches of the same type
118 // branching to the same unbound label going out of range, veneers are
119 // generated in growing order of the branch instruction offset from the start
120 // of the buffer.
121 //
122 // When creating a veneer for a branch going out of range, the link for this
123 // branch needs to be removed from this `links_`. Since all branches are
124 // tracked in one underlying InvalSet, the complexity for this deletion is the
125 // same as for finding the element, ie. O(n), where n is the number of links
126 // in the set.
127 // This could be reduced to O(1) by using the same trick as used when tracking
128 // branch information for veneers: split the container to use one set per type
129 // of branch. With that setup, when a veneer is created and the link needs to
130 // be deleted, if the two points above hold, it must be the minimum element of
131 // the set for its type of branch, and that minimum element will be accessible
132 // in O(1).
133
134 // The offsets of the instructions that have linked to this label.
135 LinksSet links_;
136 // The label location.
137 ptrdiff_t location_;
138
139 static const ptrdiff_t kLocationUnbound = -1;
140
141 // It is not safe to copy labels, so disable the copy constructor and operator
142 // by declaring them private (without an implementation).
143 #if __cplusplus >= 201103L
144 Label(const Label&) = delete;
145 void operator=(const Label&) = delete;
146 #else
147 Label(const Label&);
148 void operator=(const Label&);
149 #endif
150
151 // The Assembler class is responsible for binding and linking labels, since
152 // the stored offsets need to be consistent with the Assembler's buffer.
153 friend class Assembler;
154 // The MacroAssembler and VeneerPool handle resolution of branches to distant
155 // targets.
156 friend class MacroAssembler;
157 friend class VeneerPool;
158 };
159
160
161 class Assembler;
162 class LiteralPool;
163
164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
165 // stream and loaded through a pc relative load. The same literal can be
166 // referred to by multiple instructions but a literal can only reside at one
167 // place in memory. A literal can be used by a load before or after being
168 // placed in memory.
169 //
170 // Internally an offset of 0 is associated with a literal which has been
171 // neither used nor placed. Then two possibilities arise:
172 // 1) the label is placed, the offset (stored as offset + 1) is used to
173 // resolve any subsequent load using the label.
174 // 2) the label is not placed and offset is the offset of the last load using
175 // the literal (stored as -offset -1). If multiple loads refer to this
176 // literal then the last load holds the offset of the preceding load and
177 // all loads form a chain. Once the offset is placed all the loads in the
178 // chain are resolved and future loads fall back to possibility 1.
179 class RawLiteral {
180 public:
181 enum DeletionPolicy {
182 kDeletedOnPlacementByPool,
183 kDeletedOnPoolDestruction,
184 kManuallyDeleted
185 };
186
187 RawLiteral(size_t size,
188 LiteralPool* literal_pool,
189 DeletionPolicy deletion_policy = kManuallyDeleted);
190
191 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
192 // actually pointing to `Literal<T>` objects.
~RawLiteral()193 virtual ~RawLiteral() {}
194
GetSize()195 size_t GetSize() const {
196 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
197 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
198 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
199 (size_ == kQRegSizeInBytes));
200 return size_;
201 }
202 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
203
GetRawValue128Low64()204 uint64_t GetRawValue128Low64() const {
205 VIXL_ASSERT(size_ == kQRegSizeInBytes);
206 return low64_;
207 }
208 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
209 return GetRawValue128Low64();
210 }
211
GetRawValue128High64()212 uint64_t GetRawValue128High64() const {
213 VIXL_ASSERT(size_ == kQRegSizeInBytes);
214 return high64_;
215 }
216 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
217 return GetRawValue128High64();
218 }
219
GetRawValue64()220 uint64_t GetRawValue64() const {
221 VIXL_ASSERT(size_ == kXRegSizeInBytes);
222 VIXL_ASSERT(high64_ == 0);
223 return low64_;
224 }
225 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
226 return GetRawValue64();
227 }
228
GetRawValue32()229 uint32_t GetRawValue32() const {
230 VIXL_ASSERT(size_ == kWRegSizeInBytes);
231 VIXL_ASSERT(high64_ == 0);
232 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
233 return static_cast<uint32_t>(low64_);
234 }
235 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
236 return GetRawValue32();
237 }
238
IsUsed()239 bool IsUsed() const { return offset_ < 0; }
IsPlaced()240 bool IsPlaced() const { return offset_ > 0; }
241
GetLiteralPool()242 LiteralPool* GetLiteralPool() const { return literal_pool_; }
243
GetOffset()244 ptrdiff_t GetOffset() const {
245 VIXL_ASSERT(IsPlaced());
246 return offset_ - 1;
247 }
248 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
249
250 protected:
SetOffset(ptrdiff_t offset)251 void SetOffset(ptrdiff_t offset) {
252 VIXL_ASSERT(offset >= 0);
253 VIXL_ASSERT(IsWordAligned(offset));
254 VIXL_ASSERT(!IsPlaced());
255 offset_ = offset + 1;
256 }
set_offset(ptrdiff_t offset)257 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
258 SetOffset(offset);
259 }
260
GetLastUse()261 ptrdiff_t GetLastUse() const {
262 VIXL_ASSERT(IsUsed());
263 return -offset_ - 1;
264 }
265 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
266
SetLastUse(ptrdiff_t offset)267 void SetLastUse(ptrdiff_t offset) {
268 VIXL_ASSERT(offset >= 0);
269 VIXL_ASSERT(IsWordAligned(offset));
270 VIXL_ASSERT(!IsPlaced());
271 offset_ = -offset - 1;
272 }
set_last_use(ptrdiff_t offset)273 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
274 SetLastUse(offset);
275 }
276
277 size_t size_;
278 ptrdiff_t offset_;
279 uint64_t low64_;
280 uint64_t high64_;
281
282 private:
283 LiteralPool* literal_pool_;
284 DeletionPolicy deletion_policy_;
285
286 friend class Assembler;
287 friend class LiteralPool;
288 };
289
290
291 template <typename T>
292 class Literal : public RawLiteral {
293 public:
294 explicit Literal(T value,
295 LiteralPool* literal_pool = NULL,
296 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)297 : RawLiteral(sizeof(value), literal_pool, ownership) {
298 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
299 UpdateValue(value);
300 }
301
302 Literal(T high64,
303 T low64,
304 LiteralPool* literal_pool = NULL,
305 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)306 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
307 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
308 UpdateValue(high64, low64);
309 }
310
~Literal()311 virtual ~Literal() {}
312
313 // Update the value of this literal, if necessary by rewriting the value in
314 // the pool.
315 // If the literal has already been placed in a literal pool, the address of
316 // the start of the code buffer must be provided, as the literal only knows it
317 // offset from there. This also allows patching the value after the code has
318 // been moved in memory.
319 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
320 VIXL_ASSERT(sizeof(new_value) == size_);
321 memcpy(&low64_, &new_value, sizeof(new_value));
322 if (IsPlaced()) {
323 VIXL_ASSERT(code_buffer != NULL);
324 RewriteValueInCode(code_buffer);
325 }
326 }
327
328 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
329 VIXL_ASSERT(sizeof(low64) == size_ / 2);
330 memcpy(&low64_, &low64, sizeof(low64));
331 memcpy(&high64_, &high64, sizeof(high64));
332 if (IsPlaced()) {
333 VIXL_ASSERT(code_buffer != NULL);
334 RewriteValueInCode(code_buffer);
335 }
336 }
337
338 void UpdateValue(T new_value, const Assembler* assembler);
339 void UpdateValue(T high64, T low64, const Assembler* assembler);
340
341 private:
RewriteValueInCode(uint8_t * code_buffer)342 void RewriteValueInCode(uint8_t* code_buffer) {
343 VIXL_ASSERT(IsPlaced());
344 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
345 switch (GetSize()) {
346 case kSRegSizeInBytes:
347 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
348 GetRawValue32();
349 break;
350 case kDRegSizeInBytes:
351 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
352 GetRawValue64();
353 break;
354 default:
355 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
356 uint64_t* base_address =
357 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
358 *base_address = GetRawValue128Low64();
359 *(base_address + 1) = GetRawValue128High64();
360 }
361 }
362 };
363
364
365 // Control whether or not position-independent code should be emitted.
366 enum PositionIndependentCodeOption {
367 // All code generated will be position-independent; all branches and
368 // references to labels generated with the Label class will use PC-relative
369 // addressing.
370 PositionIndependentCode,
371
372 // Allow VIXL to generate code that refers to absolute addresses. With this
373 // option, it will not be possible to copy the code buffer and run it from a
374 // different address; code must be generated in its final location.
375 PositionDependentCode,
376
377 // Allow VIXL to assume that the bottom 12 bits of the address will be
378 // constant, but that the top 48 bits may change. This allows `adrp` to
379 // function in systems which copy code between pages, but otherwise maintain
380 // 4KB page alignment.
381 PageOffsetDependentCode
382 };
383
384
385 // Control how scaled- and unscaled-offset loads and stores are generated.
386 enum LoadStoreScalingOption {
387 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
388 // register-offset, pre-index or post-index instructions if necessary.
389 PreferScaledOffset,
390
391 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
392 // register-offset, pre-index or post-index instructions if necessary.
393 PreferUnscaledOffset,
394
395 // Require scaled-immediate-offset instructions.
396 RequireScaledOffset,
397
398 // Require unscaled-immediate-offset instructions.
399 RequireUnscaledOffset
400 };
401
402
403 // Assembler.
404 class Assembler : public vixl::internal::AssemblerBase {
405 public:
406 explicit Assembler(
407 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)408 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
409 explicit Assembler(
410 size_t capacity,
411 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)412 : AssemblerBase(capacity),
413 pic_(pic),
414 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
415 Assembler(byte* buffer,
416 size_t capacity,
417 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)418 : AssemblerBase(buffer, capacity),
419 pic_(pic),
420 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
421
422 // Upon destruction, the code will assert that one of the following is true:
423 // * The Assembler object has not been used.
424 // * Nothing has been emitted since the last Reset() call.
425 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()426 ~Assembler() {}
427
428 // System functions.
429
430 // Start generating code from the beginning of the buffer, discarding any code
431 // and data that has already been emitted into the buffer.
432 void Reset();
433
434 // Bind a label to the current PC.
435 void bind(Label* label);
436
437 // Bind a label to a specified offset from the start of the buffer.
438 void BindToOffset(Label* label, ptrdiff_t offset);
439
440 // Place a literal at the current PC.
441 void place(RawLiteral* literal);
442
443 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
444 return GetCursorOffset();
445 }
446
447 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448 ptrdiff_t GetBufferEndOffset() const) {
449 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
450 }
451 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
452 ptrdiff_t BufferEndOffset() const) {
453 return GetBuffer().GetCapacity();
454 }
455
456 // Return the address of a bound label.
457 template <typename T>
GetLabelAddress(const Label * label)458 T GetLabelAddress(const Label* label) const {
459 VIXL_ASSERT(label->IsBound());
460 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
461 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
462 }
463
GetInstructionAt(ptrdiff_t instruction_offset)464 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
465 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
466 }
467 VIXL_DEPRECATED("GetInstructionAt",
468 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
469 return GetInstructionAt(instruction_offset);
470 }
471
GetInstructionOffset(Instruction * instruction)472 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
473 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
474 ptrdiff_t offset =
475 instruction - GetBuffer()->GetStartAddress<Instruction*>();
476 VIXL_ASSERT((0 <= offset) &&
477 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
478 return offset;
479 }
480 VIXL_DEPRECATED("GetInstructionOffset",
481 ptrdiff_t InstructionOffset(Instruction* instruction)) {
482 return GetInstructionOffset(instruction);
483 }
484
485 // Instruction set functions.
486
487 // Branch / Jump instructions.
488
489 // Branch to register.
490 void br(const Register& xn);
491
492 // Branch with link to register.
493 void blr(const Register& xn);
494
495 // Branch to register with return hint.
496 void ret(const Register& xn = lr);
497
498 // Branch to register, with pointer authentication. Using key A and a modifier
499 // of zero [Armv8.3].
500 void braaz(const Register& xn);
501
502 // Branch to register, with pointer authentication. Using key B and a modifier
503 // of zero [Armv8.3].
504 void brabz(const Register& xn);
505
506 // Branch with link to register, with pointer authentication. Using key A and
507 // a modifier of zero [Armv8.3].
508 void blraaz(const Register& xn);
509
510 // Branch with link to register, with pointer authentication. Using key B and
511 // a modifier of zero [Armv8.3].
512 void blrabz(const Register& xn);
513
514 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
515 void retaa();
516
517 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
518 void retab();
519
520 // Branch to register, with pointer authentication. Using key A [Armv8.3].
521 void braa(const Register& xn, const Register& xm);
522
523 // Branch to register, with pointer authentication. Using key B [Armv8.3].
524 void brab(const Register& xn, const Register& xm);
525
526 // Branch with link to register, with pointer authentication. Using key A
527 // [Armv8.3].
528 void blraa(const Register& xn, const Register& xm);
529
530 // Branch with link to register, with pointer authentication. Using key B
531 // [Armv8.3].
532 void blrab(const Register& xn, const Register& xm);
533
534 // Unconditional branch to label.
535 void b(Label* label);
536
537 // Conditional branch to label.
538 void b(Label* label, Condition cond);
539
540 // Unconditional branch to PC offset.
541 void b(int64_t imm26);
542
543 // Conditional branch to PC offset.
544 void b(int64_t imm19, Condition cond);
545
546 // Branch with link to label.
547 void bl(Label* label);
548
549 // Branch with link to PC offset.
550 void bl(int64_t imm26);
551
552 // Compare and branch to label if zero.
553 void cbz(const Register& rt, Label* label);
554
555 // Compare and branch to PC offset if zero.
556 void cbz(const Register& rt, int64_t imm19);
557
558 // Compare and branch to label if not zero.
559 void cbnz(const Register& rt, Label* label);
560
561 // Compare and branch to PC offset if not zero.
562 void cbnz(const Register& rt, int64_t imm19);
563
564 // Table lookup from one register.
565 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566
567 // Table lookup from two registers.
568 void tbl(const VRegister& vd,
569 const VRegister& vn,
570 const VRegister& vn2,
571 const VRegister& vm);
572
573 // Table lookup from three registers.
574 void tbl(const VRegister& vd,
575 const VRegister& vn,
576 const VRegister& vn2,
577 const VRegister& vn3,
578 const VRegister& vm);
579
580 // Table lookup from four registers.
581 void tbl(const VRegister& vd,
582 const VRegister& vn,
583 const VRegister& vn2,
584 const VRegister& vn3,
585 const VRegister& vn4,
586 const VRegister& vm);
587
588 // Table lookup extension from one register.
589 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590
591 // Table lookup extension from two registers.
592 void tbx(const VRegister& vd,
593 const VRegister& vn,
594 const VRegister& vn2,
595 const VRegister& vm);
596
597 // Table lookup extension from three registers.
598 void tbx(const VRegister& vd,
599 const VRegister& vn,
600 const VRegister& vn2,
601 const VRegister& vn3,
602 const VRegister& vm);
603
604 // Table lookup extension from four registers.
605 void tbx(const VRegister& vd,
606 const VRegister& vn,
607 const VRegister& vn2,
608 const VRegister& vn3,
609 const VRegister& vn4,
610 const VRegister& vm);
611
612 // Test bit and branch to label if zero.
613 void tbz(const Register& rt, unsigned bit_pos, Label* label);
614
615 // Test bit and branch to PC offset if zero.
616 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
617
618 // Test bit and branch to label if not zero.
619 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
620
621 // Test bit and branch to PC offset if not zero.
622 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
623
624 // Address calculation instructions.
625 // Calculate a PC-relative address. Unlike for branches the offset in adr is
626 // unscaled (i.e. the result can be unaligned).
627
628 // Calculate the address of a label.
629 void adr(const Register& xd, Label* label);
630
631 // Calculate the address of a PC offset.
632 void adr(const Register& xd, int64_t imm21);
633
634 // Calculate the page address of a label.
635 void adrp(const Register& xd, Label* label);
636
637 // Calculate the page address of a PC offset.
638 void adrp(const Register& xd, int64_t imm21);
639
640 // Data Processing instructions.
641
642 // Add.
643 void add(const Register& rd, const Register& rn, const Operand& operand);
644
645 // Add and update status flags.
646 void adds(const Register& rd, const Register& rn, const Operand& operand);
647
648 // Compare negative.
649 void cmn(const Register& rn, const Operand& operand);
650
651 // Subtract.
652 void sub(const Register& rd, const Register& rn, const Operand& operand);
653
654 // Subtract and update status flags.
655 void subs(const Register& rd, const Register& rn, const Operand& operand);
656
657 // Compare.
658 void cmp(const Register& rn, const Operand& operand);
659
660 // Negate.
661 void neg(const Register& rd, const Operand& operand);
662
663 // Negate and update status flags.
664 void negs(const Register& rd, const Operand& operand);
665
666 // Add with carry bit.
667 void adc(const Register& rd, const Register& rn, const Operand& operand);
668
669 // Add with carry bit and update status flags.
670 void adcs(const Register& rd, const Register& rn, const Operand& operand);
671
672 // Subtract with carry bit.
673 void sbc(const Register& rd, const Register& rn, const Operand& operand);
674
675 // Subtract with carry bit and update status flags.
676 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
677
678 // Rotate register right and insert into NZCV flags under the control of a
679 // mask [Armv8.4].
680 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
681
682 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
683 void setf8(const Register& rn);
684
685 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
686 void setf16(const Register& rn);
687
688 // Negate with carry bit.
689 void ngc(const Register& rd, const Operand& operand);
690
691 // Negate with carry bit and update status flags.
692 void ngcs(const Register& rd, const Operand& operand);
693
694 // Logical instructions.
695
696 // Bitwise and (A & B).
697 void and_(const Register& rd, const Register& rn, const Operand& operand);
698
699 // Bitwise and (A & B) and update status flags.
700 void ands(const Register& rd, const Register& rn, const Operand& operand);
701
702 // Bit test and set flags.
703 void tst(const Register& rn, const Operand& operand);
704
705 // Bit clear (A & ~B).
706 void bic(const Register& rd, const Register& rn, const Operand& operand);
707
708 // Bit clear (A & ~B) and update status flags.
709 void bics(const Register& rd, const Register& rn, const Operand& operand);
710
711 // Bitwise or (A | B).
712 void orr(const Register& rd, const Register& rn, const Operand& operand);
713
714 // Bitwise nor (A | ~B).
715 void orn(const Register& rd, const Register& rn, const Operand& operand);
716
717 // Bitwise eor/xor (A ^ B).
718 void eor(const Register& rd, const Register& rn, const Operand& operand);
719
720 // Bitwise enor/xnor (A ^ ~B).
721 void eon(const Register& rd, const Register& rn, const Operand& operand);
722
723 // Logical shift left by variable.
724 void lslv(const Register& rd, const Register& rn, const Register& rm);
725
726 // Logical shift right by variable.
727 void lsrv(const Register& rd, const Register& rn, const Register& rm);
728
729 // Arithmetic shift right by variable.
730 void asrv(const Register& rd, const Register& rn, const Register& rm);
731
732 // Rotate right by variable.
733 void rorv(const Register& rd, const Register& rn, const Register& rm);
734
735 // Bitfield instructions.
736
737 // Bitfield move.
738 void bfm(const Register& rd,
739 const Register& rn,
740 unsigned immr,
741 unsigned imms);
742
743 // Signed bitfield move.
744 void sbfm(const Register& rd,
745 const Register& rn,
746 unsigned immr,
747 unsigned imms);
748
749 // Unsigned bitfield move.
750 void ubfm(const Register& rd,
751 const Register& rn,
752 unsigned immr,
753 unsigned imms);
754
755 // Bfm aliases.
756
757 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)758 void bfi(const Register& rd,
759 const Register& rn,
760 unsigned lsb,
761 unsigned width) {
762 VIXL_ASSERT(width >= 1);
763 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
764 bfm(rd,
765 rn,
766 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
767 width - 1);
768 }
769
770 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)771 void bfxil(const Register& rd,
772 const Register& rn,
773 unsigned lsb,
774 unsigned width) {
775 VIXL_ASSERT(width >= 1);
776 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
777 bfm(rd, rn, lsb, lsb + width - 1);
778 }
779
780 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)781 void bfc(const Register& rd, unsigned lsb, unsigned width) {
782 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
783 }
784
785 // Sbfm aliases.
786
787 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)788 void asr(const Register& rd, const Register& rn, unsigned shift) {
789 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
790 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
791 }
792
793 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)794 void sbfiz(const Register& rd,
795 const Register& rn,
796 unsigned lsb,
797 unsigned width) {
798 VIXL_ASSERT(width >= 1);
799 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
800 sbfm(rd,
801 rn,
802 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
803 width - 1);
804 }
805
806 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)807 void sbfx(const Register& rd,
808 const Register& rn,
809 unsigned lsb,
810 unsigned width) {
811 VIXL_ASSERT(width >= 1);
812 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
813 sbfm(rd, rn, lsb, lsb + width - 1);
814 }
815
816 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)817 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
818
819 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)820 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
821
822 // Signed extend word.
sxtw(const Register & rd,const Register & rn)823 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
824
825 // Ubfm aliases.
826
827 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)828 void lsl(const Register& rd, const Register& rn, unsigned shift) {
829 unsigned reg_size = rd.GetSizeInBits();
830 VIXL_ASSERT(shift < reg_size);
831 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
832 }
833
834 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)835 void lsr(const Register& rd, const Register& rn, unsigned shift) {
836 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
837 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
838 }
839
840 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)841 void ubfiz(const Register& rd,
842 const Register& rn,
843 unsigned lsb,
844 unsigned width) {
845 VIXL_ASSERT(width >= 1);
846 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
847 ubfm(rd,
848 rn,
849 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
850 width - 1);
851 }
852
853 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)854 void ubfx(const Register& rd,
855 const Register& rn,
856 unsigned lsb,
857 unsigned width) {
858 VIXL_ASSERT(width >= 1);
859 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
860 ubfm(rd, rn, lsb, lsb + width - 1);
861 }
862
863 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)864 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
865
866 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)867 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
868
869 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)870 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
871
872 // Extract.
873 void extr(const Register& rd,
874 const Register& rn,
875 const Register& rm,
876 unsigned lsb);
877
878 // Conditional select: rd = cond ? rn : rm.
879 void csel(const Register& rd,
880 const Register& rn,
881 const Register& rm,
882 Condition cond);
883
884 // Conditional select increment: rd = cond ? rn : rm + 1.
885 void csinc(const Register& rd,
886 const Register& rn,
887 const Register& rm,
888 Condition cond);
889
890 // Conditional select inversion: rd = cond ? rn : ~rm.
891 void csinv(const Register& rd,
892 const Register& rn,
893 const Register& rm,
894 Condition cond);
895
896 // Conditional select negation: rd = cond ? rn : -rm.
897 void csneg(const Register& rd,
898 const Register& rn,
899 const Register& rm,
900 Condition cond);
901
902 // Conditional set: rd = cond ? 1 : 0.
903 void cset(const Register& rd, Condition cond);
904
905 // Conditional set mask: rd = cond ? -1 : 0.
906 void csetm(const Register& rd, Condition cond);
907
908 // Conditional increment: rd = cond ? rn + 1 : rn.
909 void cinc(const Register& rd, const Register& rn, Condition cond);
910
911 // Conditional invert: rd = cond ? ~rn : rn.
912 void cinv(const Register& rd, const Register& rn, Condition cond);
913
914 // Conditional negate: rd = cond ? -rn : rn.
915 void cneg(const Register& rd, const Register& rn, Condition cond);
916
917 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)918 void ror(const Register& rd, const Register& rs, unsigned shift) {
919 extr(rd, rs, rs, shift);
920 }
921
922 // Conditional comparison.
923
924 // Conditional compare negative.
925 void ccmn(const Register& rn,
926 const Operand& operand,
927 StatusFlags nzcv,
928 Condition cond);
929
930 // Conditional compare.
931 void ccmp(const Register& rn,
932 const Operand& operand,
933 StatusFlags nzcv,
934 Condition cond);
935
936 // CRC-32 checksum from byte.
937 void crc32b(const Register& wd, const Register& wn, const Register& wm);
938
939 // CRC-32 checksum from half-word.
940 void crc32h(const Register& wd, const Register& wn, const Register& wm);
941
942 // CRC-32 checksum from word.
943 void crc32w(const Register& wd, const Register& wn, const Register& wm);
944
945 // CRC-32 checksum from double word.
946 void crc32x(const Register& wd, const Register& wn, const Register& xm);
947
948 // CRC-32 C checksum from byte.
949 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
950
951 // CRC-32 C checksum from half-word.
952 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
953
954 // CRC-32 C checksum from word.
955 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
956
957 // CRC-32C checksum from double word.
958 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
959
960 // Multiply.
961 void mul(const Register& rd, const Register& rn, const Register& rm);
962
963 // Negated multiply.
964 void mneg(const Register& rd, const Register& rn, const Register& rm);
965
966 // Signed long multiply: 32 x 32 -> 64-bit.
967 void smull(const Register& xd, const Register& wn, const Register& wm);
968
969 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
970 void smulh(const Register& xd, const Register& xn, const Register& xm);
971
972 // Multiply and accumulate.
973 void madd(const Register& rd,
974 const Register& rn,
975 const Register& rm,
976 const Register& ra);
977
978 // Multiply and subtract.
979 void msub(const Register& rd,
980 const Register& rn,
981 const Register& rm,
982 const Register& ra);
983
984 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
985 void smaddl(const Register& xd,
986 const Register& wn,
987 const Register& wm,
988 const Register& xa);
989
990 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
991 void umaddl(const Register& xd,
992 const Register& wn,
993 const Register& wm,
994 const Register& xa);
995
996 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)997 void umull(const Register& xd, const Register& wn, const Register& wm) {
998 umaddl(xd, wn, wm, xzr);
999 }
1000
1001 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1002 void umulh(const Register& xd, const Register& xn, const Register& xm);
1003
1004 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1005 void smsubl(const Register& xd,
1006 const Register& wn,
1007 const Register& wm,
1008 const Register& xa);
1009
1010 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1011 void umsubl(const Register& xd,
1012 const Register& wn,
1013 const Register& wm,
1014 const Register& xa);
1015
1016 // Signed integer divide.
1017 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1018
1019 // Unsigned integer divide.
1020 void udiv(const Register& rd, const Register& rn, const Register& rm);
1021
1022 // Bit reverse.
1023 void rbit(const Register& rd, const Register& rn);
1024
1025 // Reverse bytes in 16-bit half words.
1026 void rev16(const Register& rd, const Register& rn);
1027
1028 // Reverse bytes in 32-bit words.
1029 void rev32(const Register& xd, const Register& xn);
1030
1031 // Reverse bytes in 64-bit general purpose register, an alias for rev
1032 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1033 void rev64(const Register& xd, const Register& xn) {
1034 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1035 rev(xd, xn);
1036 }
1037
1038 // Reverse bytes.
1039 void rev(const Register& rd, const Register& rn);
1040
1041 // Count leading zeroes.
1042 void clz(const Register& rd, const Register& rn);
1043
1044 // Count leading sign bits.
1045 void cls(const Register& rd, const Register& rn);
1046
1047 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1048 void pacia(const Register& xd, const Register& rn);
1049
1050 // Pointer Authentication Code for Instruction address, using key A and a
1051 // modifier of zero [Armv8.3].
1052 void paciza(const Register& xd);
1053
1054 // Pointer Authentication Code for Instruction address, using key A, with
1055 // address in x17 and modifier in x16 [Armv8.3].
1056 void pacia1716();
1057
1058 // Pointer Authentication Code for Instruction address, using key A, with
1059 // address in LR and modifier in SP [Armv8.3].
1060 void paciasp();
1061
1062 // Pointer Authentication Code for Instruction address, using key A, with
1063 // address in LR and a modifier of zero [Armv8.3].
1064 void paciaz();
1065
1066 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1067 void pacib(const Register& xd, const Register& xn);
1068
1069 // Pointer Authentication Code for Instruction address, using key B and a
1070 // modifier of zero [Armv8.3].
1071 void pacizb(const Register& xd);
1072
1073 // Pointer Authentication Code for Instruction address, using key B, with
1074 // address in x17 and modifier in x16 [Armv8.3].
1075 void pacib1716();
1076
1077 // Pointer Authentication Code for Instruction address, using key B, with
1078 // address in LR and modifier in SP [Armv8.3].
1079 void pacibsp();
1080
1081 // Pointer Authentication Code for Instruction address, using key B, with
1082 // address in LR and a modifier of zero [Armv8.3].
1083 void pacibz();
1084
1085 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1086 void pacda(const Register& xd, const Register& xn);
1087
1088 // Pointer Authentication Code for Data address, using key A and a modifier of
1089 // zero [Armv8.3].
1090 void pacdza(const Register& xd);
1091
1092 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1093 void pacdb(const Register& xd, const Register& xn);
1094
1095 // Pointer Authentication Code for Data address, using key B and a modifier of
1096 // zero [Armv8.3].
1097 void pacdzb(const Register& xd);
1098
1099 // Pointer Authentication Code, using Generic key [Armv8.3].
1100 void pacga(const Register& xd, const Register& xn, const Register& xm);
1101
1102 // Authenticate Instruction address, using key A [Armv8.3].
1103 void autia(const Register& xd, const Register& xn);
1104
1105 // Authenticate Instruction address, using key A and a modifier of zero
1106 // [Armv8.3].
1107 void autiza(const Register& xd);
1108
1109 // Authenticate Instruction address, using key A, with address in x17 and
1110 // modifier in x16 [Armv8.3].
1111 void autia1716();
1112
1113 // Authenticate Instruction address, using key A, with address in LR and
1114 // modifier in SP [Armv8.3].
1115 void autiasp();
1116
1117 // Authenticate Instruction address, using key A, with address in LR and a
1118 // modifier of zero [Armv8.3].
1119 void autiaz();
1120
1121 // Authenticate Instruction address, using key B [Armv8.3].
1122 void autib(const Register& xd, const Register& xn);
1123
1124 // Authenticate Instruction address, using key B and a modifier of zero
1125 // [Armv8.3].
1126 void autizb(const Register& xd);
1127
1128 // Authenticate Instruction address, using key B, with address in x17 and
1129 // modifier in x16 [Armv8.3].
1130 void autib1716();
1131
1132 // Authenticate Instruction address, using key B, with address in LR and
1133 // modifier in SP [Armv8.3].
1134 void autibsp();
1135
1136 // Authenticate Instruction address, using key B, with address in LR and a
1137 // modifier of zero [Armv8.3].
1138 void autibz();
1139
1140 // Authenticate Data address, using key A [Armv8.3].
1141 void autda(const Register& xd, const Register& xn);
1142
1143 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1144 void autdza(const Register& xd);
1145
1146 // Authenticate Data address, using key B [Armv8.3].
1147 void autdb(const Register& xd, const Register& xn);
1148
1149 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1150 void autdzb(const Register& xd);
1151
1152 // Strip Pointer Authentication Code of Data address [Armv8.3].
1153 void xpacd(const Register& xd);
1154
1155 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1156 void xpaci(const Register& xd);
1157
1158 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1159 void xpaclri();
1160
1161 // Memory instructions.
1162
1163 // Load integer or FP register.
1164 void ldr(const CPURegister& rt,
1165 const MemOperand& src,
1166 LoadStoreScalingOption option = PreferScaledOffset);
1167
1168 // Store integer or FP register.
1169 void str(const CPURegister& rt,
1170 const MemOperand& dst,
1171 LoadStoreScalingOption option = PreferScaledOffset);
1172
1173 // Load word with sign extension.
1174 void ldrsw(const Register& xt,
1175 const MemOperand& src,
1176 LoadStoreScalingOption option = PreferScaledOffset);
1177
1178 // Load byte.
1179 void ldrb(const Register& rt,
1180 const MemOperand& src,
1181 LoadStoreScalingOption option = PreferScaledOffset);
1182
1183 // Store byte.
1184 void strb(const Register& rt,
1185 const MemOperand& dst,
1186 LoadStoreScalingOption option = PreferScaledOffset);
1187
1188 // Load byte with sign extension.
1189 void ldrsb(const Register& rt,
1190 const MemOperand& src,
1191 LoadStoreScalingOption option = PreferScaledOffset);
1192
1193 // Load half-word.
1194 void ldrh(const Register& rt,
1195 const MemOperand& src,
1196 LoadStoreScalingOption option = PreferScaledOffset);
1197
1198 // Store half-word.
1199 void strh(const Register& rt,
1200 const MemOperand& dst,
1201 LoadStoreScalingOption option = PreferScaledOffset);
1202
1203 // Load half-word with sign extension.
1204 void ldrsh(const Register& rt,
1205 const MemOperand& src,
1206 LoadStoreScalingOption option = PreferScaledOffset);
1207
1208 // Load integer or FP register (with unscaled offset).
1209 void ldur(const CPURegister& rt,
1210 const MemOperand& src,
1211 LoadStoreScalingOption option = PreferUnscaledOffset);
1212
1213 // Store integer or FP register (with unscaled offset).
1214 void stur(const CPURegister& rt,
1215 const MemOperand& src,
1216 LoadStoreScalingOption option = PreferUnscaledOffset);
1217
1218 // Load word with sign extension.
1219 void ldursw(const Register& xt,
1220 const MemOperand& src,
1221 LoadStoreScalingOption option = PreferUnscaledOffset);
1222
1223 // Load byte (with unscaled offset).
1224 void ldurb(const Register& rt,
1225 const MemOperand& src,
1226 LoadStoreScalingOption option = PreferUnscaledOffset);
1227
1228 // Store byte (with unscaled offset).
1229 void sturb(const Register& rt,
1230 const MemOperand& dst,
1231 LoadStoreScalingOption option = PreferUnscaledOffset);
1232
1233 // Load byte with sign extension (and unscaled offset).
1234 void ldursb(const Register& rt,
1235 const MemOperand& src,
1236 LoadStoreScalingOption option = PreferUnscaledOffset);
1237
1238 // Load half-word (with unscaled offset).
1239 void ldurh(const Register& rt,
1240 const MemOperand& src,
1241 LoadStoreScalingOption option = PreferUnscaledOffset);
1242
1243 // Store half-word (with unscaled offset).
1244 void sturh(const Register& rt,
1245 const MemOperand& dst,
1246 LoadStoreScalingOption option = PreferUnscaledOffset);
1247
1248 // Load half-word with sign extension (and unscaled offset).
1249 void ldursh(const Register& rt,
1250 const MemOperand& src,
1251 LoadStoreScalingOption option = PreferUnscaledOffset);
1252
1253 // Load double-word with pointer authentication, using data key A and a
1254 // modifier of zero [Armv8.3].
1255 void ldraa(const Register& xt, const MemOperand& src);
1256
1257 // Load double-word with pointer authentication, using data key B and a
1258 // modifier of zero [Armv8.3].
1259 void ldrab(const Register& xt, const MemOperand& src);
1260
1261 // Load integer or FP register pair.
1262 void ldp(const CPURegister& rt,
1263 const CPURegister& rt2,
1264 const MemOperand& src);
1265
1266 // Store integer or FP register pair.
1267 void stp(const CPURegister& rt,
1268 const CPURegister& rt2,
1269 const MemOperand& dst);
1270
1271 // Load word pair with sign extension.
1272 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1273
1274 // Load integer or FP register pair, non-temporal.
1275 void ldnp(const CPURegister& rt,
1276 const CPURegister& rt2,
1277 const MemOperand& src);
1278
1279 // Store integer or FP register pair, non-temporal.
1280 void stnp(const CPURegister& rt,
1281 const CPURegister& rt2,
1282 const MemOperand& dst);
1283
1284 // Load integer or FP register from literal pool.
1285 void ldr(const CPURegister& rt, RawLiteral* literal);
1286
1287 // Load word with sign extension from literal pool.
1288 void ldrsw(const Register& xt, RawLiteral* literal);
1289
1290 // Load integer or FP register from pc + imm19 << 2.
1291 void ldr(const CPURegister& rt, int64_t imm19);
1292
1293 // Load word with sign extension from pc + imm19 << 2.
1294 void ldrsw(const Register& xt, int64_t imm19);
1295
1296 // Store exclusive byte.
1297 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1298
1299 // Store exclusive half-word.
1300 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1301
1302 // Store exclusive register.
1303 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1304
1305 // Load exclusive byte.
1306 void ldxrb(const Register& rt, const MemOperand& src);
1307
1308 // Load exclusive half-word.
1309 void ldxrh(const Register& rt, const MemOperand& src);
1310
1311 // Load exclusive register.
1312 void ldxr(const Register& rt, const MemOperand& src);
1313
1314 // Store exclusive register pair.
1315 void stxp(const Register& rs,
1316 const Register& rt,
1317 const Register& rt2,
1318 const MemOperand& dst);
1319
1320 // Load exclusive register pair.
1321 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1322
1323 // Store-release exclusive byte.
1324 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1325
1326 // Store-release exclusive half-word.
1327 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1328
1329 // Store-release exclusive register.
1330 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1331
1332 // Load-acquire exclusive byte.
1333 void ldaxrb(const Register& rt, const MemOperand& src);
1334
1335 // Load-acquire exclusive half-word.
1336 void ldaxrh(const Register& rt, const MemOperand& src);
1337
1338 // Load-acquire exclusive register.
1339 void ldaxr(const Register& rt, const MemOperand& src);
1340
1341 // Store-release exclusive register pair.
1342 void stlxp(const Register& rs,
1343 const Register& rt,
1344 const Register& rt2,
1345 const MemOperand& dst);
1346
1347 // Load-acquire exclusive register pair.
1348 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1349
1350 // Store-release byte.
1351 void stlrb(const Register& rt, const MemOperand& dst);
1352
1353 // Store-release half-word.
1354 void stlrh(const Register& rt, const MemOperand& dst);
1355
1356 // Store-release register.
1357 void stlr(const Register& rt, const MemOperand& dst);
1358
1359 // Load-acquire byte.
1360 void ldarb(const Register& rt, const MemOperand& src);
1361
1362 // Load-acquire half-word.
1363 void ldarh(const Register& rt, const MemOperand& src);
1364
1365 // Load-acquire register.
1366 void ldar(const Register& rt, const MemOperand& src);
1367
1368 // Store LORelease byte [Armv8.1].
1369 void stllrb(const Register& rt, const MemOperand& dst);
1370
1371 // Store LORelease half-word [Armv8.1].
1372 void stllrh(const Register& rt, const MemOperand& dst);
1373
1374 // Store LORelease register [Armv8.1].
1375 void stllr(const Register& rt, const MemOperand& dst);
1376
1377 // Load LORelease byte [Armv8.1].
1378 void ldlarb(const Register& rt, const MemOperand& src);
1379
1380 // Load LORelease half-word [Armv8.1].
1381 void ldlarh(const Register& rt, const MemOperand& src);
1382
1383 // Load LORelease register [Armv8.1].
1384 void ldlar(const Register& rt, const MemOperand& src);
1385
1386 // Compare and Swap word or doubleword in memory [Armv8.1].
1387 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1388
1389 // Compare and Swap word or doubleword in memory [Armv8.1].
1390 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1391
1392 // Compare and Swap word or doubleword in memory [Armv8.1].
1393 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1394
1395 // Compare and Swap word or doubleword in memory [Armv8.1].
1396 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1397
1398 // Compare and Swap byte in memory [Armv8.1].
1399 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1400
1401 // Compare and Swap byte in memory [Armv8.1].
1402 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1403
1404 // Compare and Swap byte in memory [Armv8.1].
1405 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1406
1407 // Compare and Swap byte in memory [Armv8.1].
1408 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1409
1410 // Compare and Swap halfword in memory [Armv8.1].
1411 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1412
1413 // Compare and Swap halfword in memory [Armv8.1].
1414 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1415
1416 // Compare and Swap halfword in memory [Armv8.1].
1417 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1418
1419 // Compare and Swap halfword in memory [Armv8.1].
1420 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1421
1422 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1423 void casp(const Register& rs,
1424 const Register& rs2,
1425 const Register& rt,
1426 const Register& rt2,
1427 const MemOperand& src);
1428
1429 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1430 void caspa(const Register& rs,
1431 const Register& rs2,
1432 const Register& rt,
1433 const Register& rt2,
1434 const MemOperand& src);
1435
1436 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1437 void caspl(const Register& rs,
1438 const Register& rs2,
1439 const Register& rt,
1440 const Register& rt2,
1441 const MemOperand& src);
1442
1443 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1444 void caspal(const Register& rs,
1445 const Register& rs2,
1446 const Register& rt,
1447 const Register& rt2,
1448 const MemOperand& src);
1449
1450 // Store-release byte (with unscaled offset) [Armv8.4].
1451 void stlurb(const Register& rt, const MemOperand& dst);
1452
1453 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1454 void ldapurb(const Register& rt, const MemOperand& src);
1455
1456 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1457 void ldapursb(const Register& rt, const MemOperand& src);
1458
1459 // Store-release half-word (with unscaled offset) [Armv8.4].
1460 void stlurh(const Register& rt, const MemOperand& dst);
1461
1462 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1463 void ldapurh(const Register& rt, const MemOperand& src);
1464
1465 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1466 // [Armv8.4].
1467 void ldapursh(const Register& rt, const MemOperand& src);
1468
1469 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1470 void stlur(const Register& rt, const MemOperand& dst);
1471
1472 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1473 // [Armv8.4].
1474 void ldapur(const Register& rt, const MemOperand& src);
1475
1476 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1477 void ldapursw(const Register& xt, const MemOperand& src);
1478
1479 // Atomic add on byte in memory [Armv8.1]
1480 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1481
1482 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1483 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1484
1485 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1486 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1487
1488 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1489 // [Armv8.1]
1490 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1491
1492 // Atomic add on halfword in memory [Armv8.1]
1493 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1494
1495 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1496 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1497
1498 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1499 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1500
1501 // Atomic add on halfword in memory, with Load-acquire and Store-release
1502 // semantics [Armv8.1]
1503 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1504
1505 // Atomic add on word or doubleword in memory [Armv8.1]
1506 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1507
1508 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1509 // [Armv8.1]
1510 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1511
1512 // Atomic add on word or doubleword in memory, with Store-release semantics
1513 // [Armv8.1]
1514 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1515
1516 // Atomic add on word or doubleword in memory, with Load-acquire and
1517 // Store-release semantics [Armv8.1]
1518 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1519
1520 // Atomic bit clear on byte in memory [Armv8.1]
1521 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1522
1523 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1524 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1525
1526 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1527 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1528
1529 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1530 // semantics [Armv8.1]
1531 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1532
1533 // Atomic bit clear on halfword in memory [Armv8.1]
1534 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1535
1536 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1537 // [Armv8.1]
1538 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1539
1540 // Atomic bit clear on halfword in memory, with Store-release semantics
1541 // [Armv8.1]
1542 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1543
1544 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1545 // semantics [Armv8.1]
1546 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1547
1548 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1549 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1550
1551 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1552 // semantics [Armv8.1]
1553 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1554
1555 // Atomic bit clear on word or doubleword in memory, with Store-release
1556 // semantics [Armv8.1]
1557 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1558
1559 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1560 // Store-release semantics [Armv8.1]
1561 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1562
1563 // Atomic exclusive OR on byte in memory [Armv8.1]
1564 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1565
1566 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1567 // [Armv8.1]
1568 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1569
1570 // Atomic exclusive OR on byte in memory, with Store-release semantics
1571 // [Armv8.1]
1572 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1573
1574 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1575 // semantics [Armv8.1]
1576 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1577
1578 // Atomic exclusive OR on halfword in memory [Armv8.1]
1579 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1580
1581 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1582 // [Armv8.1]
1583 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1584
1585 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1586 // [Armv8.1]
1587 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1588
1589 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1590 // Store-release semantics [Armv8.1]
1591 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1592
1593 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1594 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1595
1596 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1597 // semantics [Armv8.1]
1598 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1599
1600 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1601 // semantics [Armv8.1]
1602 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1603
1604 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1605 // Store-release semantics [Armv8.1]
1606 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1607
1608 // Atomic bit set on byte in memory [Armv8.1]
1609 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1610
1611 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1612 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1613
1614 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1615 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1616
1617 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1618 // semantics [Armv8.1]
1619 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1620
1621 // Atomic bit set on halfword in memory [Armv8.1]
1622 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1623
1624 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1625 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1626
1627 // Atomic bit set on halfword in memory, with Store-release semantics
1628 // [Armv8.1]
1629 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1630
1631 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1632 // semantics [Armv8.1]
1633 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1634
1635 // Atomic bit set on word or doubleword in memory [Armv8.1]
1636 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1637
1638 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1639 // [Armv8.1]
1640 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1641
1642 // Atomic bit set on word or doubleword in memory, with Store-release
1643 // semantics [Armv8.1]
1644 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1645
1646 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1647 // Store-release semantics [Armv8.1]
1648 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1649
1650 // Atomic signed maximum on byte in memory [Armv8.1]
1651 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1652
1653 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1654 // [Armv8.1]
1655 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1656
1657 // Atomic signed maximum on byte in memory, with Store-release semantics
1658 // [Armv8.1]
1659 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1660
1661 // Atomic signed maximum on byte in memory, with Load-acquire and
1662 // Store-release semantics [Armv8.1]
1663 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1664
1665 // Atomic signed maximum on halfword in memory [Armv8.1]
1666 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1667
1668 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1669 // [Armv8.1]
1670 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1671
1672 // Atomic signed maximum on halfword in memory, with Store-release semantics
1673 // [Armv8.1]
1674 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1675
1676 // Atomic signed maximum on halfword in memory, with Load-acquire and
1677 // Store-release semantics [Armv8.1]
1678 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1679
1680 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1681 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1682
1683 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1684 // semantics [Armv8.1]
1685 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1686
1687 // Atomic signed maximum on word or doubleword in memory, with Store-release
1688 // semantics [Armv8.1]
1689 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1690
1691 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1692 // and Store-release semantics [Armv8.1]
1693 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1694
1695 // Atomic signed minimum on byte in memory [Armv8.1]
1696 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1697
1698 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1699 // [Armv8.1]
1700 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1701
1702 // Atomic signed minimum on byte in memory, with Store-release semantics
1703 // [Armv8.1]
1704 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1705
1706 // Atomic signed minimum on byte in memory, with Load-acquire and
1707 // Store-release semantics [Armv8.1]
1708 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1709
1710 // Atomic signed minimum on halfword in memory [Armv8.1]
1711 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1712
1713 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1714 // [Armv8.1]
1715 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1716
1717 // Atomic signed minimum on halfword in memory, with Store-release semantics
1718 // [Armv8.1]
1719 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1720
1721 // Atomic signed minimum on halfword in memory, with Load-acquire and
1722 // Store-release semantics [Armv8.1]
1723 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1724
1725 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1726 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1727
1728 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1729 // semantics [Armv8.1]
1730 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1731
1732 // Atomic signed minimum on word or doubleword in memory, with Store-release
1733 // semantics [Armv8.1]
1734 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1735
1736 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1737 // and Store-release semantics [Armv8.1]
1738 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1739
1740 // Atomic unsigned maximum on byte in memory [Armv8.1]
1741 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1742
1743 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1744 // [Armv8.1]
1745 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1746
1747 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1748 // [Armv8.1]
1749 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1750
1751 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1752 // Store-release semantics [Armv8.1]
1753 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1754
1755 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1756 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1757
1758 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1759 // [Armv8.1]
1760 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1761
1762 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1763 // [Armv8.1]
1764 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1765
1766 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1767 // Store-release semantics [Armv8.1]
1768 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1769
1770 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1771 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1772
1773 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1774 // semantics [Armv8.1]
1775 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1776
1777 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1778 // semantics [Armv8.1]
1779 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1780
1781 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1782 // and Store-release semantics [Armv8.1]
1783 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1784
1785 // Atomic unsigned minimum on byte in memory [Armv8.1]
1786 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1787
1788 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1789 // [Armv8.1]
1790 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1791
1792 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1793 // [Armv8.1]
1794 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1795
1796 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1797 // Store-release semantics [Armv8.1]
1798 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1799
1800 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1801 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1802
1803 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1804 // [Armv8.1]
1805 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1806
1807 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1808 // [Armv8.1]
1809 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1810
1811 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1812 // Store-release semantics [Armv8.1]
1813 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1814
1815 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1816 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1817
1818 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1819 // semantics [Armv8.1]
1820 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1821
1822 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1823 // semantics [Armv8.1]
1824 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1825
1826 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1827 // and Store-release semantics [Armv8.1]
1828 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1829
1830 // Atomic add on byte in memory, without return. [Armv8.1]
1831 void staddb(const Register& rs, const MemOperand& src);
1832
1833 // Atomic add on byte in memory, with Store-release semantics and without
1834 // return. [Armv8.1]
1835 void staddlb(const Register& rs, const MemOperand& src);
1836
1837 // Atomic add on halfword in memory, without return. [Armv8.1]
1838 void staddh(const Register& rs, const MemOperand& src);
1839
1840 // Atomic add on halfword in memory, with Store-release semantics and without
1841 // return. [Armv8.1]
1842 void staddlh(const Register& rs, const MemOperand& src);
1843
1844 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1845 void stadd(const Register& rs, const MemOperand& src);
1846
1847 // Atomic add on word or doubleword in memory, with Store-release semantics
1848 // and without return. [Armv8.1]
1849 void staddl(const Register& rs, const MemOperand& src);
1850
1851 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1852 void stclrb(const Register& rs, const MemOperand& src);
1853
1854 // Atomic bit clear on byte in memory, with Store-release semantics and
1855 // without return. [Armv8.1]
1856 void stclrlb(const Register& rs, const MemOperand& src);
1857
1858 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1859 void stclrh(const Register& rs, const MemOperand& src);
1860
1861 // Atomic bit clear on halfword in memory, with Store-release semantics and
1862 // without return. [Armv8.1]
1863 void stclrlh(const Register& rs, const MemOperand& src);
1864
1865 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1866 void stclr(const Register& rs, const MemOperand& src);
1867
1868 // Atomic bit clear on word or doubleword in memory, with Store-release
1869 // semantics and without return. [Armv8.1]
1870 void stclrl(const Register& rs, const MemOperand& src);
1871
1872 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1873 void steorb(const Register& rs, const MemOperand& src);
1874
1875 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1876 // without return. [Armv8.1]
1877 void steorlb(const Register& rs, const MemOperand& src);
1878
1879 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1880 void steorh(const Register& rs, const MemOperand& src);
1881
1882 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1883 // and without return. [Armv8.1]
1884 void steorlh(const Register& rs, const MemOperand& src);
1885
1886 // Atomic exclusive OR on word or doubleword in memory, without return.
1887 // [Armv8.1]
1888 void steor(const Register& rs, const MemOperand& src);
1889
1890 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1891 // semantics and without return. [Armv8.1]
1892 void steorl(const Register& rs, const MemOperand& src);
1893
1894 // Atomic bit set on byte in memory, without return. [Armv8.1]
1895 void stsetb(const Register& rs, const MemOperand& src);
1896
1897 // Atomic bit set on byte in memory, with Store-release semantics and without
1898 // return. [Armv8.1]
1899 void stsetlb(const Register& rs, const MemOperand& src);
1900
1901 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1902 void stseth(const Register& rs, const MemOperand& src);
1903
1904 // Atomic bit set on halfword in memory, with Store-release semantics and
1905 // without return. [Armv8.1]
1906 void stsetlh(const Register& rs, const MemOperand& src);
1907
1908 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1909 void stset(const Register& rs, const MemOperand& src);
1910
1911 // Atomic bit set on word or doubleword in memory, with Store-release
1912 // semantics and without return. [Armv8.1]
1913 void stsetl(const Register& rs, const MemOperand& src);
1914
1915 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1916 void stsmaxb(const Register& rs, const MemOperand& src);
1917
1918 // Atomic signed maximum on byte in memory, with Store-release semantics and
1919 // without return. [Armv8.1]
1920 void stsmaxlb(const Register& rs, const MemOperand& src);
1921
1922 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1923 void stsmaxh(const Register& rs, const MemOperand& src);
1924
1925 // Atomic signed maximum on halfword in memory, with Store-release semantics
1926 // and without return. [Armv8.1]
1927 void stsmaxlh(const Register& rs, const MemOperand& src);
1928
1929 // Atomic signed maximum on word or doubleword in memory, without return.
1930 // [Armv8.1]
1931 void stsmax(const Register& rs, const MemOperand& src);
1932
1933 // Atomic signed maximum on word or doubleword in memory, with Store-release
1934 // semantics and without return. [Armv8.1]
1935 void stsmaxl(const Register& rs, const MemOperand& src);
1936
1937 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1938 void stsminb(const Register& rs, const MemOperand& src);
1939
1940 // Atomic signed minimum on byte in memory, with Store-release semantics and
1941 // without return. [Armv8.1]
1942 void stsminlb(const Register& rs, const MemOperand& src);
1943
1944 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1945 void stsminh(const Register& rs, const MemOperand& src);
1946
1947 // Atomic signed minimum on halfword in memory, with Store-release semantics
1948 // and without return. [Armv8.1]
1949 void stsminlh(const Register& rs, const MemOperand& src);
1950
1951 // Atomic signed minimum on word or doubleword in memory, without return.
1952 // [Armv8.1]
1953 void stsmin(const Register& rs, const MemOperand& src);
1954
1955 // Atomic signed minimum on word or doubleword in memory, with Store-release
1956 // semantics and without return. semantics [Armv8.1]
1957 void stsminl(const Register& rs, const MemOperand& src);
1958
1959 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1960 void stumaxb(const Register& rs, const MemOperand& src);
1961
1962 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1963 // without return. [Armv8.1]
1964 void stumaxlb(const Register& rs, const MemOperand& src);
1965
1966 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1967 void stumaxh(const Register& rs, const MemOperand& src);
1968
1969 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1970 // and without return. [Armv8.1]
1971 void stumaxlh(const Register& rs, const MemOperand& src);
1972
1973 // Atomic unsigned maximum on word or doubleword in memory, without return.
1974 // [Armv8.1]
1975 void stumax(const Register& rs, const MemOperand& src);
1976
1977 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1978 // semantics and without return. [Armv8.1]
1979 void stumaxl(const Register& rs, const MemOperand& src);
1980
1981 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1982 void stuminb(const Register& rs, const MemOperand& src);
1983
1984 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1985 // without return. [Armv8.1]
1986 void stuminlb(const Register& rs, const MemOperand& src);
1987
1988 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1989 void stuminh(const Register& rs, const MemOperand& src);
1990
1991 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1992 // and without return. [Armv8.1]
1993 void stuminlh(const Register& rs, const MemOperand& src);
1994
1995 // Atomic unsigned minimum on word or doubleword in memory, without return.
1996 // [Armv8.1]
1997 void stumin(const Register& rs, const MemOperand& src);
1998
1999 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2000 // semantics and without return. [Armv8.1]
2001 void stuminl(const Register& rs, const MemOperand& src);
2002
2003 // Swap byte in memory [Armv8.1]
2004 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2005
2006 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2007 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2008
2009 // Swap byte in memory, with Store-release semantics [Armv8.1]
2010 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2011
2012 // Swap byte in memory, with Load-acquire and Store-release semantics
2013 // [Armv8.1]
2014 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2015
2016 // Swap halfword in memory [Armv8.1]
2017 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2018
2019 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2020 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2021
2022 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2023 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2024
2025 // Swap halfword in memory, with Load-acquire and Store-release semantics
2026 // [Armv8.1]
2027 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2028
2029 // Swap word or doubleword in memory [Armv8.1]
2030 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2031
2032 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2033 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2034
2035 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2036 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2037
2038 // Swap word or doubleword in memory, with Load-acquire and Store-release
2039 // semantics [Armv8.1]
2040 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2041
2042 // Load-Acquire RCpc Register byte [Armv8.3]
2043 void ldaprb(const Register& rt, const MemOperand& src);
2044
2045 // Load-Acquire RCpc Register halfword [Armv8.3]
2046 void ldaprh(const Register& rt, const MemOperand& src);
2047
2048 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2049 void ldapr(const Register& rt, const MemOperand& src);
2050
2051 // Prefetch memory.
2052 void prfm(PrefetchOperation op,
2053 const MemOperand& addr,
2054 LoadStoreScalingOption option = PreferScaledOffset);
2055
2056 // Prefetch memory (with unscaled offset).
2057 void prfum(PrefetchOperation op,
2058 const MemOperand& addr,
2059 LoadStoreScalingOption option = PreferUnscaledOffset);
2060
2061 // Prefetch memory in the literal pool.
2062 void prfm(PrefetchOperation op, RawLiteral* literal);
2063
2064 // Prefetch from pc + imm19 << 2.
2065 void prfm(PrefetchOperation op, int64_t imm19);
2066
2067 // Prefetch memory (allowing unallocated hints).
2068 void prfm(int op,
2069 const MemOperand& addr,
2070 LoadStoreScalingOption option = PreferScaledOffset);
2071
2072 // Prefetch memory (with unscaled offset, allowing unallocated hints).
2073 void prfum(int op,
2074 const MemOperand& addr,
2075 LoadStoreScalingOption option = PreferUnscaledOffset);
2076
2077 // Prefetch memory in the literal pool (allowing unallocated hints).
2078 void prfm(int op, RawLiteral* literal);
2079
2080 // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2081 void prfm(int op, int64_t imm19);
2082
2083 // Move instructions. The default shift of -1 indicates that the move
2084 // instruction will calculate an appropriate 16-bit immediate and left shift
2085 // that is equal to the 64-bit immediate argument. If an explicit left shift
2086 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2087 //
2088 // For movk, an explicit shift can be used to indicate which half word should
2089 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2090 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2091 // most-significant.
2092
2093 // Move immediate and keep.
2094 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2095 MoveWide(rd, imm, shift, MOVK);
2096 }
2097
2098 // Move inverted immediate.
2099 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2100 MoveWide(rd, imm, shift, MOVN);
2101 }
2102
2103 // Move immediate.
2104 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2105 MoveWide(rd, imm, shift, MOVZ);
2106 }
2107
2108 // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2109 void mov(const Register& rd, uint64_t imm) {
2110 if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2111 VIXL_UNIMPLEMENTED();
2112 }
2113 }
2114
2115 // Misc instructions.
2116
2117 // Monitor debug-mode breakpoint.
2118 void brk(int code);
2119
2120 // Halting debug-mode breakpoint.
2121 void hlt(int code);
2122
2123 // Generate exception targeting EL1.
2124 void svc(int code);
2125
2126 // Generate undefined instruction exception.
2127 void udf(int code);
2128
2129 // Move register to register.
2130 void mov(const Register& rd, const Register& rn);
2131
2132 // Move inverted operand to register.
2133 void mvn(const Register& rd, const Operand& operand);
2134
2135 // System instructions.
2136
2137 // Move to register from system register.
2138 void mrs(const Register& xt, SystemRegister sysreg);
2139
2140 // Move from register to system register.
2141 void msr(SystemRegister sysreg, const Register& xt);
2142
2143 // Invert carry flag [Armv8.4].
2144 void cfinv();
2145
2146 // Convert floating-point condition flags from alternative format to Arm
2147 // format [Armv8.5].
2148 void xaflag();
2149
2150 // Convert floating-point condition flags from Arm format to alternative
2151 // format [Armv8.5].
2152 void axflag();
2153
2154 // System instruction.
2155 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2156
2157 // System instruction with pre-encoded op (op1:crn:crm:op2).
2158 void sys(int op, const Register& xt = xzr);
2159
2160 // System data cache operation.
2161 void dc(DataCacheOp op, const Register& rt);
2162
2163 // System instruction cache operation.
2164 void ic(InstructionCacheOp op, const Register& rt);
2165
2166 // System hint (named type).
2167 void hint(SystemHint code);
2168
2169 // System hint (numbered type).
2170 void hint(int imm7);
2171
2172 // Clear exclusive monitor.
2173 void clrex(int imm4 = 0xf);
2174
2175 // Data memory barrier.
2176 void dmb(BarrierDomain domain, BarrierType type);
2177
2178 // Data synchronization barrier.
2179 void dsb(BarrierDomain domain, BarrierType type);
2180
2181 // Instruction synchronization barrier.
2182 void isb();
2183
2184 // Error synchronization barrier.
2185 void esb();
2186
2187 // Conditional speculation dependency barrier.
2188 void csdb();
2189
2190 // No-op.
nop()2191 void nop() { hint(NOP); }
2192
2193 // Branch target identification.
2194 void bti(BranchTargetIdentifier id);
2195
2196 // FP and NEON instructions.
2197
2198 // Move double precision immediate to FP register.
2199 void fmov(const VRegister& vd, double imm);
2200
2201 // Move single precision immediate to FP register.
2202 void fmov(const VRegister& vd, float imm);
2203
2204 // Move half precision immediate to FP register [Armv8.2].
2205 void fmov(const VRegister& vd, Float16 imm);
2206
2207 // Move FP register to register.
2208 void fmov(const Register& rd, const VRegister& fn);
2209
2210 // Move register to FP register.
2211 void fmov(const VRegister& vd, const Register& rn);
2212
2213 // Move FP register to FP register.
2214 void fmov(const VRegister& vd, const VRegister& fn);
2215
2216 // Move 64-bit register to top half of 128-bit FP register.
2217 void fmov(const VRegister& vd, int index, const Register& rn);
2218
2219 // Move top half of 128-bit FP register to 64-bit register.
2220 void fmov(const Register& rd, const VRegister& vn, int index);
2221
2222 // FP add.
2223 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2224
2225 // FP subtract.
2226 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2227
2228 // FP multiply.
2229 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2230
2231 // FP fused multiply-add.
2232 void fmadd(const VRegister& vd,
2233 const VRegister& vn,
2234 const VRegister& vm,
2235 const VRegister& va);
2236
2237 // FP fused multiply-subtract.
2238 void fmsub(const VRegister& vd,
2239 const VRegister& vn,
2240 const VRegister& vm,
2241 const VRegister& va);
2242
2243 // FP fused multiply-add and negate.
2244 void fnmadd(const VRegister& vd,
2245 const VRegister& vn,
2246 const VRegister& vm,
2247 const VRegister& va);
2248
2249 // FP fused multiply-subtract and negate.
2250 void fnmsub(const VRegister& vd,
2251 const VRegister& vn,
2252 const VRegister& vm,
2253 const VRegister& va);
2254
2255 // FP multiply-negate scalar.
2256 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2257
2258 // FP reciprocal exponent scalar.
2259 void frecpx(const VRegister& vd, const VRegister& vn);
2260
2261 // FP divide.
2262 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2263
2264 // FP maximum.
2265 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2266
2267 // FP minimum.
2268 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2269
2270 // FP maximum number.
2271 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2272
2273 // FP minimum number.
2274 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2275
2276 // FP absolute.
2277 void fabs(const VRegister& vd, const VRegister& vn);
2278
2279 // FP negate.
2280 void fneg(const VRegister& vd, const VRegister& vn);
2281
2282 // FP square root.
2283 void fsqrt(const VRegister& vd, const VRegister& vn);
2284
2285 // FP round to integer, nearest with ties to away.
2286 void frinta(const VRegister& vd, const VRegister& vn);
2287
2288 // FP round to integer, implicit rounding.
2289 void frinti(const VRegister& vd, const VRegister& vn);
2290
2291 // FP round to integer, toward minus infinity.
2292 void frintm(const VRegister& vd, const VRegister& vn);
2293
2294 // FP round to integer, nearest with ties to even.
2295 void frintn(const VRegister& vd, const VRegister& vn);
2296
2297 // FP round to integer, toward plus infinity.
2298 void frintp(const VRegister& vd, const VRegister& vn);
2299
2300 // FP round to integer, exact, implicit rounding.
2301 void frintx(const VRegister& vd, const VRegister& vn);
2302
2303 // FP round to integer, towards zero.
2304 void frintz(const VRegister& vd, const VRegister& vn);
2305
2306 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2307 void frint32x(const VRegister& vd, const VRegister& vn);
2308
2309 // FP round to 32-bit integer, towards zero [Armv8.5].
2310 void frint32z(const VRegister& vd, const VRegister& vn);
2311
2312 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2313 void frint64x(const VRegister& vd, const VRegister& vn);
2314
2315 // FP round to 64-bit integer, towards zero [Armv8.5].
2316 void frint64z(const VRegister& vd, const VRegister& vn);
2317
2318 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2319
2320 void FPCompareMacro(const VRegister& vn,
2321 const VRegister& vm,
2322 FPTrapFlags trap);
2323
2324 // FP compare registers.
2325 void fcmp(const VRegister& vn, const VRegister& vm);
2326
2327 // FP compare immediate.
2328 void fcmp(const VRegister& vn, double value);
2329
2330 void FPCCompareMacro(const VRegister& vn,
2331 const VRegister& vm,
2332 StatusFlags nzcv,
2333 Condition cond,
2334 FPTrapFlags trap);
2335
2336 // FP conditional compare.
2337 void fccmp(const VRegister& vn,
2338 const VRegister& vm,
2339 StatusFlags nzcv,
2340 Condition cond);
2341
2342 // FP signaling compare registers.
2343 void fcmpe(const VRegister& vn, const VRegister& vm);
2344
2345 // FP signaling compare immediate.
2346 void fcmpe(const VRegister& vn, double value);
2347
2348 // FP conditional signaling compare.
2349 void fccmpe(const VRegister& vn,
2350 const VRegister& vm,
2351 StatusFlags nzcv,
2352 Condition cond);
2353
2354 // FP conditional select.
2355 void fcsel(const VRegister& vd,
2356 const VRegister& vn,
2357 const VRegister& vm,
2358 Condition cond);
2359
2360 // Common FP Convert functions.
2361 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2362 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2363 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2364
2365 // FP convert between precisions.
2366 void fcvt(const VRegister& vd, const VRegister& vn);
2367
2368 // FP convert to higher precision.
2369 void fcvtl(const VRegister& vd, const VRegister& vn);
2370
2371 // FP convert to higher precision (second part).
2372 void fcvtl2(const VRegister& vd, const VRegister& vn);
2373
2374 // FP convert to lower precision.
2375 void fcvtn(const VRegister& vd, const VRegister& vn);
2376
2377 // FP convert to lower prevision (second part).
2378 void fcvtn2(const VRegister& vd, const VRegister& vn);
2379
2380 // FP convert to lower precision, rounding to odd.
2381 void fcvtxn(const VRegister& vd, const VRegister& vn);
2382
2383 // FP convert to lower precision, rounding to odd (second part).
2384 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2385
2386 // FP convert to signed integer, nearest with ties to away.
2387 void fcvtas(const Register& rd, const VRegister& vn);
2388
2389 // FP convert to unsigned integer, nearest with ties to away.
2390 void fcvtau(const Register& rd, const VRegister& vn);
2391
2392 // FP convert to signed integer, nearest with ties to away.
2393 void fcvtas(const VRegister& vd, const VRegister& vn);
2394
2395 // FP convert to unsigned integer, nearest with ties to away.
2396 void fcvtau(const VRegister& vd, const VRegister& vn);
2397
2398 // FP convert to signed integer, round towards -infinity.
2399 void fcvtms(const Register& rd, const VRegister& vn);
2400
2401 // FP convert to unsigned integer, round towards -infinity.
2402 void fcvtmu(const Register& rd, const VRegister& vn);
2403
2404 // FP convert to signed integer, round towards -infinity.
2405 void fcvtms(const VRegister& vd, const VRegister& vn);
2406
2407 // FP convert to unsigned integer, round towards -infinity.
2408 void fcvtmu(const VRegister& vd, const VRegister& vn);
2409
2410 // FP convert to signed integer, nearest with ties to even.
2411 void fcvtns(const Register& rd, const VRegister& vn);
2412
2413 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2414 void fjcvtzs(const Register& rd, const VRegister& vn);
2415
2416 // FP convert to unsigned integer, nearest with ties to even.
2417 void fcvtnu(const Register& rd, const VRegister& vn);
2418
2419 // FP convert to signed integer, nearest with ties to even.
2420 void fcvtns(const VRegister& rd, const VRegister& vn);
2421
2422 // FP convert to unsigned integer, nearest with ties to even.
2423 void fcvtnu(const VRegister& rd, const VRegister& vn);
2424
2425 // FP convert to signed integer or fixed-point, round towards zero.
2426 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2427
2428 // FP convert to unsigned integer or fixed-point, round towards zero.
2429 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2430
2431 // FP convert to signed integer or fixed-point, round towards zero.
2432 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2433
2434 // FP convert to unsigned integer or fixed-point, round towards zero.
2435 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2436
2437 // FP convert to signed integer, round towards +infinity.
2438 void fcvtps(const Register& rd, const VRegister& vn);
2439
2440 // FP convert to unsigned integer, round towards +infinity.
2441 void fcvtpu(const Register& rd, const VRegister& vn);
2442
2443 // FP convert to signed integer, round towards +infinity.
2444 void fcvtps(const VRegister& vd, const VRegister& vn);
2445
2446 // FP convert to unsigned integer, round towards +infinity.
2447 void fcvtpu(const VRegister& vd, const VRegister& vn);
2448
2449 // Convert signed integer or fixed point to FP.
2450 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2451
2452 // Convert unsigned integer or fixed point to FP.
2453 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2454
2455 // Convert signed integer or fixed-point to FP.
2456 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2457
2458 // Convert unsigned integer or fixed-point to FP.
2459 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2460
2461 // Unsigned absolute difference.
2462 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2463
2464 // Signed absolute difference.
2465 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2466
2467 // Unsigned absolute difference and accumulate.
2468 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2469
2470 // Signed absolute difference and accumulate.
2471 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2472
2473 // Add.
2474 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2475
2476 // Subtract.
2477 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2478
2479 // Unsigned halving add.
2480 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2481
2482 // Signed halving add.
2483 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2484
2485 // Unsigned rounding halving add.
2486 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2487
2488 // Signed rounding halving add.
2489 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2490
2491 // Unsigned halving sub.
2492 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2493
2494 // Signed halving sub.
2495 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2496
2497 // Unsigned saturating add.
2498 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2499
2500 // Signed saturating add.
2501 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2502
2503 // Unsigned saturating subtract.
2504 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2505
2506 // Signed saturating subtract.
2507 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2508
2509 // Add pairwise.
2510 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2511
2512 // Add pair of elements scalar.
2513 void addp(const VRegister& vd, const VRegister& vn);
2514
2515 // Multiply-add to accumulator.
2516 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2517
2518 // Multiply-subtract to accumulator.
2519 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2520
2521 // Multiply.
2522 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2523
2524 // Multiply by scalar element.
2525 void mul(const VRegister& vd,
2526 const VRegister& vn,
2527 const VRegister& vm,
2528 int vm_index);
2529
2530 // Multiply-add by scalar element.
2531 void mla(const VRegister& vd,
2532 const VRegister& vn,
2533 const VRegister& vm,
2534 int vm_index);
2535
2536 // Multiply-subtract by scalar element.
2537 void mls(const VRegister& vd,
2538 const VRegister& vn,
2539 const VRegister& vm,
2540 int vm_index);
2541
2542 // Signed long multiply-add by scalar element.
2543 void smlal(const VRegister& vd,
2544 const VRegister& vn,
2545 const VRegister& vm,
2546 int vm_index);
2547
2548 // Signed long multiply-add by scalar element (second part).
2549 void smlal2(const VRegister& vd,
2550 const VRegister& vn,
2551 const VRegister& vm,
2552 int vm_index);
2553
2554 // Unsigned long multiply-add by scalar element.
2555 void umlal(const VRegister& vd,
2556 const VRegister& vn,
2557 const VRegister& vm,
2558 int vm_index);
2559
2560 // Unsigned long multiply-add by scalar element (second part).
2561 void umlal2(const VRegister& vd,
2562 const VRegister& vn,
2563 const VRegister& vm,
2564 int vm_index);
2565
2566 // Signed long multiply-sub by scalar element.
2567 void smlsl(const VRegister& vd,
2568 const VRegister& vn,
2569 const VRegister& vm,
2570 int vm_index);
2571
2572 // Signed long multiply-sub by scalar element (second part).
2573 void smlsl2(const VRegister& vd,
2574 const VRegister& vn,
2575 const VRegister& vm,
2576 int vm_index);
2577
2578 // Unsigned long multiply-sub by scalar element.
2579 void umlsl(const VRegister& vd,
2580 const VRegister& vn,
2581 const VRegister& vm,
2582 int vm_index);
2583
2584 // Unsigned long multiply-sub by scalar element (second part).
2585 void umlsl2(const VRegister& vd,
2586 const VRegister& vn,
2587 const VRegister& vm,
2588 int vm_index);
2589
2590 // Signed long multiply by scalar element.
2591 void smull(const VRegister& vd,
2592 const VRegister& vn,
2593 const VRegister& vm,
2594 int vm_index);
2595
2596 // Signed long multiply by scalar element (second part).
2597 void smull2(const VRegister& vd,
2598 const VRegister& vn,
2599 const VRegister& vm,
2600 int vm_index);
2601
2602 // Unsigned long multiply by scalar element.
2603 void umull(const VRegister& vd,
2604 const VRegister& vn,
2605 const VRegister& vm,
2606 int vm_index);
2607
2608 // Unsigned long multiply by scalar element (second part).
2609 void umull2(const VRegister& vd,
2610 const VRegister& vn,
2611 const VRegister& vm,
2612 int vm_index);
2613
2614 // Signed saturating double long multiply by element.
2615 void sqdmull(const VRegister& vd,
2616 const VRegister& vn,
2617 const VRegister& vm,
2618 int vm_index);
2619
2620 // Signed saturating double long multiply by element (second part).
2621 void sqdmull2(const VRegister& vd,
2622 const VRegister& vn,
2623 const VRegister& vm,
2624 int vm_index);
2625
2626 // Signed saturating doubling long multiply-add by element.
2627 void sqdmlal(const VRegister& vd,
2628 const VRegister& vn,
2629 const VRegister& vm,
2630 int vm_index);
2631
2632 // Signed saturating doubling long multiply-add by element (second part).
2633 void sqdmlal2(const VRegister& vd,
2634 const VRegister& vn,
2635 const VRegister& vm,
2636 int vm_index);
2637
2638 // Signed saturating doubling long multiply-sub by element.
2639 void sqdmlsl(const VRegister& vd,
2640 const VRegister& vn,
2641 const VRegister& vm,
2642 int vm_index);
2643
2644 // Signed saturating doubling long multiply-sub by element (second part).
2645 void sqdmlsl2(const VRegister& vd,
2646 const VRegister& vn,
2647 const VRegister& vm,
2648 int vm_index);
2649
2650 // Compare equal.
2651 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2652
2653 // Compare signed greater than or equal.
2654 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2655
2656 // Compare signed greater than.
2657 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2658
2659 // Compare unsigned higher.
2660 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2661
2662 // Compare unsigned higher or same.
2663 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2664
2665 // Compare bitwise test bits nonzero.
2666 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2667
2668 // Compare bitwise to zero.
2669 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2670
2671 // Compare signed greater than or equal to zero.
2672 void cmge(const VRegister& vd, const VRegister& vn, int value);
2673
2674 // Compare signed greater than zero.
2675 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2676
2677 // Compare signed less than or equal to zero.
2678 void cmle(const VRegister& vd, const VRegister& vn, int value);
2679
2680 // Compare signed less than zero.
2681 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2682
2683 // Signed shift left by register.
2684 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2685
2686 // Unsigned shift left by register.
2687 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2688
2689 // Signed saturating shift left by register.
2690 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2691
2692 // Unsigned saturating shift left by register.
2693 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2694
2695 // Signed rounding shift left by register.
2696 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2697
2698 // Unsigned rounding shift left by register.
2699 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2700
2701 // Signed saturating rounding shift left by register.
2702 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2703
2704 // Unsigned saturating rounding shift left by register.
2705 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2706
2707 // Bitwise and.
2708 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2709
2710 // Bitwise or.
2711 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2712
2713 // Bitwise or immediate.
2714 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2715
2716 // Move register to register.
2717 void mov(const VRegister& vd, const VRegister& vn);
2718
2719 // Bitwise orn.
2720 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2721
2722 // Bitwise eor.
2723 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2724
2725 // Bit clear immediate.
2726 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2727
2728 // Bit clear.
2729 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2730
2731 // Bitwise insert if false.
2732 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2733
2734 // Bitwise insert if true.
2735 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2736
2737 // Bitwise select.
2738 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2739
2740 // Polynomial multiply.
2741 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2742
2743 // Vector move immediate.
2744 void movi(const VRegister& vd,
2745 const uint64_t imm,
2746 Shift shift = LSL,
2747 const int shift_amount = 0);
2748
2749 // Bitwise not.
2750 void mvn(const VRegister& vd, const VRegister& vn);
2751
2752 // Vector move inverted immediate.
2753 void mvni(const VRegister& vd,
2754 const int imm8,
2755 Shift shift = LSL,
2756 const int shift_amount = 0);
2757
2758 // Signed saturating accumulate of unsigned value.
2759 void suqadd(const VRegister& vd, const VRegister& vn);
2760
2761 // Unsigned saturating accumulate of signed value.
2762 void usqadd(const VRegister& vd, const VRegister& vn);
2763
2764 // Absolute value.
2765 void abs(const VRegister& vd, const VRegister& vn);
2766
2767 // Signed saturating absolute value.
2768 void sqabs(const VRegister& vd, const VRegister& vn);
2769
2770 // Negate.
2771 void neg(const VRegister& vd, const VRegister& vn);
2772
2773 // Signed saturating negate.
2774 void sqneg(const VRegister& vd, const VRegister& vn);
2775
2776 // Bitwise not.
2777 void not_(const VRegister& vd, const VRegister& vn);
2778
2779 // Extract narrow.
2780 void xtn(const VRegister& vd, const VRegister& vn);
2781
2782 // Extract narrow (second part).
2783 void xtn2(const VRegister& vd, const VRegister& vn);
2784
2785 // Signed saturating extract narrow.
2786 void sqxtn(const VRegister& vd, const VRegister& vn);
2787
2788 // Signed saturating extract narrow (second part).
2789 void sqxtn2(const VRegister& vd, const VRegister& vn);
2790
2791 // Unsigned saturating extract narrow.
2792 void uqxtn(const VRegister& vd, const VRegister& vn);
2793
2794 // Unsigned saturating extract narrow (second part).
2795 void uqxtn2(const VRegister& vd, const VRegister& vn);
2796
2797 // Signed saturating extract unsigned narrow.
2798 void sqxtun(const VRegister& vd, const VRegister& vn);
2799
2800 // Signed saturating extract unsigned narrow (second part).
2801 void sqxtun2(const VRegister& vd, const VRegister& vn);
2802
2803 // Extract vector from pair of vectors.
2804 void ext(const VRegister& vd,
2805 const VRegister& vn,
2806 const VRegister& vm,
2807 int index);
2808
2809 // Duplicate vector element to vector or scalar.
2810 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2811
2812 // Move vector element to scalar.
2813 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2814
2815 // Duplicate general-purpose register to vector.
2816 void dup(const VRegister& vd, const Register& rn);
2817
2818 // Insert vector element from another vector element.
2819 void ins(const VRegister& vd,
2820 int vd_index,
2821 const VRegister& vn,
2822 int vn_index);
2823
2824 // Move vector element to another vector element.
2825 void mov(const VRegister& vd,
2826 int vd_index,
2827 const VRegister& vn,
2828 int vn_index);
2829
2830 // Insert vector element from general-purpose register.
2831 void ins(const VRegister& vd, int vd_index, const Register& rn);
2832
2833 // Move general-purpose register to a vector element.
2834 void mov(const VRegister& vd, int vd_index, const Register& rn);
2835
2836 // Unsigned move vector element to general-purpose register.
2837 void umov(const Register& rd, const VRegister& vn, int vn_index);
2838
2839 // Move vector element to general-purpose register.
2840 void mov(const Register& rd, const VRegister& vn, int vn_index);
2841
2842 // Signed move vector element to general-purpose register.
2843 void smov(const Register& rd, const VRegister& vn, int vn_index);
2844
2845 // One-element structure load to one register.
2846 void ld1(const VRegister& vt, const MemOperand& src);
2847
2848 // One-element structure load to two registers.
2849 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2850
2851 // One-element structure load to three registers.
2852 void ld1(const VRegister& vt,
2853 const VRegister& vt2,
2854 const VRegister& vt3,
2855 const MemOperand& src);
2856
2857 // One-element structure load to four registers.
2858 void ld1(const VRegister& vt,
2859 const VRegister& vt2,
2860 const VRegister& vt3,
2861 const VRegister& vt4,
2862 const MemOperand& src);
2863
2864 // One-element single structure load to one lane.
2865 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2866
2867 // One-element single structure load to all lanes.
2868 void ld1r(const VRegister& vt, const MemOperand& src);
2869
2870 // Two-element structure load.
2871 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2872
2873 // Two-element single structure load to one lane.
2874 void ld2(const VRegister& vt,
2875 const VRegister& vt2,
2876 int lane,
2877 const MemOperand& src);
2878
2879 // Two-element single structure load to all lanes.
2880 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2881
2882 // Three-element structure load.
2883 void ld3(const VRegister& vt,
2884 const VRegister& vt2,
2885 const VRegister& vt3,
2886 const MemOperand& src);
2887
2888 // Three-element single structure load to one lane.
2889 void ld3(const VRegister& vt,
2890 const VRegister& vt2,
2891 const VRegister& vt3,
2892 int lane,
2893 const MemOperand& src);
2894
2895 // Three-element single structure load to all lanes.
2896 void ld3r(const VRegister& vt,
2897 const VRegister& vt2,
2898 const VRegister& vt3,
2899 const MemOperand& src);
2900
2901 // Four-element structure load.
2902 void ld4(const VRegister& vt,
2903 const VRegister& vt2,
2904 const VRegister& vt3,
2905 const VRegister& vt4,
2906 const MemOperand& src);
2907
2908 // Four-element single structure load to one lane.
2909 void ld4(const VRegister& vt,
2910 const VRegister& vt2,
2911 const VRegister& vt3,
2912 const VRegister& vt4,
2913 int lane,
2914 const MemOperand& src);
2915
2916 // Four-element single structure load to all lanes.
2917 void ld4r(const VRegister& vt,
2918 const VRegister& vt2,
2919 const VRegister& vt3,
2920 const VRegister& vt4,
2921 const MemOperand& src);
2922
2923 // Count leading sign bits.
2924 void cls(const VRegister& vd, const VRegister& vn);
2925
2926 // Count leading zero bits (vector).
2927 void clz(const VRegister& vd, const VRegister& vn);
2928
2929 // Population count per byte.
2930 void cnt(const VRegister& vd, const VRegister& vn);
2931
2932 // Reverse bit order.
2933 void rbit(const VRegister& vd, const VRegister& vn);
2934
2935 // Reverse elements in 16-bit halfwords.
2936 void rev16(const VRegister& vd, const VRegister& vn);
2937
2938 // Reverse elements in 32-bit words.
2939 void rev32(const VRegister& vd, const VRegister& vn);
2940
2941 // Reverse elements in 64-bit doublewords.
2942 void rev64(const VRegister& vd, const VRegister& vn);
2943
2944 // Unsigned reciprocal square root estimate.
2945 void ursqrte(const VRegister& vd, const VRegister& vn);
2946
2947 // Unsigned reciprocal estimate.
2948 void urecpe(const VRegister& vd, const VRegister& vn);
2949
2950 // Signed pairwise long add.
2951 void saddlp(const VRegister& vd, const VRegister& vn);
2952
2953 // Unsigned pairwise long add.
2954 void uaddlp(const VRegister& vd, const VRegister& vn);
2955
2956 // Signed pairwise long add and accumulate.
2957 void sadalp(const VRegister& vd, const VRegister& vn);
2958
2959 // Unsigned pairwise long add and accumulate.
2960 void uadalp(const VRegister& vd, const VRegister& vn);
2961
2962 // Shift left by immediate.
2963 void shl(const VRegister& vd, const VRegister& vn, int shift);
2964
2965 // Signed saturating shift left by immediate.
2966 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2967
2968 // Signed saturating shift left unsigned by immediate.
2969 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2970
2971 // Unsigned saturating shift left by immediate.
2972 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2973
2974 // Signed shift left long by immediate.
2975 void sshll(const VRegister& vd, const VRegister& vn, int shift);
2976
2977 // Signed shift left long by immediate (second part).
2978 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2979
2980 // Signed extend long.
2981 void sxtl(const VRegister& vd, const VRegister& vn);
2982
2983 // Signed extend long (second part).
2984 void sxtl2(const VRegister& vd, const VRegister& vn);
2985
2986 // Unsigned shift left long by immediate.
2987 void ushll(const VRegister& vd, const VRegister& vn, int shift);
2988
2989 // Unsigned shift left long by immediate (second part).
2990 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2991
2992 // Shift left long by element size.
2993 void shll(const VRegister& vd, const VRegister& vn, int shift);
2994
2995 // Shift left long by element size (second part).
2996 void shll2(const VRegister& vd, const VRegister& vn, int shift);
2997
2998 // Unsigned extend long.
2999 void uxtl(const VRegister& vd, const VRegister& vn);
3000
3001 // Unsigned extend long (second part).
3002 void uxtl2(const VRegister& vd, const VRegister& vn);
3003
3004 // Shift left by immediate and insert.
3005 void sli(const VRegister& vd, const VRegister& vn, int shift);
3006
3007 // Shift right by immediate and insert.
3008 void sri(const VRegister& vd, const VRegister& vn, int shift);
3009
3010 // Signed maximum.
3011 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3012
3013 // Signed pairwise maximum.
3014 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3015
3016 // Add across vector.
3017 void addv(const VRegister& vd, const VRegister& vn);
3018
3019 // Signed add long across vector.
3020 void saddlv(const VRegister& vd, const VRegister& vn);
3021
3022 // Unsigned add long across vector.
3023 void uaddlv(const VRegister& vd, const VRegister& vn);
3024
3025 // FP maximum number across vector.
3026 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3027
3028 // FP maximum across vector.
3029 void fmaxv(const VRegister& vd, const VRegister& vn);
3030
3031 // FP minimum number across vector.
3032 void fminnmv(const VRegister& vd, const VRegister& vn);
3033
3034 // FP minimum across vector.
3035 void fminv(const VRegister& vd, const VRegister& vn);
3036
3037 // Signed maximum across vector.
3038 void smaxv(const VRegister& vd, const VRegister& vn);
3039
3040 // Signed minimum.
3041 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3042
3043 // Signed minimum pairwise.
3044 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3045
3046 // Signed minimum across vector.
3047 void sminv(const VRegister& vd, const VRegister& vn);
3048
3049 // One-element structure store from one register.
3050 void st1(const VRegister& vt, const MemOperand& src);
3051
3052 // One-element structure store from two registers.
3053 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3054
3055 // One-element structure store from three registers.
3056 void st1(const VRegister& vt,
3057 const VRegister& vt2,
3058 const VRegister& vt3,
3059 const MemOperand& src);
3060
3061 // One-element structure store from four registers.
3062 void st1(const VRegister& vt,
3063 const VRegister& vt2,
3064 const VRegister& vt3,
3065 const VRegister& vt4,
3066 const MemOperand& src);
3067
3068 // One-element single structure store from one lane.
3069 void st1(const VRegister& vt, int lane, const MemOperand& src);
3070
3071 // Two-element structure store from two registers.
3072 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3073
3074 // Two-element single structure store from two lanes.
3075 void st2(const VRegister& vt,
3076 const VRegister& vt2,
3077 int lane,
3078 const MemOperand& src);
3079
3080 // Three-element structure store from three registers.
3081 void st3(const VRegister& vt,
3082 const VRegister& vt2,
3083 const VRegister& vt3,
3084 const MemOperand& src);
3085
3086 // Three-element single structure store from three lanes.
3087 void st3(const VRegister& vt,
3088 const VRegister& vt2,
3089 const VRegister& vt3,
3090 int lane,
3091 const MemOperand& src);
3092
3093 // Four-element structure store from four registers.
3094 void st4(const VRegister& vt,
3095 const VRegister& vt2,
3096 const VRegister& vt3,
3097 const VRegister& vt4,
3098 const MemOperand& src);
3099
3100 // Four-element single structure store from four lanes.
3101 void st4(const VRegister& vt,
3102 const VRegister& vt2,
3103 const VRegister& vt3,
3104 const VRegister& vt4,
3105 int lane,
3106 const MemOperand& src);
3107
3108 // Unsigned add long.
3109 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3110
3111 // Unsigned add long (second part).
3112 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3113
3114 // Unsigned add wide.
3115 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3116
3117 // Unsigned add wide (second part).
3118 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3119
3120 // Signed add long.
3121 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3122
3123 // Signed add long (second part).
3124 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3125
3126 // Signed add wide.
3127 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3128
3129 // Signed add wide (second part).
3130 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3131
3132 // Unsigned subtract long.
3133 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3134
3135 // Unsigned subtract long (second part).
3136 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3137
3138 // Unsigned subtract wide.
3139 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3140
3141 // Unsigned subtract wide (second part).
3142 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3143
3144 // Signed subtract long.
3145 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3146
3147 // Signed subtract long (second part).
3148 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3149
3150 // Signed integer subtract wide.
3151 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3152
3153 // Signed integer subtract wide (second part).
3154 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3155
3156 // Unsigned maximum.
3157 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3158
3159 // Unsigned pairwise maximum.
3160 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3161
3162 // Unsigned maximum across vector.
3163 void umaxv(const VRegister& vd, const VRegister& vn);
3164
3165 // Unsigned minimum.
3166 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3167
3168 // Unsigned pairwise minimum.
3169 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3170
3171 // Unsigned minimum across vector.
3172 void uminv(const VRegister& vd, const VRegister& vn);
3173
3174 // Transpose vectors (primary).
3175 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3176
3177 // Transpose vectors (secondary).
3178 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3179
3180 // Unzip vectors (primary).
3181 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3182
3183 // Unzip vectors (secondary).
3184 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3185
3186 // Zip vectors (primary).
3187 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3188
3189 // Zip vectors (secondary).
3190 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3191
3192 // Signed shift right by immediate.
3193 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3194
3195 // Unsigned shift right by immediate.
3196 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3197
3198 // Signed rounding shift right by immediate.
3199 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3200
3201 // Unsigned rounding shift right by immediate.
3202 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3203
3204 // Signed shift right by immediate and accumulate.
3205 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3206
3207 // Unsigned shift right by immediate and accumulate.
3208 void usra(const VRegister& vd, const VRegister& vn, int shift);
3209
3210 // Signed rounding shift right by immediate and accumulate.
3211 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3212
3213 // Unsigned rounding shift right by immediate and accumulate.
3214 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3215
3216 // Shift right narrow by immediate.
3217 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3218
3219 // Shift right narrow by immediate (second part).
3220 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3221
3222 // Rounding shift right narrow by immediate.
3223 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3224
3225 // Rounding shift right narrow by immediate (second part).
3226 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3227
3228 // Unsigned saturating shift right narrow by immediate.
3229 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3230
3231 // Unsigned saturating shift right narrow by immediate (second part).
3232 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3233
3234 // Unsigned saturating rounding shift right narrow by immediate.
3235 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3236
3237 // Unsigned saturating rounding shift right narrow by immediate (second part).
3238 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3239
3240 // Signed saturating shift right narrow by immediate.
3241 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3242
3243 // Signed saturating shift right narrow by immediate (second part).
3244 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3245
3246 // Signed saturating rounded shift right narrow by immediate.
3247 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3248
3249 // Signed saturating rounded shift right narrow by immediate (second part).
3250 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3251
3252 // Signed saturating shift right unsigned narrow by immediate.
3253 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3254
3255 // Signed saturating shift right unsigned narrow by immediate (second part).
3256 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3257
3258 // Signed sat rounded shift right unsigned narrow by immediate.
3259 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3260
3261 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3262 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3263
3264 // FP reciprocal step.
3265 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3266
3267 // FP reciprocal estimate.
3268 void frecpe(const VRegister& vd, const VRegister& vn);
3269
3270 // FP reciprocal square root estimate.
3271 void frsqrte(const VRegister& vd, const VRegister& vn);
3272
3273 // FP reciprocal square root step.
3274 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3275
3276 // Signed absolute difference and accumulate long.
3277 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3278
3279 // Signed absolute difference and accumulate long (second part).
3280 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3281
3282 // Unsigned absolute difference and accumulate long.
3283 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3284
3285 // Unsigned absolute difference and accumulate long (second part).
3286 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3287
3288 // Signed absolute difference long.
3289 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3290
3291 // Signed absolute difference long (second part).
3292 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3293
3294 // Unsigned absolute difference long.
3295 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3296
3297 // Unsigned absolute difference long (second part).
3298 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3299
3300 // Polynomial multiply long.
3301 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3302
3303 // Polynomial multiply long (second part).
3304 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3305
3306 // Signed long multiply-add.
3307 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3308
3309 // Signed long multiply-add (second part).
3310 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3311
3312 // Unsigned long multiply-add.
3313 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3314
3315 // Unsigned long multiply-add (second part).
3316 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3317
3318 // Signed long multiply-sub.
3319 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3320
3321 // Signed long multiply-sub (second part).
3322 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3323
3324 // Unsigned long multiply-sub.
3325 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3326
3327 // Unsigned long multiply-sub (second part).
3328 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3329
3330 // Signed long multiply.
3331 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3332
3333 // Signed long multiply (second part).
3334 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3335
3336 // Signed saturating doubling long multiply-add.
3337 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3338
3339 // Signed saturating doubling long multiply-add (second part).
3340 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3341
3342 // Signed saturating doubling long multiply-subtract.
3343 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3344
3345 // Signed saturating doubling long multiply-subtract (second part).
3346 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3347
3348 // Signed saturating doubling long multiply.
3349 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3350
3351 // Signed saturating doubling long multiply (second part).
3352 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3353
3354 // Signed saturating doubling multiply returning high half.
3355 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3356
3357 // Signed saturating rounding doubling multiply returning high half.
3358 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3359
3360 // Signed dot product [Armv8.2].
3361 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3362
3363 // Signed saturating rounding doubling multiply accumulate returning high
3364 // half [Armv8.1].
3365 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366
3367 // Unsigned dot product [Armv8.2].
3368 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3369
3370 // Dot Product with unsigned and signed integers (vector).
3371 void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3372
3373 // Dot product with signed and unsigned integers (vector, by element).
3374 void sudot(const VRegister& vd,
3375 const VRegister& vn,
3376 const VRegister& vm,
3377 int vm_index);
3378
3379 // Dot product with unsigned and signed integers (vector, by element).
3380 void usdot(const VRegister& vd,
3381 const VRegister& vn,
3382 const VRegister& vm,
3383 int vm_index);
3384
3385 // Signed saturating rounding doubling multiply subtract returning high half
3386 // [Armv8.1].
3387 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3388
3389 // Signed saturating doubling multiply element returning high half.
3390 void sqdmulh(const VRegister& vd,
3391 const VRegister& vn,
3392 const VRegister& vm,
3393 int vm_index);
3394
3395 // Signed saturating rounding doubling multiply element returning high half.
3396 void sqrdmulh(const VRegister& vd,
3397 const VRegister& vn,
3398 const VRegister& vm,
3399 int vm_index);
3400
3401 // Signed dot product by element [Armv8.2].
3402 void sdot(const VRegister& vd,
3403 const VRegister& vn,
3404 const VRegister& vm,
3405 int vm_index);
3406
3407 // Signed saturating rounding doubling multiply accumulate element returning
3408 // high half [Armv8.1].
3409 void sqrdmlah(const VRegister& vd,
3410 const VRegister& vn,
3411 const VRegister& vm,
3412 int vm_index);
3413
3414 // Unsigned dot product by element [Armv8.2].
3415 void udot(const VRegister& vd,
3416 const VRegister& vn,
3417 const VRegister& vm,
3418 int vm_index);
3419
3420 // Signed saturating rounding doubling multiply subtract element returning
3421 // high half [Armv8.1].
3422 void sqrdmlsh(const VRegister& vd,
3423 const VRegister& vn,
3424 const VRegister& vm,
3425 int vm_index);
3426
3427 // Unsigned long multiply long.
3428 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3429
3430 // Unsigned long multiply (second part).
3431 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3432
3433 // Add narrow returning high half.
3434 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3435
3436 // Add narrow returning high half (second part).
3437 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3438
3439 // Rounding add narrow returning high half.
3440 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3441
3442 // Rounding add narrow returning high half (second part).
3443 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3444
3445 // Subtract narrow returning high half.
3446 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3447
3448 // Subtract narrow returning high half (second part).
3449 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3450
3451 // Rounding subtract narrow returning high half.
3452 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3453
3454 // Rounding subtract narrow returning high half (second part).
3455 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3456
3457 // FP vector multiply accumulate.
3458 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3459
3460 // FP fused multiply-add long to accumulator.
3461 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3462
3463 // FP fused multiply-add long to accumulator (second part).
3464 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3465
3466 // FP fused multiply-add long to accumulator by element.
3467 void fmlal(const VRegister& vd,
3468 const VRegister& vn,
3469 const VRegister& vm,
3470 int vm_index);
3471
3472 // FP fused multiply-add long to accumulator by element (second part).
3473 void fmlal2(const VRegister& vd,
3474 const VRegister& vn,
3475 const VRegister& vm,
3476 int vm_index);
3477
3478 // FP vector multiply subtract.
3479 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3480
3481 // FP fused multiply-subtract long to accumulator.
3482 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3483
3484 // FP fused multiply-subtract long to accumulator (second part).
3485 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3486
3487 // FP fused multiply-subtract long to accumulator by element.
3488 void fmlsl(const VRegister& vd,
3489 const VRegister& vn,
3490 const VRegister& vm,
3491 int vm_index);
3492
3493 // FP fused multiply-subtract long to accumulator by element (second part).
3494 void fmlsl2(const VRegister& vd,
3495 const VRegister& vn,
3496 const VRegister& vm,
3497 int vm_index);
3498
3499 // FP vector multiply extended.
3500 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3501
3502 // FP absolute greater than or equal.
3503 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3504
3505 // FP absolute greater than.
3506 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3507
3508 // FP multiply by element.
3509 void fmul(const VRegister& vd,
3510 const VRegister& vn,
3511 const VRegister& vm,
3512 int vm_index);
3513
3514 // FP fused multiply-add to accumulator by element.
3515 void fmla(const VRegister& vd,
3516 const VRegister& vn,
3517 const VRegister& vm,
3518 int vm_index);
3519
3520 // FP fused multiply-sub from accumulator by element.
3521 void fmls(const VRegister& vd,
3522 const VRegister& vn,
3523 const VRegister& vm,
3524 int vm_index);
3525
3526 // FP multiply extended by element.
3527 void fmulx(const VRegister& vd,
3528 const VRegister& vn,
3529 const VRegister& vm,
3530 int vm_index);
3531
3532 // FP compare equal.
3533 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3534
3535 // FP greater than.
3536 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3537
3538 // FP greater than or equal.
3539 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3540
3541 // FP compare equal to zero.
3542 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3543
3544 // FP greater than zero.
3545 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3546
3547 // FP greater than or equal to zero.
3548 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3549
3550 // FP less than or equal to zero.
3551 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3552
3553 // FP less than to zero.
3554 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3555
3556 // FP absolute difference.
3557 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3558
3559 // FP pairwise add vector.
3560 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3561
3562 // FP pairwise add scalar.
3563 void faddp(const VRegister& vd, const VRegister& vn);
3564
3565 // FP pairwise maximum vector.
3566 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3567
3568 // FP pairwise maximum scalar.
3569 void fmaxp(const VRegister& vd, const VRegister& vn);
3570
3571 // FP pairwise minimum vector.
3572 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3573
3574 // FP pairwise minimum scalar.
3575 void fminp(const VRegister& vd, const VRegister& vn);
3576
3577 // FP pairwise maximum number vector.
3578 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3579
3580 // FP pairwise maximum number scalar.
3581 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3582
3583 // FP pairwise minimum number vector.
3584 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3585
3586 // FP pairwise minimum number scalar.
3587 void fminnmp(const VRegister& vd, const VRegister& vn);
3588
3589 // v8.3 complex numbers - note that these are only partial/helper functions
3590 // and must be used in series in order to perform full CN operations.
3591
3592 // FP complex multiply accumulate (by element) [Armv8.3].
3593 void fcmla(const VRegister& vd,
3594 const VRegister& vn,
3595 const VRegister& vm,
3596 int vm_index,
3597 int rot);
3598
3599 // FP complex multiply accumulate [Armv8.3].
3600 void fcmla(const VRegister& vd,
3601 const VRegister& vn,
3602 const VRegister& vm,
3603 int rot);
3604
3605 // FP complex add [Armv8.3].
3606 void fcadd(const VRegister& vd,
3607 const VRegister& vn,
3608 const VRegister& vm,
3609 int rot);
3610
3611 // Signed 8-bit integer matrix multiply-accumulate (vector).
3612 void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3613
3614 // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3615 void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3616
3617 // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3618 void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3619
3620 // Scalable Vector Extensions.
3621
3622 // Absolute value (predicated).
3623 void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3624
3625 // Add vectors (predicated).
3626 void add(const ZRegister& zd,
3627 const PRegisterM& pg,
3628 const ZRegister& zn,
3629 const ZRegister& zm);
3630
3631 // Add vectors (unpredicated).
3632 void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3633
3634 // Add immediate (unpredicated).
3635 void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3636
3637 // Add multiple of predicate register size to scalar register.
3638 void addpl(const Register& xd, const Register& xn, int imm6);
3639
3640 // Add multiple of vector register size to scalar register.
3641 void addvl(const Register& xd, const Register& xn, int imm6);
3642
3643 // Compute vector address.
3644 void adr(const ZRegister& zd, const SVEMemOperand& addr);
3645
3646 // Bitwise AND predicates.
3647 void and_(const PRegisterWithLaneSize& pd,
3648 const PRegisterZ& pg,
3649 const PRegisterWithLaneSize& pn,
3650 const PRegisterWithLaneSize& pm);
3651
3652 // Bitwise AND vectors (predicated).
3653 void and_(const ZRegister& zd,
3654 const PRegisterM& pg,
3655 const ZRegister& zn,
3656 const ZRegister& zm);
3657
3658 // Bitwise AND with immediate (unpredicated).
3659 void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3660
3661 // Bitwise AND vectors (unpredicated).
3662 void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3663
3664 // Bitwise AND predicates.
3665 void ands(const PRegisterWithLaneSize& pd,
3666 const PRegisterZ& pg,
3667 const PRegisterWithLaneSize& pn,
3668 const PRegisterWithLaneSize& pm);
3669
3670 // Bitwise AND reduction to scalar.
3671 void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3672
3673 // Arithmetic shift right by immediate (predicated).
3674 void asr(const ZRegister& zd,
3675 const PRegisterM& pg,
3676 const ZRegister& zn,
3677 int shift);
3678
3679 // Arithmetic shift right by 64-bit wide elements (predicated).
3680 void asr(const ZRegister& zd,
3681 const PRegisterM& pg,
3682 const ZRegister& zn,
3683 const ZRegister& zm);
3684
3685 // Arithmetic shift right by immediate (unpredicated).
3686 void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3687
3688 // Arithmetic shift right by 64-bit wide elements (unpredicated).
3689 void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3690
3691 // Arithmetic shift right for divide by immediate (predicated).
3692 void asrd(const ZRegister& zd,
3693 const PRegisterM& pg,
3694 const ZRegister& zn,
3695 int shift);
3696
3697 // Reversed arithmetic shift right by vector (predicated).
3698 void asrr(const ZRegister& zd,
3699 const PRegisterM& pg,
3700 const ZRegister& zn,
3701 const ZRegister& zm);
3702
3703 // Bitwise clear predicates.
3704 void bic(const PRegisterWithLaneSize& pd,
3705 const PRegisterZ& pg,
3706 const PRegisterWithLaneSize& pn,
3707 const PRegisterWithLaneSize& pm);
3708
3709 // Bitwise clear vectors (predicated).
3710 void bic(const ZRegister& zd,
3711 const PRegisterM& pg,
3712 const ZRegister& zn,
3713 const ZRegister& zm);
3714
3715 // Bitwise clear bits using immediate (unpredicated).
3716 void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3717
3718 // Bitwise clear vectors (unpredicated).
3719 void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3720
3721 // Bitwise clear predicates.
3722 void bics(const PRegisterWithLaneSize& pd,
3723 const PRegisterZ& pg,
3724 const PRegisterWithLaneSize& pn,
3725 const PRegisterWithLaneSize& pm);
3726
3727 // Break after first true condition.
3728 void brka(const PRegisterWithLaneSize& pd,
3729 const PRegister& pg,
3730 const PRegisterWithLaneSize& pn);
3731
3732 // Break after first true condition.
3733 void brkas(const PRegisterWithLaneSize& pd,
3734 const PRegisterZ& pg,
3735 const PRegisterWithLaneSize& pn);
3736
3737 // Break before first true condition.
3738 void brkb(const PRegisterWithLaneSize& pd,
3739 const PRegister& pg,
3740 const PRegisterWithLaneSize& pn);
3741
3742 // Break before first true condition.
3743 void brkbs(const PRegisterWithLaneSize& pd,
3744 const PRegisterZ& pg,
3745 const PRegisterWithLaneSize& pn);
3746
3747 // Propagate break to next partition.
3748 void brkn(const PRegisterWithLaneSize& pd,
3749 const PRegisterZ& pg,
3750 const PRegisterWithLaneSize& pn,
3751 const PRegisterWithLaneSize& pm);
3752
3753 // Propagate break to next partition.
3754 void brkns(const PRegisterWithLaneSize& pd,
3755 const PRegisterZ& pg,
3756 const PRegisterWithLaneSize& pn,
3757 const PRegisterWithLaneSize& pm);
3758
3759 // Break after first true condition, propagating from previous partition.
3760 void brkpa(const PRegisterWithLaneSize& pd,
3761 const PRegisterZ& pg,
3762 const PRegisterWithLaneSize& pn,
3763 const PRegisterWithLaneSize& pm);
3764
3765 // Break after first true condition, propagating from previous partition.
3766 void brkpas(const PRegisterWithLaneSize& pd,
3767 const PRegisterZ& pg,
3768 const PRegisterWithLaneSize& pn,
3769 const PRegisterWithLaneSize& pm);
3770
3771 // Break before first true condition, propagating from previous partition.
3772 void brkpb(const PRegisterWithLaneSize& pd,
3773 const PRegisterZ& pg,
3774 const PRegisterWithLaneSize& pn,
3775 const PRegisterWithLaneSize& pm);
3776
3777 // Break before first true condition, propagating from previous partition.
3778 void brkpbs(const PRegisterWithLaneSize& pd,
3779 const PRegisterZ& pg,
3780 const PRegisterWithLaneSize& pn,
3781 const PRegisterWithLaneSize& pm);
3782
3783 // Conditionally extract element after last to general-purpose register.
3784 void clasta(const Register& rd,
3785 const PRegister& pg,
3786 const Register& rn,
3787 const ZRegister& zm);
3788
3789 // Conditionally extract element after last to SIMD&FP scalar register.
3790 void clasta(const VRegister& vd,
3791 const PRegister& pg,
3792 const VRegister& vn,
3793 const ZRegister& zm);
3794
3795 // Conditionally extract element after last to vector register.
3796 void clasta(const ZRegister& zd,
3797 const PRegister& pg,
3798 const ZRegister& zn,
3799 const ZRegister& zm);
3800
3801 // Conditionally extract last element to general-purpose register.
3802 void clastb(const Register& rd,
3803 const PRegister& pg,
3804 const Register& rn,
3805 const ZRegister& zm);
3806
3807 // Conditionally extract last element to SIMD&FP scalar register.
3808 void clastb(const VRegister& vd,
3809 const PRegister& pg,
3810 const VRegister& vn,
3811 const ZRegister& zm);
3812
3813 // Conditionally extract last element to vector register.
3814 void clastb(const ZRegister& zd,
3815 const PRegister& pg,
3816 const ZRegister& zn,
3817 const ZRegister& zm);
3818
3819 // Count leading sign bits (predicated).
3820 void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3821
3822 // Count leading zero bits (predicated).
3823 void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3824
3825 void cmp(Condition cond,
3826 const PRegisterWithLaneSize& pd,
3827 const PRegisterZ& pg,
3828 const ZRegister& zn,
3829 const ZRegister& zm);
3830
3831 // Compare vector to 64-bit wide elements.
3832 void cmpeq(const PRegisterWithLaneSize& pd,
3833 const PRegisterZ& pg,
3834 const ZRegister& zn,
3835 const ZRegister& zm);
3836
3837 // Compare vector to immediate.
3838 void cmpeq(const PRegisterWithLaneSize& pd,
3839 const PRegisterZ& pg,
3840 const ZRegister& zn,
3841 int imm5);
3842
3843 // Compare vector to 64-bit wide elements.
3844 void cmpge(const PRegisterWithLaneSize& pd,
3845 const PRegisterZ& pg,
3846 const ZRegister& zn,
3847 const ZRegister& zm);
3848
3849 // Compare vector to immediate.
3850 void cmpge(const PRegisterWithLaneSize& pd,
3851 const PRegisterZ& pg,
3852 const ZRegister& zn,
3853 int imm5);
3854
3855 // Compare vector to 64-bit wide elements.
3856 void cmpgt(const PRegisterWithLaneSize& pd,
3857 const PRegisterZ& pg,
3858 const ZRegister& zn,
3859 const ZRegister& zm);
3860
3861 // Compare vector to immediate.
3862 void cmpgt(const PRegisterWithLaneSize& pd,
3863 const PRegisterZ& pg,
3864 const ZRegister& zn,
3865 int imm5);
3866
3867 // Compare vector to 64-bit wide elements.
3868 void cmphi(const PRegisterWithLaneSize& pd,
3869 const PRegisterZ& pg,
3870 const ZRegister& zn,
3871 const ZRegister& zm);
3872
3873 // Compare vector to immediate.
3874 void cmphi(const PRegisterWithLaneSize& pd,
3875 const PRegisterZ& pg,
3876 const ZRegister& zn,
3877 unsigned imm7);
3878
3879 // Compare vector to 64-bit wide elements.
3880 void cmphs(const PRegisterWithLaneSize& pd,
3881 const PRegisterZ& pg,
3882 const ZRegister& zn,
3883 const ZRegister& zm);
3884
3885 // Compare vector to immediate.
3886 void cmphs(const PRegisterWithLaneSize& pd,
3887 const PRegisterZ& pg,
3888 const ZRegister& zn,
3889 unsigned imm7);
3890
3891 // Compare vector to 64-bit wide elements.
3892 void cmple(const PRegisterWithLaneSize& pd,
3893 const PRegisterZ& pg,
3894 const ZRegister& zn,
3895 const ZRegister& zm);
3896
3897 // Compare vector to immediate.
3898 void cmple(const PRegisterWithLaneSize& pd,
3899 const PRegisterZ& pg,
3900 const ZRegister& zn,
3901 int imm5);
3902
3903 // Compare vector to 64-bit wide elements.
3904 void cmplo(const PRegisterWithLaneSize& pd,
3905 const PRegisterZ& pg,
3906 const ZRegister& zn,
3907 const ZRegister& zm);
3908
3909 // Compare vector to immediate.
3910 void cmplo(const PRegisterWithLaneSize& pd,
3911 const PRegisterZ& pg,
3912 const ZRegister& zn,
3913 unsigned imm7);
3914
3915 // Compare vector to 64-bit wide elements.
3916 void cmpls(const PRegisterWithLaneSize& pd,
3917 const PRegisterZ& pg,
3918 const ZRegister& zn,
3919 const ZRegister& zm);
3920
3921 // Compare vector to immediate.
3922 void cmpls(const PRegisterWithLaneSize& pd,
3923 const PRegisterZ& pg,
3924 const ZRegister& zn,
3925 unsigned imm7);
3926
3927 // Compare vector to 64-bit wide elements.
3928 void cmplt(const PRegisterWithLaneSize& pd,
3929 const PRegisterZ& pg,
3930 const ZRegister& zn,
3931 const ZRegister& zm);
3932
3933 // Compare vector to immediate.
3934 void cmplt(const PRegisterWithLaneSize& pd,
3935 const PRegisterZ& pg,
3936 const ZRegister& zn,
3937 int imm5);
3938
3939 // Compare vector to 64-bit wide elements.
3940 void cmpne(const PRegisterWithLaneSize& pd,
3941 const PRegisterZ& pg,
3942 const ZRegister& zn,
3943 const ZRegister& zm);
3944
3945 // Compare vector to immediate.
3946 void cmpne(const PRegisterWithLaneSize& pd,
3947 const PRegisterZ& pg,
3948 const ZRegister& zn,
3949 int imm5);
3950
3951 // Logically invert boolean condition in vector (predicated).
3952 void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3953
3954 // Count non-zero bits (predicated).
3955 void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3956
3957 // Set scalar to multiple of predicate constraint element count.
3958 void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3959
3960 // Set scalar to multiple of predicate constraint element count.
3961 void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3962
3963 // Set scalar to multiple of predicate constraint element count.
3964 void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3965
3966 // Set scalar to active predicate element count.
3967 void cntp(const Register& xd,
3968 const PRegister& pg,
3969 const PRegisterWithLaneSize& pn);
3970
3971 // Set scalar to multiple of predicate constraint element count.
3972 void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3973
3974 // Shuffle active elements of vector to the right and fill with zero.
3975 void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
3976
3977 // Copy signed integer immediate to vector elements (predicated).
3978 void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
3979
3980 // Copy general-purpose register to vector elements (predicated).
3981 void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
3982
3983 // Copy SIMD&FP scalar register to vector elements (predicated).
3984 void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
3985
3986 // Compare and terminate loop.
3987 void ctermeq(const Register& rn, const Register& rm);
3988
3989 // Compare and terminate loop.
3990 void ctermne(const Register& rn, const Register& rm);
3991
3992 // Decrement scalar by multiple of predicate constraint element count.
3993 void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3994
3995 // Decrement scalar by multiple of predicate constraint element count.
3996 void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3997
3998 // Decrement vector by multiple of predicate constraint element count.
3999 void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4000
4001 // Decrement scalar by multiple of predicate constraint element count.
4002 void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4003
4004 // Decrement vector by multiple of predicate constraint element count.
4005 void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4006
4007 // Decrement scalar by active predicate element count.
4008 void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4009
4010 // Decrement vector by active predicate element count.
4011 void decp(const ZRegister& zdn, const PRegister& pg);
4012
4013 // Decrement scalar by multiple of predicate constraint element count.
4014 void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4015
4016 // Decrement vector by multiple of predicate constraint element count.
4017 void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4018
4019 // Broadcast general-purpose register to vector elements (unpredicated).
4020 void dup(const ZRegister& zd, const Register& xn);
4021
4022 // Broadcast indexed element to vector (unpredicated).
4023 void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4024
4025 // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4026 // assembler will pick an appropriate immediate and left shift that is
4027 // equivalent to the immediate argument. If an explicit left shift is
4028 // specified (0 or 8), the immediate must be a signed 8-bit integer.
4029
4030 // Broadcast signed immediate to vector elements (unpredicated).
4031 void dup(const ZRegister& zd, int imm8, int shift = -1);
4032
4033 // Broadcast logical bitmask immediate to vector (unpredicated).
4034 void dupm(const ZRegister& zd, uint64_t imm);
4035
4036 // Bitwise exclusive OR with inverted immediate (unpredicated).
4037 void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4038
4039 // Bitwise exclusive OR predicates.
4040 void eor(const PRegisterWithLaneSize& pd,
4041 const PRegisterZ& pg,
4042 const PRegisterWithLaneSize& pn,
4043 const PRegisterWithLaneSize& pm);
4044
4045 // Bitwise exclusive OR vectors (predicated).
4046 void eor(const ZRegister& zd,
4047 const PRegisterM& pg,
4048 const ZRegister& zn,
4049 const ZRegister& zm);
4050
4051 // Bitwise exclusive OR with immediate (unpredicated).
4052 void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4053
4054 // Bitwise exclusive OR vectors (unpredicated).
4055 void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4056
4057 // Bitwise exclusive OR predicates.
4058 void eors(const PRegisterWithLaneSize& pd,
4059 const PRegisterZ& pg,
4060 const PRegisterWithLaneSize& pn,
4061 const PRegisterWithLaneSize& pm);
4062
4063 // Bitwise XOR reduction to scalar.
4064 void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4065
4066 // Extract vector from pair of vectors.
4067 void ext(const ZRegister& zd,
4068 const ZRegister& zn,
4069 const ZRegister& zm,
4070 unsigned offset);
4071
4072 // Floating-point absolute difference (predicated).
4073 void fabd(const ZRegister& zd,
4074 const PRegisterM& pg,
4075 const ZRegister& zn,
4076 const ZRegister& zm);
4077
4078 // Floating-point absolute value (predicated).
4079 void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4080
4081 // Floating-point absolute compare vectors.
4082 void facge(const PRegisterWithLaneSize& pd,
4083 const PRegisterZ& pg,
4084 const ZRegister& zn,
4085 const ZRegister& zm);
4086
4087 // Floating-point absolute compare vectors.
4088 void facgt(const PRegisterWithLaneSize& pd,
4089 const PRegisterZ& pg,
4090 const ZRegister& zn,
4091 const ZRegister& zm);
4092
4093 // Floating-point add immediate (predicated).
4094 void fadd(const ZRegister& zd,
4095 const PRegisterM& pg,
4096 const ZRegister& zn,
4097 double imm);
4098
4099 // Floating-point add vector (predicated).
4100 void fadd(const ZRegister& zd,
4101 const PRegisterM& pg,
4102 const ZRegister& zn,
4103 const ZRegister& zm);
4104
4105 // Floating-point add vector (unpredicated).
4106 void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4107
4108 // Floating-point add strictly-ordered reduction, accumulating in scalar.
4109 void fadda(const VRegister& vd,
4110 const PRegister& pg,
4111 const VRegister& vn,
4112 const ZRegister& zm);
4113
4114 // Floating-point add recursive reduction to scalar.
4115 void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4116
4117 // Floating-point complex add with rotate (predicated).
4118 void fcadd(const ZRegister& zd,
4119 const PRegisterM& pg,
4120 const ZRegister& zn,
4121 const ZRegister& zm,
4122 int rot);
4123
4124 // Floating-point compare vector with zero.
4125 void fcmeq(const PRegisterWithLaneSize& pd,
4126 const PRegisterZ& pg,
4127 const ZRegister& zn,
4128 double zero);
4129
4130 // Floating-point compare vectors.
4131 void fcmeq(const PRegisterWithLaneSize& pd,
4132 const PRegisterZ& pg,
4133 const ZRegister& zn,
4134 const ZRegister& zm);
4135
4136 // Floating-point compare vector with zero.
4137 void fcmge(const PRegisterWithLaneSize& pd,
4138 const PRegisterZ& pg,
4139 const ZRegister& zn,
4140 double zero);
4141
4142 // Floating-point compare vectors.
4143 void fcmge(const PRegisterWithLaneSize& pd,
4144 const PRegisterZ& pg,
4145 const ZRegister& zn,
4146 const ZRegister& zm);
4147
4148 // Floating-point compare vector with zero.
4149 void fcmgt(const PRegisterWithLaneSize& pd,
4150 const PRegisterZ& pg,
4151 const ZRegister& zn,
4152 double zero);
4153
4154 // Floating-point compare vectors.
4155 void fcmgt(const PRegisterWithLaneSize& pd,
4156 const PRegisterZ& pg,
4157 const ZRegister& zn,
4158 const ZRegister& zm);
4159
4160 // Floating-point complex multiply-add with rotate (predicated).
4161 void fcmla(const ZRegister& zda,
4162 const PRegisterM& pg,
4163 const ZRegister& zn,
4164 const ZRegister& zm,
4165 int rot);
4166
4167 // Floating-point complex multiply-add by indexed values with rotate.
4168 void fcmla(const ZRegister& zda,
4169 const ZRegister& zn,
4170 const ZRegister& zm,
4171 int index,
4172 int rot);
4173
4174 // Floating-point compare vector with zero.
4175 void fcmle(const PRegisterWithLaneSize& pd,
4176 const PRegisterZ& pg,
4177 const ZRegister& zn,
4178 double zero);
4179
4180 // Floating-point compare vector with zero.
4181 void fcmlt(const PRegisterWithLaneSize& pd,
4182 const PRegisterZ& pg,
4183 const ZRegister& zn,
4184 double zero);
4185
4186 // Floating-point compare vector with zero.
4187 void fcmne(const PRegisterWithLaneSize& pd,
4188 const PRegisterZ& pg,
4189 const ZRegister& zn,
4190 double zero);
4191
4192 // Floating-point compare vectors.
4193 void fcmne(const PRegisterWithLaneSize& pd,
4194 const PRegisterZ& pg,
4195 const ZRegister& zn,
4196 const ZRegister& zm);
4197
4198 // Floating-point compare vectors.
4199 void fcmuo(const PRegisterWithLaneSize& pd,
4200 const PRegisterZ& pg,
4201 const ZRegister& zn,
4202 const ZRegister& zm);
4203
4204 // Copy floating-point immediate to vector elements (predicated).
4205 void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4206
4207 // Copy half-precision floating-point immediate to vector elements
4208 // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4209 void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4210 fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4211 }
4212
4213 // Floating-point convert precision (predicated).
4214 void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4215
4216 // Floating-point convert to signed integer, rounding toward zero
4217 // (predicated).
4218 void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4219
4220 // Floating-point convert to unsigned integer, rounding toward zero
4221 // (predicated).
4222 void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4223
4224 // Floating-point divide by vector (predicated).
4225 void fdiv(const ZRegister& zd,
4226 const PRegisterM& pg,
4227 const ZRegister& zn,
4228 const ZRegister& zm);
4229
4230 // Floating-point reversed divide by vector (predicated).
4231 void fdivr(const ZRegister& zd,
4232 const PRegisterM& pg,
4233 const ZRegister& zn,
4234 const ZRegister& zm);
4235
4236 // Broadcast floating-point immediate to vector elements.
4237 void fdup(const ZRegister& zd, double imm);
4238
4239 // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4240 void fdup(const ZRegister& zd, Float16 imm) {
4241 fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4242 }
4243
4244 // Floating-point exponential accelerator.
4245 void fexpa(const ZRegister& zd, const ZRegister& zn);
4246
4247 // Floating-point fused multiply-add vectors (predicated), writing
4248 // multiplicand [Zdn = Za + Zdn * Zm].
4249 void fmad(const ZRegister& zdn,
4250 const PRegisterM& pg,
4251 const ZRegister& zm,
4252 const ZRegister& za);
4253
4254 // Floating-point maximum with immediate (predicated).
4255 void fmax(const ZRegister& zd,
4256 const PRegisterM& pg,
4257 const ZRegister& zn,
4258 double imm);
4259
4260 // Floating-point maximum (predicated).
4261 void fmax(const ZRegister& zd,
4262 const PRegisterM& pg,
4263 const ZRegister& zn,
4264 const ZRegister& zm);
4265
4266 // Floating-point maximum number with immediate (predicated).
4267 void fmaxnm(const ZRegister& zd,
4268 const PRegisterM& pg,
4269 const ZRegister& zn,
4270 double imm);
4271
4272 // Floating-point maximum number (predicated).
4273 void fmaxnm(const ZRegister& zd,
4274 const PRegisterM& pg,
4275 const ZRegister& zn,
4276 const ZRegister& zm);
4277
4278 // Floating-point maximum number recursive reduction to scalar.
4279 void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4280
4281 // Floating-point maximum recursive reduction to scalar.
4282 void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4283
4284 // Floating-point minimum with immediate (predicated).
4285 void fmin(const ZRegister& zd,
4286 const PRegisterM& pg,
4287 const ZRegister& zn,
4288 double imm);
4289
4290 // Floating-point minimum (predicated).
4291 void fmin(const ZRegister& zd,
4292 const PRegisterM& pg,
4293 const ZRegister& zn,
4294 const ZRegister& zm);
4295
4296 // Floating-point minimum number with immediate (predicated).
4297 void fminnm(const ZRegister& zd,
4298 const PRegisterM& pg,
4299 const ZRegister& zn,
4300 double imm);
4301
4302 // Floating-point minimum number (predicated).
4303 void fminnm(const ZRegister& zd,
4304 const PRegisterM& pg,
4305 const ZRegister& zn,
4306 const ZRegister& zm);
4307
4308 // Floating-point minimum number recursive reduction to scalar.
4309 void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4310
4311 // Floating-point minimum recursive reduction to scalar.
4312 void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4313
4314 // Floating-point fused multiply-add vectors (predicated), writing addend
4315 // [Zda = Zda + Zn * Zm].
4316 void fmla(const ZRegister& zda,
4317 const PRegisterM& pg,
4318 const ZRegister& zn,
4319 const ZRegister& zm);
4320
4321 // Floating-point fused multiply-add by indexed elements
4322 // (Zda = Zda + Zn * Zm[indexed]).
4323 void fmla(const ZRegister& zda,
4324 const ZRegister& zn,
4325 const ZRegister& zm,
4326 int index);
4327
4328 // Floating-point fused multiply-subtract vectors (predicated), writing
4329 // addend [Zda = Zda + -Zn * Zm].
4330 void fmls(const ZRegister& zda,
4331 const PRegisterM& pg,
4332 const ZRegister& zn,
4333 const ZRegister& zm);
4334
4335 // Floating-point fused multiply-subtract by indexed elements
4336 // (Zda = Zda + -Zn * Zm[indexed]).
4337 void fmls(const ZRegister& zda,
4338 const ZRegister& zn,
4339 const ZRegister& zm,
4340 int index);
4341
4342 // Move 8-bit floating-point immediate to vector elements (unpredicated).
4343 void fmov(const ZRegister& zd, double imm);
4344
4345 // Move 8-bit floating-point immediate to vector elements (predicated).
4346 void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4347
4348 // Floating-point fused multiply-subtract vectors (predicated), writing
4349 // multiplicand [Zdn = Za + -Zdn * Zm].
4350 void fmsb(const ZRegister& zdn,
4351 const PRegisterM& pg,
4352 const ZRegister& zm,
4353 const ZRegister& za);
4354
4355 // Floating-point multiply by immediate (predicated).
4356 void fmul(const ZRegister& zd,
4357 const PRegisterM& pg,
4358 const ZRegister& zn,
4359 double imm);
4360
4361 // Floating-point multiply vectors (predicated).
4362 void fmul(const ZRegister& zd,
4363 const PRegisterM& pg,
4364 const ZRegister& zn,
4365 const ZRegister& zm);
4366
4367 // Floating-point multiply by indexed elements.
4368 void fmul(const ZRegister& zd,
4369 const ZRegister& zn,
4370 const ZRegister& zm,
4371 unsigned index);
4372
4373 // Floating-point multiply vectors (unpredicated).
4374 void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4375
4376 // Floating-point multiply-extended vectors (predicated).
4377 void fmulx(const ZRegister& zd,
4378 const PRegisterM& pg,
4379 const ZRegister& zn,
4380 const ZRegister& zm);
4381
4382 // Floating-point negate (predicated).
4383 void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4384
4385 // Floating-point negated fused multiply-add vectors (predicated), writing
4386 // multiplicand [Zdn = -Za + -Zdn * Zm].
4387 void fnmad(const ZRegister& zdn,
4388 const PRegisterM& pg,
4389 const ZRegister& zm,
4390 const ZRegister& za);
4391
4392 // Floating-point negated fused multiply-add vectors (predicated), writing
4393 // addend [Zda = -Zda + -Zn * Zm].
4394 void fnmla(const ZRegister& zda,
4395 const PRegisterM& pg,
4396 const ZRegister& zn,
4397 const ZRegister& zm);
4398
4399 // Floating-point negated fused multiply-subtract vectors (predicated),
4400 // writing addend [Zda = -Zda + Zn * Zm].
4401 void fnmls(const ZRegister& zda,
4402 const PRegisterM& pg,
4403 const ZRegister& zn,
4404 const ZRegister& zm);
4405
4406 // Floating-point negated fused multiply-subtract vectors (predicated),
4407 // writing multiplicand [Zdn = -Za + Zdn * Zm].
4408 void fnmsb(const ZRegister& zdn,
4409 const PRegisterM& pg,
4410 const ZRegister& zm,
4411 const ZRegister& za);
4412
4413 // Floating-point reciprocal estimate (unpredicated).
4414 void frecpe(const ZRegister& zd, const ZRegister& zn);
4415
4416 // Floating-point reciprocal step (unpredicated).
4417 void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4418
4419 // Floating-point reciprocal exponent (predicated).
4420 void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4421
4422 // Floating-point round to integral value (predicated).
4423 void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4424
4425 // Floating-point round to integral value (predicated).
4426 void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4427
4428 // Floating-point round to integral value (predicated).
4429 void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4430
4431 // Floating-point round to integral value (predicated).
4432 void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4433
4434 // Floating-point round to integral value (predicated).
4435 void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4436
4437 // Floating-point round to integral value (predicated).
4438 void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4439
4440 // Floating-point round to integral value (predicated).
4441 void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4442
4443 // Floating-point reciprocal square root estimate (unpredicated).
4444 void frsqrte(const ZRegister& zd, const ZRegister& zn);
4445
4446 // Floating-point reciprocal square root step (unpredicated).
4447 void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4448
4449 // Floating-point adjust exponent by vector (predicated).
4450 void fscale(const ZRegister& zd,
4451 const PRegisterM& pg,
4452 const ZRegister& zn,
4453 const ZRegister& zm);
4454
4455 // Floating-point square root (predicated).
4456 void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4457
4458 // Floating-point subtract immediate (predicated).
4459 void fsub(const ZRegister& zd,
4460 const PRegisterM& pg,
4461 const ZRegister& zn,
4462 double imm);
4463
4464 // Floating-point subtract vectors (predicated).
4465 void fsub(const ZRegister& zd,
4466 const PRegisterM& pg,
4467 const ZRegister& zn,
4468 const ZRegister& zm);
4469
4470 // Floating-point subtract vectors (unpredicated).
4471 void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4472
4473 // Floating-point reversed subtract from immediate (predicated).
4474 void fsubr(const ZRegister& zd,
4475 const PRegisterM& pg,
4476 const ZRegister& zn,
4477 double imm);
4478
4479 // Floating-point reversed subtract vectors (predicated).
4480 void fsubr(const ZRegister& zd,
4481 const PRegisterM& pg,
4482 const ZRegister& zn,
4483 const ZRegister& zm);
4484
4485 // Floating-point trigonometric multiply-add coefficient.
4486 void ftmad(const ZRegister& zd,
4487 const ZRegister& zn,
4488 const ZRegister& zm,
4489 int imm3);
4490
4491 // Floating-point trigonometric starting value.
4492 void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4493
4494 // Floating-point trigonometric select coefficient.
4495 void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4496
4497 // Increment scalar by multiple of predicate constraint element count.
4498 void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4499
4500 // Increment scalar by multiple of predicate constraint element count.
4501 void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4502
4503 // Increment vector by multiple of predicate constraint element count.
4504 void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4505
4506 // Increment scalar by multiple of predicate constraint element count.
4507 void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4508
4509 // Increment vector by multiple of predicate constraint element count.
4510 void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4511
4512 // Increment scalar by active predicate element count.
4513 void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4514
4515 // Increment vector by active predicate element count.
4516 void incp(const ZRegister& zdn, const PRegister& pg);
4517
4518 // Increment scalar by multiple of predicate constraint element count.
4519 void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4520
4521 // Increment vector by multiple of predicate constraint element count.
4522 void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4523
4524 // Create index starting from and incremented by immediate.
4525 void index(const ZRegister& zd, int start, int step);
4526
4527 // Create index starting from and incremented by general-purpose register.
4528 void index(const ZRegister& zd, const Register& rn, const Register& rm);
4529
4530 // Create index starting from general-purpose register and incremented by
4531 // immediate.
4532 void index(const ZRegister& zd, const Register& rn, int imm5);
4533
4534 // Create index starting from immediate and incremented by general-purpose
4535 // register.
4536 void index(const ZRegister& zd, int imm5, const Register& rm);
4537
4538 // Insert general-purpose register in shifted vector.
4539 void insr(const ZRegister& zdn, const Register& rm);
4540
4541 // Insert SIMD&FP scalar register in shifted vector.
4542 void insr(const ZRegister& zdn, const VRegister& vm);
4543
4544 // Extract element after last to general-purpose register.
4545 void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4546
4547 // Extract element after last to SIMD&FP scalar register.
4548 void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4549
4550 // Extract last element to general-purpose register.
4551 void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4552
4553 // Extract last element to SIMD&FP scalar register.
4554 void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4555
4556 // Contiguous/gather load bytes to vector.
4557 void ld1b(const ZRegister& zt,
4558 const PRegisterZ& pg,
4559 const SVEMemOperand& addr);
4560
4561 // Contiguous/gather load halfwords to vector.
4562 void ld1h(const ZRegister& zt,
4563 const PRegisterZ& pg,
4564 const SVEMemOperand& addr);
4565
4566 // Contiguous/gather load words to vector.
4567 void ld1w(const ZRegister& zt,
4568 const PRegisterZ& pg,
4569 const SVEMemOperand& addr);
4570
4571 // Contiguous/gather load doublewords to vector.
4572 void ld1d(const ZRegister& zt,
4573 const PRegisterZ& pg,
4574 const SVEMemOperand& addr);
4575
4576 // TODO: Merge other loads into the SVEMemOperand versions.
4577
4578 // Load and broadcast unsigned byte to vector.
4579 void ld1rb(const ZRegister& zt,
4580 const PRegisterZ& pg,
4581 const SVEMemOperand& addr);
4582
4583 // Load and broadcast unsigned halfword to vector.
4584 void ld1rh(const ZRegister& zt,
4585 const PRegisterZ& pg,
4586 const SVEMemOperand& addr);
4587
4588 // Load and broadcast unsigned word to vector.
4589 void ld1rw(const ZRegister& zt,
4590 const PRegisterZ& pg,
4591 const SVEMemOperand& addr);
4592
4593 // Load and broadcast doubleword to vector.
4594 void ld1rd(const ZRegister& zt,
4595 const PRegisterZ& pg,
4596 const SVEMemOperand& addr);
4597
4598 // Contiguous load and replicate sixteen bytes.
4599 void ld1rqb(const ZRegister& zt,
4600 const PRegisterZ& pg,
4601 const SVEMemOperand& addr);
4602
4603 // Contiguous load and replicate eight halfwords.
4604 void ld1rqh(const ZRegister& zt,
4605 const PRegisterZ& pg,
4606 const SVEMemOperand& addr);
4607
4608 // Contiguous load and replicate four words.
4609 void ld1rqw(const ZRegister& zt,
4610 const PRegisterZ& pg,
4611 const SVEMemOperand& addr);
4612
4613 // Contiguous load and replicate two doublewords.
4614 void ld1rqd(const ZRegister& zt,
4615 const PRegisterZ& pg,
4616 const SVEMemOperand& addr);
4617
4618 // Contiguous load and replicate thirty-two bytes.
4619 void ld1rob(const ZRegister& zt,
4620 const PRegisterZ& pg,
4621 const SVEMemOperand& addr);
4622
4623 // Contiguous load and replicate sixteen halfwords.
4624 void ld1roh(const ZRegister& zt,
4625 const PRegisterZ& pg,
4626 const SVEMemOperand& addr);
4627
4628 // Contiguous load and replicate eight words.
4629 void ld1row(const ZRegister& zt,
4630 const PRegisterZ& pg,
4631 const SVEMemOperand& addr);
4632
4633 // Contiguous load and replicate four doublewords.
4634 void ld1rod(const ZRegister& zt,
4635 const PRegisterZ& pg,
4636 const SVEMemOperand& addr);
4637
4638 // Load and broadcast signed byte to vector.
4639 void ld1rsb(const ZRegister& zt,
4640 const PRegisterZ& pg,
4641 const SVEMemOperand& addr);
4642
4643 // Load and broadcast signed halfword to vector.
4644 void ld1rsh(const ZRegister& zt,
4645 const PRegisterZ& pg,
4646 const SVEMemOperand& addr);
4647
4648 // Load and broadcast signed word to vector.
4649 void ld1rsw(const ZRegister& zt,
4650 const PRegisterZ& pg,
4651 const SVEMemOperand& addr);
4652
4653 // Contiguous/gather load signed bytes to vector.
4654 void ld1sb(const ZRegister& zt,
4655 const PRegisterZ& pg,
4656 const SVEMemOperand& addr);
4657
4658 // Contiguous/gather load signed halfwords to vector.
4659 void ld1sh(const ZRegister& zt,
4660 const PRegisterZ& pg,
4661 const SVEMemOperand& addr);
4662
4663 // Contiguous/gather load signed words to vector.
4664 void ld1sw(const ZRegister& zt,
4665 const PRegisterZ& pg,
4666 const SVEMemOperand& addr);
4667
4668 // TODO: Merge other loads into the SVEMemOperand versions.
4669
4670 // Contiguous load two-byte structures to two vectors.
4671 void ld2b(const ZRegister& zt1,
4672 const ZRegister& zt2,
4673 const PRegisterZ& pg,
4674 const SVEMemOperand& addr);
4675
4676 // Contiguous load two-halfword structures to two vectors.
4677 void ld2h(const ZRegister& zt1,
4678 const ZRegister& zt2,
4679 const PRegisterZ& pg,
4680 const SVEMemOperand& addr);
4681
4682 // Contiguous load two-word structures to two vectors.
4683 void ld2w(const ZRegister& zt1,
4684 const ZRegister& zt2,
4685 const PRegisterZ& pg,
4686 const SVEMemOperand& addr);
4687
4688 // Contiguous load two-doubleword structures to two vectors.
4689 void ld2d(const ZRegister& zt1,
4690 const ZRegister& zt2,
4691 const PRegisterZ& pg,
4692 const SVEMemOperand& addr);
4693
4694 // Contiguous load three-byte structures to three vectors.
4695 void ld3b(const ZRegister& zt1,
4696 const ZRegister& zt2,
4697 const ZRegister& zt3,
4698 const PRegisterZ& pg,
4699 const SVEMemOperand& addr);
4700
4701 // Contiguous load three-halfword structures to three vectors.
4702 void ld3h(const ZRegister& zt1,
4703 const ZRegister& zt2,
4704 const ZRegister& zt3,
4705 const PRegisterZ& pg,
4706 const SVEMemOperand& addr);
4707
4708 // Contiguous load three-word structures to three vectors.
4709 void ld3w(const ZRegister& zt1,
4710 const ZRegister& zt2,
4711 const ZRegister& zt3,
4712 const PRegisterZ& pg,
4713 const SVEMemOperand& addr);
4714
4715 // Contiguous load three-doubleword structures to three vectors.
4716 void ld3d(const ZRegister& zt1,
4717 const ZRegister& zt2,
4718 const ZRegister& zt3,
4719 const PRegisterZ& pg,
4720 const SVEMemOperand& addr);
4721
4722 // Contiguous load four-byte structures to four vectors.
4723 void ld4b(const ZRegister& zt1,
4724 const ZRegister& zt2,
4725 const ZRegister& zt3,
4726 const ZRegister& zt4,
4727 const PRegisterZ& pg,
4728 const SVEMemOperand& addr);
4729
4730 // Contiguous load four-halfword structures to four vectors.
4731 void ld4h(const ZRegister& zt1,
4732 const ZRegister& zt2,
4733 const ZRegister& zt3,
4734 const ZRegister& zt4,
4735 const PRegisterZ& pg,
4736 const SVEMemOperand& addr);
4737
4738 // Contiguous load four-word structures to four vectors.
4739 void ld4w(const ZRegister& zt1,
4740 const ZRegister& zt2,
4741 const ZRegister& zt3,
4742 const ZRegister& zt4,
4743 const PRegisterZ& pg,
4744 const SVEMemOperand& addr);
4745
4746 // Contiguous load four-doubleword structures to four vectors.
4747 void ld4d(const ZRegister& zt1,
4748 const ZRegister& zt2,
4749 const ZRegister& zt3,
4750 const ZRegister& zt4,
4751 const PRegisterZ& pg,
4752 const SVEMemOperand& addr);
4753
4754 // Contiguous load first-fault unsigned bytes to vector.
4755 void ldff1b(const ZRegister& zt,
4756 const PRegisterZ& pg,
4757 const SVEMemOperand& addr);
4758
4759 // Contiguous load first-fault unsigned halfwords to vector.
4760 void ldff1h(const ZRegister& zt,
4761 const PRegisterZ& pg,
4762 const SVEMemOperand& addr);
4763
4764 // Contiguous load first-fault unsigned words to vector.
4765 void ldff1w(const ZRegister& zt,
4766 const PRegisterZ& pg,
4767 const SVEMemOperand& addr);
4768
4769 // Contiguous load first-fault doublewords to vector.
4770 void ldff1d(const ZRegister& zt,
4771 const PRegisterZ& pg,
4772 const SVEMemOperand& addr);
4773
4774 // Contiguous load first-fault signed bytes to vector.
4775 void ldff1sb(const ZRegister& zt,
4776 const PRegisterZ& pg,
4777 const SVEMemOperand& addr);
4778
4779 // Contiguous load first-fault signed halfwords to vector.
4780 void ldff1sh(const ZRegister& zt,
4781 const PRegisterZ& pg,
4782 const SVEMemOperand& addr);
4783
4784 // Contiguous load first-fault signed words to vector.
4785 void ldff1sw(const ZRegister& zt,
4786 const PRegisterZ& pg,
4787 const SVEMemOperand& addr);
4788
4789 // Gather load first-fault unsigned bytes to vector.
4790 void ldff1b(const ZRegister& zt,
4791 const PRegisterZ& pg,
4792 const Register& xn,
4793 const ZRegister& zm);
4794
4795 // Gather load first-fault unsigned bytes to vector (immediate index).
4796 void ldff1b(const ZRegister& zt,
4797 const PRegisterZ& pg,
4798 const ZRegister& zn,
4799 int imm5);
4800
4801 // Gather load first-fault doublewords to vector (vector index).
4802 void ldff1d(const ZRegister& zt,
4803 const PRegisterZ& pg,
4804 const Register& xn,
4805 const ZRegister& zm);
4806
4807 // Gather load first-fault doublewords to vector (immediate index).
4808 void ldff1d(const ZRegister& zt,
4809 const PRegisterZ& pg,
4810 const ZRegister& zn,
4811 int imm5);
4812
4813 // Gather load first-fault unsigned halfwords to vector (vector index).
4814 void ldff1h(const ZRegister& zt,
4815 const PRegisterZ& pg,
4816 const Register& xn,
4817 const ZRegister& zm);
4818
4819 // Gather load first-fault unsigned halfwords to vector (immediate index).
4820 void ldff1h(const ZRegister& zt,
4821 const PRegisterZ& pg,
4822 const ZRegister& zn,
4823 int imm5);
4824
4825 // Gather load first-fault signed bytes to vector (vector index).
4826 void ldff1sb(const ZRegister& zt,
4827 const PRegisterZ& pg,
4828 const Register& xn,
4829 const ZRegister& zm);
4830
4831 // Gather load first-fault signed bytes to vector (immediate index).
4832 void ldff1sb(const ZRegister& zt,
4833 const PRegisterZ& pg,
4834 const ZRegister& zn,
4835 int imm5);
4836
4837 // Gather load first-fault signed halfwords to vector (vector index).
4838 void ldff1sh(const ZRegister& zt,
4839 const PRegisterZ& pg,
4840 const Register& xn,
4841 const ZRegister& zm);
4842
4843 // Gather load first-fault signed halfwords to vector (immediate index).
4844 void ldff1sh(const ZRegister& zt,
4845 const PRegisterZ& pg,
4846 const ZRegister& zn,
4847 int imm5);
4848
4849 // Gather load first-fault signed words to vector (vector index).
4850 void ldff1sw(const ZRegister& zt,
4851 const PRegisterZ& pg,
4852 const Register& xn,
4853 const ZRegister& zm);
4854
4855 // Gather load first-fault signed words to vector (immediate index).
4856 void ldff1sw(const ZRegister& zt,
4857 const PRegisterZ& pg,
4858 const ZRegister& zn,
4859 int imm5);
4860
4861 // Gather load first-fault unsigned words to vector (vector index).
4862 void ldff1w(const ZRegister& zt,
4863 const PRegisterZ& pg,
4864 const Register& xn,
4865 const ZRegister& zm);
4866
4867 // Gather load first-fault unsigned words to vector (immediate index).
4868 void ldff1w(const ZRegister& zt,
4869 const PRegisterZ& pg,
4870 const ZRegister& zn,
4871 int imm5);
4872
4873 // Contiguous load non-fault unsigned bytes to vector (immediate index).
4874 void ldnf1b(const ZRegister& zt,
4875 const PRegisterZ& pg,
4876 const SVEMemOperand& addr);
4877
4878 // Contiguous load non-fault doublewords to vector (immediate index).
4879 void ldnf1d(const ZRegister& zt,
4880 const PRegisterZ& pg,
4881 const SVEMemOperand& addr);
4882
4883 // Contiguous load non-fault unsigned halfwords to vector (immediate
4884 // index).
4885 void ldnf1h(const ZRegister& zt,
4886 const PRegisterZ& pg,
4887 const SVEMemOperand& addr);
4888
4889 // Contiguous load non-fault signed bytes to vector (immediate index).
4890 void ldnf1sb(const ZRegister& zt,
4891 const PRegisterZ& pg,
4892 const SVEMemOperand& addr);
4893
4894 // Contiguous load non-fault signed halfwords to vector (immediate index).
4895 void ldnf1sh(const ZRegister& zt,
4896 const PRegisterZ& pg,
4897 const SVEMemOperand& addr);
4898
4899 // Contiguous load non-fault signed words to vector (immediate index).
4900 void ldnf1sw(const ZRegister& zt,
4901 const PRegisterZ& pg,
4902 const SVEMemOperand& addr);
4903
4904 // Contiguous load non-fault unsigned words to vector (immediate index).
4905 void ldnf1w(const ZRegister& zt,
4906 const PRegisterZ& pg,
4907 const SVEMemOperand& addr);
4908
4909 // Contiguous load non-temporal bytes to vector.
4910 void ldnt1b(const ZRegister& zt,
4911 const PRegisterZ& pg,
4912 const SVEMemOperand& addr);
4913
4914 // Contiguous load non-temporal halfwords to vector.
4915 void ldnt1h(const ZRegister& zt,
4916 const PRegisterZ& pg,
4917 const SVEMemOperand& addr);
4918
4919 // Contiguous load non-temporal words to vector.
4920 void ldnt1w(const ZRegister& zt,
4921 const PRegisterZ& pg,
4922 const SVEMemOperand& addr);
4923
4924 // Contiguous load non-temporal doublewords to vector.
4925 void ldnt1d(const ZRegister& zt,
4926 const PRegisterZ& pg,
4927 const SVEMemOperand& addr);
4928
4929 // Load SVE predicate/vector register.
4930 void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4931
4932 // Logical shift left by immediate (predicated).
4933 void lsl(const ZRegister& zd,
4934 const PRegisterM& pg,
4935 const ZRegister& zn,
4936 int shift);
4937
4938 // Logical shift left by 64-bit wide elements (predicated).
4939 void lsl(const ZRegister& zd,
4940 const PRegisterM& pg,
4941 const ZRegister& zn,
4942 const ZRegister& zm);
4943
4944 // Logical shift left by immediate (unpredicated).
4945 void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4946
4947 // Logical shift left by 64-bit wide elements (unpredicated).
4948 void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4949
4950 // Reversed logical shift left by vector (predicated).
4951 void lslr(const ZRegister& zd,
4952 const PRegisterM& pg,
4953 const ZRegister& zn,
4954 const ZRegister& zm);
4955
4956 // Logical shift right by immediate (predicated).
4957 void lsr(const ZRegister& zd,
4958 const PRegisterM& pg,
4959 const ZRegister& zn,
4960 int shift);
4961
4962 // Logical shift right by 64-bit wide elements (predicated).
4963 void lsr(const ZRegister& zd,
4964 const PRegisterM& pg,
4965 const ZRegister& zn,
4966 const ZRegister& zm);
4967
4968 // Logical shift right by immediate (unpredicated).
4969 void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4970
4971 // Logical shift right by 64-bit wide elements (unpredicated).
4972 void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4973
4974 // Reversed logical shift right by vector (predicated).
4975 void lsrr(const ZRegister& zd,
4976 const PRegisterM& pg,
4977 const ZRegister& zn,
4978 const ZRegister& zm);
4979
4980 // Bitwise invert predicate.
4981 void not_(const PRegisterWithLaneSize& pd,
4982 const PRegisterZ& pg,
4983 const PRegisterWithLaneSize& pn);
4984
4985 // Bitwise invert predicate, setting the condition flags.
4986 void nots(const PRegisterWithLaneSize& pd,
4987 const PRegisterZ& pg,
4988 const PRegisterWithLaneSize& pn);
4989
4990 // Multiply-add vectors (predicated), writing multiplicand
4991 // [Zdn = Za + Zdn * Zm].
4992 void mad(const ZRegister& zdn,
4993 const PRegisterM& pg,
4994 const ZRegister& zm,
4995 const ZRegister& za);
4996
4997 // Multiply-add vectors (predicated), writing addend
4998 // [Zda = Zda + Zn * Zm].
4999 void mla(const ZRegister& zda,
5000 const PRegisterM& pg,
5001 const ZRegister& zn,
5002 const ZRegister& zm);
5003
5004 // Multiply-subtract vectors (predicated), writing addend
5005 // [Zda = Zda - Zn * Zm].
5006 void mls(const ZRegister& zda,
5007 const PRegisterM& pg,
5008 const ZRegister& zn,
5009 const ZRegister& zm);
5010
5011 // Move predicates (unpredicated)
5012 void mov(const PRegister& pd, const PRegister& pn);
5013
5014 // Move predicates (merging)
5015 void mov(const PRegisterWithLaneSize& pd,
5016 const PRegisterM& pg,
5017 const PRegisterWithLaneSize& pn);
5018
5019 // Move predicates (zeroing)
5020 void mov(const PRegisterWithLaneSize& pd,
5021 const PRegisterZ& pg,
5022 const PRegisterWithLaneSize& pn);
5023
5024 // Move general-purpose register to vector elements (unpredicated)
5025 void mov(const ZRegister& zd, const Register& xn);
5026
5027 // Move SIMD&FP scalar register to vector elements (unpredicated)
5028 void mov(const ZRegister& zd, const VRegister& vn);
5029
5030 // Move vector register (unpredicated)
5031 void mov(const ZRegister& zd, const ZRegister& zn);
5032
5033 // Move indexed element to vector elements (unpredicated)
5034 void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5035
5036 // Move general-purpose register to vector elements (predicated)
5037 void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5038
5039 // Move SIMD&FP scalar register to vector elements (predicated)
5040 void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5041
5042 // Move vector elements (predicated)
5043 void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5044
5045 // Move signed integer immediate to vector elements (predicated)
5046 void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5047
5048 // Move signed immediate to vector elements (unpredicated).
5049 void mov(const ZRegister& zd, int imm8, int shift);
5050
5051 // Move logical bitmask immediate to vector (unpredicated).
5052 void mov(const ZRegister& zd, uint64_t imm);
5053
5054 // Move predicate (unpredicated), setting the condition flags
5055 void movs(const PRegister& pd, const PRegister& pn);
5056
5057 // Move predicates (zeroing), setting the condition flags
5058 void movs(const PRegisterWithLaneSize& pd,
5059 const PRegisterZ& pg,
5060 const PRegisterWithLaneSize& pn);
5061
5062 // Move prefix (predicated).
5063 void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5064
5065 // Move prefix (unpredicated).
5066 void movprfx(const ZRegister& zd, const ZRegister& zn);
5067
5068 // Multiply-subtract vectors (predicated), writing multiplicand
5069 // [Zdn = Za - Zdn * Zm].
5070 void msb(const ZRegister& zdn,
5071 const PRegisterM& pg,
5072 const ZRegister& zm,
5073 const ZRegister& za);
5074
5075 // Multiply vectors (predicated).
5076 void mul(const ZRegister& zd,
5077 const PRegisterM& pg,
5078 const ZRegister& zn,
5079 const ZRegister& zm);
5080
5081 // Multiply by immediate (unpredicated).
5082 void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5083
5084 // Bitwise NAND predicates.
5085 void nand(const PRegisterWithLaneSize& pd,
5086 const PRegisterZ& pg,
5087 const PRegisterWithLaneSize& pn,
5088 const PRegisterWithLaneSize& pm);
5089
5090 // Bitwise NAND predicates.
5091 void nands(const PRegisterWithLaneSize& pd,
5092 const PRegisterZ& pg,
5093 const PRegisterWithLaneSize& pn,
5094 const PRegisterWithLaneSize& pm);
5095
5096 // Negate (predicated).
5097 void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5098
5099 // Bitwise NOR predicates.
5100 void nor(const PRegisterWithLaneSize& pd,
5101 const PRegisterZ& pg,
5102 const PRegisterWithLaneSize& pn,
5103 const PRegisterWithLaneSize& pm);
5104
5105 // Bitwise NOR predicates.
5106 void nors(const PRegisterWithLaneSize& pd,
5107 const PRegisterZ& pg,
5108 const PRegisterWithLaneSize& pn,
5109 const PRegisterWithLaneSize& pm);
5110
5111 // Bitwise invert vector (predicated).
5112 void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5113
5114 // Bitwise OR inverted predicate.
5115 void orn(const PRegisterWithLaneSize& pd,
5116 const PRegisterZ& pg,
5117 const PRegisterWithLaneSize& pn,
5118 const PRegisterWithLaneSize& pm);
5119
5120 // Bitwise OR inverted predicate.
5121 void orns(const PRegisterWithLaneSize& pd,
5122 const PRegisterZ& pg,
5123 const PRegisterWithLaneSize& pn,
5124 const PRegisterWithLaneSize& pm);
5125
5126 // Bitwise OR with inverted immediate (unpredicated).
5127 void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5128
5129 // Bitwise OR predicate.
5130 void orr(const PRegisterWithLaneSize& pd,
5131 const PRegisterZ& pg,
5132 const PRegisterWithLaneSize& pn,
5133 const PRegisterWithLaneSize& pm);
5134
5135 // Bitwise OR vectors (predicated).
5136 void orr(const ZRegister& zd,
5137 const PRegisterM& pg,
5138 const ZRegister& zn,
5139 const ZRegister& zm);
5140
5141 // Bitwise OR with immediate (unpredicated).
5142 void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5143
5144 // Bitwise OR vectors (unpredicated).
5145 void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5146
5147 // Bitwise OR predicate.
5148 void orrs(const PRegisterWithLaneSize& pd,
5149 const PRegisterZ& pg,
5150 const PRegisterWithLaneSize& pn,
5151 const PRegisterWithLaneSize& pm);
5152
5153 // Bitwise OR reduction to scalar.
5154 void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5155
5156 // Set all predicate elements to false.
5157 void pfalse(const PRegisterWithLaneSize& pd);
5158
5159 // Set the first active predicate element to true.
5160 void pfirst(const PRegisterWithLaneSize& pd,
5161 const PRegister& pg,
5162 const PRegisterWithLaneSize& pn);
5163
5164 // Find next active predicate.
5165 void pnext(const PRegisterWithLaneSize& pd,
5166 const PRegister& pg,
5167 const PRegisterWithLaneSize& pn);
5168
5169 // Prefetch bytes.
5170 void prfb(PrefetchOperation prfop,
5171 const PRegister& pg,
5172 const SVEMemOperand& addr);
5173
5174 // Prefetch halfwords.
5175 void prfh(PrefetchOperation prfop,
5176 const PRegister& pg,
5177 const SVEMemOperand& addr);
5178
5179 // Prefetch words.
5180 void prfw(PrefetchOperation prfop,
5181 const PRegister& pg,
5182 const SVEMemOperand& addr);
5183
5184 // Prefetch doublewords.
5185 void prfd(PrefetchOperation prfop,
5186 const PRegister& pg,
5187 const SVEMemOperand& addr);
5188
5189 // Set condition flags for predicate.
5190 void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5191
5192 // Initialise predicate from named constraint.
5193 void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5194
5195 // Initialise predicate from named constraint.
5196 void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5197
5198 // Unpack and widen half of predicate.
5199 void punpkhi(const PRegisterWithLaneSize& pd,
5200 const PRegisterWithLaneSize& pn);
5201
5202 // Unpack and widen half of predicate.
5203 void punpklo(const PRegisterWithLaneSize& pd,
5204 const PRegisterWithLaneSize& pn);
5205
5206 // Reverse bits (predicated).
5207 void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5208
5209 // Read the first-fault register.
5210 void rdffr(const PRegisterWithLaneSize& pd);
5211
5212 // Return predicate of succesfully loaded elements.
5213 void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5214
5215 // Return predicate of succesfully loaded elements.
5216 void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5217
5218 // Read multiple of vector register size to scalar register.
5219 void rdvl(const Register& xd, int imm6);
5220
5221 // Reverse all elements in a predicate.
5222 void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5223
5224 // Reverse all elements in a vector (unpredicated).
5225 void rev(const ZRegister& zd, const ZRegister& zn);
5226
5227 // Reverse bytes / halfwords / words within elements (predicated).
5228 void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5229
5230 // Reverse bytes / halfwords / words within elements (predicated).
5231 void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5232
5233 // Reverse bytes / halfwords / words within elements (predicated).
5234 void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5235
5236 // Signed absolute difference (predicated).
5237 void sabd(const ZRegister& zd,
5238 const PRegisterM& pg,
5239 const ZRegister& zn,
5240 const ZRegister& zm);
5241
5242 // Signed add reduction to scalar.
5243 void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5244
5245 // Signed integer convert to floating-point (predicated).
5246 void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5247
5248 // Signed divide (predicated).
5249 void sdiv(const ZRegister& zd,
5250 const PRegisterM& pg,
5251 const ZRegister& zn,
5252 const ZRegister& zm);
5253
5254 // Signed reversed divide (predicated).
5255 void sdivr(const ZRegister& zd,
5256 const PRegisterM& pg,
5257 const ZRegister& zn,
5258 const ZRegister& zm);
5259
5260 // Signed dot product by indexed quadtuplet.
5261 void sdot(const ZRegister& zda,
5262 const ZRegister& zn,
5263 const ZRegister& zm,
5264 int index);
5265
5266 // Signed dot product.
5267 void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5268
5269 // Conditionally select elements from two predicates.
5270 void sel(const PRegisterWithLaneSize& pd,
5271 const PRegister& pg,
5272 const PRegisterWithLaneSize& pn,
5273 const PRegisterWithLaneSize& pm);
5274
5275 // Conditionally select elements from two vectors.
5276 void sel(const ZRegister& zd,
5277 const PRegister& pg,
5278 const ZRegister& zn,
5279 const ZRegister& zm);
5280
5281 // Initialise the first-fault register to all true.
5282 void setffr();
5283
5284 // Signed maximum vectors (predicated).
5285 void smax(const ZRegister& zd,
5286 const PRegisterM& pg,
5287 const ZRegister& zn,
5288 const ZRegister& zm);
5289
5290 // Signed maximum with immediate (unpredicated).
5291 void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5292
5293 // Signed maximum reduction to scalar.
5294 void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5295
5296 // Signed minimum vectors (predicated).
5297 void smin(const ZRegister& zd,
5298 const PRegisterM& pg,
5299 const ZRegister& zn,
5300 const ZRegister& zm);
5301
5302 // Signed minimum with immediate (unpredicated).
5303 void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5304
5305 // Signed minimum reduction to scalar.
5306 void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5307
5308 // Signed multiply returning high half (predicated).
5309 void smulh(const ZRegister& zd,
5310 const PRegisterM& pg,
5311 const ZRegister& zn,
5312 const ZRegister& zm);
5313
5314 // Splice two vectors under predicate control.
5315 void splice(const ZRegister& zd,
5316 const PRegister& pg,
5317 const ZRegister& zn,
5318 const ZRegister& zm);
5319
5320 // Splice two vectors under predicate control (constructive).
5321 void splice_con(const ZRegister& zd,
5322 const PRegister& pg,
5323 const ZRegister& zn,
5324 const ZRegister& zm);
5325
5326 // Signed saturating add vectors (unpredicated).
5327 void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5328
5329 // Signed saturating add immediate (unpredicated).
5330 void sqadd(const ZRegister& zd,
5331 const ZRegister& zn,
5332 int imm8,
5333 int shift = -1);
5334
5335 // Signed saturating decrement scalar by multiple of 8-bit predicate
5336 // constraint element count.
5337 void sqdecb(const Register& xd,
5338 const Register& wn,
5339 int pattern,
5340 int multiplier);
5341
5342 // Signed saturating decrement scalar by multiple of 8-bit predicate
5343 // constraint element count.
5344 void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5345
5346 // Signed saturating decrement scalar by multiple of 64-bit predicate
5347 // constraint element count.
5348 void sqdecd(const Register& xd,
5349 const Register& wn,
5350 int pattern = SVE_ALL,
5351 int multiplier = 1);
5352
5353 // Signed saturating decrement scalar by multiple of 64-bit predicate
5354 // constraint element count.
5355 void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5356
5357 // Signed saturating decrement vector by multiple of 64-bit predicate
5358 // constraint element count.
5359 void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5360
5361 // Signed saturating decrement scalar by multiple of 16-bit predicate
5362 // constraint element count.
5363 void sqdech(const Register& xd,
5364 const Register& wn,
5365 int pattern = SVE_ALL,
5366 int multiplier = 1);
5367
5368 // Signed saturating decrement scalar by multiple of 16-bit predicate
5369 // constraint element count.
5370 void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5371
5372 // Signed saturating decrement vector by multiple of 16-bit predicate
5373 // constraint element count.
5374 void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5375
5376 // Signed saturating decrement scalar by active predicate element count.
5377 void sqdecp(const Register& xd,
5378 const PRegisterWithLaneSize& pg,
5379 const Register& wn);
5380
5381 // Signed saturating decrement scalar by active predicate element count.
5382 void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5383
5384 // Signed saturating decrement vector by active predicate element count.
5385 void sqdecp(const ZRegister& zdn, const PRegister& pg);
5386
5387 // Signed saturating decrement scalar by multiple of 32-bit predicate
5388 // constraint element count.
5389 void sqdecw(const Register& xd,
5390 const Register& wn,
5391 int pattern = SVE_ALL,
5392 int multiplier = 1);
5393
5394 // Signed saturating decrement scalar by multiple of 32-bit predicate
5395 // constraint element count.
5396 void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5397
5398 // Signed saturating decrement vector by multiple of 32-bit predicate
5399 // constraint element count.
5400 void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5401
5402 // Signed saturating increment scalar by multiple of 8-bit predicate
5403 // constraint element count.
5404 void sqincb(const Register& xd,
5405 const Register& wn,
5406 int pattern = SVE_ALL,
5407 int multiplier = 1);
5408
5409 // Signed saturating increment scalar by multiple of 8-bit predicate
5410 // constraint element count.
5411 void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5412
5413 // Signed saturating increment scalar by multiple of 64-bit predicate
5414 // constraint element count.
5415 void sqincd(const Register& xd,
5416 const Register& wn,
5417 int pattern,
5418 int multiplier);
5419
5420 // Signed saturating increment scalar by multiple of 64-bit predicate
5421 // constraint element count.
5422 void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5423
5424 // Signed saturating increment vector by multiple of 64-bit predicate
5425 // constraint element count.
5426 void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5427
5428 // Signed saturating increment scalar by multiple of 16-bit predicate
5429 // constraint element count.
5430 void sqinch(const Register& xd,
5431 const Register& wn,
5432 int pattern = SVE_ALL,
5433 int multiplier = 1);
5434
5435 // Signed saturating increment scalar by multiple of 16-bit predicate
5436 // constraint element count.
5437 void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5438
5439 // Signed saturating increment vector by multiple of 16-bit predicate
5440 // constraint element count.
5441 void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5442
5443 // Signed saturating increment scalar by active predicate element count.
5444 void sqincp(const Register& xd,
5445 const PRegisterWithLaneSize& pg,
5446 const Register& wn);
5447
5448 // Signed saturating increment scalar by active predicate element count.
5449 void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5450
5451 // Signed saturating increment vector by active predicate element count.
5452 void sqincp(const ZRegister& zdn, const PRegister& pg);
5453
5454 // Signed saturating increment scalar by multiple of 32-bit predicate
5455 // constraint element count.
5456 void sqincw(const Register& xd,
5457 const Register& wn,
5458 int pattern = SVE_ALL,
5459 int multiplier = 1);
5460
5461 // Signed saturating increment scalar by multiple of 32-bit predicate
5462 // constraint element count.
5463 void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5464
5465 // Signed saturating increment vector by multiple of 32-bit predicate
5466 // constraint element count.
5467 void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5468
5469 // Signed saturating subtract vectors (unpredicated).
5470 void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5471
5472 // Signed saturating subtract immediate (unpredicated).
5473 void sqsub(const ZRegister& zd,
5474 const ZRegister& zn,
5475 int imm8,
5476 int shift = -1);
5477
5478 // Contiguous/scatter store bytes from vector.
5479 void st1b(const ZRegister& zt,
5480 const PRegister& pg,
5481 const SVEMemOperand& addr);
5482
5483 // Contiguous/scatter store halfwords from vector.
5484 void st1h(const ZRegister& zt,
5485 const PRegister& pg,
5486 const SVEMemOperand& addr);
5487
5488 // Contiguous/scatter store words from vector.
5489 void st1w(const ZRegister& zt,
5490 const PRegister& pg,
5491 const SVEMemOperand& addr);
5492
5493 // Contiguous/scatter store doublewords from vector.
5494 void st1d(const ZRegister& zt,
5495 const PRegister& pg,
5496 const SVEMemOperand& addr);
5497
5498 // Contiguous store two-byte structures from two vectors.
5499 void st2b(const ZRegister& zt1,
5500 const ZRegister& zt2,
5501 const PRegister& pg,
5502 const SVEMemOperand& addr);
5503
5504 // Contiguous store two-halfword structures from two vectors.
5505 void st2h(const ZRegister& zt1,
5506 const ZRegister& zt2,
5507 const PRegister& pg,
5508 const SVEMemOperand& addr);
5509
5510 // Contiguous store two-word structures from two vectors.
5511 void st2w(const ZRegister& zt1,
5512 const ZRegister& zt2,
5513 const PRegister& pg,
5514 const SVEMemOperand& addr);
5515
5516 // Contiguous store two-doubleword structures from two vectors,
5517 void st2d(const ZRegister& zt1,
5518 const ZRegister& zt2,
5519 const PRegister& pg,
5520 const SVEMemOperand& addr);
5521
5522 // Contiguous store three-byte structures from three vectors.
5523 void st3b(const ZRegister& zt1,
5524 const ZRegister& zt2,
5525 const ZRegister& zt3,
5526 const PRegister& pg,
5527 const SVEMemOperand& addr);
5528
5529 // Contiguous store three-halfword structures from three vectors.
5530 void st3h(const ZRegister& zt1,
5531 const ZRegister& zt2,
5532 const ZRegister& zt3,
5533 const PRegister& pg,
5534 const SVEMemOperand& addr);
5535
5536 // Contiguous store three-word structures from three vectors.
5537 void st3w(const ZRegister& zt1,
5538 const ZRegister& zt2,
5539 const ZRegister& zt3,
5540 const PRegister& pg,
5541 const SVEMemOperand& addr);
5542
5543 // Contiguous store three-doubleword structures from three vectors.
5544 void st3d(const ZRegister& zt1,
5545 const ZRegister& zt2,
5546 const ZRegister& zt3,
5547 const PRegister& pg,
5548 const SVEMemOperand& addr);
5549
5550 // Contiguous store four-byte structures from four vectors.
5551 void st4b(const ZRegister& zt1,
5552 const ZRegister& zt2,
5553 const ZRegister& zt3,
5554 const ZRegister& zt4,
5555 const PRegister& pg,
5556 const SVEMemOperand& addr);
5557
5558 // Contiguous store four-halfword structures from four vectors.
5559 void st4h(const ZRegister& zt1,
5560 const ZRegister& zt2,
5561 const ZRegister& zt3,
5562 const ZRegister& zt4,
5563 const PRegister& pg,
5564 const SVEMemOperand& addr);
5565
5566 // Contiguous store four-word structures from four vectors.
5567 void st4w(const ZRegister& zt1,
5568 const ZRegister& zt2,
5569 const ZRegister& zt3,
5570 const ZRegister& zt4,
5571 const PRegister& pg,
5572 const SVEMemOperand& addr);
5573
5574 // Contiguous store four-doubleword structures from four vectors.
5575 void st4d(const ZRegister& zt1,
5576 const ZRegister& zt2,
5577 const ZRegister& zt3,
5578 const ZRegister& zt4,
5579 const PRegister& pg,
5580 const SVEMemOperand& addr);
5581
5582 // Contiguous store non-temporal bytes from vector.
5583 void stnt1b(const ZRegister& zt,
5584 const PRegister& pg,
5585 const SVEMemOperand& addr);
5586
5587 // Contiguous store non-temporal halfwords from vector.
5588 void stnt1h(const ZRegister& zt,
5589 const PRegister& pg,
5590 const SVEMemOperand& addr);
5591
5592 // Contiguous store non-temporal words from vector.
5593 void stnt1w(const ZRegister& zt,
5594 const PRegister& pg,
5595 const SVEMemOperand& addr);
5596
5597 // Contiguous store non-temporal doublewords from vector.
5598 void stnt1d(const ZRegister& zt,
5599 const PRegister& pg,
5600 const SVEMemOperand& addr);
5601
5602 // Store SVE predicate/vector register.
5603 void str(const CPURegister& rt, const SVEMemOperand& addr);
5604
5605 // Subtract vectors (predicated).
5606 void sub(const ZRegister& zd,
5607 const PRegisterM& pg,
5608 const ZRegister& zn,
5609 const ZRegister& zm);
5610
5611 // Subtract vectors (unpredicated).
5612 void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5613
5614 // Subtract immediate (unpredicated).
5615 void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5616
5617 // Reversed subtract vectors (predicated).
5618 void subr(const ZRegister& zd,
5619 const PRegisterM& pg,
5620 const ZRegister& zn,
5621 const ZRegister& zm);
5622
5623 // Reversed subtract from immediate (unpredicated).
5624 void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5625
5626 // Signed unpack and extend half of vector.
5627 void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5628
5629 // Signed unpack and extend half of vector.
5630 void sunpklo(const ZRegister& zd, const ZRegister& zn);
5631
5632 // Signed byte extend (predicated).
5633 void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5634
5635 // Signed halfword extend (predicated).
5636 void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5637
5638 // Signed word extend (predicated).
5639 void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5640
5641 // Programmable table lookup/permute using vector of indices into a
5642 // vector.
5643 void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5644
5645 // Interleave even or odd elements from two predicates.
5646 void trn1(const PRegisterWithLaneSize& pd,
5647 const PRegisterWithLaneSize& pn,
5648 const PRegisterWithLaneSize& pm);
5649
5650 // Interleave even or odd elements from two vectors.
5651 void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5652
5653 // Interleave even or odd elements from two predicates.
5654 void trn2(const PRegisterWithLaneSize& pd,
5655 const PRegisterWithLaneSize& pn,
5656 const PRegisterWithLaneSize& pm);
5657
5658 // Interleave even or odd elements from two vectors.
5659 void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5660
5661 // Unsigned absolute difference (predicated).
5662 void uabd(const ZRegister& zd,
5663 const PRegisterM& pg,
5664 const ZRegister& zn,
5665 const ZRegister& zm);
5666
5667 // Unsigned add reduction to scalar.
5668 void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5669
5670 // Unsigned integer convert to floating-point (predicated).
5671 void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5672
5673 // Unsigned divide (predicated).
5674 void udiv(const ZRegister& zd,
5675 const PRegisterM& pg,
5676 const ZRegister& zn,
5677 const ZRegister& zm);
5678
5679 // Unsigned reversed divide (predicated).
5680 void udivr(const ZRegister& zd,
5681 const PRegisterM& pg,
5682 const ZRegister& zn,
5683 const ZRegister& zm);
5684
5685 // Unsigned dot product by indexed quadtuplet.
5686 void udot(const ZRegister& zda,
5687 const ZRegister& zn,
5688 const ZRegister& zm,
5689 int index);
5690
5691 // Unsigned dot product.
5692 void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5693
5694 // Unsigned maximum vectors (predicated).
5695 void umax(const ZRegister& zd,
5696 const PRegisterM& pg,
5697 const ZRegister& zn,
5698 const ZRegister& zm);
5699
5700 // Unsigned maximum with immediate (unpredicated).
5701 void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5702
5703 // Unsigned maximum reduction to scalar.
5704 void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5705
5706 // Unsigned minimum vectors (predicated).
5707 void umin(const ZRegister& zd,
5708 const PRegisterM& pg,
5709 const ZRegister& zn,
5710 const ZRegister& zm);
5711
5712 // Unsigned minimum with immediate (unpredicated).
5713 void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5714
5715 // Unsigned minimum reduction to scalar.
5716 void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5717
5718 // Unsigned multiply returning high half (predicated).
5719 void umulh(const ZRegister& zd,
5720 const PRegisterM& pg,
5721 const ZRegister& zn,
5722 const ZRegister& zm);
5723
5724 // Unsigned saturating add vectors (unpredicated).
5725 void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5726
5727 // Unsigned saturating add immediate (unpredicated).
5728 void uqadd(const ZRegister& zd,
5729 const ZRegister& zn,
5730 int imm8,
5731 int shift = -1);
5732
5733 // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5734 // constraint element count.
5735 void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5736
5737 // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5738 // constraint element count.
5739 void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5740
5741 // Unsigned saturating decrement vector by multiple of 64-bit predicate
5742 // constraint element count.
5743 void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5744
5745 // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5746 // constraint element count.
5747 void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5748
5749 // Unsigned saturating decrement vector by multiple of 16-bit predicate
5750 // constraint element count.
5751 void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5752
5753 // Unsigned saturating decrement scalar by active predicate element count.
5754 void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5755
5756 // Unsigned saturating decrement vector by active predicate element count.
5757 void uqdecp(const ZRegister& zdn, const PRegister& pg);
5758
5759 // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5760 // constraint element count.
5761 void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5762
5763 // Unsigned saturating decrement vector by multiple of 32-bit predicate
5764 // constraint element count.
5765 void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5766
5767 // Unsigned saturating increment scalar by multiple of 8-bit predicate
5768 // constraint element count.
5769 void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5770
5771 // Unsigned saturating increment scalar by multiple of 64-bit predicate
5772 // constraint element count.
5773 void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5774
5775 // Unsigned saturating increment vector by multiple of 64-bit predicate
5776 // constraint element count.
5777 void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5778
5779 // Unsigned saturating increment scalar by multiple of 16-bit predicate
5780 // constraint element count.
5781 void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5782
5783 // Unsigned saturating increment vector by multiple of 16-bit predicate
5784 // constraint element count.
5785 void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5786
5787 // Unsigned saturating increment scalar by active predicate element count.
5788 void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5789
5790 // Unsigned saturating increment vector by active predicate element count.
5791 void uqincp(const ZRegister& zdn, const PRegister& pg);
5792
5793 // Unsigned saturating increment scalar by multiple of 32-bit predicate
5794 // constraint element count.
5795 void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5796
5797 // Unsigned saturating increment vector by multiple of 32-bit predicate
5798 // constraint element count.
5799 void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5800
5801 // Unsigned saturating subtract vectors (unpredicated).
5802 void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5803
5804 // Unsigned saturating subtract immediate (unpredicated).
5805 void uqsub(const ZRegister& zd,
5806 const ZRegister& zn,
5807 int imm8,
5808 int shift = -1);
5809
5810 // Unsigned unpack and extend half of vector.
5811 void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5812
5813 // Unsigned unpack and extend half of vector.
5814 void uunpklo(const ZRegister& zd, const ZRegister& zn);
5815
5816 // Unsigned byte extend (predicated).
5817 void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5818
5819 // Unsigned halfword extend (predicated).
5820 void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5821
5822 // Unsigned word extend (predicated).
5823 void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5824
5825 // Concatenate even or odd elements from two predicates.
5826 void uzp1(const PRegisterWithLaneSize& pd,
5827 const PRegisterWithLaneSize& pn,
5828 const PRegisterWithLaneSize& pm);
5829
5830 // Concatenate even or odd elements from two vectors.
5831 void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5832
5833 // Concatenate even or odd elements from two predicates.
5834 void uzp2(const PRegisterWithLaneSize& pd,
5835 const PRegisterWithLaneSize& pn,
5836 const PRegisterWithLaneSize& pm);
5837
5838 // Concatenate even or odd elements from two vectors.
5839 void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5840
5841 // While incrementing signed scalar less than or equal to scalar.
5842 void whilele(const PRegisterWithLaneSize& pd,
5843 const Register& rn,
5844 const Register& rm);
5845
5846 // While incrementing unsigned scalar lower than scalar.
5847 void whilelo(const PRegisterWithLaneSize& pd,
5848 const Register& rn,
5849 const Register& rm);
5850
5851 // While incrementing unsigned scalar lower or same as scalar.
5852 void whilels(const PRegisterWithLaneSize& pd,
5853 const Register& rn,
5854 const Register& rm);
5855
5856 // While incrementing signed scalar less than scalar.
5857 void whilelt(const PRegisterWithLaneSize& pd,
5858 const Register& rn,
5859 const Register& rm);
5860
5861 // Write the first-fault register.
5862 void wrffr(const PRegisterWithLaneSize& pn);
5863
5864 // Interleave elements from two half predicates.
5865 void zip1(const PRegisterWithLaneSize& pd,
5866 const PRegisterWithLaneSize& pn,
5867 const PRegisterWithLaneSize& pm);
5868
5869 // Interleave elements from two half vectors.
5870 void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5871
5872 // Interleave elements from two half predicates.
5873 void zip2(const PRegisterWithLaneSize& pd,
5874 const PRegisterWithLaneSize& pn,
5875 const PRegisterWithLaneSize& pm);
5876
5877 // Interleave elements from two half vectors.
5878 void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5879
5880 // Add with carry long (bottom).
5881 void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5882
5883 // Add with carry long (top).
5884 void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5885
5886 // Add narrow high part (bottom).
5887 void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5888
5889 // Add narrow high part (top).
5890 void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5891
5892 // Add pairwise.
5893 void addp(const ZRegister& zd,
5894 const PRegisterM& pg,
5895 const ZRegister& zn,
5896 const ZRegister& zm);
5897
5898 // Bitwise clear and exclusive OR.
5899 void bcax(const ZRegister& zd,
5900 const ZRegister& zn,
5901 const ZRegister& zm,
5902 const ZRegister& zk);
5903
5904 // Scatter lower bits into positions selected by bitmask.
5905 void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5906
5907 // Gather lower bits from positions selected by bitmask.
5908 void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5909
5910 // Group bits to right or left as selected by bitmask.
5911 void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5912
5913 // Bitwise select.
5914 void bsl(const ZRegister& zd,
5915 const ZRegister& zn,
5916 const ZRegister& zm,
5917 const ZRegister& zk);
5918
5919 // Bitwise select with first input inverted.
5920 void bsl1n(const ZRegister& zd,
5921 const ZRegister& zn,
5922 const ZRegister& zm,
5923 const ZRegister& zk);
5924
5925 // Bitwise select with second input inverted.
5926 void bsl2n(const ZRegister& zd,
5927 const ZRegister& zn,
5928 const ZRegister& zm,
5929 const ZRegister& zk);
5930
5931 // Complex integer add with rotate.
5932 void cadd(const ZRegister& zd,
5933 const ZRegister& zn,
5934 const ZRegister& zm,
5935 int rot);
5936
5937 // Complex integer dot product (indexed).
5938 void cdot(const ZRegister& zda,
5939 const ZRegister& zn,
5940 const ZRegister& zm,
5941 int index,
5942 int rot);
5943
5944 // Complex integer dot product.
5945 void cdot(const ZRegister& zda,
5946 const ZRegister& zn,
5947 const ZRegister& zm,
5948 int rot);
5949
5950 // Complex integer multiply-add with rotate (indexed).
5951 void cmla(const ZRegister& zda,
5952 const ZRegister& zn,
5953 const ZRegister& zm,
5954 int index,
5955 int rot);
5956
5957 // Complex integer multiply-add with rotate.
5958 void cmla(const ZRegister& zda,
5959 const ZRegister& zn,
5960 const ZRegister& zm,
5961 int rot);
5962
5963 // Bitwise exclusive OR of three vectors.
5964 void eor3(const ZRegister& zd,
5965 const ZRegister& zn,
5966 const ZRegister& zm,
5967 const ZRegister& zk);
5968
5969 // Interleaving exclusive OR (bottom, top).
5970 void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5971
5972 // Interleaving exclusive OR (top, bottom).
5973 void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5974
5975 // Floating-point add pairwise.
5976 void faddp(const ZRegister& zd,
5977 const PRegisterM& pg,
5978 const ZRegister& zn,
5979 const ZRegister& zm);
5980
5981 // Floating-point up convert long (top, predicated).
5982 void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5983
5984 // Floating-point down convert and narrow (top, predicated).
5985 void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5986
5987 // Floating-point down convert, rounding to odd (predicated).
5988 void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5989
5990 // Floating-point down convert, rounding to odd (top, predicated).
5991 void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5992
5993 // Floating-point base 2 logarithm as integer.
5994 void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5995
5996 // Floating-point maximum number pairwise.
5997 void fmaxnmp(const ZRegister& zd,
5998 const PRegisterM& pg,
5999 const ZRegister& zn,
6000 const ZRegister& zm);
6001
6002 // Floating-point maximum pairwise.
6003 void fmaxp(const ZRegister& zd,
6004 const PRegisterM& pg,
6005 const ZRegister& zn,
6006 const ZRegister& zm);
6007
6008 // Floating-point minimum number pairwise.
6009 void fminnmp(const ZRegister& zd,
6010 const PRegisterM& pg,
6011 const ZRegister& zn,
6012 const ZRegister& zm);
6013
6014 // Floating-point minimum pairwise.
6015 void fminp(const ZRegister& zd,
6016 const PRegisterM& pg,
6017 const ZRegister& zn,
6018 const ZRegister& zm);
6019
6020 // Half-precision floating-point multiply-add long to single-precision
6021 // (bottom).
6022 void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6023
6024 // Half-precision floating-point multiply-add long to single-precision
6025 // (top).
6026 void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6027
6028 // Half-precision floating-point multiply-subtract long from
6029 // single-precision (bottom).
6030 void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6031
6032 // Half-precision floating-point multiply-subtract long from
6033 // single-precision (top, indexed).
6034 void fmlslt(const ZRegister& zda,
6035 const ZRegister& zn,
6036 const ZRegister& zm,
6037 int index);
6038
6039 // Half-precision floating-point multiply-add long to single-precision
6040 // (bottom, indexed).
6041 void fmlalb(const ZRegister& zda,
6042 const ZRegister& zn,
6043 const ZRegister& zm,
6044 int index);
6045
6046 // Half-precision floating-point multiply-add long to single-precision
6047 // (top, indexed).
6048 void fmlalt(const ZRegister& zda,
6049 const ZRegister& zn,
6050 const ZRegister& zm,
6051 int index);
6052
6053 // Half-precision floating-point multiply-subtract long from
6054 // single-precision (bottom, indexed).
6055 void fmlslb(const ZRegister& zda,
6056 const ZRegister& zn,
6057 const ZRegister& zm,
6058 int index);
6059
6060 // Half-precision floating-point multiply-subtract long from
6061 // single-precision (top).
6062 void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6063
6064 // Count matching elements in vector.
6065 void histcnt(const ZRegister& zd,
6066 const PRegisterZ& pg,
6067 const ZRegister& zn,
6068 const ZRegister& zm);
6069
6070 // Count matching elements in vector segments.
6071 void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6072
6073 // Gather load non-temporal signed bytes.
6074 void ldnt1sb(const ZRegister& zt,
6075 const PRegisterZ& pg,
6076 const SVEMemOperand& addr);
6077
6078 // Gather load non-temporal signed halfwords.
6079 void ldnt1sh(const ZRegister& zt,
6080 const PRegisterZ& pg,
6081 const SVEMemOperand& addr);
6082
6083 // Gather load non-temporal signed words.
6084 void ldnt1sw(const ZRegister& zt,
6085 const PRegisterZ& pg,
6086 const SVEMemOperand& addr);
6087
6088 // Detect any matching elements, setting the condition flags.
6089 void match(const PRegisterWithLaneSize& pd,
6090 const PRegisterZ& pg,
6091 const ZRegister& zn,
6092 const ZRegister& zm);
6093
6094 // Multiply-add to accumulator (indexed).
6095 void mla(const ZRegister& zda,
6096 const ZRegister& zn,
6097 const ZRegister& zm,
6098 int index);
6099
6100 // Multiply-subtract from accumulator (indexed).
6101 void mls(const ZRegister& zda,
6102 const ZRegister& zn,
6103 const ZRegister& zm,
6104 int index);
6105
6106 // Multiply (indexed).
6107 void mul(const ZRegister& zd,
6108 const ZRegister& zn,
6109 const ZRegister& zm,
6110 int index);
6111
6112 // Multiply vectors (unpredicated).
6113 void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6114
6115 // Bitwise inverted select.
6116 void nbsl(const ZRegister& zd,
6117 const ZRegister& zn,
6118 const ZRegister& zm,
6119 const ZRegister& zk);
6120
6121 // Detect no matching elements, setting the condition flags.
6122 void nmatch(const PRegisterWithLaneSize& pd,
6123 const PRegisterZ& pg,
6124 const ZRegister& zn,
6125 const ZRegister& zm);
6126
6127 // Polynomial multiply vectors (unpredicated).
6128 void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6129
6130 // Polynomial multiply long (bottom).
6131 void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6132
6133 // Polynomial multiply long (top).
6134 void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6135
6136 // Rounding add narrow high part (bottom).
6137 void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6138
6139 // Rounding add narrow high part (top).
6140 void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6141
6142 // Rounding shift right narrow by immediate (bottom).
6143 void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6144
6145 // Rounding shift right narrow by immediate (top).
6146 void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6147
6148 // Rounding subtract narrow high part (bottom).
6149 void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6150
6151 // Rounding subtract narrow high part (top).
6152 void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6153
6154 // Signed absolute difference and accumulate.
6155 void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6156
6157 // Signed absolute difference and accumulate long (bottom).
6158 void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6159
6160 // Signed absolute difference and accumulate long (top).
6161 void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6162
6163 // Signed absolute difference long (bottom).
6164 void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6165
6166 // Signed absolute difference long (top).
6167 void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6168
6169 // Signed add and accumulate long pairwise.
6170 void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6171
6172 // Signed add long (bottom).
6173 void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6174
6175 // Signed add long (bottom + top).
6176 void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6177
6178 // Signed add long (top).
6179 void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6180
6181 // Signed add wide (bottom).
6182 void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6183
6184 // Signed add wide (top).
6185 void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6186
6187 // Subtract with carry long (bottom).
6188 void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6189
6190 // Subtract with carry long (top).
6191 void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6192
6193 // Signed halving addition.
6194 void shadd(const ZRegister& zd,
6195 const PRegisterM& pg,
6196 const ZRegister& zn,
6197 const ZRegister& zm);
6198
6199 // Shift right narrow by immediate (bottom).
6200 void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6201
6202 // Shift right narrow by immediate (top).
6203 void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6204
6205 // Signed halving subtract.
6206 void shsub(const ZRegister& zd,
6207 const PRegisterM& pg,
6208 const ZRegister& zn,
6209 const ZRegister& zm);
6210
6211 // Signed halving subtract reversed vectors.
6212 void shsubr(const ZRegister& zd,
6213 const PRegisterM& pg,
6214 const ZRegister& zn,
6215 const ZRegister& zm);
6216
6217 // Shift left and insert (immediate).
6218 void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6219
6220 // Signed maximum pairwise.
6221 void smaxp(const ZRegister& zd,
6222 const PRegisterM& pg,
6223 const ZRegister& zn,
6224 const ZRegister& zm);
6225
6226 // Signed minimum pairwise.
6227 void sminp(const ZRegister& zd,
6228 const PRegisterM& pg,
6229 const ZRegister& zn,
6230 const ZRegister& zm);
6231
6232 // Signed multiply-add long to accumulator (bottom, indexed).
6233 void smlalb(const ZRegister& zda,
6234 const ZRegister& zn,
6235 const ZRegister& zm,
6236 int index);
6237
6238 // Signed multiply-add long to accumulator (bottom).
6239 void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6240
6241 // Signed multiply-add long to accumulator (top, indexed).
6242 void smlalt(const ZRegister& zda,
6243 const ZRegister& zn,
6244 const ZRegister& zm,
6245 int index);
6246
6247 // Signed multiply-add long to accumulator (top).
6248 void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6249
6250 // Signed multiply-subtract long from accumulator (bottom, indexed).
6251 void smlslb(const ZRegister& zda,
6252 const ZRegister& zn,
6253 const ZRegister& zm,
6254 int index);
6255
6256 // Signed multiply-subtract long from accumulator (bottom).
6257 void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6258
6259 // Signed multiply-subtract long from accumulator (top, indexed).
6260 void smlslt(const ZRegister& zda,
6261 const ZRegister& zn,
6262 const ZRegister& zm,
6263 int index);
6264
6265 // Signed multiply-subtract long from accumulator (top).
6266 void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6267
6268 // Signed multiply returning high half (unpredicated).
6269 void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6270
6271 // Signed multiply long (bottom, indexed).
6272 void smullb(const ZRegister& zd,
6273 const ZRegister& zn,
6274 const ZRegister& zm,
6275 int index);
6276
6277 // Signed multiply long (bottom).
6278 void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6279
6280 // Signed multiply long (top, indexed).
6281 void smullt(const ZRegister& zd,
6282 const ZRegister& zn,
6283 const ZRegister& zm,
6284 int index);
6285
6286 // Signed multiply long (top).
6287 void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6288
6289 // Signed saturating absolute value.
6290 void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6291
6292 // Signed saturating addition (predicated).
6293 void sqadd(const ZRegister& zd,
6294 const PRegisterM& pg,
6295 const ZRegister& zn,
6296 const ZRegister& zm);
6297
6298 // Saturating complex integer add with rotate.
6299 void sqcadd(const ZRegister& zd,
6300 const ZRegister& zn,
6301 const ZRegister& zm,
6302 int rot);
6303
6304 // Signed saturating doubling multiply-add long to accumulator (bottom,
6305 // indexed).
6306 void sqdmlalb(const ZRegister& zda,
6307 const ZRegister& zn,
6308 const ZRegister& zm,
6309 int index);
6310
6311 // Signed saturating doubling multiply-add long to accumulator (bottom).
6312 void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6313
6314 // Signed saturating doubling multiply-add long to accumulator (bottom x
6315 // top).
6316 void sqdmlalbt(const ZRegister& zda,
6317 const ZRegister& zn,
6318 const ZRegister& zm);
6319
6320 // Signed saturating doubling multiply-add long to accumulator (top,
6321 // indexed).
6322 void sqdmlalt(const ZRegister& zda,
6323 const ZRegister& zn,
6324 const ZRegister& zm,
6325 int index);
6326
6327 // Signed saturating doubling multiply-add long to accumulator (top).
6328 void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6329
6330 // Signed saturating doubling multiply-subtract long from accumulator
6331 // (bottom, indexed).
6332 void sqdmlslb(const ZRegister& zda,
6333 const ZRegister& zn,
6334 const ZRegister& zm,
6335 int index);
6336
6337 // Signed saturating doubling multiply-subtract long from accumulator
6338 // (bottom).
6339 void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6340
6341 // Signed saturating doubling multiply-subtract long from accumulator
6342 // (bottom x top).
6343 void sqdmlslbt(const ZRegister& zda,
6344 const ZRegister& zn,
6345 const ZRegister& zm);
6346
6347 // Signed saturating doubling multiply-subtract long from accumulator
6348 // (top, indexed).
6349 void sqdmlslt(const ZRegister& zda,
6350 const ZRegister& zn,
6351 const ZRegister& zm,
6352 int index);
6353
6354 // Signed saturating doubling multiply-subtract long from accumulator
6355 // (top).
6356 void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6357
6358 // Signed saturating doubling multiply high (indexed).
6359 void sqdmulh(const ZRegister& zd,
6360 const ZRegister& zn,
6361 const ZRegister& zm,
6362 int index);
6363
6364 // Signed saturating doubling multiply high (unpredicated).
6365 void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6366
6367 // Signed saturating doubling multiply long (bottom, indexed).
6368 void sqdmullb(const ZRegister& zd,
6369 const ZRegister& zn,
6370 const ZRegister& zm,
6371 int index);
6372
6373 // Signed saturating doubling multiply long (bottom).
6374 void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6375
6376 // Signed saturating doubling multiply long (top, indexed).
6377 void sqdmullt(const ZRegister& zd,
6378 const ZRegister& zn,
6379 const ZRegister& zm,
6380 int index);
6381
6382 // Signed saturating doubling multiply long (top).
6383 void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6384
6385 // Signed saturating negate.
6386 void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6387
6388 // Saturating rounding doubling complex integer multiply-add high with
6389 // rotate (indexed).
6390 void sqrdcmlah(const ZRegister& zda,
6391 const ZRegister& zn,
6392 const ZRegister& zm,
6393 int index,
6394 int rot);
6395
6396 // Saturating rounding doubling complex integer multiply-add high with
6397 // rotate.
6398 void sqrdcmlah(const ZRegister& zda,
6399 const ZRegister& zn,
6400 const ZRegister& zm,
6401 int rot);
6402
6403 // Signed saturating rounding doubling multiply-add high to accumulator
6404 // (indexed).
6405 void sqrdmlah(const ZRegister& zda,
6406 const ZRegister& zn,
6407 const ZRegister& zm,
6408 int index);
6409
6410 // Signed saturating rounding doubling multiply-add high to accumulator
6411 // (unpredicated).
6412 void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6413
6414 // Signed saturating rounding doubling multiply-subtract high from
6415 // accumulator (indexed).
6416 void sqrdmlsh(const ZRegister& zda,
6417 const ZRegister& zn,
6418 const ZRegister& zm,
6419 int index);
6420
6421 // Signed saturating rounding doubling multiply-subtract high from
6422 // accumulator (unpredicated).
6423 void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6424
6425 // Signed saturating rounding doubling multiply high (indexed).
6426 void sqrdmulh(const ZRegister& zd,
6427 const ZRegister& zn,
6428 const ZRegister& zm,
6429 int index);
6430
6431 // Signed saturating rounding doubling multiply high (unpredicated).
6432 void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6433
6434 // Signed saturating rounding shift left by vector (predicated).
6435 void sqrshl(const ZRegister& zd,
6436 const PRegisterM& pg,
6437 const ZRegister& zn,
6438 const ZRegister& zm);
6439
6440 // Signed saturating rounding shift left reversed vectors (predicated).
6441 void sqrshlr(const ZRegister& zd,
6442 const PRegisterM& pg,
6443 const ZRegister& zn,
6444 const ZRegister& zm);
6445
6446 // Signed saturating rounding shift right narrow by immediate (bottom).
6447 void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6448
6449 // Signed saturating rounding shift right narrow by immediate (top).
6450 void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6451
6452 // Signed saturating rounding shift right unsigned narrow by immediate
6453 // (bottom).
6454 void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6455
6456 // Signed saturating rounding shift right unsigned narrow by immediate
6457 // (top).
6458 void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6459
6460 // Signed saturating shift left by immediate.
6461 void sqshl(const ZRegister& zd,
6462 const PRegisterM& pg,
6463 const ZRegister& zn,
6464 int shift);
6465
6466 // Signed saturating shift left by vector (predicated).
6467 void sqshl(const ZRegister& zd,
6468 const PRegisterM& pg,
6469 const ZRegister& zn,
6470 const ZRegister& zm);
6471
6472 // Signed saturating shift left reversed vectors (predicated).
6473 void sqshlr(const ZRegister& zd,
6474 const PRegisterM& pg,
6475 const ZRegister& zn,
6476 const ZRegister& zm);
6477
6478 // Signed saturating shift left unsigned by immediate.
6479 void sqshlu(const ZRegister& zd,
6480 const PRegisterM& pg,
6481 const ZRegister& zn,
6482 int shift);
6483
6484 // Signed saturating shift right narrow by immediate (bottom).
6485 void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6486
6487 // Signed saturating shift right narrow by immediate (top).
6488 void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6489
6490 // Signed saturating shift right unsigned narrow by immediate (bottom).
6491 void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6492
6493 // Signed saturating shift right unsigned narrow by immediate (top).
6494 void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6495
6496 // Signed saturating subtraction (predicated).
6497 void sqsub(const ZRegister& zd,
6498 const PRegisterM& pg,
6499 const ZRegister& zn,
6500 const ZRegister& zm);
6501
6502 // Signed saturating subtraction reversed vectors (predicated).
6503 void sqsubr(const ZRegister& zd,
6504 const PRegisterM& pg,
6505 const ZRegister& zn,
6506 const ZRegister& zm);
6507
6508 // Signed saturating extract narrow (bottom).
6509 void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6510
6511 // Signed saturating extract narrow (top).
6512 void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6513
6514 // Signed saturating unsigned extract narrow (bottom).
6515 void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6516
6517 // Signed saturating unsigned extract narrow (top).
6518 void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6519
6520 // Signed rounding halving addition.
6521 void srhadd(const ZRegister& zd,
6522 const PRegisterM& pg,
6523 const ZRegister& zn,
6524 const ZRegister& zm);
6525
6526 // Shift right and insert (immediate).
6527 void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6528
6529 // Signed rounding shift left by vector (predicated).
6530 void srshl(const ZRegister& zd,
6531 const PRegisterM& pg,
6532 const ZRegister& zn,
6533 const ZRegister& zm);
6534
6535 // Signed rounding shift left reversed vectors (predicated).
6536 void srshlr(const ZRegister& zd,
6537 const PRegisterM& pg,
6538 const ZRegister& zn,
6539 const ZRegister& zm);
6540
6541 // Signed rounding shift right by immediate.
6542 void srshr(const ZRegister& zd,
6543 const PRegisterM& pg,
6544 const ZRegister& zn,
6545 int shift);
6546
6547 // Signed rounding shift right and accumulate (immediate).
6548 void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6549
6550 // Signed shift left long by immediate (bottom).
6551 void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6552
6553 // Signed shift left long by immediate (top).
6554 void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6555
6556 // Signed shift right and accumulate (immediate).
6557 void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6558
6559 // Signed subtract long (bottom).
6560 void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6561
6562 // Signed subtract long (bottom - top).
6563 void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6564
6565 // Signed subtract long (top).
6566 void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6567
6568 // Signed subtract long (top - bottom).
6569 void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6570
6571 // Signed subtract wide (bottom).
6572 void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6573
6574 // Signed subtract wide (top).
6575 void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6576
6577 // Subtract narrow high part (bottom).
6578 void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6579
6580 // Subtract narrow high part (top).
6581 void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6582
6583 // Signed saturating addition of unsigned value.
6584 void suqadd(const ZRegister& zd,
6585 const PRegisterM& pg,
6586 const ZRegister& zn,
6587 const ZRegister& zm);
6588
6589 // Programmable table lookup in one or two vector table (zeroing).
6590 void tbl(const ZRegister& zd,
6591 const ZRegister& zn1,
6592 const ZRegister& zn2,
6593 const ZRegister& zm);
6594
6595 // Programmable table lookup in single vector table (merging).
6596 void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6597
6598 // Unsigned absolute difference and accumulate.
6599 void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6600
6601 // Unsigned absolute difference and accumulate long (bottom).
6602 void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6603
6604 // Unsigned absolute difference and accumulate long (top).
6605 void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6606
6607 // Unsigned absolute difference long (bottom).
6608 void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6609
6610 // Unsigned absolute difference long (top).
6611 void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6612
6613 // Unsigned add and accumulate long pairwise.
6614 void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6615
6616 // Unsigned add long (bottom).
6617 void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6618
6619 // Unsigned add long (top).
6620 void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6621
6622 // Unsigned add wide (bottom).
6623 void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6624
6625 // Unsigned add wide (top).
6626 void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6627
6628 // Unsigned halving addition.
6629 void uhadd(const ZRegister& zd,
6630 const PRegisterM& pg,
6631 const ZRegister& zn,
6632 const ZRegister& zm);
6633
6634 // Unsigned halving subtract.
6635 void uhsub(const ZRegister& zd,
6636 const PRegisterM& pg,
6637 const ZRegister& zn,
6638 const ZRegister& zm);
6639
6640 // Unsigned halving subtract reversed vectors.
6641 void uhsubr(const ZRegister& zd,
6642 const PRegisterM& pg,
6643 const ZRegister& zn,
6644 const ZRegister& zm);
6645
6646 // Unsigned maximum pairwise.
6647 void umaxp(const ZRegister& zd,
6648 const PRegisterM& pg,
6649 const ZRegister& zn,
6650 const ZRegister& zm);
6651
6652 // Unsigned minimum pairwise.
6653 void uminp(const ZRegister& zd,
6654 const PRegisterM& pg,
6655 const ZRegister& zn,
6656 const ZRegister& zm);
6657
6658 // Unsigned multiply-add long to accumulator (bottom, indexed).
6659 void umlalb(const ZRegister& zda,
6660 const ZRegister& zn,
6661 const ZRegister& zm,
6662 int index);
6663
6664 // Unsigned multiply-add long to accumulator (bottom).
6665 void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6666
6667 // Unsigned multiply-add long to accumulator (top, indexed).
6668 void umlalt(const ZRegister& zda,
6669 const ZRegister& zn,
6670 const ZRegister& zm,
6671 int index);
6672
6673 // Unsigned multiply-add long to accumulator (top).
6674 void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6675
6676 // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6677 void umlslb(const ZRegister& zda,
6678 const ZRegister& zn,
6679 const ZRegister& zm,
6680 int index);
6681
6682 // Unsigned multiply-subtract long from accumulator (bottom).
6683 void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6684
6685 // Unsigned multiply-subtract long from accumulator (top, indexed).
6686 void umlslt(const ZRegister& zda,
6687 const ZRegister& zn,
6688 const ZRegister& zm,
6689 int index);
6690
6691 // Unsigned multiply-subtract long from accumulator (top).
6692 void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6693
6694 // Unsigned multiply returning high half (unpredicated).
6695 void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6696
6697 // Unsigned multiply long (bottom, indexed).
6698 void umullb(const ZRegister& zd,
6699 const ZRegister& zn,
6700 const ZRegister& zm,
6701 int index);
6702
6703 // Unsigned multiply long (bottom).
6704 void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6705
6706 // Unsigned multiply long (top, indexed).
6707 void umullt(const ZRegister& zd,
6708 const ZRegister& zn,
6709 const ZRegister& zm,
6710 int index);
6711
6712 // Unsigned multiply long (top).
6713 void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6714
6715 // Unsigned saturating addition (predicated).
6716 void uqadd(const ZRegister& zd,
6717 const PRegisterM& pg,
6718 const ZRegister& zn,
6719 const ZRegister& zm);
6720
6721 // Unsigned saturating rounding shift left by vector (predicated).
6722 void uqrshl(const ZRegister& zd,
6723 const PRegisterM& pg,
6724 const ZRegister& zn,
6725 const ZRegister& zm);
6726
6727 // Unsigned saturating rounding shift left reversed vectors (predicated).
6728 void uqrshlr(const ZRegister& zd,
6729 const PRegisterM& pg,
6730 const ZRegister& zn,
6731 const ZRegister& zm);
6732
6733 // Unsigned saturating rounding shift right narrow by immediate (bottom).
6734 void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6735
6736 // Unsigned saturating rounding shift right narrow by immediate (top).
6737 void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6738
6739 // Unsigned saturating shift left by immediate.
6740 void uqshl(const ZRegister& zd,
6741 const PRegisterM& pg,
6742 const ZRegister& zn,
6743 int shift);
6744
6745 // Unsigned saturating shift left by vector (predicated).
6746 void uqshl(const ZRegister& zd,
6747 const PRegisterM& pg,
6748 const ZRegister& zn,
6749 const ZRegister& zm);
6750
6751 // Unsigned saturating shift left reversed vectors (predicated).
6752 void uqshlr(const ZRegister& zd,
6753 const PRegisterM& pg,
6754 const ZRegister& zn,
6755 const ZRegister& zm);
6756
6757 // Unsigned saturating shift right narrow by immediate (bottom).
6758 void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6759
6760 // Unsigned saturating shift right narrow by immediate (top).
6761 void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6762
6763 // Unsigned saturating subtraction (predicated).
6764 void uqsub(const ZRegister& zd,
6765 const PRegisterM& pg,
6766 const ZRegister& zn,
6767 const ZRegister& zm);
6768
6769 // Unsigned saturating subtraction reversed vectors (predicated).
6770 void uqsubr(const ZRegister& zd,
6771 const PRegisterM& pg,
6772 const ZRegister& zn,
6773 const ZRegister& zm);
6774
6775 // Unsigned saturating extract narrow (bottom).
6776 void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6777
6778 // Unsigned saturating extract narrow (top).
6779 void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6780
6781 // Unsigned reciprocal estimate (predicated).
6782 void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6783
6784 // Unsigned rounding halving addition.
6785 void urhadd(const ZRegister& zd,
6786 const PRegisterM& pg,
6787 const ZRegister& zn,
6788 const ZRegister& zm);
6789
6790 // Unsigned rounding shift left by vector (predicated).
6791 void urshl(const ZRegister& zd,
6792 const PRegisterM& pg,
6793 const ZRegister& zn,
6794 const ZRegister& zm);
6795
6796 // Unsigned rounding shift left reversed vectors (predicated).
6797 void urshlr(const ZRegister& zd,
6798 const PRegisterM& pg,
6799 const ZRegister& zn,
6800 const ZRegister& zm);
6801
6802 // Unsigned rounding shift right by immediate.
6803 void urshr(const ZRegister& zd,
6804 const PRegisterM& pg,
6805 const ZRegister& zn,
6806 int shift);
6807
6808 // Unsigned reciprocal square root estimate (predicated).
6809 void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6810
6811 // Unsigned rounding shift right and accumulate (immediate).
6812 void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6813
6814 // Unsigned shift left long by immediate (bottom).
6815 void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6816
6817 // Unsigned shift left long by immediate (top).
6818 void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6819
6820 // Unsigned saturating addition of signed value.
6821 void usqadd(const ZRegister& zd,
6822 const PRegisterM& pg,
6823 const ZRegister& zn,
6824 const ZRegister& zm);
6825
6826 // Unsigned shift right and accumulate (immediate).
6827 void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6828
6829 // Unsigned subtract long (bottom).
6830 void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6831
6832 // Unsigned subtract long (top).
6833 void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6834
6835 // Unsigned subtract wide (bottom).
6836 void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6837
6838 // Unsigned subtract wide (top).
6839 void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6840
6841 // While decrementing signed scalar greater than or equal to scalar.
6842 void whilege(const PRegisterWithLaneSize& pd,
6843 const Register& rn,
6844 const Register& rm);
6845
6846 // While decrementing signed scalar greater than scalar.
6847 void whilegt(const PRegisterWithLaneSize& pd,
6848 const Register& rn,
6849 const Register& rm);
6850
6851 // While decrementing unsigned scalar higher than scalar.
6852 void whilehi(const PRegisterWithLaneSize& pd,
6853 const Register& rn,
6854 const Register& rm);
6855
6856 // While decrementing unsigned scalar higher or same as scalar.
6857 void whilehs(const PRegisterWithLaneSize& pd,
6858 const Register& rn,
6859 const Register& rm);
6860
6861 // While free of read-after-write conflicts.
6862 void whilerw(const PRegisterWithLaneSize& pd,
6863 const Register& rn,
6864 const Register& rm);
6865
6866 // While free of write-after-read/write conflicts.
6867 void whilewr(const PRegisterWithLaneSize& pd,
6868 const Register& rn,
6869 const Register& rm);
6870
6871 // Bitwise exclusive OR and rotate right by immediate.
6872 void xar(const ZRegister& zd,
6873 const ZRegister& zn,
6874 const ZRegister& zm,
6875 int shift);
6876
6877 // Floating-point matrix multiply-accumulate.
6878 void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6879
6880 // Signed integer matrix multiply-accumulate.
6881 void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6882
6883 // Unsigned by signed integer matrix multiply-accumulate.
6884 void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6885
6886 // Unsigned integer matrix multiply-accumulate.
6887 void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6888
6889 // Unsigned by signed integer dot product.
6890 void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6891
6892 // Unsigned by signed integer indexed dot product.
6893 void usdot(const ZRegister& zda,
6894 const ZRegister& zn,
6895 const ZRegister& zm,
6896 int index);
6897
6898 // Signed by unsigned integer indexed dot product.
6899 void sudot(const ZRegister& zda,
6900 const ZRegister& zn,
6901 const ZRegister& zm,
6902 int index);
6903
6904 // Emit generic instructions.
6905
6906 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)6907 void dci(Instr raw_inst) { Emit(raw_inst); }
6908
6909 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)6910 void dc32(uint32_t data) { dc(data); }
6911
6912 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)6913 void dc64(uint64_t data) { dc(data); }
6914
6915 // Emit data in the instruction stream.
6916 template <typename T>
dc(T data)6917 void dc(T data) {
6918 VIXL_ASSERT(AllowAssembler());
6919 GetBuffer()->Emit<T>(data);
6920 }
6921
6922 // Copy a string into the instruction stream, including the terminating NULL
6923 // character. The instruction pointer is then aligned correctly for
6924 // subsequent instructions.
EmitString(const char * string)6925 void EmitString(const char* string) {
6926 VIXL_ASSERT(string != NULL);
6927 VIXL_ASSERT(AllowAssembler());
6928
6929 GetBuffer()->EmitString(string);
6930 GetBuffer()->Align();
6931 }
6932
6933 // Code generation helpers.
6934 static bool OneInstrMoveImmediateHelper(Assembler* assm,
6935 const Register& dst,
6936 uint64_t imm);
6937
6938 // Register encoding.
6939 template <int hibit, int lobit>
Rx(CPURegister rx)6940 static Instr Rx(CPURegister rx) {
6941 VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
6942 return ImmUnsignedField<hibit, lobit>(rx.GetCode());
6943 }
6944
6945 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
6946 #define REGISTER_ENCODER(N) \
6947 static Instr R##N(CPURegister r##N) { \
6948 return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
6949 }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)6950 CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
6951 #undef REGISTER_ENCODER
6952 #undef CPU_REGISTER_FIELD_NAMES
6953
6954 static Instr RmNot31(CPURegister rm) {
6955 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
6956 VIXL_ASSERT(!rm.IsZero());
6957 return Rm(rm);
6958 }
6959
6960 // These encoding functions allow the stack pointer to be encoded, and
6961 // disallow the zero register.
RdSP(Register rd)6962 static Instr RdSP(Register rd) {
6963 VIXL_ASSERT(!rd.IsZero());
6964 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
6965 }
6966
RnSP(Register rn)6967 static Instr RnSP(Register rn) {
6968 VIXL_ASSERT(!rn.IsZero());
6969 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
6970 }
6971
RmSP(Register rm)6972 static Instr RmSP(Register rm) {
6973 VIXL_ASSERT(!rm.IsZero());
6974 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
6975 }
6976
Pd(PRegister pd)6977 static Instr Pd(PRegister pd) {
6978 return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
6979 }
6980
Pm(PRegister pm)6981 static Instr Pm(PRegister pm) {
6982 return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
6983 }
6984
Pn(PRegister pn)6985 static Instr Pn(PRegister pn) {
6986 return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
6987 }
6988
PgLow8(PRegister pg)6989 static Instr PgLow8(PRegister pg) {
6990 // Governing predicates can be merging, zeroing, or unqualified. They should
6991 // never have a lane size.
6992 VIXL_ASSERT(!pg.HasLaneSize());
6993 return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
6994 }
6995
6996 template <int hibit, int lobit>
Pg(PRegister pg)6997 static Instr Pg(PRegister pg) {
6998 // Governing predicates can be merging, zeroing, or unqualified. They should
6999 // never have a lane size.
7000 VIXL_ASSERT(!pg.HasLaneSize());
7001 return Rx<hibit, lobit>(pg);
7002 }
7003
7004 // Flags encoding.
Flags(FlagsUpdate S)7005 static Instr Flags(FlagsUpdate S) {
7006 if (S == SetFlags) {
7007 return 1 << FlagsUpdate_offset;
7008 } else if (S == LeaveFlags) {
7009 return 0 << FlagsUpdate_offset;
7010 }
7011 VIXL_UNREACHABLE();
7012 return 0;
7013 }
7014
Cond(Condition cond)7015 static Instr Cond(Condition cond) { return cond << Condition_offset; }
7016
7017 // Generic immediate encoding.
7018 template <int hibit, int lobit>
ImmField(int64_t imm)7019 static Instr ImmField(int64_t imm) {
7020 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7021 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7022 int fieldsize = hibit - lobit + 1;
7023 VIXL_ASSERT(IsIntN(fieldsize, imm));
7024 return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7025 }
7026
7027 // For unsigned immediate encoding.
7028 // TODO: Handle signed and unsigned immediate in satisfactory way.
7029 template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7030 static Instr ImmUnsignedField(uint64_t imm) {
7031 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7032 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7033 VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7034 return static_cast<Instr>(imm << lobit);
7035 }
7036
7037 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7038 static Instr ImmPCRelAddress(int64_t imm21) {
7039 VIXL_ASSERT(IsInt21(imm21));
7040 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7041 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7042 Instr immlo = imm << ImmPCRelLo_offset;
7043 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7044 }
7045
7046 // Branch encoding.
ImmUncondBranch(int64_t imm26)7047 static Instr ImmUncondBranch(int64_t imm26) {
7048 VIXL_ASSERT(IsInt26(imm26));
7049 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7050 }
7051
ImmCondBranch(int64_t imm19)7052 static Instr ImmCondBranch(int64_t imm19) {
7053 VIXL_ASSERT(IsInt19(imm19));
7054 return TruncateToUint19(imm19) << ImmCondBranch_offset;
7055 }
7056
ImmCmpBranch(int64_t imm19)7057 static Instr ImmCmpBranch(int64_t imm19) {
7058 VIXL_ASSERT(IsInt19(imm19));
7059 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7060 }
7061
ImmTestBranch(int64_t imm14)7062 static Instr ImmTestBranch(int64_t imm14) {
7063 VIXL_ASSERT(IsInt14(imm14));
7064 return TruncateToUint14(imm14) << ImmTestBranch_offset;
7065 }
7066
ImmTestBranchBit(unsigned bit_pos)7067 static Instr ImmTestBranchBit(unsigned bit_pos) {
7068 VIXL_ASSERT(IsUint6(bit_pos));
7069 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7070 unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7071 unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7072 bit5 &= ImmTestBranchBit5_mask;
7073 bit40 &= ImmTestBranchBit40_mask;
7074 return bit5 | bit40;
7075 }
7076
7077 // Data Processing encoding.
SF(Register rd)7078 static Instr SF(Register rd) {
7079 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7080 }
7081
ImmAddSub(int imm)7082 static Instr ImmAddSub(int imm) {
7083 VIXL_ASSERT(IsImmAddSub(imm));
7084 if (IsUint12(imm)) { // No shift required.
7085 imm <<= ImmAddSub_offset;
7086 } else {
7087 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7088 }
7089 return imm;
7090 }
7091
SVEImmSetBits(unsigned imms,unsigned lane_size)7092 static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7093 VIXL_ASSERT(IsUint6(imms));
7094 VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7095 USE(lane_size);
7096 return imms << SVEImmSetBits_offset;
7097 }
7098
SVEImmRotate(unsigned immr,unsigned lane_size)7099 static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7100 VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7101 USE(lane_size);
7102 return immr << SVEImmRotate_offset;
7103 }
7104
SVEBitN(unsigned bitn)7105 static Instr SVEBitN(unsigned bitn) {
7106 VIXL_ASSERT(IsUint1(bitn));
7107 return bitn << SVEBitN_offset;
7108 }
7109
7110 static Instr SVEDtype(unsigned msize_in_bytes_log2,
7111 unsigned esize_in_bytes_log2,
7112 bool is_signed,
7113 int dtype_h_lsb = 23,
7114 int dtype_l_lsb = 21) {
7115 VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7116 VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7117 Instr dtype_h = msize_in_bytes_log2;
7118 Instr dtype_l = esize_in_bytes_log2;
7119 // Signed forms use the encodings where msize would be greater than esize.
7120 if (is_signed) {
7121 dtype_h = dtype_h ^ 0x3;
7122 dtype_l = dtype_l ^ 0x3;
7123 }
7124 VIXL_ASSERT(IsUint2(dtype_h));
7125 VIXL_ASSERT(IsUint2(dtype_l));
7126 VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7127
7128 return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7129 }
7130
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7131 static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7132 unsigned esize_in_bytes_log2,
7133 bool is_signed) {
7134 return SVEDtype(msize_in_bytes_log2,
7135 esize_in_bytes_log2,
7136 is_signed,
7137 23,
7138 13);
7139 }
7140
ImmS(unsigned imms,unsigned reg_size)7141 static Instr ImmS(unsigned imms, unsigned reg_size) {
7142 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7143 ((reg_size == kWRegSize) && IsUint5(imms)));
7144 USE(reg_size);
7145 return imms << ImmS_offset;
7146 }
7147
ImmR(unsigned immr,unsigned reg_size)7148 static Instr ImmR(unsigned immr, unsigned reg_size) {
7149 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7150 ((reg_size == kWRegSize) && IsUint5(immr)));
7151 USE(reg_size);
7152 VIXL_ASSERT(IsUint6(immr));
7153 return immr << ImmR_offset;
7154 }
7155
ImmSetBits(unsigned imms,unsigned reg_size)7156 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7157 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7158 VIXL_ASSERT(IsUint6(imms));
7159 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7160 USE(reg_size);
7161 return imms << ImmSetBits_offset;
7162 }
7163
ImmRotate(unsigned immr,unsigned reg_size)7164 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7165 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7166 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7167 ((reg_size == kWRegSize) && IsUint5(immr)));
7168 USE(reg_size);
7169 return immr << ImmRotate_offset;
7170 }
7171
ImmLLiteral(int64_t imm19)7172 static Instr ImmLLiteral(int64_t imm19) {
7173 VIXL_ASSERT(IsInt19(imm19));
7174 return TruncateToUint19(imm19) << ImmLLiteral_offset;
7175 }
7176
BitN(unsigned bitn,unsigned reg_size)7177 static Instr BitN(unsigned bitn, unsigned reg_size) {
7178 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7179 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7180 USE(reg_size);
7181 return bitn << BitN_offset;
7182 }
7183
ShiftDP(Shift shift)7184 static Instr ShiftDP(Shift shift) {
7185 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7186 return shift << ShiftDP_offset;
7187 }
7188
ImmDPShift(unsigned amount)7189 static Instr ImmDPShift(unsigned amount) {
7190 VIXL_ASSERT(IsUint6(amount));
7191 return amount << ImmDPShift_offset;
7192 }
7193
ExtendMode(Extend extend)7194 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7195
ImmExtendShift(unsigned left_shift)7196 static Instr ImmExtendShift(unsigned left_shift) {
7197 VIXL_ASSERT(left_shift <= 4);
7198 return left_shift << ImmExtendShift_offset;
7199 }
7200
ImmCondCmp(unsigned imm)7201 static Instr ImmCondCmp(unsigned imm) {
7202 VIXL_ASSERT(IsUint5(imm));
7203 return imm << ImmCondCmp_offset;
7204 }
7205
Nzcv(StatusFlags nzcv)7206 static Instr Nzcv(StatusFlags nzcv) {
7207 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7208 }
7209
7210 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7211 static Instr ImmLSUnsigned(int64_t imm12) {
7212 VIXL_ASSERT(IsUint12(imm12));
7213 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7214 }
7215
ImmLS(int64_t imm9)7216 static Instr ImmLS(int64_t imm9) {
7217 VIXL_ASSERT(IsInt9(imm9));
7218 return TruncateToUint9(imm9) << ImmLS_offset;
7219 }
7220
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7221 static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7222 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
7223 int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
7224 VIXL_ASSERT(IsInt7(scaled_imm7));
7225 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7226 }
7227
ImmShiftLS(unsigned shift_amount)7228 static Instr ImmShiftLS(unsigned shift_amount) {
7229 VIXL_ASSERT(IsUint1(shift_amount));
7230 return shift_amount << ImmShiftLS_offset;
7231 }
7232
ImmLSPAC(int64_t imm10)7233 static Instr ImmLSPAC(int64_t imm10) {
7234 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7235 int64_t scaled_imm10 = imm10 / (1 << 3);
7236 VIXL_ASSERT(IsInt10(scaled_imm10));
7237 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7238 return (s_bit << ImmLSPACHi_offset) |
7239 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7240 }
7241
ImmPrefetchOperation(int imm5)7242 static Instr ImmPrefetchOperation(int imm5) {
7243 VIXL_ASSERT(IsUint5(imm5));
7244 return imm5 << ImmPrefetchOperation_offset;
7245 }
7246
ImmException(int imm16)7247 static Instr ImmException(int imm16) {
7248 VIXL_ASSERT(IsUint16(imm16));
7249 return imm16 << ImmException_offset;
7250 }
7251
ImmUdf(int imm16)7252 static Instr ImmUdf(int imm16) {
7253 VIXL_ASSERT(IsUint16(imm16));
7254 return imm16 << ImmUdf_offset;
7255 }
7256
ImmSystemRegister(int imm16)7257 static Instr ImmSystemRegister(int imm16) {
7258 VIXL_ASSERT(IsUint16(imm16));
7259 return imm16 << ImmSystemRegister_offset;
7260 }
7261
ImmRMIFRotation(int imm6)7262 static Instr ImmRMIFRotation(int imm6) {
7263 VIXL_ASSERT(IsUint6(imm6));
7264 return imm6 << ImmRMIFRotation_offset;
7265 }
7266
ImmHint(int imm7)7267 static Instr ImmHint(int imm7) {
7268 VIXL_ASSERT(IsUint7(imm7));
7269 return imm7 << ImmHint_offset;
7270 }
7271
CRm(int imm4)7272 static Instr CRm(int imm4) {
7273 VIXL_ASSERT(IsUint4(imm4));
7274 return imm4 << CRm_offset;
7275 }
7276
CRn(int imm4)7277 static Instr CRn(int imm4) {
7278 VIXL_ASSERT(IsUint4(imm4));
7279 return imm4 << CRn_offset;
7280 }
7281
SysOp(int imm14)7282 static Instr SysOp(int imm14) {
7283 VIXL_ASSERT(IsUint14(imm14));
7284 return imm14 << SysOp_offset;
7285 }
7286
ImmSysOp1(int imm3)7287 static Instr ImmSysOp1(int imm3) {
7288 VIXL_ASSERT(IsUint3(imm3));
7289 return imm3 << SysOp1_offset;
7290 }
7291
ImmSysOp2(int imm3)7292 static Instr ImmSysOp2(int imm3) {
7293 VIXL_ASSERT(IsUint3(imm3));
7294 return imm3 << SysOp2_offset;
7295 }
7296
ImmBarrierDomain(int imm2)7297 static Instr ImmBarrierDomain(int imm2) {
7298 VIXL_ASSERT(IsUint2(imm2));
7299 return imm2 << ImmBarrierDomain_offset;
7300 }
7301
ImmBarrierType(int imm2)7302 static Instr ImmBarrierType(int imm2) {
7303 VIXL_ASSERT(IsUint2(imm2));
7304 return imm2 << ImmBarrierType_offset;
7305 }
7306
7307 // Move immediates encoding.
ImmMoveWide(uint64_t imm)7308 static Instr ImmMoveWide(uint64_t imm) {
7309 VIXL_ASSERT(IsUint16(imm));
7310 return static_cast<Instr>(imm << ImmMoveWide_offset);
7311 }
7312
ShiftMoveWide(int64_t shift)7313 static Instr ShiftMoveWide(int64_t shift) {
7314 VIXL_ASSERT(IsUint2(shift));
7315 return static_cast<Instr>(shift << ShiftMoveWide_offset);
7316 }
7317
7318 // FP Immediates.
7319 static Instr ImmFP16(Float16 imm);
7320 static Instr ImmFP32(float imm);
7321 static Instr ImmFP64(double imm);
7322
7323 // FP register type.
FPType(VRegister fd)7324 static Instr FPType(VRegister fd) {
7325 VIXL_ASSERT(fd.IsScalar());
7326 switch (fd.GetSizeInBits()) {
7327 case 16:
7328 return FP16;
7329 case 32:
7330 return FP32;
7331 case 64:
7332 return FP64;
7333 default:
7334 VIXL_UNREACHABLE();
7335 return 0;
7336 }
7337 }
7338
FPScale(unsigned scale)7339 static Instr FPScale(unsigned scale) {
7340 VIXL_ASSERT(IsUint6(scale));
7341 return scale << FPScale_offset;
7342 }
7343
7344 // Immediate field checking helpers.
7345 static bool IsImmAddSub(int64_t immediate);
7346 static bool IsImmConditionalCompare(int64_t immediate);
7347 static bool IsImmFP16(Float16 imm);
7348 static bool IsImmFP32(float imm);
7349 static bool IsImmFP64(double imm);
7350 static bool IsImmLogical(uint64_t value,
7351 unsigned width,
7352 unsigned* n = NULL,
7353 unsigned* imm_s = NULL,
7354 unsigned* imm_r = NULL);
7355 static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7356 static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7357 static bool IsImmLSUnscaled(int64_t offset);
7358 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7359 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7360
7361 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7362 static Instr VFormat(VRegister vd) {
7363 if (vd.Is64Bits()) {
7364 switch (vd.GetLanes()) {
7365 case 2:
7366 return NEON_2S;
7367 case 4:
7368 return NEON_4H;
7369 case 8:
7370 return NEON_8B;
7371 default:
7372 return 0xffffffff;
7373 }
7374 } else {
7375 VIXL_ASSERT(vd.Is128Bits());
7376 switch (vd.GetLanes()) {
7377 case 2:
7378 return NEON_2D;
7379 case 4:
7380 return NEON_4S;
7381 case 8:
7382 return NEON_8H;
7383 case 16:
7384 return NEON_16B;
7385 default:
7386 return 0xffffffff;
7387 }
7388 }
7389 }
7390
7391 // Instruction bits for vector format in floating point data processing
7392 // operations.
FPFormat(VRegister vd)7393 static Instr FPFormat(VRegister vd) {
7394 switch (vd.GetLanes()) {
7395 case 1:
7396 // Floating point scalar formats.
7397 switch (vd.GetSizeInBits()) {
7398 case 16:
7399 return FP16;
7400 case 32:
7401 return FP32;
7402 case 64:
7403 return FP64;
7404 default:
7405 VIXL_UNREACHABLE();
7406 }
7407 break;
7408 case 2:
7409 // Two lane floating point vector formats.
7410 switch (vd.GetSizeInBits()) {
7411 case 64:
7412 return NEON_FP_2S;
7413 case 128:
7414 return NEON_FP_2D;
7415 default:
7416 VIXL_UNREACHABLE();
7417 }
7418 break;
7419 case 4:
7420 // Four lane floating point vector formats.
7421 switch (vd.GetSizeInBits()) {
7422 case 64:
7423 return NEON_FP_4H;
7424 case 128:
7425 return NEON_FP_4S;
7426 default:
7427 VIXL_UNREACHABLE();
7428 }
7429 break;
7430 case 8:
7431 // Eight lane floating point vector format.
7432 VIXL_ASSERT(vd.Is128Bits());
7433 return NEON_FP_8H;
7434 default:
7435 VIXL_UNREACHABLE();
7436 return 0;
7437 }
7438 VIXL_UNREACHABLE();
7439 return 0;
7440 }
7441
7442 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7443 static Instr LSVFormat(VRegister vd) {
7444 if (vd.Is64Bits()) {
7445 switch (vd.GetLanes()) {
7446 case 1:
7447 return LS_NEON_1D;
7448 case 2:
7449 return LS_NEON_2S;
7450 case 4:
7451 return LS_NEON_4H;
7452 case 8:
7453 return LS_NEON_8B;
7454 default:
7455 return 0xffffffff;
7456 }
7457 } else {
7458 VIXL_ASSERT(vd.Is128Bits());
7459 switch (vd.GetLanes()) {
7460 case 2:
7461 return LS_NEON_2D;
7462 case 4:
7463 return LS_NEON_4S;
7464 case 8:
7465 return LS_NEON_8H;
7466 case 16:
7467 return LS_NEON_16B;
7468 default:
7469 return 0xffffffff;
7470 }
7471 }
7472 }
7473
7474 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7475 static Instr SFormat(VRegister vd) {
7476 VIXL_ASSERT(vd.GetLanes() == 1);
7477 switch (vd.GetSizeInBytes()) {
7478 case 1:
7479 return NEON_B;
7480 case 2:
7481 return NEON_H;
7482 case 4:
7483 return NEON_S;
7484 case 8:
7485 return NEON_D;
7486 default:
7487 return 0xffffffff;
7488 }
7489 }
7490
7491 template <typename T>
SVESize(const T & rd)7492 static Instr SVESize(const T& rd) {
7493 VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7494 VIXL_ASSERT(rd.HasLaneSize());
7495 switch (rd.GetLaneSizeInBytes()) {
7496 case 1:
7497 return SVE_B;
7498 case 2:
7499 return SVE_H;
7500 case 4:
7501 return SVE_S;
7502 case 8:
7503 return SVE_D;
7504 default:
7505 return 0xffffffff;
7506 }
7507 }
7508
ImmSVEPredicateConstraint(int pattern)7509 static Instr ImmSVEPredicateConstraint(int pattern) {
7510 VIXL_ASSERT(IsUint5(pattern));
7511 return (pattern << ImmSVEPredicateConstraint_offset) &
7512 ImmSVEPredicateConstraint_mask;
7513 }
7514
ImmNEONHLM(int index,int num_bits)7515 static Instr ImmNEONHLM(int index, int num_bits) {
7516 int h, l, m;
7517 if (num_bits == 3) {
7518 VIXL_ASSERT(IsUint3(index));
7519 h = (index >> 2) & 1;
7520 l = (index >> 1) & 1;
7521 m = (index >> 0) & 1;
7522 } else if (num_bits == 2) {
7523 VIXL_ASSERT(IsUint2(index));
7524 h = (index >> 1) & 1;
7525 l = (index >> 0) & 1;
7526 m = 0;
7527 } else {
7528 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7529 h = (index >> 0) & 1;
7530 l = 0;
7531 m = 0;
7532 }
7533 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7534 }
7535
ImmRotFcadd(int rot)7536 static Instr ImmRotFcadd(int rot) {
7537 VIXL_ASSERT(rot == 90 || rot == 270);
7538 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7539 }
7540
ImmRotFcmlaSca(int rot)7541 static Instr ImmRotFcmlaSca(int rot) {
7542 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7543 return (rot / 90) << ImmRotFcmlaSca_offset;
7544 }
7545
ImmRotFcmlaVec(int rot)7546 static Instr ImmRotFcmlaVec(int rot) {
7547 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7548 return (rot / 90) << ImmRotFcmlaVec_offset;
7549 }
7550
ImmNEONExt(int imm4)7551 static Instr ImmNEONExt(int imm4) {
7552 VIXL_ASSERT(IsUint4(imm4));
7553 return imm4 << ImmNEONExt_offset;
7554 }
7555
ImmNEON5(Instr format,int index)7556 static Instr ImmNEON5(Instr format, int index) {
7557 VIXL_ASSERT(IsUint4(index));
7558 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7559 int imm5 = (index << (s + 1)) | (1 << s);
7560 return imm5 << ImmNEON5_offset;
7561 }
7562
ImmNEON4(Instr format,int index)7563 static Instr ImmNEON4(Instr format, int index) {
7564 VIXL_ASSERT(IsUint4(index));
7565 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7566 int imm4 = index << s;
7567 return imm4 << ImmNEON4_offset;
7568 }
7569
ImmNEONabcdefgh(int imm8)7570 static Instr ImmNEONabcdefgh(int imm8) {
7571 VIXL_ASSERT(IsUint8(imm8));
7572 Instr instr;
7573 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7574 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7575 return instr;
7576 }
7577
NEONCmode(int cmode)7578 static Instr NEONCmode(int cmode) {
7579 VIXL_ASSERT(IsUint4(cmode));
7580 return cmode << NEONCmode_offset;
7581 }
7582
NEONModImmOp(int op)7583 static Instr NEONModImmOp(int op) {
7584 VIXL_ASSERT(IsUint1(op));
7585 return op << NEONModImmOp_offset;
7586 }
7587
7588 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7589 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7590 VIXL_ASSERT(label->IsBound());
7591 return GetBuffer().GetOffsetFrom(label->GetLocation());
7592 }
7593 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7594 size_t SizeOfCodeGeneratedSince(Label* label) const) {
7595 return GetSizeOfCodeGeneratedSince(label);
7596 }
7597
7598 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7599 size_t GetBufferCapacity() const) {
7600 return GetBuffer().GetCapacity();
7601 }
7602 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7603 return GetBuffer().GetCapacity();
7604 }
7605
7606 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7607 size_t GetRemainingBufferSpace() const) {
7608 return GetBuffer().GetRemainingBytes();
7609 }
7610 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7611 size_t RemainingBufferSpace() const) {
7612 return GetBuffer().GetRemainingBytes();
7613 }
7614
GetPic()7615 PositionIndependentCodeOption GetPic() const { return pic_; }
7616 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7617 return GetPic();
7618 }
7619
GetCPUFeatures()7620 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7621
SetCPUFeatures(const CPUFeatures & cpu_features)7622 void SetCPUFeatures(const CPUFeatures& cpu_features) {
7623 cpu_features_ = cpu_features;
7624 }
7625
AllowPageOffsetDependentCode()7626 bool AllowPageOffsetDependentCode() const {
7627 return (GetPic() == PageOffsetDependentCode) ||
7628 (GetPic() == PositionDependentCode);
7629 }
7630
AppropriateZeroRegFor(const CPURegister & reg)7631 static Register AppropriateZeroRegFor(const CPURegister& reg) {
7632 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7633 }
7634
7635 protected:
7636 void LoadStore(const CPURegister& rt,
7637 const MemOperand& addr,
7638 LoadStoreOp op,
7639 LoadStoreScalingOption option = PreferScaledOffset);
7640
7641 void LoadStorePAC(const Register& xt,
7642 const MemOperand& addr,
7643 LoadStorePACOp op);
7644
7645 void LoadStorePair(const CPURegister& rt,
7646 const CPURegister& rt2,
7647 const MemOperand& addr,
7648 LoadStorePairOp op);
7649 void LoadStoreStruct(const VRegister& vt,
7650 const MemOperand& addr,
7651 NEONLoadStoreMultiStructOp op);
7652 void LoadStoreStruct1(const VRegister& vt,
7653 int reg_count,
7654 const MemOperand& addr);
7655 void LoadStoreStructSingle(const VRegister& vt,
7656 uint32_t lane,
7657 const MemOperand& addr,
7658 NEONLoadStoreSingleStructOp op);
7659 void LoadStoreStructSingleAllLanes(const VRegister& vt,
7660 const MemOperand& addr,
7661 NEONLoadStoreSingleStructOp op);
7662 void LoadStoreStructVerify(const VRegister& vt,
7663 const MemOperand& addr,
7664 Instr op);
7665
7666 // Set `is_load` to false in default as it's only used in the
7667 // scalar-plus-vector form.
7668 Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7669 int num_regs,
7670 const SVEMemOperand& addr,
7671 bool is_load = false);
7672
7673 // E.g. st1b, st1h, ...
7674 // This supports both contiguous and scatter stores.
7675 void SVESt1Helper(unsigned msize_in_bytes_log2,
7676 const ZRegister& zt,
7677 const PRegister& pg,
7678 const SVEMemOperand& addr);
7679
7680 // E.g. ld1b, ld1h, ...
7681 // This supports both contiguous and gather loads.
7682 void SVELd1Helper(unsigned msize_in_bytes_log2,
7683 const ZRegister& zt,
7684 const PRegisterZ& pg,
7685 const SVEMemOperand& addr,
7686 bool is_signed);
7687
7688 // E.g. ld1rb, ld1rh, ...
7689 void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7690 const ZRegister& zt,
7691 const PRegisterZ& pg,
7692 const SVEMemOperand& addr,
7693 bool is_signed);
7694
7695 // E.g. ldff1b, ldff1h, ...
7696 // This supports both contiguous and gather loads.
7697 void SVELdff1Helper(unsigned msize_in_bytes_log2,
7698 const ZRegister& zt,
7699 const PRegisterZ& pg,
7700 const SVEMemOperand& addr,
7701 bool is_signed);
7702
7703 // Common code for the helpers above.
7704 void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7705 const ZRegister& zt,
7706 const PRegister& pg,
7707 const SVEMemOperand& addr,
7708 bool is_signed,
7709 Instr op);
7710
7711 // Common code for the helpers above.
7712 void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7713 const ZRegister& zt,
7714 const PRegister& pg,
7715 const SVEMemOperand& addr,
7716 bool is_load,
7717 bool is_signed,
7718 bool is_first_fault);
7719
7720 // E.g. st2b, st3h, ...
7721 void SVESt234Helper(int num_regs,
7722 const ZRegister& zt1,
7723 const PRegister& pg,
7724 const SVEMemOperand& addr);
7725
7726 // E.g. ld2b, ld3h, ...
7727 void SVELd234Helper(int num_regs,
7728 const ZRegister& zt1,
7729 const PRegisterZ& pg,
7730 const SVEMemOperand& addr);
7731
7732 // Common code for the helpers above.
7733 void SVELdSt234Helper(int num_regs,
7734 const ZRegister& zt1,
7735 const PRegister& pg,
7736 const SVEMemOperand& addr,
7737 Instr op);
7738
7739 // E.g. ld1qb, ld1qh, ldnt1b, ...
7740 void SVELd1St1ScaImmHelper(const ZRegister& zt,
7741 const PRegister& pg,
7742 const SVEMemOperand& addr,
7743 Instr regoffset_op,
7744 Instr immoffset_op,
7745 int imm_divisor = 1);
7746
7747 void SVELd1VecScaHelper(const ZRegister& zt,
7748 const PRegister& pg,
7749 const SVEMemOperand& addr,
7750 uint32_t msize,
7751 bool is_signed);
7752 void SVESt1VecScaHelper(const ZRegister& zt,
7753 const PRegister& pg,
7754 const SVEMemOperand& addr,
7755 uint32_t msize);
7756
7757 void Prefetch(PrefetchOperation op,
7758 const MemOperand& addr,
7759 LoadStoreScalingOption option = PreferScaledOffset);
7760 void Prefetch(int op,
7761 const MemOperand& addr,
7762 LoadStoreScalingOption option = PreferScaledOffset);
7763
7764 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7765 // reports a bogus uninitialised warning then.
7766 void Logical(const Register& rd,
7767 const Register& rn,
7768 const Operand operand,
7769 LogicalOp op);
7770
7771 void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7772
7773 void LogicalImmediate(const Register& rd,
7774 const Register& rn,
7775 unsigned n,
7776 unsigned imm_s,
7777 unsigned imm_r,
7778 LogicalOp op);
7779
7780 void ConditionalCompare(const Register& rn,
7781 const Operand& operand,
7782 StatusFlags nzcv,
7783 Condition cond,
7784 ConditionalCompareOp op);
7785
7786 void AddSubWithCarry(const Register& rd,
7787 const Register& rn,
7788 const Operand& operand,
7789 FlagsUpdate S,
7790 AddSubWithCarryOp op);
7791
7792 void CompareVectors(const PRegisterWithLaneSize& pd,
7793 const PRegisterZ& pg,
7794 const ZRegister& zn,
7795 const ZRegister& zm,
7796 SVEIntCompareVectorsOp op);
7797
7798 void CompareVectors(const PRegisterWithLaneSize& pd,
7799 const PRegisterZ& pg,
7800 const ZRegister& zn,
7801 int imm,
7802 SVEIntCompareSignedImmOp op);
7803
7804 void CompareVectors(const PRegisterWithLaneSize& pd,
7805 const PRegisterZ& pg,
7806 const ZRegister& zn,
7807 unsigned imm,
7808 SVEIntCompareUnsignedImmOp op);
7809
7810 void SVEIntAddSubtractImmUnpredicatedHelper(
7811 SVEIntAddSubtractImm_UnpredicatedOp op,
7812 const ZRegister& zd,
7813 int imm8,
7814 int shift);
7815
7816 void SVEElementCountToRegisterHelper(Instr op,
7817 const Register& rd,
7818 int pattern,
7819 int multiplier);
7820
7821 Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
7822
7823 Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
7824
7825 void SVEBitwiseShiftImmediate(const ZRegister& zd,
7826 const ZRegister& zn,
7827 Instr encoded_imm,
7828 Instr op);
7829
7830 void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
7831 const PRegisterM& pg,
7832 Instr encoded_imm,
7833 Instr op);
7834
7835 Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
7836 const ZRegister& zm,
7837 int index,
7838 Instr op_h,
7839 Instr op_s,
7840 Instr op_d);
7841
7842 Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
7843
7844 Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
7845
7846 void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
7847 const PRegister& pg,
7848 const SVEMemOperand& addr,
7849 int prefetch_size);
7850
7851 void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
7852 const PRegister& pg,
7853 const SVEMemOperand& addr,
7854 int prefetch_size);
7855
7856 void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
7857 const PRegister& pg,
7858 const SVEMemOperand& addr,
7859 int prefetch_size);
7860
7861 void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
7862 const PRegister& pg,
7863 const SVEMemOperand& addr,
7864 int prefetch_size);
7865
7866 void SVEPrefetchHelper(PrefetchOperation prfop,
7867 const PRegister& pg,
7868 const SVEMemOperand& addr,
7869 int prefetch_size);
7870
SVEImmPrefetchOperation(PrefetchOperation prfop)7871 static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
7872 // SVE only supports PLD and PST, not PLI.
7873 VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
7874 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
7875 // Check that we can simply map bits.
7876 VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
7877 VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
7878 // Remaining operations map directly.
7879 return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
7880 }
7881
7882 // Functions for emulating operands not directly supported by the instruction
7883 // set.
7884 void EmitShift(const Register& rd,
7885 const Register& rn,
7886 Shift shift,
7887 unsigned amount);
7888 void EmitExtendShift(const Register& rd,
7889 const Register& rn,
7890 Extend extend,
7891 unsigned left_shift);
7892
7893 void AddSub(const Register& rd,
7894 const Register& rn,
7895 const Operand& operand,
7896 FlagsUpdate S,
7897 AddSubOp op);
7898
7899 void NEONTable(const VRegister& vd,
7900 const VRegister& vn,
7901 const VRegister& vm,
7902 NEONTableOp op);
7903
7904 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
7905 // registers. Only simple loads are supported; sign- and zero-extension (such
7906 // as in LDPSW_x or LDRB_w) are not supported.
7907 static LoadStoreOp LoadOpFor(const CPURegister& rt);
7908 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
7909 const CPURegister& rt2);
7910 static LoadStoreOp StoreOpFor(const CPURegister& rt);
7911 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
7912 const CPURegister& rt2);
7913 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
7914 const CPURegister& rt, const CPURegister& rt2);
7915 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
7916 const CPURegister& rt, const CPURegister& rt2);
7917 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
7918
7919 // Convenience pass-through for CPU feature checks.
7920 bool CPUHas(CPUFeatures::Feature feature0,
7921 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
7922 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
7923 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
7924 return cpu_features_.Has(feature0, feature1, feature2, feature3);
7925 }
7926
7927 // Determine whether the target CPU has the specified registers, based on the
7928 // currently-enabled CPU features. Presence of a register does not imply
7929 // support for arbitrary operations on it. For example, CPUs with FP have H
7930 // registers, but most half-precision operations require the FPHalf feature.
7931 //
7932 // These are used to check CPU features in loads and stores that have the same
7933 // entry point for both integer and FP registers.
7934 bool CPUHas(const CPURegister& rt) const;
7935 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
7936
7937 bool CPUHas(SystemRegister sysreg) const;
7938
7939 private:
7940 static uint32_t FP16ToImm8(Float16 imm);
7941 static uint32_t FP32ToImm8(float imm);
7942 static uint32_t FP64ToImm8(double imm);
7943
7944 // Instruction helpers.
7945 void MoveWide(const Register& rd,
7946 uint64_t imm,
7947 int shift,
7948 MoveWideImmediateOp mov_op);
7949 void DataProcShiftedRegister(const Register& rd,
7950 const Register& rn,
7951 const Operand& operand,
7952 FlagsUpdate S,
7953 Instr op);
7954 void DataProcExtendedRegister(const Register& rd,
7955 const Register& rn,
7956 const Operand& operand,
7957 FlagsUpdate S,
7958 Instr op);
7959 void LoadStorePairNonTemporal(const CPURegister& rt,
7960 const CPURegister& rt2,
7961 const MemOperand& addr,
7962 LoadStorePairNonTemporalOp op);
7963 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
7964 void ConditionalSelect(const Register& rd,
7965 const Register& rn,
7966 const Register& rm,
7967 Condition cond,
7968 ConditionalSelectOp op);
7969 void DataProcessing1Source(const Register& rd,
7970 const Register& rn,
7971 DataProcessing1SourceOp op);
7972 void DataProcessing3Source(const Register& rd,
7973 const Register& rn,
7974 const Register& rm,
7975 const Register& ra,
7976 DataProcessing3SourceOp op);
7977 void FPDataProcessing1Source(const VRegister& fd,
7978 const VRegister& fn,
7979 FPDataProcessing1SourceOp op);
7980 void FPDataProcessing3Source(const VRegister& fd,
7981 const VRegister& fn,
7982 const VRegister& fm,
7983 const VRegister& fa,
7984 FPDataProcessing3SourceOp op);
7985 void NEONAcrossLanesL(const VRegister& vd,
7986 const VRegister& vn,
7987 NEONAcrossLanesOp op);
7988 void NEONAcrossLanes(const VRegister& vd,
7989 const VRegister& vn,
7990 NEONAcrossLanesOp op,
7991 Instr op_half);
7992 void NEONModifiedImmShiftLsl(const VRegister& vd,
7993 const int imm8,
7994 const int left_shift,
7995 NEONModifiedImmediateOp op);
7996 void NEONModifiedImmShiftMsl(const VRegister& vd,
7997 const int imm8,
7998 const int shift_amount,
7999 NEONModifiedImmediateOp op);
8000 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8001 void NEON3Same(const VRegister& vd,
8002 const VRegister& vn,
8003 const VRegister& vm,
8004 NEON3SameOp vop);
8005 void NEON3SameFP16(const VRegister& vd,
8006 const VRegister& vn,
8007 const VRegister& vm,
8008 Instr op);
8009 void NEONFP3Same(const VRegister& vd,
8010 const VRegister& vn,
8011 const VRegister& vm,
8012 Instr op);
8013 void NEON3DifferentL(const VRegister& vd,
8014 const VRegister& vn,
8015 const VRegister& vm,
8016 NEON3DifferentOp vop);
8017 void NEON3DifferentW(const VRegister& vd,
8018 const VRegister& vn,
8019 const VRegister& vm,
8020 NEON3DifferentOp vop);
8021 void NEON3DifferentHN(const VRegister& vd,
8022 const VRegister& vn,
8023 const VRegister& vm,
8024 NEON3DifferentOp vop);
8025 void NEONFP2RegMisc(const VRegister& vd,
8026 const VRegister& vn,
8027 NEON2RegMiscOp vop,
8028 double value = 0.0);
8029 void NEONFP2RegMiscFP16(const VRegister& vd,
8030 const VRegister& vn,
8031 NEON2RegMiscFP16Op vop,
8032 double value = 0.0);
8033 void NEON2RegMisc(const VRegister& vd,
8034 const VRegister& vn,
8035 NEON2RegMiscOp vop,
8036 int value = 0);
8037 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8038 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8039 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8040 void NEONPerm(const VRegister& vd,
8041 const VRegister& vn,
8042 const VRegister& vm,
8043 NEONPermOp op);
8044 void NEONFPByElement(const VRegister& vd,
8045 const VRegister& vn,
8046 const VRegister& vm,
8047 int vm_index,
8048 NEONByIndexedElementOp op,
8049 NEONByIndexedElementOp op_half);
8050 void NEONByElement(const VRegister& vd,
8051 const VRegister& vn,
8052 const VRegister& vm,
8053 int vm_index,
8054 NEONByIndexedElementOp op);
8055 void NEONByElementL(const VRegister& vd,
8056 const VRegister& vn,
8057 const VRegister& vm,
8058 int vm_index,
8059 NEONByIndexedElementOp op);
8060 void NEONShiftImmediate(const VRegister& vd,
8061 const VRegister& vn,
8062 NEONShiftImmediateOp op,
8063 int immh_immb);
8064 void NEONShiftLeftImmediate(const VRegister& vd,
8065 const VRegister& vn,
8066 int shift,
8067 NEONShiftImmediateOp op);
8068 void NEONShiftRightImmediate(const VRegister& vd,
8069 const VRegister& vn,
8070 int shift,
8071 NEONShiftImmediateOp op);
8072 void NEONShiftImmediateL(const VRegister& vd,
8073 const VRegister& vn,
8074 int shift,
8075 NEONShiftImmediateOp op);
8076 void NEONShiftImmediateN(const VRegister& vd,
8077 const VRegister& vn,
8078 int shift,
8079 NEONShiftImmediateOp op);
8080 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8081
8082 // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8083 // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8084 void ResolveSVEImm8Shift(int* imm8, int* shift);
8085
8086 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8087
8088 // Encode the specified MemOperand for the specified access size and scaling
8089 // preference.
8090 Instr LoadStoreMemOperand(const MemOperand& addr,
8091 unsigned access_size_in_bytes_log2,
8092 LoadStoreScalingOption option);
8093
8094 // Link the current (not-yet-emitted) instruction to the specified label, then
8095 // return an offset to be encoded in the instruction. If the label is not yet
8096 // bound, an offset of 0 is returned.
8097 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8098 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8099 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8100
8101 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8102 template <int element_shift>
8103 ptrdiff_t LinkAndGetOffsetTo(Label* label);
8104
8105 // Literal load offset are in words (32-bit).
8106 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8107
8108 // Emit the instruction in buffer_.
Emit(Instr instruction)8109 void Emit(Instr instruction) {
8110 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8111 VIXL_ASSERT(AllowAssembler());
8112 GetBuffer()->Emit32(instruction);
8113 }
8114
8115 PositionIndependentCodeOption pic_;
8116
8117 CPUFeatures cpu_features_;
8118 };
8119
8120
8121 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8122 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8123 return UpdateValue(new_value,
8124 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8125 }
8126
8127
8128 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8129 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8130 return UpdateValue(high64,
8131 low64,
8132 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8133 }
8134
8135
8136 } // namespace aarch64
8137
8138 // Required InvalSet template specialisations.
8139 // TODO: These template specialisations should not live in this file. Move
8140 // Label out of the aarch64 namespace in order to share its implementation
8141 // later.
8142 #define INVAL_SET_TEMPLATE_PARAMETERS \
8143 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
8144 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8145 aarch64::Label::kReclaimFactor
8146 template <>
GetKey(const ptrdiff_t & element)8147 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8148 const ptrdiff_t& element) {
8149 return element;
8150 }
8151 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8152 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8153 ptrdiff_t key) {
8154 *element = key;
8155 }
8156 #undef INVAL_SET_TEMPLATE_PARAMETERS
8157
8158 } // namespace vixl
8159
8160 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8161