1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37
38 namespace vixl {
39 namespace aarch64 {
40
41 class LabelTestHelper; // Forward declaration.
42
43
44 class Label {
45 public:
46 #ifndef PANDA_BUILD
Label()47 Label() : location_(kLocationUnbound) {}
48 #else
49 Label() = delete;
50 Label(AllocatorWrapper allocator) : links_(allocator), location_(kLocationUnbound) {}
51 #endif
~Label()52 virtual ~Label() {
53 // All links to a label must have been resolved before it is destructed.
54 #ifndef PANDA_BUILD
55 VIXL_ASSERT(!IsLinked());
56 #else
57 // Codegen may create unlinked labels
58 #endif
59 }
60
IsBound()61 bool IsBound() const { return location_ >= 0; }
IsLinked()62 bool IsLinked() const { return !links_.empty(); }
63
GetLocation()64 ptrdiff_t GetLocation() const { return location_; }
65 VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
66 return GetLocation();
67 }
68
69 static const int kNPreallocatedLinks = 4;
70 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
71 static const size_t kReclaimFrom = 512;
72 static const size_t kReclaimFactor = 2;
73
74 typedef InvalSet<ptrdiff_t,
75 kNPreallocatedLinks,
76 ptrdiff_t,
77 kInvalidLinkKey,
78 kReclaimFrom,
79 kReclaimFactor>
80 LinksSetBase;
81 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
82
83 private:
84 class LinksSet : public LinksSetBase {
85 public:
86 #ifndef PANDA_BUILD
LinksSet()87 LinksSet() : LinksSetBase() {}
88 #else
89 LinksSet() = delete;
90 LinksSet(AllocatorWrapper allocator) : LinksSetBase(allocator) {}
91 #endif
92 };
93
94 // Allows iterating over the links of a label. The behaviour is undefined if
95 // the list of links is modified in any way while iterating.
96 class LabelLinksIterator : public LabelLinksIteratorBase {
97 public:
LabelLinksIterator(Label * label)98 explicit LabelLinksIterator(Label* label)
99 : LabelLinksIteratorBase(&label->links_) {}
100
101 // TODO: Remove these and use the STL-like interface instead.
102 using LabelLinksIteratorBase::Advance;
103 using LabelLinksIteratorBase::Current;
104 };
105
Bind(ptrdiff_t location)106 void Bind(ptrdiff_t location) {
107 // Labels can only be bound once.
108 #ifndef PANDA_BUILD
109 VIXL_ASSERT(!IsBound());
110 #else
111 // Disabled for unit-tests (it bind non-bound locs)
112 #endif
113 location_ = location;
114 }
115
AddLink(ptrdiff_t instruction)116 void AddLink(ptrdiff_t instruction) {
117 // If a label is bound, the assembler already has the information it needs
118 // to write the instruction, so there is no need to add it to links_.
119 VIXL_ASSERT(!IsBound());
120 links_.insert(instruction);
121 }
122
DeleteLink(ptrdiff_t instruction)123 void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
124
ClearAllLinks()125 void ClearAllLinks() { links_.clear(); }
126
127 // TODO: The comment below considers average case complexity for our
128 // usual use-cases. The elements of interest are:
129 // - Branches to a label are emitted in order: branch instructions to a label
130 // are generated at an offset in the code generation buffer greater than any
131 // other branch to that same label already generated. As an example, this can
132 // be broken when an instruction is patched to become a branch. Note that the
133 // code will still work, but the complexity considerations below may locally
134 // not apply any more.
135 // - Veneers are generated in order: for multiple branches of the same type
136 // branching to the same unbound label going out of range, veneers are
137 // generated in growing order of the branch instruction offset from the start
138 // of the buffer.
139 //
140 // When creating a veneer for a branch going out of range, the link for this
141 // branch needs to be removed from this `links_`. Since all branches are
142 // tracked in one underlying InvalSet, the complexity for this deletion is the
143 // same as for finding the element, ie. O(n), where n is the number of links
144 // in the set.
145 // This could be reduced to O(1) by using the same trick as used when tracking
146 // branch information for veneers: split the container to use one set per type
147 // of branch. With that setup, when a veneer is created and the link needs to
148 // be deleted, if the two points above hold, it must be the minimum element of
149 // the set for its type of branch, and that minimum element will be accessible
150 // in O(1).
151
152 // The offsets of the instructions that have linked to this label.
153 LinksSet links_;
154 // The label location.
155 ptrdiff_t location_;
156
157 static const ptrdiff_t kLocationUnbound = -1;
158
159 // It is not safe to copy labels, so disable the copy constructor and operator
160 // by declaring them private (without an implementation).
161 #if __cplusplus >= 201103L
162 Label(const Label&) = delete;
163 void operator=(const Label&) = delete;
164 #else
165 Label(const Label&);
166 void operator=(const Label&);
167 #endif
168
169 // The Assembler class is responsible for binding and linking labels, since
170 // the stored offsets need to be consistent with the Assembler's buffer.
171 friend class Assembler;
172 // The MacroAssembler and VeneerPool handle resolution of branches to distant
173 // targets.
174 friend class MacroAssembler;
175 friend class VeneerPool;
176 };
177
178 class Assembler;
179 class LiteralPool;
180
181 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
182 // stream and loaded through a pc relative load. The same literal can be
183 // referred to by multiple instructions but a literal can only reside at one
184 // place in memory. A literal can be used by a load before or after being
185 // placed in memory.
186 //
187 // Internally an offset of 0 is associated with a literal which has been
188 // neither used nor placed. Then two possibilities arise:
189 // 1) the label is placed, the offset (stored as offset + 1) is used to
190 // resolve any subsequent load using the label.
191 // 2) the label is not placed and offset is the offset of the last load using
192 // the literal (stored as -offset -1). If multiple loads refer to this
193 // literal then the last load holds the offset of the preceding load and
194 // all loads form a chain. Once the offset is placed all the loads in the
195 // chain are resolved and future loads fall back to possibility 1.
196 class RawLiteral {
197 public:
198 enum DeletionPolicy {
199 kDeletedOnPlacementByPool,
200 kDeletedOnPoolDestruction,
201 kManuallyDeleted
202 };
203
204 RawLiteral(size_t size,
205 LiteralPool* literal_pool,
206 DeletionPolicy deletion_policy = kManuallyDeleted);
207
208 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
209 // actually pointing to `Literal<T>` objects.
~RawLiteral()210 virtual ~RawLiteral() {}
211
GetSize()212 size_t GetSize() const {
213 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
214 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
215 VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
216 (size_ == kQRegSizeInBytes));
217 return size_;
218 }
219 VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
220
GetRawValue128Low64()221 uint64_t GetRawValue128Low64() const {
222 VIXL_ASSERT(size_ == kQRegSizeInBytes);
223 return low64_;
224 }
225 VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
226 return GetRawValue128Low64();
227 }
228
GetRawValue128High64()229 uint64_t GetRawValue128High64() const {
230 VIXL_ASSERT(size_ == kQRegSizeInBytes);
231 return high64_;
232 }
233 VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
234 return GetRawValue128High64();
235 }
236
GetRawValue64()237 uint64_t GetRawValue64() const {
238 VIXL_ASSERT(size_ == kXRegSizeInBytes);
239 VIXL_ASSERT(high64_ == 0);
240 return low64_;
241 }
242 VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
243 return GetRawValue64();
244 }
245
GetRawValue32()246 uint32_t GetRawValue32() const {
247 VIXL_ASSERT(size_ == kWRegSizeInBytes);
248 VIXL_ASSERT(high64_ == 0);
249 VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
250 return static_cast<uint32_t>(low64_);
251 }
252 VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
253 return GetRawValue32();
254 }
255
IsUsed()256 bool IsUsed() const { return offset_ < 0; }
IsPlaced()257 bool IsPlaced() const { return offset_ > 0; }
258
GetLiteralPool()259 LiteralPool* GetLiteralPool() const { return literal_pool_; }
260
GetOffset()261 ptrdiff_t GetOffset() const {
262 VIXL_ASSERT(IsPlaced());
263 return offset_ - 1;
264 }
265 VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
266
267 protected:
SetOffset(ptrdiff_t offset)268 void SetOffset(ptrdiff_t offset) {
269 VIXL_ASSERT(offset >= 0);
270 VIXL_ASSERT(IsWordAligned(offset));
271 VIXL_ASSERT(!IsPlaced());
272 offset_ = offset + 1;
273 }
set_offset(ptrdiff_t offset)274 VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
275 SetOffset(offset);
276 }
277
GetLastUse()278 ptrdiff_t GetLastUse() const {
279 VIXL_ASSERT(IsUsed());
280 return -offset_ - 1;
281 }
282 VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
283
SetLastUse(ptrdiff_t offset)284 void SetLastUse(ptrdiff_t offset) {
285 VIXL_ASSERT(offset >= 0);
286 VIXL_ASSERT(IsWordAligned(offset));
287 VIXL_ASSERT(!IsPlaced());
288 offset_ = -offset - 1;
289 }
set_last_use(ptrdiff_t offset)290 VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
291 SetLastUse(offset);
292 }
293
294 size_t size_;
295 ptrdiff_t offset_;
296 uint64_t low64_;
297 uint64_t high64_;
298
299 private:
300 LiteralPool* literal_pool_;
301 DeletionPolicy deletion_policy_;
302
303 friend class Assembler;
304 friend class LiteralPool;
305 };
306
307
308 template <typename T>
309 class Literal : public RawLiteral {
310 public:
311 explicit Literal(T value,
312 LiteralPool* literal_pool = NULL,
313 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)314 : RawLiteral(sizeof(value), literal_pool, ownership) {
315 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
316 UpdateValue(value);
317 }
318
319 Literal(T high64,
320 T low64,
321 LiteralPool* literal_pool = NULL,
322 RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)323 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
324 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
325 UpdateValue(high64, low64);
326 }
327
~Literal()328 virtual ~Literal() {}
329
330 // Update the value of this literal, if necessary by rewriting the value in
331 // the pool.
332 // If the literal has already been placed in a literal pool, the address of
333 // the start of the code buffer must be provided, as the literal only knows it
334 // offset from there. This also allows patching the value after the code has
335 // been moved in memory.
336 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
337 VIXL_ASSERT(sizeof(new_value) == size_);
338 memcpy(&low64_, &new_value, sizeof(new_value));
339 if (IsPlaced()) {
340 VIXL_ASSERT(code_buffer != NULL);
341 RewriteValueInCode(code_buffer);
342 }
343 }
344
345 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
346 VIXL_ASSERT(sizeof(low64) == size_ / 2);
347 memcpy(&low64_, &low64, sizeof(low64));
348 memcpy(&high64_, &high64, sizeof(high64));
349 if (IsPlaced()) {
350 VIXL_ASSERT(code_buffer != NULL);
351 RewriteValueInCode(code_buffer);
352 }
353 }
354
355 void UpdateValue(T new_value, const Assembler* assembler);
356 void UpdateValue(T high64, T low64, const Assembler* assembler);
357
358 private:
RewriteValueInCode(uint8_t * code_buffer)359 void RewriteValueInCode(uint8_t* code_buffer) {
360 VIXL_ASSERT(IsPlaced());
361 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
362 switch (GetSize()) {
363 case kSRegSizeInBytes:
364 *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
365 GetRawValue32();
366 break;
367 case kDRegSizeInBytes:
368 *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
369 GetRawValue64();
370 break;
371 default:
372 VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
373 uint64_t* base_address =
374 reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
375 *base_address = GetRawValue128Low64();
376 *(base_address + 1) = GetRawValue128High64();
377 }
378 }
379 };
380
381
382 // Control whether or not position-independent code should be emitted.
383 enum PositionIndependentCodeOption {
384 // All code generated will be position-independent; all branches and
385 // references to labels generated with the Label class will use PC-relative
386 // addressing.
387 PositionIndependentCode,
388
389 // Allow VIXL to generate code that refers to absolute addresses. With this
390 // option, it will not be possible to copy the code buffer and run it from a
391 // different address; code must be generated in its final location.
392 PositionDependentCode,
393
394 // Allow VIXL to assume that the bottom 12 bits of the address will be
395 // constant, but that the top 48 bits may change. This allows `adrp` to
396 // function in systems which copy code between pages, but otherwise maintain
397 // 4KB page alignment.
398 PageOffsetDependentCode
399 };
400
401
402 // Control how scaled- and unscaled-offset loads and stores are generated.
403 enum LoadStoreScalingOption {
404 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
405 // register-offset, pre-index or post-index instructions if necessary.
406 PreferScaledOffset,
407
408 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
409 // register-offset, pre-index or post-index instructions if necessary.
410 PreferUnscaledOffset,
411
412 // Require scaled-immediate-offset instructions.
413 RequireScaledOffset,
414
415 // Require unscaled-immediate-offset instructions.
416 RequireUnscaledOffset
417 };
418
419
420 // Assembler.
421 class Assembler : public vixl::internal::AssemblerBase {
422 public:
423 explicit Assembler(
424 PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)425 : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
426
427 #ifdef PANDA_BUILD
428 explicit Assembler(
429 size_t capacity,
430 PositionIndependentCodeOption pic = PositionIndependentCode) = delete;
431 #else
432 explicit Assembler(
433 size_t capacity,
434 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)435 : AssemblerBase(capacity),
436 pic_(pic),
437 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
438 #endif
439 Assembler(byte* buffer,
440 size_t capacity,
441 PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)442 : AssemblerBase(buffer, capacity),
443 pic_(pic),
444 cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
445
446 // Upon destruction, the code will assert that one of the following is true:
447 // * The Assembler object has not been used.
448 // * Nothing has been emitted since the last Reset() call.
449 // * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()450 ~Assembler() {}
451
452 // System functions.
453
454 // Start generating code from the beginning of the buffer, discarding any code
455 // and data that has already been emitted into the buffer.
456 void Reset();
457
458 // Bind a label to the current PC.
459 void bind(Label* label);
460
461 // Bind a label to a specified offset from the start of the buffer.
462 void BindToOffset(Label* label, ptrdiff_t offset);
463
464 // Place a literal at the current PC.
465 void place(RawLiteral* literal);
466
467 VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
468 return GetCursorOffset();
469 }
470
471 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
472 ptrdiff_t GetBufferEndOffset() const) {
473 return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
474 }
475 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
476 ptrdiff_t BufferEndOffset() const) {
477 return GetBuffer().GetCapacity();
478 }
479
480 // Return the address of a bound label.
481 template <typename T>
GetLabelAddress(const Label * label)482 T GetLabelAddress(const Label* label) const {
483 VIXL_ASSERT(label->IsBound());
484 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
485 return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
486 }
487
GetInstructionAt(ptrdiff_t instruction_offset)488 Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
489 return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
490 }
491 VIXL_DEPRECATED("GetInstructionAt",
492 Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
493 return GetInstructionAt(instruction_offset);
494 }
495
GetInstructionOffset(Instruction * instruction)496 ptrdiff_t GetInstructionOffset(Instruction* instruction) {
497 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
498 ptrdiff_t offset =
499 instruction - GetBuffer()->GetStartAddress<Instruction*>();
500 VIXL_ASSERT((0 <= offset) &&
501 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
502 return offset;
503 }
504 VIXL_DEPRECATED("GetInstructionOffset",
505 ptrdiff_t InstructionOffset(Instruction* instruction)) {
506 return GetInstructionOffset(instruction);
507 }
508
509 // Instruction set functions.
510
511 // Branch / Jump instructions.
512
513 // Branch to register.
514 void br(const Register& xn);
515
516 // Branch with link to register.
517 void blr(const Register& xn);
518
519 // Branch to register with return hint.
520 void ret(const Register& xn = lr);
521
522 // Branch to register, with pointer authentication. Using key A and a modifier
523 // of zero [Armv8.3].
524 void braaz(const Register& xn);
525
526 // Branch to register, with pointer authentication. Using key B and a modifier
527 // of zero [Armv8.3].
528 void brabz(const Register& xn);
529
530 // Branch with link to register, with pointer authentication. Using key A and
531 // a modifier of zero [Armv8.3].
532 void blraaz(const Register& xn);
533
534 // Branch with link to register, with pointer authentication. Using key B and
535 // a modifier of zero [Armv8.3].
536 void blrabz(const Register& xn);
537
538 // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
539 void retaa();
540
541 // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
542 void retab();
543
544 // Branch to register, with pointer authentication. Using key A [Armv8.3].
545 void braa(const Register& xn, const Register& xm);
546
547 // Branch to register, with pointer authentication. Using key B [Armv8.3].
548 void brab(const Register& xn, const Register& xm);
549
550 // Branch with link to register, with pointer authentication. Using key A
551 // [Armv8.3].
552 void blraa(const Register& xn, const Register& xm);
553
554 // Branch with link to register, with pointer authentication. Using key B
555 // [Armv8.3].
556 void blrab(const Register& xn, const Register& xm);
557
558 // Unconditional branch to label.
559 void b(Label* label);
560
561 // Conditional branch to label.
562 void b(Label* label, Condition cond);
563
564 // Unconditional branch to PC offset.
565 void b(int64_t imm26);
566
567 // Conditional branch to PC offset.
568 void b(int64_t imm19, Condition cond);
569
570 // Branch with link to label.
571 void bl(Label* label);
572
573 // Branch with link to PC offset.
574 void bl(int64_t imm26);
575
576 // Compare and branch to label if zero.
577 void cbz(const Register& rt, Label* label);
578
579 // Compare and branch to PC offset if zero.
580 void cbz(const Register& rt, int64_t imm19);
581
582 // Compare and branch to label if not zero.
583 void cbnz(const Register& rt, Label* label);
584
585 // Compare and branch to PC offset if not zero.
586 void cbnz(const Register& rt, int64_t imm19);
587
588 // Table lookup from one register.
589 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590
591 // Table lookup from two registers.
592 void tbl(const VRegister& vd,
593 const VRegister& vn,
594 const VRegister& vn2,
595 const VRegister& vm);
596
597 // Table lookup from three registers.
598 void tbl(const VRegister& vd,
599 const VRegister& vn,
600 const VRegister& vn2,
601 const VRegister& vn3,
602 const VRegister& vm);
603
604 // Table lookup from four registers.
605 void tbl(const VRegister& vd,
606 const VRegister& vn,
607 const VRegister& vn2,
608 const VRegister& vn3,
609 const VRegister& vn4,
610 const VRegister& vm);
611
612 // Table lookup extension from one register.
613 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
614
615 // Table lookup extension from two registers.
616 void tbx(const VRegister& vd,
617 const VRegister& vn,
618 const VRegister& vn2,
619 const VRegister& vm);
620
621 // Table lookup extension from three registers.
622 void tbx(const VRegister& vd,
623 const VRegister& vn,
624 const VRegister& vn2,
625 const VRegister& vn3,
626 const VRegister& vm);
627
628 // Table lookup extension from four registers.
629 void tbx(const VRegister& vd,
630 const VRegister& vn,
631 const VRegister& vn2,
632 const VRegister& vn3,
633 const VRegister& vn4,
634 const VRegister& vm);
635
636 // Test bit and branch to label if zero.
637 void tbz(const Register& rt, unsigned bit_pos, Label* label);
638
639 // Test bit and branch to PC offset if zero.
640 void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
641
642 // Test bit and branch to label if not zero.
643 void tbnz(const Register& rt, unsigned bit_pos, Label* label);
644
645 // Test bit and branch to PC offset if not zero.
646 void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
647
648 // Address calculation instructions.
649 // Calculate a PC-relative address. Unlike for branches the offset in adr is
650 // unscaled (i.e. the result can be unaligned).
651
652 // Calculate the address of a label.
653 void adr(const Register& xd, Label* label);
654
655 // Calculate the address of a PC offset.
656 void adr(const Register& xd, int64_t imm21);
657
658 // Calculate the page address of a label.
659 void adrp(const Register& xd, Label* label);
660
661 // Calculate the page address of a PC offset.
662 void adrp(const Register& xd, int64_t imm21);
663
664 // Data Processing instructions.
665
666 // Add.
667 void add(const Register& rd, const Register& rn, const Operand& operand);
668
669 // Add and update status flags.
670 void adds(const Register& rd, const Register& rn, const Operand& operand);
671
672 // Compare negative.
673 void cmn(const Register& rn, const Operand& operand);
674
675 // Subtract.
676 void sub(const Register& rd, const Register& rn, const Operand& operand);
677
678 // Subtract and update status flags.
679 void subs(const Register& rd, const Register& rn, const Operand& operand);
680
681 // Compare.
682 void cmp(const Register& rn, const Operand& operand);
683
684 // Negate.
685 void neg(const Register& rd, const Operand& operand);
686
687 // Negate and update status flags.
688 void negs(const Register& rd, const Operand& operand);
689
690 // Add with carry bit.
691 void adc(const Register& rd, const Register& rn, const Operand& operand);
692
693 // Add with carry bit and update status flags.
694 void adcs(const Register& rd, const Register& rn, const Operand& operand);
695
696 // Subtract with carry bit.
697 void sbc(const Register& rd, const Register& rn, const Operand& operand);
698
699 // Subtract with carry bit and update status flags.
700 void sbcs(const Register& rd, const Register& rn, const Operand& operand);
701
702 // Rotate register right and insert into NZCV flags under the control of a
703 // mask [Armv8.4].
704 void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
705
706 // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
707 void setf8(const Register& rn);
708
709 // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
710 void setf16(const Register& rn);
711
712 // Negate with carry bit.
713 void ngc(const Register& rd, const Operand& operand);
714
715 // Negate with carry bit and update status flags.
716 void ngcs(const Register& rd, const Operand& operand);
717
718 // Logical instructions.
719
720 // Bitwise and (A & B).
721 void and_(const Register& rd, const Register& rn, const Operand& operand);
722
723 // Bitwise and (A & B) and update status flags.
724 void ands(const Register& rd, const Register& rn, const Operand& operand);
725
726 // Bit test and set flags.
727 void tst(const Register& rn, const Operand& operand);
728
729 // Bit clear (A & ~B).
730 void bic(const Register& rd, const Register& rn, const Operand& operand);
731
732 // Bit clear (A & ~B) and update status flags.
733 void bics(const Register& rd, const Register& rn, const Operand& operand);
734
735 // Bitwise or (A | B).
736 void orr(const Register& rd, const Register& rn, const Operand& operand);
737
738 // Bitwise nor (A | ~B).
739 void orn(const Register& rd, const Register& rn, const Operand& operand);
740
741 // Bitwise eor/xor (A ^ B).
742 void eor(const Register& rd, const Register& rn, const Operand& operand);
743
744 // Bitwise enor/xnor (A ^ ~B).
745 void eon(const Register& rd, const Register& rn, const Operand& operand);
746
747 // Logical shift left by variable.
748 void lslv(const Register& rd, const Register& rn, const Register& rm);
749
750 // Logical shift right by variable.
751 void lsrv(const Register& rd, const Register& rn, const Register& rm);
752
753 // Arithmetic shift right by variable.
754 void asrv(const Register& rd, const Register& rn, const Register& rm);
755
756 // Rotate right by variable.
757 void rorv(const Register& rd, const Register& rn, const Register& rm);
758
759 // Bitfield instructions.
760
761 // Bitfield move.
762 void bfm(const Register& rd,
763 const Register& rn,
764 unsigned immr,
765 unsigned imms);
766
767 // Signed bitfield move.
768 void sbfm(const Register& rd,
769 const Register& rn,
770 unsigned immr,
771 unsigned imms);
772
773 // Unsigned bitfield move.
774 void ubfm(const Register& rd,
775 const Register& rn,
776 unsigned immr,
777 unsigned imms);
778
779 // Bfm aliases.
780
781 // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)782 void bfi(const Register& rd,
783 const Register& rn,
784 unsigned lsb,
785 unsigned width) {
786 VIXL_ASSERT(width >= 1);
787 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
788 bfm(rd,
789 rn,
790 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
791 width - 1);
792 }
793
794 // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)795 void bfxil(const Register& rd,
796 const Register& rn,
797 unsigned lsb,
798 unsigned width) {
799 VIXL_ASSERT(width >= 1);
800 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
801 bfm(rd, rn, lsb, lsb + width - 1);
802 }
803
804 // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)805 void bfc(const Register& rd, unsigned lsb, unsigned width) {
806 bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
807 }
808
809 // Sbfm aliases.
810
811 // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)812 void asr(const Register& rd, const Register& rn, unsigned shift) {
813 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
814 sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
815 }
816
817 // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)818 void sbfiz(const Register& rd,
819 const Register& rn,
820 unsigned lsb,
821 unsigned width) {
822 VIXL_ASSERT(width >= 1);
823 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
824 sbfm(rd,
825 rn,
826 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
827 width - 1);
828 }
829
830 // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)831 void sbfx(const Register& rd,
832 const Register& rn,
833 unsigned lsb,
834 unsigned width) {
835 VIXL_ASSERT(width >= 1);
836 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
837 sbfm(rd, rn, lsb, lsb + width - 1);
838 }
839
840 // Signed extend byte.
sxtb(const Register & rd,const Register & rn)841 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
842
843 // Signed extend halfword.
sxth(const Register & rd,const Register & rn)844 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
845
846 // Signed extend word.
sxtw(const Register & rd,const Register & rn)847 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
848
849 // Ubfm aliases.
850
851 // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)852 void lsl(const Register& rd, const Register& rn, unsigned shift) {
853 unsigned reg_size = rd.GetSizeInBits();
854 VIXL_ASSERT(shift < reg_size);
855 // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
856 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
857 }
858
859 // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)860 void lsr(const Register& rd, const Register& rn, unsigned shift) {
861 VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
862 ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
863 }
864
865 // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)866 void ubfiz(const Register& rd,
867 const Register& rn,
868 unsigned lsb,
869 unsigned width) {
870 VIXL_ASSERT(width >= 1);
871 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
872 ubfm(rd,
873 rn,
874 (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
875 width - 1);
876 }
877
878 // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)879 void ubfx(const Register& rd,
880 const Register& rn,
881 unsigned lsb,
882 unsigned width) {
883 VIXL_ASSERT(width >= 1);
884 VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
885 ubfm(rd, rn, lsb, lsb + width - 1);
886 }
887
888 // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)889 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
890
891 // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)892 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
893
894 // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)895 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
896
897 // Extract.
898 void extr(const Register& rd,
899 const Register& rn,
900 const Register& rm,
901 unsigned lsb);
902
903 // Conditional select: rd = cond ? rn : rm.
904 void csel(const Register& rd,
905 const Register& rn,
906 const Register& rm,
907 Condition cond);
908
909 // Conditional select increment: rd = cond ? rn : rm + 1.
910 void csinc(const Register& rd,
911 const Register& rn,
912 const Register& rm,
913 Condition cond);
914
915 // Conditional select inversion: rd = cond ? rn : ~rm.
916 void csinv(const Register& rd,
917 const Register& rn,
918 const Register& rm,
919 Condition cond);
920
921 // Conditional select negation: rd = cond ? rn : -rm.
922 void csneg(const Register& rd,
923 const Register& rn,
924 const Register& rm,
925 Condition cond);
926
927 // Conditional set: rd = cond ? 1 : 0.
928 void cset(const Register& rd, Condition cond);
929
930 // Conditional set mask: rd = cond ? -1 : 0.
931 void csetm(const Register& rd, Condition cond);
932
933 // Conditional increment: rd = cond ? rn + 1 : rn.
934 void cinc(const Register& rd, const Register& rn, Condition cond);
935
936 // Conditional invert: rd = cond ? ~rn : rn.
937 void cinv(const Register& rd, const Register& rn, Condition cond);
938
939 // Conditional negate: rd = cond ? -rn : rn.
940 void cneg(const Register& rd, const Register& rn, Condition cond);
941
942 // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)943 void ror(const Register& rd, const Register& rs, unsigned shift) {
944 extr(rd, rs, rs, shift);
945 }
946
947 // Conditional comparison.
948
949 // Conditional compare negative.
950 void ccmn(const Register& rn,
951 const Operand& operand,
952 StatusFlags nzcv,
953 Condition cond);
954
955 // Conditional compare.
956 void ccmp(const Register& rn,
957 const Operand& operand,
958 StatusFlags nzcv,
959 Condition cond);
960
961 // CRC-32 checksum from byte.
962 void crc32b(const Register& wd, const Register& wn, const Register& wm);
963
964 // CRC-32 checksum from half-word.
965 void crc32h(const Register& wd, const Register& wn, const Register& wm);
966
967 // CRC-32 checksum from word.
968 void crc32w(const Register& wd, const Register& wn, const Register& wm);
969
970 // CRC-32 checksum from double word.
971 void crc32x(const Register& wd, const Register& wn, const Register& xm);
972
973 // CRC-32 C checksum from byte.
974 void crc32cb(const Register& wd, const Register& wn, const Register& wm);
975
976 // CRC-32 C checksum from half-word.
977 void crc32ch(const Register& wd, const Register& wn, const Register& wm);
978
979 // CRC-32 C checksum from word.
980 void crc32cw(const Register& wd, const Register& wn, const Register& wm);
981
982 // CRC-32C checksum from double word.
983 void crc32cx(const Register& wd, const Register& wn, const Register& xm);
984
985 // Multiply.
986 void mul(const Register& rd, const Register& rn, const Register& rm);
987
988 // Negated multiply.
989 void mneg(const Register& rd, const Register& rn, const Register& rm);
990
991 // Signed long multiply: 32 x 32 -> 64-bit.
992 void smull(const Register& xd, const Register& wn, const Register& wm);
993
994 // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
995 void smulh(const Register& xd, const Register& xn, const Register& xm);
996
997 // Multiply and accumulate.
998 void madd(const Register& rd,
999 const Register& rn,
1000 const Register& rm,
1001 const Register& ra);
1002
1003 // Multiply and subtract.
1004 void msub(const Register& rd,
1005 const Register& rn,
1006 const Register& rm,
1007 const Register& ra);
1008
1009 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1010 void smaddl(const Register& xd,
1011 const Register& wn,
1012 const Register& wm,
1013 const Register& xa);
1014
1015 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1016 void umaddl(const Register& xd,
1017 const Register& wn,
1018 const Register& wm,
1019 const Register& xa);
1020
1021 // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)1022 void umull(const Register& xd, const Register& wn, const Register& wm) {
1023 umaddl(xd, wn, wm, xzr);
1024 }
1025
1026 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1027 void umulh(const Register& xd, const Register& xn, const Register& xm);
1028
1029 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1030 void smsubl(const Register& xd,
1031 const Register& wn,
1032 const Register& wm,
1033 const Register& xa);
1034
1035 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1036 void umsubl(const Register& xd,
1037 const Register& wn,
1038 const Register& wm,
1039 const Register& xa);
1040
1041 // Signed integer divide.
1042 void sdiv(const Register& rd, const Register& rn, const Register& rm);
1043
1044 // Unsigned integer divide.
1045 void udiv(const Register& rd, const Register& rn, const Register& rm);
1046
1047 // Bit reverse.
1048 void rbit(const Register& rd, const Register& rn);
1049
1050 // Reverse bytes in 16-bit half words.
1051 void rev16(const Register& rd, const Register& rn);
1052
1053 // Reverse bytes in 32-bit words.
1054 void rev32(const Register& xd, const Register& xn);
1055
1056 // Reverse bytes in 64-bit general purpose register, an alias for rev
1057 // [Armv8.2].
rev64(const Register & xd,const Register & xn)1058 void rev64(const Register& xd, const Register& xn) {
1059 VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1060 rev(xd, xn);
1061 }
1062
1063 // Reverse bytes.
1064 void rev(const Register& rd, const Register& rn);
1065
1066 // Count leading zeroes.
1067 void clz(const Register& rd, const Register& rn);
1068
1069 // Count leading sign bits.
1070 void cls(const Register& rd, const Register& rn);
1071
1072 // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1073 void pacia(const Register& xd, const Register& rn);
1074
1075 // Pointer Authentication Code for Instruction address, using key A and a
1076 // modifier of zero [Armv8.3].
1077 void paciza(const Register& xd);
1078
1079 // Pointer Authentication Code for Instruction address, using key A, with
1080 // address in x17 and modifier in x16 [Armv8.3].
1081 void pacia1716();
1082
1083 // Pointer Authentication Code for Instruction address, using key A, with
1084 // address in LR and modifier in SP [Armv8.3].
1085 void paciasp();
1086
1087 // Pointer Authentication Code for Instruction address, using key A, with
1088 // address in LR and a modifier of zero [Armv8.3].
1089 void paciaz();
1090
1091 // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1092 void pacib(const Register& xd, const Register& xn);
1093
1094 // Pointer Authentication Code for Instruction address, using key B and a
1095 // modifier of zero [Armv8.3].
1096 void pacizb(const Register& xd);
1097
1098 // Pointer Authentication Code for Instruction address, using key B, with
1099 // address in x17 and modifier in x16 [Armv8.3].
1100 void pacib1716();
1101
1102 // Pointer Authentication Code for Instruction address, using key B, with
1103 // address in LR and modifier in SP [Armv8.3].
1104 void pacibsp();
1105
1106 // Pointer Authentication Code for Instruction address, using key B, with
1107 // address in LR and a modifier of zero [Armv8.3].
1108 void pacibz();
1109
1110 // Pointer Authentication Code for Data address, using key A [Armv8.3].
1111 void pacda(const Register& xd, const Register& xn);
1112
1113 // Pointer Authentication Code for Data address, using key A and a modifier of
1114 // zero [Armv8.3].
1115 void pacdza(const Register& xd);
1116
1117 // Pointer Authentication Code for Data address, using key B [Armv8.3].
1118 void pacdb(const Register& xd, const Register& xn);
1119
1120 // Pointer Authentication Code for Data address, using key B and a modifier of
1121 // zero [Armv8.3].
1122 void pacdzb(const Register& xd);
1123
1124 // Pointer Authentication Code, using Generic key [Armv8.3].
1125 void pacga(const Register& xd, const Register& xn, const Register& xm);
1126
1127 // Authenticate Instruction address, using key A [Armv8.3].
1128 void autia(const Register& xd, const Register& xn);
1129
1130 // Authenticate Instruction address, using key A and a modifier of zero
1131 // [Armv8.3].
1132 void autiza(const Register& xd);
1133
1134 // Authenticate Instruction address, using key A, with address in x17 and
1135 // modifier in x16 [Armv8.3].
1136 void autia1716();
1137
1138 // Authenticate Instruction address, using key A, with address in LR and
1139 // modifier in SP [Armv8.3].
1140 void autiasp();
1141
1142 // Authenticate Instruction address, using key A, with address in LR and a
1143 // modifier of zero [Armv8.3].
1144 void autiaz();
1145
1146 // Authenticate Instruction address, using key B [Armv8.3].
1147 void autib(const Register& xd, const Register& xn);
1148
1149 // Authenticate Instruction address, using key B and a modifier of zero
1150 // [Armv8.3].
1151 void autizb(const Register& xd);
1152
1153 // Authenticate Instruction address, using key B, with address in x17 and
1154 // modifier in x16 [Armv8.3].
1155 void autib1716();
1156
1157 // Authenticate Instruction address, using key B, with address in LR and
1158 // modifier in SP [Armv8.3].
1159 void autibsp();
1160
1161 // Authenticate Instruction address, using key B, with address in LR and a
1162 // modifier of zero [Armv8.3].
1163 void autibz();
1164
1165 // Authenticate Data address, using key A [Armv8.3].
1166 void autda(const Register& xd, const Register& xn);
1167
1168 // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1169 void autdza(const Register& xd);
1170
1171 // Authenticate Data address, using key B [Armv8.3].
1172 void autdb(const Register& xd, const Register& xn);
1173
1174 // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1175 void autdzb(const Register& xd);
1176
1177 // Strip Pointer Authentication Code of Data address [Armv8.3].
1178 void xpacd(const Register& xd);
1179
1180 // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1181 void xpaci(const Register& xd);
1182
1183 // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1184 void xpaclri();
1185
1186 // Memory instructions.
1187
1188 // Load integer or FP register.
1189 void ldr(const CPURegister& rt,
1190 const MemOperand& src,
1191 LoadStoreScalingOption option = PreferScaledOffset);
1192
1193 // Store integer or FP register.
1194 void str(const CPURegister& rt,
1195 const MemOperand& dst,
1196 LoadStoreScalingOption option = PreferScaledOffset);
1197
1198 // Load word with sign extension.
1199 void ldrsw(const Register& xt,
1200 const MemOperand& src,
1201 LoadStoreScalingOption option = PreferScaledOffset);
1202
1203 // Load byte.
1204 void ldrb(const Register& rt,
1205 const MemOperand& src,
1206 LoadStoreScalingOption option = PreferScaledOffset);
1207
1208 // Store byte.
1209 void strb(const Register& rt,
1210 const MemOperand& dst,
1211 LoadStoreScalingOption option = PreferScaledOffset);
1212
1213 // Load byte with sign extension.
1214 void ldrsb(const Register& rt,
1215 const MemOperand& src,
1216 LoadStoreScalingOption option = PreferScaledOffset);
1217
1218 // Load half-word.
1219 void ldrh(const Register& rt,
1220 const MemOperand& src,
1221 LoadStoreScalingOption option = PreferScaledOffset);
1222
1223 // Store half-word.
1224 void strh(const Register& rt,
1225 const MemOperand& dst,
1226 LoadStoreScalingOption option = PreferScaledOffset);
1227
1228 // Load half-word with sign extension.
1229 void ldrsh(const Register& rt,
1230 const MemOperand& src,
1231 LoadStoreScalingOption option = PreferScaledOffset);
1232
1233 // Load integer or FP register (with unscaled offset).
1234 void ldur(const CPURegister& rt,
1235 const MemOperand& src,
1236 LoadStoreScalingOption option = PreferUnscaledOffset);
1237
1238 // Store integer or FP register (with unscaled offset).
1239 void stur(const CPURegister& rt,
1240 const MemOperand& src,
1241 LoadStoreScalingOption option = PreferUnscaledOffset);
1242
1243 // Load word with sign extension.
1244 void ldursw(const Register& xt,
1245 const MemOperand& src,
1246 LoadStoreScalingOption option = PreferUnscaledOffset);
1247
1248 // Load byte (with unscaled offset).
1249 void ldurb(const Register& rt,
1250 const MemOperand& src,
1251 LoadStoreScalingOption option = PreferUnscaledOffset);
1252
1253 // Store byte (with unscaled offset).
1254 void sturb(const Register& rt,
1255 const MemOperand& dst,
1256 LoadStoreScalingOption option = PreferUnscaledOffset);
1257
1258 // Load byte with sign extension (and unscaled offset).
1259 void ldursb(const Register& rt,
1260 const MemOperand& src,
1261 LoadStoreScalingOption option = PreferUnscaledOffset);
1262
1263 // Load half-word (with unscaled offset).
1264 void ldurh(const Register& rt,
1265 const MemOperand& src,
1266 LoadStoreScalingOption option = PreferUnscaledOffset);
1267
1268 // Store half-word (with unscaled offset).
1269 void sturh(const Register& rt,
1270 const MemOperand& dst,
1271 LoadStoreScalingOption option = PreferUnscaledOffset);
1272
1273 // Load half-word with sign extension (and unscaled offset).
1274 void ldursh(const Register& rt,
1275 const MemOperand& src,
1276 LoadStoreScalingOption option = PreferUnscaledOffset);
1277
1278 // Load double-word with pointer authentication, using data key A and a
1279 // modifier of zero [Armv8.3].
1280 void ldraa(const Register& xt, const MemOperand& src);
1281
1282 // Load double-word with pointer authentication, using data key B and a
1283 // modifier of zero [Armv8.3].
1284 void ldrab(const Register& xt, const MemOperand& src);
1285
1286 // Load integer or FP register pair.
1287 void ldp(const CPURegister& rt,
1288 const CPURegister& rt2,
1289 const MemOperand& src);
1290
1291 // Store integer or FP register pair.
1292 void stp(const CPURegister& rt,
1293 const CPURegister& rt2,
1294 const MemOperand& dst);
1295
1296 // Load word pair with sign extension.
1297 void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1298
1299 // Load integer or FP register pair, non-temporal.
1300 void ldnp(const CPURegister& rt,
1301 const CPURegister& rt2,
1302 const MemOperand& src);
1303
1304 // Store integer or FP register pair, non-temporal.
1305 void stnp(const CPURegister& rt,
1306 const CPURegister& rt2,
1307 const MemOperand& dst);
1308
1309 // Load integer or FP register from literal pool.
1310 void ldr(const CPURegister& rt, RawLiteral* literal);
1311
1312 // Load word with sign extension from literal pool.
1313 void ldrsw(const Register& xt, RawLiteral* literal);
1314
1315 // Load integer or FP register from pc + imm19 << 2.
1316 void ldr(const CPURegister& rt, int64_t imm19);
1317
1318 // Load word with sign extension from pc + imm19 << 2.
1319 void ldrsw(const Register& xt, int64_t imm19);
1320
1321 // Store exclusive byte.
1322 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1323
1324 // Store exclusive half-word.
1325 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1326
1327 // Store exclusive register.
1328 void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1329
1330 // Load exclusive byte.
1331 void ldxrb(const Register& rt, const MemOperand& src);
1332
1333 // Load exclusive half-word.
1334 void ldxrh(const Register& rt, const MemOperand& src);
1335
1336 // Load exclusive register.
1337 void ldxr(const Register& rt, const MemOperand& src);
1338
1339 // Store exclusive register pair.
1340 void stxp(const Register& rs,
1341 const Register& rt,
1342 const Register& rt2,
1343 const MemOperand& dst);
1344
1345 // Load exclusive register pair.
1346 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1347
1348 // Store-release exclusive byte.
1349 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1350
1351 // Store-release exclusive half-word.
1352 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1353
1354 // Store-release exclusive register.
1355 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1356
1357 // Load-acquire exclusive byte.
1358 void ldaxrb(const Register& rt, const MemOperand& src);
1359
1360 // Load-acquire exclusive half-word.
1361 void ldaxrh(const Register& rt, const MemOperand& src);
1362
1363 // Load-acquire exclusive register.
1364 void ldaxr(const Register& rt, const MemOperand& src);
1365
1366 // Store-release exclusive register pair.
1367 void stlxp(const Register& rs,
1368 const Register& rt,
1369 const Register& rt2,
1370 const MemOperand& dst);
1371
1372 // Load-acquire exclusive register pair.
1373 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1374
1375 // Store-release byte.
1376 void stlrb(const Register& rt, const MemOperand& dst);
1377
1378 // Store-release half-word.
1379 void stlrh(const Register& rt, const MemOperand& dst);
1380
1381 // Store-release register.
1382 void stlr(const Register& rt, const MemOperand& dst);
1383
1384 // Load-acquire byte.
1385 void ldarb(const Register& rt, const MemOperand& src);
1386
1387 // Load-acquire half-word.
1388 void ldarh(const Register& rt, const MemOperand& src);
1389
1390 // Load-acquire register.
1391 void ldar(const Register& rt, const MemOperand& src);
1392
1393 // Store LORelease byte [Armv8.1].
1394 void stllrb(const Register& rt, const MemOperand& dst);
1395
1396 // Store LORelease half-word [Armv8.1].
1397 void stllrh(const Register& rt, const MemOperand& dst);
1398
1399 // Store LORelease register [Armv8.1].
1400 void stllr(const Register& rt, const MemOperand& dst);
1401
1402 // Load LORelease byte [Armv8.1].
1403 void ldlarb(const Register& rt, const MemOperand& src);
1404
1405 // Load LORelease half-word [Armv8.1].
1406 void ldlarh(const Register& rt, const MemOperand& src);
1407
1408 // Load LORelease register [Armv8.1].
1409 void ldlar(const Register& rt, const MemOperand& src);
1410
1411 // Compare and Swap word or doubleword in memory [Armv8.1].
1412 void cas(const Register& rs, const Register& rt, const MemOperand& src);
1413
1414 // Compare and Swap word or doubleword in memory [Armv8.1].
1415 void casa(const Register& rs, const Register& rt, const MemOperand& src);
1416
1417 // Compare and Swap word or doubleword in memory [Armv8.1].
1418 void casl(const Register& rs, const Register& rt, const MemOperand& src);
1419
1420 // Compare and Swap word or doubleword in memory [Armv8.1].
1421 void casal(const Register& rs, const Register& rt, const MemOperand& src);
1422
1423 // Compare and Swap byte in memory [Armv8.1].
1424 void casb(const Register& rs, const Register& rt, const MemOperand& src);
1425
1426 // Compare and Swap byte in memory [Armv8.1].
1427 void casab(const Register& rs, const Register& rt, const MemOperand& src);
1428
1429 // Compare and Swap byte in memory [Armv8.1].
1430 void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1431
1432 // Compare and Swap byte in memory [Armv8.1].
1433 void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1434
1435 // Compare and Swap halfword in memory [Armv8.1].
1436 void cash(const Register& rs, const Register& rt, const MemOperand& src);
1437
1438 // Compare and Swap halfword in memory [Armv8.1].
1439 void casah(const Register& rs, const Register& rt, const MemOperand& src);
1440
1441 // Compare and Swap halfword in memory [Armv8.1].
1442 void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1443
1444 // Compare and Swap halfword in memory [Armv8.1].
1445 void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1446
1447 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1448 void casp(const Register& rs,
1449 const Register& rs2,
1450 const Register& rt,
1451 const Register& rt2,
1452 const MemOperand& src);
1453
1454 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1455 void caspa(const Register& rs,
1456 const Register& rs2,
1457 const Register& rt,
1458 const Register& rt2,
1459 const MemOperand& src);
1460
1461 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1462 void caspl(const Register& rs,
1463 const Register& rs2,
1464 const Register& rt,
1465 const Register& rt2,
1466 const MemOperand& src);
1467
1468 // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1469 void caspal(const Register& rs,
1470 const Register& rs2,
1471 const Register& rt,
1472 const Register& rt2,
1473 const MemOperand& src);
1474
1475 // Store-release byte (with unscaled offset) [Armv8.4].
1476 void stlurb(const Register& rt, const MemOperand& dst);
1477
1478 // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1479 void ldapurb(const Register& rt, const MemOperand& src);
1480
1481 // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1482 void ldapursb(const Register& rt, const MemOperand& src);
1483
1484 // Store-release half-word (with unscaled offset) [Armv8.4].
1485 void stlurh(const Register& rt, const MemOperand& dst);
1486
1487 // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1488 void ldapurh(const Register& rt, const MemOperand& src);
1489
1490 // Load-acquire RCpc Register signed half-word (with unscaled offset)
1491 // [Armv8.4].
1492 void ldapursh(const Register& rt, const MemOperand& src);
1493
1494 // Store-release word or double-word (with unscaled offset) [Armv8.4].
1495 void stlur(const Register& rt, const MemOperand& dst);
1496
1497 // Load-acquire RCpc Register word or double-word (with unscaled offset)
1498 // [Armv8.4].
1499 void ldapur(const Register& rt, const MemOperand& src);
1500
1501 // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1502 void ldapursw(const Register& xt, const MemOperand& src);
1503
1504 // Atomic add on byte in memory [Armv8.1]
1505 void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1506
1507 // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1508 void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1509
1510 // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1511 void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1512
1513 // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1514 // [Armv8.1]
1515 void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1516
1517 // Atomic add on halfword in memory [Armv8.1]
1518 void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1519
1520 // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1521 void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1522
1523 // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1524 void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1525
1526 // Atomic add on halfword in memory, with Load-acquire and Store-release
1527 // semantics [Armv8.1]
1528 void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1529
1530 // Atomic add on word or doubleword in memory [Armv8.1]
1531 void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1532
1533 // Atomic add on word or doubleword in memory, with Load-acquire semantics
1534 // [Armv8.1]
1535 void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1536
1537 // Atomic add on word or doubleword in memory, with Store-release semantics
1538 // [Armv8.1]
1539 void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1540
1541 // Atomic add on word or doubleword in memory, with Load-acquire and
1542 // Store-release semantics [Armv8.1]
1543 void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1544
1545 // Atomic bit clear on byte in memory [Armv8.1]
1546 void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1547
1548 // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1549 void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1550
1551 // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1552 void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1553
1554 // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1555 // semantics [Armv8.1]
1556 void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1557
1558 // Atomic bit clear on halfword in memory [Armv8.1]
1559 void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1560
1561 // Atomic bit clear on halfword in memory, with Load-acquire semantics
1562 // [Armv8.1]
1563 void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1564
1565 // Atomic bit clear on halfword in memory, with Store-release semantics
1566 // [Armv8.1]
1567 void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1568
1569 // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1570 // semantics [Armv8.1]
1571 void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1572
1573 // Atomic bit clear on word or doubleword in memory [Armv8.1]
1574 void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1575
1576 // Atomic bit clear on word or doubleword in memory, with Load-acquire
1577 // semantics [Armv8.1]
1578 void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1579
1580 // Atomic bit clear on word or doubleword in memory, with Store-release
1581 // semantics [Armv8.1]
1582 void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1583
1584 // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1585 // Store-release semantics [Armv8.1]
1586 void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1587
1588 // Atomic exclusive OR on byte in memory [Armv8.1]
1589 void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1590
1591 // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1592 // [Armv8.1]
1593 void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1594
1595 // Atomic exclusive OR on byte in memory, with Store-release semantics
1596 // [Armv8.1]
1597 void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1598
1599 // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1600 // semantics [Armv8.1]
1601 void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1602
1603 // Atomic exclusive OR on halfword in memory [Armv8.1]
1604 void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1605
1606 // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1607 // [Armv8.1]
1608 void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1609
1610 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1611 // [Armv8.1]
1612 void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1613
1614 // Atomic exclusive OR on halfword in memory, with Load-acquire and
1615 // Store-release semantics [Armv8.1]
1616 void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1617
1618 // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1619 void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1620
1621 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1622 // semantics [Armv8.1]
1623 void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1624
1625 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1626 // semantics [Armv8.1]
1627 void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1628
1629 // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1630 // Store-release semantics [Armv8.1]
1631 void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1632
1633 // Atomic bit set on byte in memory [Armv8.1]
1634 void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1635
1636 // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1637 void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1638
1639 // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1640 void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1641
1642 // Atomic bit set on byte in memory, with Load-acquire and Store-release
1643 // semantics [Armv8.1]
1644 void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1645
1646 // Atomic bit set on halfword in memory [Armv8.1]
1647 void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1648
1649 // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1650 void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1651
1652 // Atomic bit set on halfword in memory, with Store-release semantics
1653 // [Armv8.1]
1654 void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1655
1656 // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1657 // semantics [Armv8.1]
1658 void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1659
1660 // Atomic bit set on word or doubleword in memory [Armv8.1]
1661 void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1662
1663 // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1664 // [Armv8.1]
1665 void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1666
1667 // Atomic bit set on word or doubleword in memory, with Store-release
1668 // semantics [Armv8.1]
1669 void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1670
1671 // Atomic bit set on word or doubleword in memory, with Load-acquire and
1672 // Store-release semantics [Armv8.1]
1673 void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1674
1675 // Atomic signed maximum on byte in memory [Armv8.1]
1676 void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1677
1678 // Atomic signed maximum on byte in memory, with Load-acquire semantics
1679 // [Armv8.1]
1680 void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1681
1682 // Atomic signed maximum on byte in memory, with Store-release semantics
1683 // [Armv8.1]
1684 void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1685
1686 // Atomic signed maximum on byte in memory, with Load-acquire and
1687 // Store-release semantics [Armv8.1]
1688 void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1689
1690 // Atomic signed maximum on halfword in memory [Armv8.1]
1691 void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1692
1693 // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1694 // [Armv8.1]
1695 void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1696
1697 // Atomic signed maximum on halfword in memory, with Store-release semantics
1698 // [Armv8.1]
1699 void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1700
1701 // Atomic signed maximum on halfword in memory, with Load-acquire and
1702 // Store-release semantics [Armv8.1]
1703 void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1704
1705 // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1706 void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1707
1708 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1709 // semantics [Armv8.1]
1710 void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1711
1712 // Atomic signed maximum on word or doubleword in memory, with Store-release
1713 // semantics [Armv8.1]
1714 void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1715
1716 // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1717 // and Store-release semantics [Armv8.1]
1718 void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1719
1720 // Atomic signed minimum on byte in memory [Armv8.1]
1721 void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1722
1723 // Atomic signed minimum on byte in memory, with Load-acquire semantics
1724 // [Armv8.1]
1725 void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1726
1727 // Atomic signed minimum on byte in memory, with Store-release semantics
1728 // [Armv8.1]
1729 void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1730
1731 // Atomic signed minimum on byte in memory, with Load-acquire and
1732 // Store-release semantics [Armv8.1]
1733 void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1734
1735 // Atomic signed minimum on halfword in memory [Armv8.1]
1736 void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1737
1738 // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1739 // [Armv8.1]
1740 void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1741
1742 // Atomic signed minimum on halfword in memory, with Store-release semantics
1743 // [Armv8.1]
1744 void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1745
1746 // Atomic signed minimum on halfword in memory, with Load-acquire and
1747 // Store-release semantics [Armv8.1]
1748 void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1749
1750 // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1751 void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1752
1753 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1754 // semantics [Armv8.1]
1755 void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1756
1757 // Atomic signed minimum on word or doubleword in memory, with Store-release
1758 // semantics [Armv8.1]
1759 void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1760
1761 // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1762 // and Store-release semantics [Armv8.1]
1763 void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1764
1765 // Atomic unsigned maximum on byte in memory [Armv8.1]
1766 void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1767
1768 // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1769 // [Armv8.1]
1770 void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1771
1772 // Atomic unsigned maximum on byte in memory, with Store-release semantics
1773 // [Armv8.1]
1774 void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1775
1776 // Atomic unsigned maximum on byte in memory, with Load-acquire and
1777 // Store-release semantics [Armv8.1]
1778 void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1779
1780 // Atomic unsigned maximum on halfword in memory [Armv8.1]
1781 void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1782
1783 // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1784 // [Armv8.1]
1785 void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1786
1787 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1788 // [Armv8.1]
1789 void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1790
1791 // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1792 // Store-release semantics [Armv8.1]
1793 void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1794
1795 // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1796 void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1797
1798 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1799 // semantics [Armv8.1]
1800 void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1801
1802 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1803 // semantics [Armv8.1]
1804 void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1805
1806 // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1807 // and Store-release semantics [Armv8.1]
1808 void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1809
1810 // Atomic unsigned minimum on byte in memory [Armv8.1]
1811 void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1812
1813 // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1814 // [Armv8.1]
1815 void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1816
1817 // Atomic unsigned minimum on byte in memory, with Store-release semantics
1818 // [Armv8.1]
1819 void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1820
1821 // Atomic unsigned minimum on byte in memory, with Load-acquire and
1822 // Store-release semantics [Armv8.1]
1823 void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1824
1825 // Atomic unsigned minimum on halfword in memory [Armv8.1]
1826 void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1827
1828 // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1829 // [Armv8.1]
1830 void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1831
1832 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1833 // [Armv8.1]
1834 void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1835
1836 // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1837 // Store-release semantics [Armv8.1]
1838 void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1839
1840 // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1841 void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1842
1843 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1844 // semantics [Armv8.1]
1845 void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1846
1847 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1848 // semantics [Armv8.1]
1849 void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1850
1851 // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1852 // and Store-release semantics [Armv8.1]
1853 void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1854
1855 // Atomic add on byte in memory, without return. [Armv8.1]
1856 void staddb(const Register& rs, const MemOperand& src);
1857
1858 // Atomic add on byte in memory, with Store-release semantics and without
1859 // return. [Armv8.1]
1860 void staddlb(const Register& rs, const MemOperand& src);
1861
1862 // Atomic add on halfword in memory, without return. [Armv8.1]
1863 void staddh(const Register& rs, const MemOperand& src);
1864
1865 // Atomic add on halfword in memory, with Store-release semantics and without
1866 // return. [Armv8.1]
1867 void staddlh(const Register& rs, const MemOperand& src);
1868
1869 // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1870 void stadd(const Register& rs, const MemOperand& src);
1871
1872 // Atomic add on word or doubleword in memory, with Store-release semantics
1873 // and without return. [Armv8.1]
1874 void staddl(const Register& rs, const MemOperand& src);
1875
1876 // Atomic bit clear on byte in memory, without return. [Armv8.1]
1877 void stclrb(const Register& rs, const MemOperand& src);
1878
1879 // Atomic bit clear on byte in memory, with Store-release semantics and
1880 // without return. [Armv8.1]
1881 void stclrlb(const Register& rs, const MemOperand& src);
1882
1883 // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1884 void stclrh(const Register& rs, const MemOperand& src);
1885
1886 // Atomic bit clear on halfword in memory, with Store-release semantics and
1887 // without return. [Armv8.1]
1888 void stclrlh(const Register& rs, const MemOperand& src);
1889
1890 // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1891 void stclr(const Register& rs, const MemOperand& src);
1892
1893 // Atomic bit clear on word or doubleword in memory, with Store-release
1894 // semantics and without return. [Armv8.1]
1895 void stclrl(const Register& rs, const MemOperand& src);
1896
1897 // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1898 void steorb(const Register& rs, const MemOperand& src);
1899
1900 // Atomic exclusive OR on byte in memory, with Store-release semantics and
1901 // without return. [Armv8.1]
1902 void steorlb(const Register& rs, const MemOperand& src);
1903
1904 // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1905 void steorh(const Register& rs, const MemOperand& src);
1906
1907 // Atomic exclusive OR on halfword in memory, with Store-release semantics
1908 // and without return. [Armv8.1]
1909 void steorlh(const Register& rs, const MemOperand& src);
1910
1911 // Atomic exclusive OR on word or doubleword in memory, without return.
1912 // [Armv8.1]
1913 void steor(const Register& rs, const MemOperand& src);
1914
1915 // Atomic exclusive OR on word or doubleword in memory, with Store-release
1916 // semantics and without return. [Armv8.1]
1917 void steorl(const Register& rs, const MemOperand& src);
1918
1919 // Atomic bit set on byte in memory, without return. [Armv8.1]
1920 void stsetb(const Register& rs, const MemOperand& src);
1921
1922 // Atomic bit set on byte in memory, with Store-release semantics and without
1923 // return. [Armv8.1]
1924 void stsetlb(const Register& rs, const MemOperand& src);
1925
1926 // Atomic bit set on halfword in memory, without return. [Armv8.1]
1927 void stseth(const Register& rs, const MemOperand& src);
1928
1929 // Atomic bit set on halfword in memory, with Store-release semantics and
1930 // without return. [Armv8.1]
1931 void stsetlh(const Register& rs, const MemOperand& src);
1932
1933 // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1934 void stset(const Register& rs, const MemOperand& src);
1935
1936 // Atomic bit set on word or doubleword in memory, with Store-release
1937 // semantics and without return. [Armv8.1]
1938 void stsetl(const Register& rs, const MemOperand& src);
1939
1940 // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1941 void stsmaxb(const Register& rs, const MemOperand& src);
1942
1943 // Atomic signed maximum on byte in memory, with Store-release semantics and
1944 // without return. [Armv8.1]
1945 void stsmaxlb(const Register& rs, const MemOperand& src);
1946
1947 // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1948 void stsmaxh(const Register& rs, const MemOperand& src);
1949
1950 // Atomic signed maximum on halfword in memory, with Store-release semantics
1951 // and without return. [Armv8.1]
1952 void stsmaxlh(const Register& rs, const MemOperand& src);
1953
1954 // Atomic signed maximum on word or doubleword in memory, without return.
1955 // [Armv8.1]
1956 void stsmax(const Register& rs, const MemOperand& src);
1957
1958 // Atomic signed maximum on word or doubleword in memory, with Store-release
1959 // semantics and without return. [Armv8.1]
1960 void stsmaxl(const Register& rs, const MemOperand& src);
1961
1962 // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1963 void stsminb(const Register& rs, const MemOperand& src);
1964
1965 // Atomic signed minimum on byte in memory, with Store-release semantics and
1966 // without return. [Armv8.1]
1967 void stsminlb(const Register& rs, const MemOperand& src);
1968
1969 // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1970 void stsminh(const Register& rs, const MemOperand& src);
1971
1972 // Atomic signed minimum on halfword in memory, with Store-release semantics
1973 // and without return. [Armv8.1]
1974 void stsminlh(const Register& rs, const MemOperand& src);
1975
1976 // Atomic signed minimum on word or doubleword in memory, without return.
1977 // [Armv8.1]
1978 void stsmin(const Register& rs, const MemOperand& src);
1979
1980 // Atomic signed minimum on word or doubleword in memory, with Store-release
1981 // semantics and without return. semantics [Armv8.1]
1982 void stsminl(const Register& rs, const MemOperand& src);
1983
1984 // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1985 void stumaxb(const Register& rs, const MemOperand& src);
1986
1987 // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1988 // without return. [Armv8.1]
1989 void stumaxlb(const Register& rs, const MemOperand& src);
1990
1991 // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1992 void stumaxh(const Register& rs, const MemOperand& src);
1993
1994 // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1995 // and without return. [Armv8.1]
1996 void stumaxlh(const Register& rs, const MemOperand& src);
1997
1998 // Atomic unsigned maximum on word or doubleword in memory, without return.
1999 // [Armv8.1]
2000 void stumax(const Register& rs, const MemOperand& src);
2001
2002 // Atomic unsigned maximum on word or doubleword in memory, with Store-release
2003 // semantics and without return. [Armv8.1]
2004 void stumaxl(const Register& rs, const MemOperand& src);
2005
2006 // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
2007 void stuminb(const Register& rs, const MemOperand& src);
2008
2009 // Atomic unsigned minimum on byte in memory, with Store-release semantics and
2010 // without return. [Armv8.1]
2011 void stuminlb(const Register& rs, const MemOperand& src);
2012
2013 // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
2014 void stuminh(const Register& rs, const MemOperand& src);
2015
2016 // Atomic unsigned minimum on halfword in memory, with Store-release semantics
2017 // and without return. [Armv8.1]
2018 void stuminlh(const Register& rs, const MemOperand& src);
2019
2020 // Atomic unsigned minimum on word or doubleword in memory, without return.
2021 // [Armv8.1]
2022 void stumin(const Register& rs, const MemOperand& src);
2023
2024 // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2025 // semantics and without return. [Armv8.1]
2026 void stuminl(const Register& rs, const MemOperand& src);
2027
2028 // Swap byte in memory [Armv8.1]
2029 void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2030
2031 // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2032 void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2033
2034 // Swap byte in memory, with Store-release semantics [Armv8.1]
2035 void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2036
2037 // Swap byte in memory, with Load-acquire and Store-release semantics
2038 // [Armv8.1]
2039 void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2040
2041 // Swap halfword in memory [Armv8.1]
2042 void swph(const Register& rs, const Register& rt, const MemOperand& src);
2043
2044 // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2045 void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2046
2047 // Swap halfword in memory, with Store-release semantics [Armv8.1]
2048 void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2049
2050 // Swap halfword in memory, with Load-acquire and Store-release semantics
2051 // [Armv8.1]
2052 void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2053
2054 // Swap word or doubleword in memory [Armv8.1]
2055 void swp(const Register& rs, const Register& rt, const MemOperand& src);
2056
2057 // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2058 void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2059
2060 // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2061 void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2062
2063 // Swap word or doubleword in memory, with Load-acquire and Store-release
2064 // semantics [Armv8.1]
2065 void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2066
2067 // Load-Acquire RCpc Register byte [Armv8.3]
2068 void ldaprb(const Register& rt, const MemOperand& src);
2069
2070 // Load-Acquire RCpc Register halfword [Armv8.3]
2071 void ldaprh(const Register& rt, const MemOperand& src);
2072
2073 // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2074 void ldapr(const Register& rt, const MemOperand& src);
2075
2076 // Prefetch memory.
2077 void prfm(PrefetchOperation op,
2078 const MemOperand& addr,
2079 LoadStoreScalingOption option = PreferScaledOffset);
2080
2081 // Prefetch memory (with unscaled offset).
2082 void prfum(PrefetchOperation op,
2083 const MemOperand& addr,
2084 LoadStoreScalingOption option = PreferUnscaledOffset);
2085
2086 // Prefetch memory in the literal pool.
2087 void prfm(PrefetchOperation op, RawLiteral* literal);
2088
2089 // Prefetch from pc + imm19 << 2.
2090 void prfm(PrefetchOperation op, int64_t imm19);
2091
2092 // Prefetch memory (allowing unallocated hints).
2093 void prfm(int op,
2094 const MemOperand& addr,
2095 LoadStoreScalingOption option = PreferScaledOffset);
2096
2097 // Prefetch memory (with unscaled offset, allowing unallocated hints).
2098 void prfum(int op,
2099 const MemOperand& addr,
2100 LoadStoreScalingOption option = PreferUnscaledOffset);
2101
2102 // Prefetch memory in the literal pool (allowing unallocated hints).
2103 void prfm(int op, RawLiteral* literal);
2104
2105 // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2106 void prfm(int op, int64_t imm19);
2107
2108 // Move instructions. The default shift of -1 indicates that the move
2109 // instruction will calculate an appropriate 16-bit immediate and left shift
2110 // that is equal to the 64-bit immediate argument. If an explicit left shift
2111 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2112 //
2113 // For movk, an explicit shift can be used to indicate which half word should
2114 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2115 // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2116 // most-significant.
2117
2118 // Move immediate and keep.
2119 void movk(const Register& rd, uint64_t imm, int shift = -1) {
2120 MoveWide(rd, imm, shift, MOVK);
2121 }
2122
2123 // Move inverted immediate.
2124 void movn(const Register& rd, uint64_t imm, int shift = -1) {
2125 MoveWide(rd, imm, shift, MOVN);
2126 }
2127
2128 // Move immediate.
2129 void movz(const Register& rd, uint64_t imm, int shift = -1) {
2130 MoveWide(rd, imm, shift, MOVZ);
2131 }
2132
2133 // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2134 void mov(const Register& rd, uint64_t imm) {
2135 if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2136 VIXL_UNIMPLEMENTED();
2137 }
2138 }
2139
2140 // Misc instructions.
2141
2142 // Monitor debug-mode breakpoint.
2143 void brk(int code);
2144
2145 // Halting debug-mode breakpoint.
2146 void hlt(int code);
2147
2148 // Generate exception targeting EL1.
2149 void svc(int code);
2150
2151 // Generate undefined instruction exception.
2152 void udf(int code);
2153
2154 // Move register to register.
2155 void mov(const Register& rd, const Register& rn);
2156
2157 // Move inverted operand to register.
2158 void mvn(const Register& rd, const Operand& operand);
2159
2160 // System instructions.
2161
2162 // Move to register from system register.
2163 void mrs(const Register& xt, SystemRegister sysreg);
2164
2165 // Move from register to system register.
2166 void msr(SystemRegister sysreg, const Register& xt);
2167
2168 // Invert carry flag [Armv8.4].
2169 void cfinv();
2170
2171 // Convert floating-point condition flags from alternative format to Arm
2172 // format [Armv8.5].
2173 void xaflag();
2174
2175 // Convert floating-point condition flags from Arm format to alternative
2176 // format [Armv8.5].
2177 void axflag();
2178
2179 // System instruction.
2180 void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2181
2182 // System instruction with pre-encoded op (op1:crn:crm:op2).
2183 void sys(int op, const Register& xt = xzr);
2184
2185 // System data cache operation.
2186 void dc(DataCacheOp op, const Register& rt);
2187
2188 // System instruction cache operation.
2189 void ic(InstructionCacheOp op, const Register& rt);
2190
2191 // System hint (named type).
2192 void hint(SystemHint code);
2193
2194 // System hint (numbered type).
2195 void hint(int imm7);
2196
2197 // Clear exclusive monitor.
2198 void clrex(int imm4 = 0xf);
2199
2200 // Data memory barrier.
2201 void dmb(BarrierDomain domain, BarrierType type);
2202
2203 // Data synchronization barrier.
2204 void dsb(BarrierDomain domain, BarrierType type);
2205
2206 // Instruction synchronization barrier.
2207 void isb();
2208
2209 // Error synchronization barrier.
2210 void esb();
2211
2212 // Conditional speculation dependency barrier.
2213 void csdb();
2214
2215 // No-op.
nop()2216 void nop() { hint(NOP); }
2217
2218 // Branch target identification.
2219 void bti(BranchTargetIdentifier id);
2220
2221 // FP and NEON instructions.
2222
2223 // Move double precision immediate to FP register.
2224 void fmov(const VRegister& vd, double imm);
2225
2226 // Move single precision immediate to FP register.
2227 void fmov(const VRegister& vd, float imm);
2228
2229 // Move half precision immediate to FP register [Armv8.2].
2230 void fmov(const VRegister& vd, Float16 imm);
2231
2232 // Move FP register to register.
2233 void fmov(const Register& rd, const VRegister& fn);
2234
2235 // Move register to FP register.
2236 void fmov(const VRegister& vd, const Register& rn);
2237
2238 // Move FP register to FP register.
2239 void fmov(const VRegister& vd, const VRegister& fn);
2240
2241 // Move 64-bit register to top half of 128-bit FP register.
2242 void fmov(const VRegister& vd, int index, const Register& rn);
2243
2244 // Move top half of 128-bit FP register to 64-bit register.
2245 void fmov(const Register& rd, const VRegister& vn, int index);
2246
2247 // FP add.
2248 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2249
2250 // FP subtract.
2251 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2252
2253 // FP multiply.
2254 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2255
2256 // FP fused multiply-add.
2257 void fmadd(const VRegister& vd,
2258 const VRegister& vn,
2259 const VRegister& vm,
2260 const VRegister& va);
2261
2262 // FP fused multiply-subtract.
2263 void fmsub(const VRegister& vd,
2264 const VRegister& vn,
2265 const VRegister& vm,
2266 const VRegister& va);
2267
2268 // FP fused multiply-add and negate.
2269 void fnmadd(const VRegister& vd,
2270 const VRegister& vn,
2271 const VRegister& vm,
2272 const VRegister& va);
2273
2274 // FP fused multiply-subtract and negate.
2275 void fnmsub(const VRegister& vd,
2276 const VRegister& vn,
2277 const VRegister& vm,
2278 const VRegister& va);
2279
2280 // FP multiply-negate scalar.
2281 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2282
2283 // FP reciprocal exponent scalar.
2284 void frecpx(const VRegister& vd, const VRegister& vn);
2285
2286 // FP divide.
2287 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2288
2289 // FP maximum.
2290 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2291
2292 // FP minimum.
2293 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2294
2295 // FP maximum number.
2296 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2297
2298 // FP minimum number.
2299 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2300
2301 // FP absolute.
2302 void fabs(const VRegister& vd, const VRegister& vn);
2303
2304 // FP negate.
2305 void fneg(const VRegister& vd, const VRegister& vn);
2306
2307 // FP square root.
2308 void fsqrt(const VRegister& vd, const VRegister& vn);
2309
2310 // FP round to integer, nearest with ties to away.
2311 void frinta(const VRegister& vd, const VRegister& vn);
2312
2313 // FP round to integer, implicit rounding.
2314 void frinti(const VRegister& vd, const VRegister& vn);
2315
2316 // FP round to integer, toward minus infinity.
2317 void frintm(const VRegister& vd, const VRegister& vn);
2318
2319 // FP round to integer, nearest with ties to even.
2320 void frintn(const VRegister& vd, const VRegister& vn);
2321
2322 // FP round to integer, toward plus infinity.
2323 void frintp(const VRegister& vd, const VRegister& vn);
2324
2325 // FP round to integer, exact, implicit rounding.
2326 void frintx(const VRegister& vd, const VRegister& vn);
2327
2328 // FP round to integer, towards zero.
2329 void frintz(const VRegister& vd, const VRegister& vn);
2330
2331 // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2332 void frint32x(const VRegister& vd, const VRegister& vn);
2333
2334 // FP round to 32-bit integer, towards zero [Armv8.5].
2335 void frint32z(const VRegister& vd, const VRegister& vn);
2336
2337 // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2338 void frint64x(const VRegister& vd, const VRegister& vn);
2339
2340 // FP round to 64-bit integer, towards zero [Armv8.5].
2341 void frint64z(const VRegister& vd, const VRegister& vn);
2342
2343 void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2344
2345 void FPCompareMacro(const VRegister& vn,
2346 const VRegister& vm,
2347 FPTrapFlags trap);
2348
2349 // FP compare registers.
2350 void fcmp(const VRegister& vn, const VRegister& vm);
2351
2352 // FP compare immediate.
2353 void fcmp(const VRegister& vn, double value);
2354
2355 void FPCCompareMacro(const VRegister& vn,
2356 const VRegister& vm,
2357 StatusFlags nzcv,
2358 Condition cond,
2359 FPTrapFlags trap);
2360
2361 // FP conditional compare.
2362 void fccmp(const VRegister& vn,
2363 const VRegister& vm,
2364 StatusFlags nzcv,
2365 Condition cond);
2366
2367 // FP signaling compare registers.
2368 void fcmpe(const VRegister& vn, const VRegister& vm);
2369
2370 // FP signaling compare immediate.
2371 void fcmpe(const VRegister& vn, double value);
2372
2373 // FP conditional signaling compare.
2374 void fccmpe(const VRegister& vn,
2375 const VRegister& vm,
2376 StatusFlags nzcv,
2377 Condition cond);
2378
2379 // FP conditional select.
2380 void fcsel(const VRegister& vd,
2381 const VRegister& vn,
2382 const VRegister& vm,
2383 Condition cond);
2384
2385 // Common FP Convert functions.
2386 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2387 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2388 void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2389
2390 // FP convert between precisions.
2391 void fcvt(const VRegister& vd, const VRegister& vn);
2392
2393 // FP convert to higher precision.
2394 void fcvtl(const VRegister& vd, const VRegister& vn);
2395
2396 // FP convert to higher precision (second part).
2397 void fcvtl2(const VRegister& vd, const VRegister& vn);
2398
2399 // FP convert to lower precision.
2400 void fcvtn(const VRegister& vd, const VRegister& vn);
2401
2402 // FP convert to lower prevision (second part).
2403 void fcvtn2(const VRegister& vd, const VRegister& vn);
2404
2405 // FP convert to lower precision, rounding to odd.
2406 void fcvtxn(const VRegister& vd, const VRegister& vn);
2407
2408 // FP convert to lower precision, rounding to odd (second part).
2409 void fcvtxn2(const VRegister& vd, const VRegister& vn);
2410
2411 // FP convert to signed integer, nearest with ties to away.
2412 void fcvtas(const Register& rd, const VRegister& vn);
2413
2414 // FP convert to unsigned integer, nearest with ties to away.
2415 void fcvtau(const Register& rd, const VRegister& vn);
2416
2417 // FP convert to signed integer, nearest with ties to away.
2418 void fcvtas(const VRegister& vd, const VRegister& vn);
2419
2420 // FP convert to unsigned integer, nearest with ties to away.
2421 void fcvtau(const VRegister& vd, const VRegister& vn);
2422
2423 // FP convert to signed integer, round towards -infinity.
2424 void fcvtms(const Register& rd, const VRegister& vn);
2425
2426 // FP convert to unsigned integer, round towards -infinity.
2427 void fcvtmu(const Register& rd, const VRegister& vn);
2428
2429 // FP convert to signed integer, round towards -infinity.
2430 void fcvtms(const VRegister& vd, const VRegister& vn);
2431
2432 // FP convert to unsigned integer, round towards -infinity.
2433 void fcvtmu(const VRegister& vd, const VRegister& vn);
2434
2435 // FP convert to signed integer, nearest with ties to even.
2436 void fcvtns(const Register& rd, const VRegister& vn);
2437
2438 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2439 void fjcvtzs(const Register& rd, const VRegister& vn);
2440
2441 // FP convert to unsigned integer, nearest with ties to even.
2442 void fcvtnu(const Register& rd, const VRegister& vn);
2443
2444 // FP convert to signed integer, nearest with ties to even.
2445 void fcvtns(const VRegister& rd, const VRegister& vn);
2446
2447 // FP convert to unsigned integer, nearest with ties to even.
2448 void fcvtnu(const VRegister& rd, const VRegister& vn);
2449
2450 // FP convert to signed integer or fixed-point, round towards zero.
2451 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2452
2453 // FP convert to unsigned integer or fixed-point, round towards zero.
2454 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2455
2456 // FP convert to signed integer or fixed-point, round towards zero.
2457 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2458
2459 // FP convert to unsigned integer or fixed-point, round towards zero.
2460 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2461
2462 // FP convert to signed integer, round towards +infinity.
2463 void fcvtps(const Register& rd, const VRegister& vn);
2464
2465 // FP convert to unsigned integer, round towards +infinity.
2466 void fcvtpu(const Register& rd, const VRegister& vn);
2467
2468 // FP convert to signed integer, round towards +infinity.
2469 void fcvtps(const VRegister& vd, const VRegister& vn);
2470
2471 // FP convert to unsigned integer, round towards +infinity.
2472 void fcvtpu(const VRegister& vd, const VRegister& vn);
2473
2474 // Convert signed integer or fixed point to FP.
2475 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2476
2477 // Convert unsigned integer or fixed point to FP.
2478 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2479
2480 // Convert signed integer or fixed-point to FP.
2481 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2482
2483 // Convert unsigned integer or fixed-point to FP.
2484 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2485
2486 // Unsigned absolute difference.
2487 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2488
2489 // Signed absolute difference.
2490 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2491
2492 // Unsigned absolute difference and accumulate.
2493 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2494
2495 // Signed absolute difference and accumulate.
2496 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497
2498 // Add.
2499 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500
2501 // Subtract.
2502 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503
2504 // Unsigned halving add.
2505 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2506
2507 // Signed halving add.
2508 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2509
2510 // Unsigned rounding halving add.
2511 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2512
2513 // Signed rounding halving add.
2514 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2515
2516 // Unsigned halving sub.
2517 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2518
2519 // Signed halving sub.
2520 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521
2522 // Unsigned saturating add.
2523 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524
2525 // Signed saturating add.
2526 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2527
2528 // Unsigned saturating subtract.
2529 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2530
2531 // Signed saturating subtract.
2532 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2533
2534 // Add pairwise.
2535 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2536
2537 // Add pair of elements scalar.
2538 void addp(const VRegister& vd, const VRegister& vn);
2539
2540 // Multiply-add to accumulator.
2541 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2542
2543 // Multiply-subtract to accumulator.
2544 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2545
2546 // Multiply.
2547 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2548
2549 // Multiply by scalar element.
2550 void mul(const VRegister& vd,
2551 const VRegister& vn,
2552 const VRegister& vm,
2553 int vm_index);
2554
2555 // Multiply-add by scalar element.
2556 void mla(const VRegister& vd,
2557 const VRegister& vn,
2558 const VRegister& vm,
2559 int vm_index);
2560
2561 // Multiply-subtract by scalar element.
2562 void mls(const VRegister& vd,
2563 const VRegister& vn,
2564 const VRegister& vm,
2565 int vm_index);
2566
2567 // Signed long multiply-add by scalar element.
2568 void smlal(const VRegister& vd,
2569 const VRegister& vn,
2570 const VRegister& vm,
2571 int vm_index);
2572
2573 // Signed long multiply-add by scalar element (second part).
2574 void smlal2(const VRegister& vd,
2575 const VRegister& vn,
2576 const VRegister& vm,
2577 int vm_index);
2578
2579 // Unsigned long multiply-add by scalar element.
2580 void umlal(const VRegister& vd,
2581 const VRegister& vn,
2582 const VRegister& vm,
2583 int vm_index);
2584
2585 // Unsigned long multiply-add by scalar element (second part).
2586 void umlal2(const VRegister& vd,
2587 const VRegister& vn,
2588 const VRegister& vm,
2589 int vm_index);
2590
2591 // Signed long multiply-sub by scalar element.
2592 void smlsl(const VRegister& vd,
2593 const VRegister& vn,
2594 const VRegister& vm,
2595 int vm_index);
2596
2597 // Signed long multiply-sub by scalar element (second part).
2598 void smlsl2(const VRegister& vd,
2599 const VRegister& vn,
2600 const VRegister& vm,
2601 int vm_index);
2602
2603 // Unsigned long multiply-sub by scalar element.
2604 void umlsl(const VRegister& vd,
2605 const VRegister& vn,
2606 const VRegister& vm,
2607 int vm_index);
2608
2609 // Unsigned long multiply-sub by scalar element (second part).
2610 void umlsl2(const VRegister& vd,
2611 const VRegister& vn,
2612 const VRegister& vm,
2613 int vm_index);
2614
2615 // Signed long multiply by scalar element.
2616 void smull(const VRegister& vd,
2617 const VRegister& vn,
2618 const VRegister& vm,
2619 int vm_index);
2620
2621 // Signed long multiply by scalar element (second part).
2622 void smull2(const VRegister& vd,
2623 const VRegister& vn,
2624 const VRegister& vm,
2625 int vm_index);
2626
2627 // Unsigned long multiply by scalar element.
2628 void umull(const VRegister& vd,
2629 const VRegister& vn,
2630 const VRegister& vm,
2631 int vm_index);
2632
2633 // Unsigned long multiply by scalar element (second part).
2634 void umull2(const VRegister& vd,
2635 const VRegister& vn,
2636 const VRegister& vm,
2637 int vm_index);
2638
2639 // Signed saturating double long multiply by element.
2640 void sqdmull(const VRegister& vd,
2641 const VRegister& vn,
2642 const VRegister& vm,
2643 int vm_index);
2644
2645 // Signed saturating double long multiply by element (second part).
2646 void sqdmull2(const VRegister& vd,
2647 const VRegister& vn,
2648 const VRegister& vm,
2649 int vm_index);
2650
2651 // Signed saturating doubling long multiply-add by element.
2652 void sqdmlal(const VRegister& vd,
2653 const VRegister& vn,
2654 const VRegister& vm,
2655 int vm_index);
2656
2657 // Signed saturating doubling long multiply-add by element (second part).
2658 void sqdmlal2(const VRegister& vd,
2659 const VRegister& vn,
2660 const VRegister& vm,
2661 int vm_index);
2662
2663 // Signed saturating doubling long multiply-sub by element.
2664 void sqdmlsl(const VRegister& vd,
2665 const VRegister& vn,
2666 const VRegister& vm,
2667 int vm_index);
2668
2669 // Signed saturating doubling long multiply-sub by element (second part).
2670 void sqdmlsl2(const VRegister& vd,
2671 const VRegister& vn,
2672 const VRegister& vm,
2673 int vm_index);
2674
2675 // Compare equal.
2676 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2677
2678 // Compare signed greater than or equal.
2679 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2680
2681 // Compare signed greater than.
2682 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2683
2684 // Compare unsigned higher.
2685 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2686
2687 // Compare unsigned higher or same.
2688 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2689
2690 // Compare bitwise test bits nonzero.
2691 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2692
2693 // Compare bitwise to zero.
2694 void cmeq(const VRegister& vd, const VRegister& vn, int value);
2695
2696 // Compare signed greater than or equal to zero.
2697 void cmge(const VRegister& vd, const VRegister& vn, int value);
2698
2699 // Compare signed greater than zero.
2700 void cmgt(const VRegister& vd, const VRegister& vn, int value);
2701
2702 // Compare signed less than or equal to zero.
2703 void cmle(const VRegister& vd, const VRegister& vn, int value);
2704
2705 // Compare signed less than zero.
2706 void cmlt(const VRegister& vd, const VRegister& vn, int value);
2707
2708 // Signed shift left by register.
2709 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2710
2711 // Unsigned shift left by register.
2712 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2713
2714 // Signed saturating shift left by register.
2715 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2716
2717 // Unsigned saturating shift left by register.
2718 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2719
2720 // Signed rounding shift left by register.
2721 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2722
2723 // Unsigned rounding shift left by register.
2724 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2725
2726 // Signed saturating rounding shift left by register.
2727 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2728
2729 // Unsigned saturating rounding shift left by register.
2730 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2731
2732 // Bitwise and.
2733 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2734
2735 // Bitwise or.
2736 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2737
2738 // Bitwise or immediate.
2739 void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2740
2741 // Move register to register.
2742 void mov(const VRegister& vd, const VRegister& vn);
2743
2744 // Bitwise orn.
2745 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2746
2747 // Bitwise eor.
2748 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2749
2750 // Bit clear immediate.
2751 void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2752
2753 // Bit clear.
2754 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2755
2756 // Bitwise insert if false.
2757 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2758
2759 // Bitwise insert if true.
2760 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2761
2762 // Bitwise select.
2763 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2764
2765 // Polynomial multiply.
2766 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2767
2768 // Vector move immediate.
2769 void movi(const VRegister& vd,
2770 const uint64_t imm,
2771 Shift shift = LSL,
2772 const int shift_amount = 0);
2773
2774 // Bitwise not.
2775 void mvn(const VRegister& vd, const VRegister& vn);
2776
2777 // Vector move inverted immediate.
2778 void mvni(const VRegister& vd,
2779 const int imm8,
2780 Shift shift = LSL,
2781 const int shift_amount = 0);
2782
2783 // Signed saturating accumulate of unsigned value.
2784 void suqadd(const VRegister& vd, const VRegister& vn);
2785
2786 // Unsigned saturating accumulate of signed value.
2787 void usqadd(const VRegister& vd, const VRegister& vn);
2788
2789 // Absolute value.
2790 void abs(const VRegister& vd, const VRegister& vn);
2791
2792 // Signed saturating absolute value.
2793 void sqabs(const VRegister& vd, const VRegister& vn);
2794
2795 // Negate.
2796 void neg(const VRegister& vd, const VRegister& vn);
2797
2798 // Signed saturating negate.
2799 void sqneg(const VRegister& vd, const VRegister& vn);
2800
2801 // Bitwise not.
2802 void not_(const VRegister& vd, const VRegister& vn);
2803
2804 // Extract narrow.
2805 void xtn(const VRegister& vd, const VRegister& vn);
2806
2807 // Extract narrow (second part).
2808 void xtn2(const VRegister& vd, const VRegister& vn);
2809
2810 // Signed saturating extract narrow.
2811 void sqxtn(const VRegister& vd, const VRegister& vn);
2812
2813 // Signed saturating extract narrow (second part).
2814 void sqxtn2(const VRegister& vd, const VRegister& vn);
2815
2816 // Unsigned saturating extract narrow.
2817 void uqxtn(const VRegister& vd, const VRegister& vn);
2818
2819 // Unsigned saturating extract narrow (second part).
2820 void uqxtn2(const VRegister& vd, const VRegister& vn);
2821
2822 // Signed saturating extract unsigned narrow.
2823 void sqxtun(const VRegister& vd, const VRegister& vn);
2824
2825 // Signed saturating extract unsigned narrow (second part).
2826 void sqxtun2(const VRegister& vd, const VRegister& vn);
2827
2828 // Extract vector from pair of vectors.
2829 void ext(const VRegister& vd,
2830 const VRegister& vn,
2831 const VRegister& vm,
2832 int index);
2833
2834 // Duplicate vector element to vector or scalar.
2835 void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2836
2837 // Move vector element to scalar.
2838 void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2839
2840 // Duplicate general-purpose register to vector.
2841 void dup(const VRegister& vd, const Register& rn);
2842
2843 // Insert vector element from another vector element.
2844 void ins(const VRegister& vd,
2845 int vd_index,
2846 const VRegister& vn,
2847 int vn_index);
2848
2849 // Move vector element to another vector element.
2850 void mov(const VRegister& vd,
2851 int vd_index,
2852 const VRegister& vn,
2853 int vn_index);
2854
2855 // Insert vector element from general-purpose register.
2856 void ins(const VRegister& vd, int vd_index, const Register& rn);
2857
2858 // Move general-purpose register to a vector element.
2859 void mov(const VRegister& vd, int vd_index, const Register& rn);
2860
2861 // Unsigned move vector element to general-purpose register.
2862 void umov(const Register& rd, const VRegister& vn, int vn_index);
2863
2864 // Move vector element to general-purpose register.
2865 void mov(const Register& rd, const VRegister& vn, int vn_index);
2866
2867 // Signed move vector element to general-purpose register.
2868 void smov(const Register& rd, const VRegister& vn, int vn_index);
2869
2870 // One-element structure load to one register.
2871 void ld1(const VRegister& vt, const MemOperand& src);
2872
2873 // One-element structure load to two registers.
2874 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2875
2876 // One-element structure load to three registers.
2877 void ld1(const VRegister& vt,
2878 const VRegister& vt2,
2879 const VRegister& vt3,
2880 const MemOperand& src);
2881
2882 // One-element structure load to four registers.
2883 void ld1(const VRegister& vt,
2884 const VRegister& vt2,
2885 const VRegister& vt3,
2886 const VRegister& vt4,
2887 const MemOperand& src);
2888
2889 // One-element single structure load to one lane.
2890 void ld1(const VRegister& vt, int lane, const MemOperand& src);
2891
2892 // One-element single structure load to all lanes.
2893 void ld1r(const VRegister& vt, const MemOperand& src);
2894
2895 // Two-element structure load.
2896 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2897
2898 // Two-element single structure load to one lane.
2899 void ld2(const VRegister& vt,
2900 const VRegister& vt2,
2901 int lane,
2902 const MemOperand& src);
2903
2904 // Two-element single structure load to all lanes.
2905 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2906
2907 // Three-element structure load.
2908 void ld3(const VRegister& vt,
2909 const VRegister& vt2,
2910 const VRegister& vt3,
2911 const MemOperand& src);
2912
2913 // Three-element single structure load to one lane.
2914 void ld3(const VRegister& vt,
2915 const VRegister& vt2,
2916 const VRegister& vt3,
2917 int lane,
2918 const MemOperand& src);
2919
2920 // Three-element single structure load to all lanes.
2921 void ld3r(const VRegister& vt,
2922 const VRegister& vt2,
2923 const VRegister& vt3,
2924 const MemOperand& src);
2925
2926 // Four-element structure load.
2927 void ld4(const VRegister& vt,
2928 const VRegister& vt2,
2929 const VRegister& vt3,
2930 const VRegister& vt4,
2931 const MemOperand& src);
2932
2933 // Four-element single structure load to one lane.
2934 void ld4(const VRegister& vt,
2935 const VRegister& vt2,
2936 const VRegister& vt3,
2937 const VRegister& vt4,
2938 int lane,
2939 const MemOperand& src);
2940
2941 // Four-element single structure load to all lanes.
2942 void ld4r(const VRegister& vt,
2943 const VRegister& vt2,
2944 const VRegister& vt3,
2945 const VRegister& vt4,
2946 const MemOperand& src);
2947
2948 // Count leading sign bits.
2949 void cls(const VRegister& vd, const VRegister& vn);
2950
2951 // Count leading zero bits (vector).
2952 void clz(const VRegister& vd, const VRegister& vn);
2953
2954 // Population count per byte.
2955 void cnt(const VRegister& vd, const VRegister& vn);
2956
2957 // Reverse bit order.
2958 void rbit(const VRegister& vd, const VRegister& vn);
2959
2960 // Reverse elements in 16-bit halfwords.
2961 void rev16(const VRegister& vd, const VRegister& vn);
2962
2963 // Reverse elements in 32-bit words.
2964 void rev32(const VRegister& vd, const VRegister& vn);
2965
2966 // Reverse elements in 64-bit doublewords.
2967 void rev64(const VRegister& vd, const VRegister& vn);
2968
2969 // Unsigned reciprocal square root estimate.
2970 void ursqrte(const VRegister& vd, const VRegister& vn);
2971
2972 // Unsigned reciprocal estimate.
2973 void urecpe(const VRegister& vd, const VRegister& vn);
2974
2975 // Signed pairwise long add.
2976 void saddlp(const VRegister& vd, const VRegister& vn);
2977
2978 // Unsigned pairwise long add.
2979 void uaddlp(const VRegister& vd, const VRegister& vn);
2980
2981 // Signed pairwise long add and accumulate.
2982 void sadalp(const VRegister& vd, const VRegister& vn);
2983
2984 // Unsigned pairwise long add and accumulate.
2985 void uadalp(const VRegister& vd, const VRegister& vn);
2986
2987 // Shift left by immediate.
2988 void shl(const VRegister& vd, const VRegister& vn, int shift);
2989
2990 // Signed saturating shift left by immediate.
2991 void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2992
2993 // Signed saturating shift left unsigned by immediate.
2994 void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2995
2996 // Unsigned saturating shift left by immediate.
2997 void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2998
2999 // Signed shift left long by immediate.
3000 void sshll(const VRegister& vd, const VRegister& vn, int shift);
3001
3002 // Signed shift left long by immediate (second part).
3003 void sshll2(const VRegister& vd, const VRegister& vn, int shift);
3004
3005 // Signed extend long.
3006 void sxtl(const VRegister& vd, const VRegister& vn);
3007
3008 // Signed extend long (second part).
3009 void sxtl2(const VRegister& vd, const VRegister& vn);
3010
3011 // Unsigned shift left long by immediate.
3012 void ushll(const VRegister& vd, const VRegister& vn, int shift);
3013
3014 // Unsigned shift left long by immediate (second part).
3015 void ushll2(const VRegister& vd, const VRegister& vn, int shift);
3016
3017 // Shift left long by element size.
3018 void shll(const VRegister& vd, const VRegister& vn, int shift);
3019
3020 // Shift left long by element size (second part).
3021 void shll2(const VRegister& vd, const VRegister& vn, int shift);
3022
3023 // Unsigned extend long.
3024 void uxtl(const VRegister& vd, const VRegister& vn);
3025
3026 // Unsigned extend long (second part).
3027 void uxtl2(const VRegister& vd, const VRegister& vn);
3028
3029 // Shift left by immediate and insert.
3030 void sli(const VRegister& vd, const VRegister& vn, int shift);
3031
3032 // Shift right by immediate and insert.
3033 void sri(const VRegister& vd, const VRegister& vn, int shift);
3034
3035 // Signed maximum.
3036 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3037
3038 // Signed pairwise maximum.
3039 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3040
3041 // Add across vector.
3042 void addv(const VRegister& vd, const VRegister& vn);
3043
3044 // Signed add long across vector.
3045 void saddlv(const VRegister& vd, const VRegister& vn);
3046
3047 // Unsigned add long across vector.
3048 void uaddlv(const VRegister& vd, const VRegister& vn);
3049
3050 // FP maximum number across vector.
3051 void fmaxnmv(const VRegister& vd, const VRegister& vn);
3052
3053 // FP maximum across vector.
3054 void fmaxv(const VRegister& vd, const VRegister& vn);
3055
3056 // FP minimum number across vector.
3057 void fminnmv(const VRegister& vd, const VRegister& vn);
3058
3059 // FP minimum across vector.
3060 void fminv(const VRegister& vd, const VRegister& vn);
3061
3062 // Signed maximum across vector.
3063 void smaxv(const VRegister& vd, const VRegister& vn);
3064
3065 // Signed minimum.
3066 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3067
3068 // Signed minimum pairwise.
3069 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3070
3071 // Signed minimum across vector.
3072 void sminv(const VRegister& vd, const VRegister& vn);
3073
3074 // One-element structure store from one register.
3075 void st1(const VRegister& vt, const MemOperand& src);
3076
3077 // One-element structure store from two registers.
3078 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3079
3080 // One-element structure store from three registers.
3081 void st1(const VRegister& vt,
3082 const VRegister& vt2,
3083 const VRegister& vt3,
3084 const MemOperand& src);
3085
3086 // One-element structure store from four registers.
3087 void st1(const VRegister& vt,
3088 const VRegister& vt2,
3089 const VRegister& vt3,
3090 const VRegister& vt4,
3091 const MemOperand& src);
3092
3093 // One-element single structure store from one lane.
3094 void st1(const VRegister& vt, int lane, const MemOperand& src);
3095
3096 // Two-element structure store from two registers.
3097 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3098
3099 // Two-element single structure store from two lanes.
3100 void st2(const VRegister& vt,
3101 const VRegister& vt2,
3102 int lane,
3103 const MemOperand& src);
3104
3105 // Three-element structure store from three registers.
3106 void st3(const VRegister& vt,
3107 const VRegister& vt2,
3108 const VRegister& vt3,
3109 const MemOperand& src);
3110
3111 // Three-element single structure store from three lanes.
3112 void st3(const VRegister& vt,
3113 const VRegister& vt2,
3114 const VRegister& vt3,
3115 int lane,
3116 const MemOperand& src);
3117
3118 // Four-element structure store from four registers.
3119 void st4(const VRegister& vt,
3120 const VRegister& vt2,
3121 const VRegister& vt3,
3122 const VRegister& vt4,
3123 const MemOperand& src);
3124
3125 // Four-element single structure store from four lanes.
3126 void st4(const VRegister& vt,
3127 const VRegister& vt2,
3128 const VRegister& vt3,
3129 const VRegister& vt4,
3130 int lane,
3131 const MemOperand& src);
3132
3133 // Unsigned add long.
3134 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3135
3136 // Unsigned add long (second part).
3137 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3138
3139 // Unsigned add wide.
3140 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3141
3142 // Unsigned add wide (second part).
3143 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3144
3145 // Signed add long.
3146 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3147
3148 // Signed add long (second part).
3149 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3150
3151 // Signed add wide.
3152 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3153
3154 // Signed add wide (second part).
3155 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3156
3157 // Unsigned subtract long.
3158 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3159
3160 // Unsigned subtract long (second part).
3161 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3162
3163 // Unsigned subtract wide.
3164 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3165
3166 // Unsigned subtract wide (second part).
3167 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3168
3169 // Signed subtract long.
3170 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3171
3172 // Signed subtract long (second part).
3173 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3174
3175 // Signed integer subtract wide.
3176 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3177
3178 // Signed integer subtract wide (second part).
3179 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3180
3181 // Unsigned maximum.
3182 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3183
3184 // Unsigned pairwise maximum.
3185 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3186
3187 // Unsigned maximum across vector.
3188 void umaxv(const VRegister& vd, const VRegister& vn);
3189
3190 // Unsigned minimum.
3191 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3192
3193 // Unsigned pairwise minimum.
3194 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3195
3196 // Unsigned minimum across vector.
3197 void uminv(const VRegister& vd, const VRegister& vn);
3198
3199 // Transpose vectors (primary).
3200 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3201
3202 // Transpose vectors (secondary).
3203 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3204
3205 // Unzip vectors (primary).
3206 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3207
3208 // Unzip vectors (secondary).
3209 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3210
3211 // Zip vectors (primary).
3212 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3213
3214 // Zip vectors (secondary).
3215 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3216
3217 // Signed shift right by immediate.
3218 void sshr(const VRegister& vd, const VRegister& vn, int shift);
3219
3220 // Unsigned shift right by immediate.
3221 void ushr(const VRegister& vd, const VRegister& vn, int shift);
3222
3223 // Signed rounding shift right by immediate.
3224 void srshr(const VRegister& vd, const VRegister& vn, int shift);
3225
3226 // Unsigned rounding shift right by immediate.
3227 void urshr(const VRegister& vd, const VRegister& vn, int shift);
3228
3229 // Signed shift right by immediate and accumulate.
3230 void ssra(const VRegister& vd, const VRegister& vn, int shift);
3231
3232 // Unsigned shift right by immediate and accumulate.
3233 void usra(const VRegister& vd, const VRegister& vn, int shift);
3234
3235 // Signed rounding shift right by immediate and accumulate.
3236 void srsra(const VRegister& vd, const VRegister& vn, int shift);
3237
3238 // Unsigned rounding shift right by immediate and accumulate.
3239 void ursra(const VRegister& vd, const VRegister& vn, int shift);
3240
3241 // Shift right narrow by immediate.
3242 void shrn(const VRegister& vd, const VRegister& vn, int shift);
3243
3244 // Shift right narrow by immediate (second part).
3245 void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3246
3247 // Rounding shift right narrow by immediate.
3248 void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3249
3250 // Rounding shift right narrow by immediate (second part).
3251 void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3252
3253 // Unsigned saturating shift right narrow by immediate.
3254 void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3255
3256 // Unsigned saturating shift right narrow by immediate (second part).
3257 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3258
3259 // Unsigned saturating rounding shift right narrow by immediate.
3260 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3261
3262 // Unsigned saturating rounding shift right narrow by immediate (second part).
3263 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3264
3265 // Signed saturating shift right narrow by immediate.
3266 void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3267
3268 // Signed saturating shift right narrow by immediate (second part).
3269 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3270
3271 // Signed saturating rounded shift right narrow by immediate.
3272 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3273
3274 // Signed saturating rounded shift right narrow by immediate (second part).
3275 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3276
3277 // Signed saturating shift right unsigned narrow by immediate.
3278 void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3279
3280 // Signed saturating shift right unsigned narrow by immediate (second part).
3281 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3282
3283 // Signed sat rounded shift right unsigned narrow by immediate.
3284 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3285
3286 // Signed sat rounded shift right unsigned narrow by immediate (second part).
3287 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3288
3289 // FP reciprocal step.
3290 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3291
3292 // FP reciprocal estimate.
3293 void frecpe(const VRegister& vd, const VRegister& vn);
3294
3295 // FP reciprocal square root estimate.
3296 void frsqrte(const VRegister& vd, const VRegister& vn);
3297
3298 // FP reciprocal square root step.
3299 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3300
3301 // Signed absolute difference and accumulate long.
3302 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3303
3304 // Signed absolute difference and accumulate long (second part).
3305 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3306
3307 // Unsigned absolute difference and accumulate long.
3308 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3309
3310 // Unsigned absolute difference and accumulate long (second part).
3311 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3312
3313 // Signed absolute difference long.
3314 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3315
3316 // Signed absolute difference long (second part).
3317 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3318
3319 // Unsigned absolute difference long.
3320 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3321
3322 // Unsigned absolute difference long (second part).
3323 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3324
3325 // Polynomial multiply long.
3326 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3327
3328 // Polynomial multiply long (second part).
3329 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3330
3331 // Signed long multiply-add.
3332 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3333
3334 // Signed long multiply-add (second part).
3335 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3336
3337 // Unsigned long multiply-add.
3338 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3339
3340 // Unsigned long multiply-add (second part).
3341 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3342
3343 // Signed long multiply-sub.
3344 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3345
3346 // Signed long multiply-sub (second part).
3347 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3348
3349 // Unsigned long multiply-sub.
3350 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3351
3352 // Unsigned long multiply-sub (second part).
3353 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3354
3355 // Signed long multiply.
3356 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3357
3358 // Signed long multiply (second part).
3359 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3360
3361 // Signed saturating doubling long multiply-add.
3362 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3363
3364 // Signed saturating doubling long multiply-add (second part).
3365 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366
3367 // Signed saturating doubling long multiply-subtract.
3368 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3369
3370 // Signed saturating doubling long multiply-subtract (second part).
3371 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3372
3373 // Signed saturating doubling long multiply.
3374 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3375
3376 // Signed saturating doubling long multiply (second part).
3377 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3378
3379 // Signed saturating doubling multiply returning high half.
3380 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3381
3382 // Signed saturating rounding doubling multiply returning high half.
3383 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3384
3385 // Signed dot product [Armv8.2].
3386 void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3387
3388 // Signed saturating rounding doubling multiply accumulate returning high
3389 // half [Armv8.1].
3390 void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3391
3392 // Unsigned dot product [Armv8.2].
3393 void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3394
3395 // Dot Product with unsigned and signed integers (vector).
3396 void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3397
3398 // Dot product with signed and unsigned integers (vector, by element).
3399 void sudot(const VRegister& vd,
3400 const VRegister& vn,
3401 const VRegister& vm,
3402 int vm_index);
3403
3404 // Dot product with unsigned and signed integers (vector, by element).
3405 void usdot(const VRegister& vd,
3406 const VRegister& vn,
3407 const VRegister& vm,
3408 int vm_index);
3409
3410 // Signed saturating rounding doubling multiply subtract returning high half
3411 // [Armv8.1].
3412 void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3413
3414 // Signed saturating doubling multiply element returning high half.
3415 void sqdmulh(const VRegister& vd,
3416 const VRegister& vn,
3417 const VRegister& vm,
3418 int vm_index);
3419
3420 // Signed saturating rounding doubling multiply element returning high half.
3421 void sqrdmulh(const VRegister& vd,
3422 const VRegister& vn,
3423 const VRegister& vm,
3424 int vm_index);
3425
3426 // Signed dot product by element [Armv8.2].
3427 void sdot(const VRegister& vd,
3428 const VRegister& vn,
3429 const VRegister& vm,
3430 int vm_index);
3431
3432 // Signed saturating rounding doubling multiply accumulate element returning
3433 // high half [Armv8.1].
3434 void sqrdmlah(const VRegister& vd,
3435 const VRegister& vn,
3436 const VRegister& vm,
3437 int vm_index);
3438
3439 // Unsigned dot product by element [Armv8.2].
3440 void udot(const VRegister& vd,
3441 const VRegister& vn,
3442 const VRegister& vm,
3443 int vm_index);
3444
3445 // Signed saturating rounding doubling multiply subtract element returning
3446 // high half [Armv8.1].
3447 void sqrdmlsh(const VRegister& vd,
3448 const VRegister& vn,
3449 const VRegister& vm,
3450 int vm_index);
3451
3452 // Unsigned long multiply long.
3453 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3454
3455 // Unsigned long multiply (second part).
3456 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3457
3458 // Add narrow returning high half.
3459 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3460
3461 // Add narrow returning high half (second part).
3462 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3463
3464 // Rounding add narrow returning high half.
3465 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3466
3467 // Rounding add narrow returning high half (second part).
3468 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3469
3470 // Subtract narrow returning high half.
3471 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3472
3473 // Subtract narrow returning high half (second part).
3474 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3475
3476 // Rounding subtract narrow returning high half.
3477 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3478
3479 // Rounding subtract narrow returning high half (second part).
3480 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3481
3482 // FP vector multiply accumulate.
3483 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3484
3485 // FP fused multiply-add long to accumulator.
3486 void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3487
3488 // FP fused multiply-add long to accumulator (second part).
3489 void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3490
3491 // FP fused multiply-add long to accumulator by element.
3492 void fmlal(const VRegister& vd,
3493 const VRegister& vn,
3494 const VRegister& vm,
3495 int vm_index);
3496
3497 // FP fused multiply-add long to accumulator by element (second part).
3498 void fmlal2(const VRegister& vd,
3499 const VRegister& vn,
3500 const VRegister& vm,
3501 int vm_index);
3502
3503 // FP vector multiply subtract.
3504 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3505
3506 // FP fused multiply-subtract long to accumulator.
3507 void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3508
3509 // FP fused multiply-subtract long to accumulator (second part).
3510 void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3511
3512 // FP fused multiply-subtract long to accumulator by element.
3513 void fmlsl(const VRegister& vd,
3514 const VRegister& vn,
3515 const VRegister& vm,
3516 int vm_index);
3517
3518 // FP fused multiply-subtract long to accumulator by element (second part).
3519 void fmlsl2(const VRegister& vd,
3520 const VRegister& vn,
3521 const VRegister& vm,
3522 int vm_index);
3523
3524 // FP vector multiply extended.
3525 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3526
3527 // FP absolute greater than or equal.
3528 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3529
3530 // FP absolute greater than.
3531 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3532
3533 // FP multiply by element.
3534 void fmul(const VRegister& vd,
3535 const VRegister& vn,
3536 const VRegister& vm,
3537 int vm_index);
3538
3539 // FP fused multiply-add to accumulator by element.
3540 void fmla(const VRegister& vd,
3541 const VRegister& vn,
3542 const VRegister& vm,
3543 int vm_index);
3544
3545 // FP fused multiply-sub from accumulator by element.
3546 void fmls(const VRegister& vd,
3547 const VRegister& vn,
3548 const VRegister& vm,
3549 int vm_index);
3550
3551 // FP multiply extended by element.
3552 void fmulx(const VRegister& vd,
3553 const VRegister& vn,
3554 const VRegister& vm,
3555 int vm_index);
3556
3557 // FP compare equal.
3558 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3559
3560 // FP greater than.
3561 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3562
3563 // FP greater than or equal.
3564 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3565
3566 // FP compare equal to zero.
3567 void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3568
3569 // FP greater than zero.
3570 void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3571
3572 // FP greater than or equal to zero.
3573 void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3574
3575 // FP less than or equal to zero.
3576 void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3577
3578 // FP less than to zero.
3579 void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3580
3581 // FP absolute difference.
3582 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3583
3584 // FP pairwise add vector.
3585 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3586
3587 // FP pairwise add scalar.
3588 void faddp(const VRegister& vd, const VRegister& vn);
3589
3590 // FP pairwise maximum vector.
3591 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3592
3593 // FP pairwise maximum scalar.
3594 void fmaxp(const VRegister& vd, const VRegister& vn);
3595
3596 // FP pairwise minimum vector.
3597 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3598
3599 // FP pairwise minimum scalar.
3600 void fminp(const VRegister& vd, const VRegister& vn);
3601
3602 // FP pairwise maximum number vector.
3603 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3604
3605 // FP pairwise maximum number scalar.
3606 void fmaxnmp(const VRegister& vd, const VRegister& vn);
3607
3608 // FP pairwise minimum number vector.
3609 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3610
3611 // FP pairwise minimum number scalar.
3612 void fminnmp(const VRegister& vd, const VRegister& vn);
3613
3614 // v8.3 complex numbers - note that these are only partial/helper functions
3615 // and must be used in series in order to perform full CN operations.
3616
3617 // FP complex multiply accumulate (by element) [Armv8.3].
3618 void fcmla(const VRegister& vd,
3619 const VRegister& vn,
3620 const VRegister& vm,
3621 int vm_index,
3622 int rot);
3623
3624 // FP complex multiply accumulate [Armv8.3].
3625 void fcmla(const VRegister& vd,
3626 const VRegister& vn,
3627 const VRegister& vm,
3628 int rot);
3629
3630 // FP complex add [Armv8.3].
3631 void fcadd(const VRegister& vd,
3632 const VRegister& vn,
3633 const VRegister& vm,
3634 int rot);
3635
3636 // Signed 8-bit integer matrix multiply-accumulate (vector).
3637 void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3638
3639 // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3640 void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3641
3642 // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3643 void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3644
3645 // Scalable Vector Extensions.
3646
3647 // Absolute value (predicated).
3648 void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3649
3650 // Add vectors (predicated).
3651 void add(const ZRegister& zd,
3652 const PRegisterM& pg,
3653 const ZRegister& zn,
3654 const ZRegister& zm);
3655
3656 // Add vectors (unpredicated).
3657 void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3658
3659 // Add immediate (unpredicated).
3660 void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3661
3662 // Add multiple of predicate register size to scalar register.
3663 void addpl(const Register& xd, const Register& xn, int imm6);
3664
3665 // Add multiple of vector register size to scalar register.
3666 void addvl(const Register& xd, const Register& xn, int imm6);
3667
3668 // Compute vector address.
3669 void adr(const ZRegister& zd, const SVEMemOperand& addr);
3670
3671 // Bitwise AND predicates.
3672 void and_(const PRegisterWithLaneSize& pd,
3673 const PRegisterZ& pg,
3674 const PRegisterWithLaneSize& pn,
3675 const PRegisterWithLaneSize& pm);
3676
3677 // Bitwise AND vectors (predicated).
3678 void and_(const ZRegister& zd,
3679 const PRegisterM& pg,
3680 const ZRegister& zn,
3681 const ZRegister& zm);
3682
3683 // Bitwise AND with immediate (unpredicated).
3684 void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3685
3686 // Bitwise AND vectors (unpredicated).
3687 void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3688
3689 // Bitwise AND predicates.
3690 void ands(const PRegisterWithLaneSize& pd,
3691 const PRegisterZ& pg,
3692 const PRegisterWithLaneSize& pn,
3693 const PRegisterWithLaneSize& pm);
3694
3695 // Bitwise AND reduction to scalar.
3696 void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3697
3698 // Arithmetic shift right by immediate (predicated).
3699 void asr(const ZRegister& zd,
3700 const PRegisterM& pg,
3701 const ZRegister& zn,
3702 int shift);
3703
3704 // Arithmetic shift right by 64-bit wide elements (predicated).
3705 void asr(const ZRegister& zd,
3706 const PRegisterM& pg,
3707 const ZRegister& zn,
3708 const ZRegister& zm);
3709
3710 // Arithmetic shift right by immediate (unpredicated).
3711 void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3712
3713 // Arithmetic shift right by 64-bit wide elements (unpredicated).
3714 void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3715
3716 // Arithmetic shift right for divide by immediate (predicated).
3717 void asrd(const ZRegister& zd,
3718 const PRegisterM& pg,
3719 const ZRegister& zn,
3720 int shift);
3721
3722 // Reversed arithmetic shift right by vector (predicated).
3723 void asrr(const ZRegister& zd,
3724 const PRegisterM& pg,
3725 const ZRegister& zn,
3726 const ZRegister& zm);
3727
3728 // Bitwise clear predicates.
3729 void bic(const PRegisterWithLaneSize& pd,
3730 const PRegisterZ& pg,
3731 const PRegisterWithLaneSize& pn,
3732 const PRegisterWithLaneSize& pm);
3733
3734 // Bitwise clear vectors (predicated).
3735 void bic(const ZRegister& zd,
3736 const PRegisterM& pg,
3737 const ZRegister& zn,
3738 const ZRegister& zm);
3739
3740 // Bitwise clear bits using immediate (unpredicated).
3741 void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3742
3743 // Bitwise clear vectors (unpredicated).
3744 void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3745
3746 // Bitwise clear predicates.
3747 void bics(const PRegisterWithLaneSize& pd,
3748 const PRegisterZ& pg,
3749 const PRegisterWithLaneSize& pn,
3750 const PRegisterWithLaneSize& pm);
3751
3752 // Break after first true condition.
3753 void brka(const PRegisterWithLaneSize& pd,
3754 const PRegister& pg,
3755 const PRegisterWithLaneSize& pn);
3756
3757 // Break after first true condition.
3758 void brkas(const PRegisterWithLaneSize& pd,
3759 const PRegisterZ& pg,
3760 const PRegisterWithLaneSize& pn);
3761
3762 // Break before first true condition.
3763 void brkb(const PRegisterWithLaneSize& pd,
3764 const PRegister& pg,
3765 const PRegisterWithLaneSize& pn);
3766
3767 // Break before first true condition.
3768 void brkbs(const PRegisterWithLaneSize& pd,
3769 const PRegisterZ& pg,
3770 const PRegisterWithLaneSize& pn);
3771
3772 // Propagate break to next partition.
3773 void brkn(const PRegisterWithLaneSize& pd,
3774 const PRegisterZ& pg,
3775 const PRegisterWithLaneSize& pn,
3776 const PRegisterWithLaneSize& pm);
3777
3778 // Propagate break to next partition.
3779 void brkns(const PRegisterWithLaneSize& pd,
3780 const PRegisterZ& pg,
3781 const PRegisterWithLaneSize& pn,
3782 const PRegisterWithLaneSize& pm);
3783
3784 // Break after first true condition, propagating from previous partition.
3785 void brkpa(const PRegisterWithLaneSize& pd,
3786 const PRegisterZ& pg,
3787 const PRegisterWithLaneSize& pn,
3788 const PRegisterWithLaneSize& pm);
3789
3790 // Break after first true condition, propagating from previous partition.
3791 void brkpas(const PRegisterWithLaneSize& pd,
3792 const PRegisterZ& pg,
3793 const PRegisterWithLaneSize& pn,
3794 const PRegisterWithLaneSize& pm);
3795
3796 // Break before first true condition, propagating from previous partition.
3797 void brkpb(const PRegisterWithLaneSize& pd,
3798 const PRegisterZ& pg,
3799 const PRegisterWithLaneSize& pn,
3800 const PRegisterWithLaneSize& pm);
3801
3802 // Break before first true condition, propagating from previous partition.
3803 void brkpbs(const PRegisterWithLaneSize& pd,
3804 const PRegisterZ& pg,
3805 const PRegisterWithLaneSize& pn,
3806 const PRegisterWithLaneSize& pm);
3807
3808 // Conditionally extract element after last to general-purpose register.
3809 void clasta(const Register& rd,
3810 const PRegister& pg,
3811 const Register& rn,
3812 const ZRegister& zm);
3813
3814 // Conditionally extract element after last to SIMD&FP scalar register.
3815 void clasta(const VRegister& vd,
3816 const PRegister& pg,
3817 const VRegister& vn,
3818 const ZRegister& zm);
3819
3820 // Conditionally extract element after last to vector register.
3821 void clasta(const ZRegister& zd,
3822 const PRegister& pg,
3823 const ZRegister& zn,
3824 const ZRegister& zm);
3825
3826 // Conditionally extract last element to general-purpose register.
3827 void clastb(const Register& rd,
3828 const PRegister& pg,
3829 const Register& rn,
3830 const ZRegister& zm);
3831
3832 // Conditionally extract last element to SIMD&FP scalar register.
3833 void clastb(const VRegister& vd,
3834 const PRegister& pg,
3835 const VRegister& vn,
3836 const ZRegister& zm);
3837
3838 // Conditionally extract last element to vector register.
3839 void clastb(const ZRegister& zd,
3840 const PRegister& pg,
3841 const ZRegister& zn,
3842 const ZRegister& zm);
3843
3844 // Count leading sign bits (predicated).
3845 void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3846
3847 // Count leading zero bits (predicated).
3848 void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3849
3850 void cmp(Condition cond,
3851 const PRegisterWithLaneSize& pd,
3852 const PRegisterZ& pg,
3853 const ZRegister& zn,
3854 const ZRegister& zm);
3855
3856 // Compare vector to 64-bit wide elements.
3857 void cmpeq(const PRegisterWithLaneSize& pd,
3858 const PRegisterZ& pg,
3859 const ZRegister& zn,
3860 const ZRegister& zm);
3861
3862 // Compare vector to immediate.
3863 void cmpeq(const PRegisterWithLaneSize& pd,
3864 const PRegisterZ& pg,
3865 const ZRegister& zn,
3866 int imm5);
3867
3868 // Compare vector to 64-bit wide elements.
3869 void cmpge(const PRegisterWithLaneSize& pd,
3870 const PRegisterZ& pg,
3871 const ZRegister& zn,
3872 const ZRegister& zm);
3873
3874 // Compare vector to immediate.
3875 void cmpge(const PRegisterWithLaneSize& pd,
3876 const PRegisterZ& pg,
3877 const ZRegister& zn,
3878 int imm5);
3879
3880 // Compare vector to 64-bit wide elements.
3881 void cmpgt(const PRegisterWithLaneSize& pd,
3882 const PRegisterZ& pg,
3883 const ZRegister& zn,
3884 const ZRegister& zm);
3885
3886 // Compare vector to immediate.
3887 void cmpgt(const PRegisterWithLaneSize& pd,
3888 const PRegisterZ& pg,
3889 const ZRegister& zn,
3890 int imm5);
3891
3892 // Compare vector to 64-bit wide elements.
3893 void cmphi(const PRegisterWithLaneSize& pd,
3894 const PRegisterZ& pg,
3895 const ZRegister& zn,
3896 const ZRegister& zm);
3897
3898 // Compare vector to immediate.
3899 void cmphi(const PRegisterWithLaneSize& pd,
3900 const PRegisterZ& pg,
3901 const ZRegister& zn,
3902 unsigned imm7);
3903
3904 // Compare vector to 64-bit wide elements.
3905 void cmphs(const PRegisterWithLaneSize& pd,
3906 const PRegisterZ& pg,
3907 const ZRegister& zn,
3908 const ZRegister& zm);
3909
3910 // Compare vector to immediate.
3911 void cmphs(const PRegisterWithLaneSize& pd,
3912 const PRegisterZ& pg,
3913 const ZRegister& zn,
3914 unsigned imm7);
3915
3916 // Compare vector to 64-bit wide elements.
3917 void cmple(const PRegisterWithLaneSize& pd,
3918 const PRegisterZ& pg,
3919 const ZRegister& zn,
3920 const ZRegister& zm);
3921
3922 // Compare vector to immediate.
3923 void cmple(const PRegisterWithLaneSize& pd,
3924 const PRegisterZ& pg,
3925 const ZRegister& zn,
3926 int imm5);
3927
3928 // Compare vector to 64-bit wide elements.
3929 void cmplo(const PRegisterWithLaneSize& pd,
3930 const PRegisterZ& pg,
3931 const ZRegister& zn,
3932 const ZRegister& zm);
3933
3934 // Compare vector to immediate.
3935 void cmplo(const PRegisterWithLaneSize& pd,
3936 const PRegisterZ& pg,
3937 const ZRegister& zn,
3938 unsigned imm7);
3939
3940 // Compare vector to 64-bit wide elements.
3941 void cmpls(const PRegisterWithLaneSize& pd,
3942 const PRegisterZ& pg,
3943 const ZRegister& zn,
3944 const ZRegister& zm);
3945
3946 // Compare vector to immediate.
3947 void cmpls(const PRegisterWithLaneSize& pd,
3948 const PRegisterZ& pg,
3949 const ZRegister& zn,
3950 unsigned imm7);
3951
3952 // Compare vector to 64-bit wide elements.
3953 void cmplt(const PRegisterWithLaneSize& pd,
3954 const PRegisterZ& pg,
3955 const ZRegister& zn,
3956 const ZRegister& zm);
3957
3958 // Compare vector to immediate.
3959 void cmplt(const PRegisterWithLaneSize& pd,
3960 const PRegisterZ& pg,
3961 const ZRegister& zn,
3962 int imm5);
3963
3964 // Compare vector to 64-bit wide elements.
3965 void cmpne(const PRegisterWithLaneSize& pd,
3966 const PRegisterZ& pg,
3967 const ZRegister& zn,
3968 const ZRegister& zm);
3969
3970 // Compare vector to immediate.
3971 void cmpne(const PRegisterWithLaneSize& pd,
3972 const PRegisterZ& pg,
3973 const ZRegister& zn,
3974 int imm5);
3975
3976 // Logically invert boolean condition in vector (predicated).
3977 void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3978
3979 // Count non-zero bits (predicated).
3980 void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3981
3982 // Set scalar to multiple of predicate constraint element count.
3983 void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3984
3985 // Set scalar to multiple of predicate constraint element count.
3986 void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3987
3988 // Set scalar to multiple of predicate constraint element count.
3989 void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3990
3991 // Set scalar to active predicate element count.
3992 void cntp(const Register& xd,
3993 const PRegister& pg,
3994 const PRegisterWithLaneSize& pn);
3995
3996 // Set scalar to multiple of predicate constraint element count.
3997 void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3998
3999 // Shuffle active elements of vector to the right and fill with zero.
4000 void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4001
4002 // Copy signed integer immediate to vector elements (predicated).
4003 void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4004
4005 // Copy general-purpose register to vector elements (predicated).
4006 void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4007
4008 // Copy SIMD&FP scalar register to vector elements (predicated).
4009 void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4010
4011 // Compare and terminate loop.
4012 void ctermeq(const Register& rn, const Register& rm);
4013
4014 // Compare and terminate loop.
4015 void ctermne(const Register& rn, const Register& rm);
4016
4017 // Decrement scalar by multiple of predicate constraint element count.
4018 void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4019
4020 // Decrement scalar by multiple of predicate constraint element count.
4021 void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4022
4023 // Decrement vector by multiple of predicate constraint element count.
4024 void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4025
4026 // Decrement scalar by multiple of predicate constraint element count.
4027 void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4028
4029 // Decrement vector by multiple of predicate constraint element count.
4030 void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4031
4032 // Decrement scalar by active predicate element count.
4033 void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4034
4035 // Decrement vector by active predicate element count.
4036 void decp(const ZRegister& zdn, const PRegister& pg);
4037
4038 // Decrement scalar by multiple of predicate constraint element count.
4039 void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4040
4041 // Decrement vector by multiple of predicate constraint element count.
4042 void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4043
4044 // Broadcast general-purpose register to vector elements (unpredicated).
4045 void dup(const ZRegister& zd, const Register& xn);
4046
4047 // Broadcast indexed element to vector (unpredicated).
4048 void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4049
4050 // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4051 // assembler will pick an appropriate immediate and left shift that is
4052 // equivalent to the immediate argument. If an explicit left shift is
4053 // specified (0 or 8), the immediate must be a signed 8-bit integer.
4054
4055 // Broadcast signed immediate to vector elements (unpredicated).
4056 void dup(const ZRegister& zd, int imm8, int shift = -1);
4057
4058 // Broadcast logical bitmask immediate to vector (unpredicated).
4059 void dupm(const ZRegister& zd, uint64_t imm);
4060
4061 // Bitwise exclusive OR with inverted immediate (unpredicated).
4062 void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4063
4064 // Bitwise exclusive OR predicates.
4065 void eor(const PRegisterWithLaneSize& pd,
4066 const PRegisterZ& pg,
4067 const PRegisterWithLaneSize& pn,
4068 const PRegisterWithLaneSize& pm);
4069
4070 // Bitwise exclusive OR vectors (predicated).
4071 void eor(const ZRegister& zd,
4072 const PRegisterM& pg,
4073 const ZRegister& zn,
4074 const ZRegister& zm);
4075
4076 // Bitwise exclusive OR with immediate (unpredicated).
4077 void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4078
4079 // Bitwise exclusive OR vectors (unpredicated).
4080 void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4081
4082 // Bitwise exclusive OR predicates.
4083 void eors(const PRegisterWithLaneSize& pd,
4084 const PRegisterZ& pg,
4085 const PRegisterWithLaneSize& pn,
4086 const PRegisterWithLaneSize& pm);
4087
4088 // Bitwise XOR reduction to scalar.
4089 void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4090
4091 // Extract vector from pair of vectors.
4092 void ext(const ZRegister& zd,
4093 const ZRegister& zn,
4094 const ZRegister& zm,
4095 unsigned offset);
4096
4097 // Floating-point absolute difference (predicated).
4098 void fabd(const ZRegister& zd,
4099 const PRegisterM& pg,
4100 const ZRegister& zn,
4101 const ZRegister& zm);
4102
4103 // Floating-point absolute value (predicated).
4104 void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4105
4106 // Floating-point absolute compare vectors.
4107 void facge(const PRegisterWithLaneSize& pd,
4108 const PRegisterZ& pg,
4109 const ZRegister& zn,
4110 const ZRegister& zm);
4111
4112 // Floating-point absolute compare vectors.
4113 void facgt(const PRegisterWithLaneSize& pd,
4114 const PRegisterZ& pg,
4115 const ZRegister& zn,
4116 const ZRegister& zm);
4117
4118 // Floating-point add immediate (predicated).
4119 void fadd(const ZRegister& zd,
4120 const PRegisterM& pg,
4121 const ZRegister& zn,
4122 double imm);
4123
4124 // Floating-point add vector (predicated).
4125 void fadd(const ZRegister& zd,
4126 const PRegisterM& pg,
4127 const ZRegister& zn,
4128 const ZRegister& zm);
4129
4130 // Floating-point add vector (unpredicated).
4131 void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4132
4133 // Floating-point add strictly-ordered reduction, accumulating in scalar.
4134 void fadda(const VRegister& vd,
4135 const PRegister& pg,
4136 const VRegister& vn,
4137 const ZRegister& zm);
4138
4139 // Floating-point add recursive reduction to scalar.
4140 void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4141
4142 // Floating-point complex add with rotate (predicated).
4143 void fcadd(const ZRegister& zd,
4144 const PRegisterM& pg,
4145 const ZRegister& zn,
4146 const ZRegister& zm,
4147 int rot);
4148
4149 // Floating-point compare vector with zero.
4150 void fcmeq(const PRegisterWithLaneSize& pd,
4151 const PRegisterZ& pg,
4152 const ZRegister& zn,
4153 double zero);
4154
4155 // Floating-point compare vectors.
4156 void fcmeq(const PRegisterWithLaneSize& pd,
4157 const PRegisterZ& pg,
4158 const ZRegister& zn,
4159 const ZRegister& zm);
4160
4161 // Floating-point compare vector with zero.
4162 void fcmge(const PRegisterWithLaneSize& pd,
4163 const PRegisterZ& pg,
4164 const ZRegister& zn,
4165 double zero);
4166
4167 // Floating-point compare vectors.
4168 void fcmge(const PRegisterWithLaneSize& pd,
4169 const PRegisterZ& pg,
4170 const ZRegister& zn,
4171 const ZRegister& zm);
4172
4173 // Floating-point compare vector with zero.
4174 void fcmgt(const PRegisterWithLaneSize& pd,
4175 const PRegisterZ& pg,
4176 const ZRegister& zn,
4177 double zero);
4178
4179 // Floating-point compare vectors.
4180 void fcmgt(const PRegisterWithLaneSize& pd,
4181 const PRegisterZ& pg,
4182 const ZRegister& zn,
4183 const ZRegister& zm);
4184
4185 // Floating-point complex multiply-add with rotate (predicated).
4186 void fcmla(const ZRegister& zda,
4187 const PRegisterM& pg,
4188 const ZRegister& zn,
4189 const ZRegister& zm,
4190 int rot);
4191
4192 // Floating-point complex multiply-add by indexed values with rotate.
4193 void fcmla(const ZRegister& zda,
4194 const ZRegister& zn,
4195 const ZRegister& zm,
4196 int index,
4197 int rot);
4198
4199 // Floating-point compare vector with zero.
4200 void fcmle(const PRegisterWithLaneSize& pd,
4201 const PRegisterZ& pg,
4202 const ZRegister& zn,
4203 double zero);
4204
4205 // Floating-point compare vector with zero.
4206 void fcmlt(const PRegisterWithLaneSize& pd,
4207 const PRegisterZ& pg,
4208 const ZRegister& zn,
4209 double zero);
4210
4211 // Floating-point compare vector with zero.
4212 void fcmne(const PRegisterWithLaneSize& pd,
4213 const PRegisterZ& pg,
4214 const ZRegister& zn,
4215 double zero);
4216
4217 // Floating-point compare vectors.
4218 void fcmne(const PRegisterWithLaneSize& pd,
4219 const PRegisterZ& pg,
4220 const ZRegister& zn,
4221 const ZRegister& zm);
4222
4223 // Floating-point compare vectors.
4224 void fcmuo(const PRegisterWithLaneSize& pd,
4225 const PRegisterZ& pg,
4226 const ZRegister& zn,
4227 const ZRegister& zm);
4228
4229 // Copy floating-point immediate to vector elements (predicated).
4230 void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4231
4232 // Copy half-precision floating-point immediate to vector elements
4233 // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4234 void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4235 fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4236 }
4237
4238 // Floating-point convert precision (predicated).
4239 void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4240
4241 // Floating-point convert to signed integer, rounding toward zero
4242 // (predicated).
4243 void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4244
4245 // Floating-point convert to unsigned integer, rounding toward zero
4246 // (predicated).
4247 void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4248
4249 // Floating-point divide by vector (predicated).
4250 void fdiv(const ZRegister& zd,
4251 const PRegisterM& pg,
4252 const ZRegister& zn,
4253 const ZRegister& zm);
4254
4255 // Floating-point reversed divide by vector (predicated).
4256 void fdivr(const ZRegister& zd,
4257 const PRegisterM& pg,
4258 const ZRegister& zn,
4259 const ZRegister& zm);
4260
4261 // Broadcast floating-point immediate to vector elements.
4262 void fdup(const ZRegister& zd, double imm);
4263
4264 // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4265 void fdup(const ZRegister& zd, Float16 imm) {
4266 fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4267 }
4268
4269 // Floating-point exponential accelerator.
4270 void fexpa(const ZRegister& zd, const ZRegister& zn);
4271
4272 // Floating-point fused multiply-add vectors (predicated), writing
4273 // multiplicand [Zdn = Za + Zdn * Zm].
4274 void fmad(const ZRegister& zdn,
4275 const PRegisterM& pg,
4276 const ZRegister& zm,
4277 const ZRegister& za);
4278
4279 // Floating-point maximum with immediate (predicated).
4280 void fmax(const ZRegister& zd,
4281 const PRegisterM& pg,
4282 const ZRegister& zn,
4283 double imm);
4284
4285 // Floating-point maximum (predicated).
4286 void fmax(const ZRegister& zd,
4287 const PRegisterM& pg,
4288 const ZRegister& zn,
4289 const ZRegister& zm);
4290
4291 // Floating-point maximum number with immediate (predicated).
4292 void fmaxnm(const ZRegister& zd,
4293 const PRegisterM& pg,
4294 const ZRegister& zn,
4295 double imm);
4296
4297 // Floating-point maximum number (predicated).
4298 void fmaxnm(const ZRegister& zd,
4299 const PRegisterM& pg,
4300 const ZRegister& zn,
4301 const ZRegister& zm);
4302
4303 // Floating-point maximum number recursive reduction to scalar.
4304 void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4305
4306 // Floating-point maximum recursive reduction to scalar.
4307 void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4308
4309 // Floating-point minimum with immediate (predicated).
4310 void fmin(const ZRegister& zd,
4311 const PRegisterM& pg,
4312 const ZRegister& zn,
4313 double imm);
4314
4315 // Floating-point minimum (predicated).
4316 void fmin(const ZRegister& zd,
4317 const PRegisterM& pg,
4318 const ZRegister& zn,
4319 const ZRegister& zm);
4320
4321 // Floating-point minimum number with immediate (predicated).
4322 void fminnm(const ZRegister& zd,
4323 const PRegisterM& pg,
4324 const ZRegister& zn,
4325 double imm);
4326
4327 // Floating-point minimum number (predicated).
4328 void fminnm(const ZRegister& zd,
4329 const PRegisterM& pg,
4330 const ZRegister& zn,
4331 const ZRegister& zm);
4332
4333 // Floating-point minimum number recursive reduction to scalar.
4334 void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4335
4336 // Floating-point minimum recursive reduction to scalar.
4337 void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4338
4339 // Floating-point fused multiply-add vectors (predicated), writing addend
4340 // [Zda = Zda + Zn * Zm].
4341 void fmla(const ZRegister& zda,
4342 const PRegisterM& pg,
4343 const ZRegister& zn,
4344 const ZRegister& zm);
4345
4346 // Floating-point fused multiply-add by indexed elements
4347 // (Zda = Zda + Zn * Zm[indexed]).
4348 void fmla(const ZRegister& zda,
4349 const ZRegister& zn,
4350 const ZRegister& zm,
4351 int index);
4352
4353 // Floating-point fused multiply-subtract vectors (predicated), writing
4354 // addend [Zda = Zda + -Zn * Zm].
4355 void fmls(const ZRegister& zda,
4356 const PRegisterM& pg,
4357 const ZRegister& zn,
4358 const ZRegister& zm);
4359
4360 // Floating-point fused multiply-subtract by indexed elements
4361 // (Zda = Zda + -Zn * Zm[indexed]).
4362 void fmls(const ZRegister& zda,
4363 const ZRegister& zn,
4364 const ZRegister& zm,
4365 int index);
4366
4367 // Move 8-bit floating-point immediate to vector elements (unpredicated).
4368 void fmov(const ZRegister& zd, double imm);
4369
4370 // Move 8-bit floating-point immediate to vector elements (predicated).
4371 void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4372
4373 // Floating-point fused multiply-subtract vectors (predicated), writing
4374 // multiplicand [Zdn = Za + -Zdn * Zm].
4375 void fmsb(const ZRegister& zdn,
4376 const PRegisterM& pg,
4377 const ZRegister& zm,
4378 const ZRegister& za);
4379
4380 // Floating-point multiply by immediate (predicated).
4381 void fmul(const ZRegister& zd,
4382 const PRegisterM& pg,
4383 const ZRegister& zn,
4384 double imm);
4385
4386 // Floating-point multiply vectors (predicated).
4387 void fmul(const ZRegister& zd,
4388 const PRegisterM& pg,
4389 const ZRegister& zn,
4390 const ZRegister& zm);
4391
4392 // Floating-point multiply by indexed elements.
4393 void fmul(const ZRegister& zd,
4394 const ZRegister& zn,
4395 const ZRegister& zm,
4396 unsigned index);
4397
4398 // Floating-point multiply vectors (unpredicated).
4399 void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4400
4401 // Floating-point multiply-extended vectors (predicated).
4402 void fmulx(const ZRegister& zd,
4403 const PRegisterM& pg,
4404 const ZRegister& zn,
4405 const ZRegister& zm);
4406
4407 // Floating-point negate (predicated).
4408 void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4409
4410 // Floating-point negated fused multiply-add vectors (predicated), writing
4411 // multiplicand [Zdn = -Za + -Zdn * Zm].
4412 void fnmad(const ZRegister& zdn,
4413 const PRegisterM& pg,
4414 const ZRegister& zm,
4415 const ZRegister& za);
4416
4417 // Floating-point negated fused multiply-add vectors (predicated), writing
4418 // addend [Zda = -Zda + -Zn * Zm].
4419 void fnmla(const ZRegister& zda,
4420 const PRegisterM& pg,
4421 const ZRegister& zn,
4422 const ZRegister& zm);
4423
4424 // Floating-point negated fused multiply-subtract vectors (predicated),
4425 // writing addend [Zda = -Zda + Zn * Zm].
4426 void fnmls(const ZRegister& zda,
4427 const PRegisterM& pg,
4428 const ZRegister& zn,
4429 const ZRegister& zm);
4430
4431 // Floating-point negated fused multiply-subtract vectors (predicated),
4432 // writing multiplicand [Zdn = -Za + Zdn * Zm].
4433 void fnmsb(const ZRegister& zdn,
4434 const PRegisterM& pg,
4435 const ZRegister& zm,
4436 const ZRegister& za);
4437
4438 // Floating-point reciprocal estimate (unpredicated).
4439 void frecpe(const ZRegister& zd, const ZRegister& zn);
4440
4441 // Floating-point reciprocal step (unpredicated).
4442 void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4443
4444 // Floating-point reciprocal exponent (predicated).
4445 void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4446
4447 // Floating-point round to integral value (predicated).
4448 void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4449
4450 // Floating-point round to integral value (predicated).
4451 void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4452
4453 // Floating-point round to integral value (predicated).
4454 void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4455
4456 // Floating-point round to integral value (predicated).
4457 void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4458
4459 // Floating-point round to integral value (predicated).
4460 void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4461
4462 // Floating-point round to integral value (predicated).
4463 void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4464
4465 // Floating-point round to integral value (predicated).
4466 void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4467
4468 // Floating-point reciprocal square root estimate (unpredicated).
4469 void frsqrte(const ZRegister& zd, const ZRegister& zn);
4470
4471 // Floating-point reciprocal square root step (unpredicated).
4472 void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4473
4474 // Floating-point adjust exponent by vector (predicated).
4475 void fscale(const ZRegister& zd,
4476 const PRegisterM& pg,
4477 const ZRegister& zn,
4478 const ZRegister& zm);
4479
4480 // Floating-point square root (predicated).
4481 void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4482
4483 // Floating-point subtract immediate (predicated).
4484 void fsub(const ZRegister& zd,
4485 const PRegisterM& pg,
4486 const ZRegister& zn,
4487 double imm);
4488
4489 // Floating-point subtract vectors (predicated).
4490 void fsub(const ZRegister& zd,
4491 const PRegisterM& pg,
4492 const ZRegister& zn,
4493 const ZRegister& zm);
4494
4495 // Floating-point subtract vectors (unpredicated).
4496 void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4497
4498 // Floating-point reversed subtract from immediate (predicated).
4499 void fsubr(const ZRegister& zd,
4500 const PRegisterM& pg,
4501 const ZRegister& zn,
4502 double imm);
4503
4504 // Floating-point reversed subtract vectors (predicated).
4505 void fsubr(const ZRegister& zd,
4506 const PRegisterM& pg,
4507 const ZRegister& zn,
4508 const ZRegister& zm);
4509
4510 // Floating-point trigonometric multiply-add coefficient.
4511 void ftmad(const ZRegister& zd,
4512 const ZRegister& zn,
4513 const ZRegister& zm,
4514 int imm3);
4515
4516 // Floating-point trigonometric starting value.
4517 void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4518
4519 // Floating-point trigonometric select coefficient.
4520 void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4521
4522 // Increment scalar by multiple of predicate constraint element count.
4523 void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4524
4525 // Increment scalar by multiple of predicate constraint element count.
4526 void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4527
4528 // Increment vector by multiple of predicate constraint element count.
4529 void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4530
4531 // Increment scalar by multiple of predicate constraint element count.
4532 void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4533
4534 // Increment vector by multiple of predicate constraint element count.
4535 void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4536
4537 // Increment scalar by active predicate element count.
4538 void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4539
4540 // Increment vector by active predicate element count.
4541 void incp(const ZRegister& zdn, const PRegister& pg);
4542
4543 // Increment scalar by multiple of predicate constraint element count.
4544 void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4545
4546 // Increment vector by multiple of predicate constraint element count.
4547 void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4548
4549 // Create index starting from and incremented by immediate.
4550 void index(const ZRegister& zd, int start, int step);
4551
4552 // Create index starting from and incremented by general-purpose register.
4553 void index(const ZRegister& zd, const Register& rn, const Register& rm);
4554
4555 // Create index starting from general-purpose register and incremented by
4556 // immediate.
4557 void index(const ZRegister& zd, const Register& rn, int imm5);
4558
4559 // Create index starting from immediate and incremented by general-purpose
4560 // register.
4561 void index(const ZRegister& zd, int imm5, const Register& rm);
4562
4563 // Insert general-purpose register in shifted vector.
4564 void insr(const ZRegister& zdn, const Register& rm);
4565
4566 // Insert SIMD&FP scalar register in shifted vector.
4567 void insr(const ZRegister& zdn, const VRegister& vm);
4568
4569 // Extract element after last to general-purpose register.
4570 void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4571
4572 // Extract element after last to SIMD&FP scalar register.
4573 void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4574
4575 // Extract last element to general-purpose register.
4576 void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4577
4578 // Extract last element to SIMD&FP scalar register.
4579 void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4580
4581 // Contiguous/gather load bytes to vector.
4582 void ld1b(const ZRegister& zt,
4583 const PRegisterZ& pg,
4584 const SVEMemOperand& addr);
4585
4586 // Contiguous/gather load halfwords to vector.
4587 void ld1h(const ZRegister& zt,
4588 const PRegisterZ& pg,
4589 const SVEMemOperand& addr);
4590
4591 // Contiguous/gather load words to vector.
4592 void ld1w(const ZRegister& zt,
4593 const PRegisterZ& pg,
4594 const SVEMemOperand& addr);
4595
4596 // Contiguous/gather load doublewords to vector.
4597 void ld1d(const ZRegister& zt,
4598 const PRegisterZ& pg,
4599 const SVEMemOperand& addr);
4600
4601 // TODO: Merge other loads into the SVEMemOperand versions.
4602
4603 // Load and broadcast unsigned byte to vector.
4604 void ld1rb(const ZRegister& zt,
4605 const PRegisterZ& pg,
4606 const SVEMemOperand& addr);
4607
4608 // Load and broadcast unsigned halfword to vector.
4609 void ld1rh(const ZRegister& zt,
4610 const PRegisterZ& pg,
4611 const SVEMemOperand& addr);
4612
4613 // Load and broadcast unsigned word to vector.
4614 void ld1rw(const ZRegister& zt,
4615 const PRegisterZ& pg,
4616 const SVEMemOperand& addr);
4617
4618 // Load and broadcast doubleword to vector.
4619 void ld1rd(const ZRegister& zt,
4620 const PRegisterZ& pg,
4621 const SVEMemOperand& addr);
4622
4623 // Contiguous load and replicate sixteen bytes.
4624 void ld1rqb(const ZRegister& zt,
4625 const PRegisterZ& pg,
4626 const SVEMemOperand& addr);
4627
4628 // Contiguous load and replicate eight halfwords.
4629 void ld1rqh(const ZRegister& zt,
4630 const PRegisterZ& pg,
4631 const SVEMemOperand& addr);
4632
4633 // Contiguous load and replicate four words.
4634 void ld1rqw(const ZRegister& zt,
4635 const PRegisterZ& pg,
4636 const SVEMemOperand& addr);
4637
4638 // Contiguous load and replicate two doublewords.
4639 void ld1rqd(const ZRegister& zt,
4640 const PRegisterZ& pg,
4641 const SVEMemOperand& addr);
4642
4643 // Contiguous load and replicate thirty-two bytes.
4644 void ld1rob(const ZRegister& zt,
4645 const PRegisterZ& pg,
4646 const SVEMemOperand& addr);
4647
4648 // Contiguous load and replicate sixteen halfwords.
4649 void ld1roh(const ZRegister& zt,
4650 const PRegisterZ& pg,
4651 const SVEMemOperand& addr);
4652
4653 // Contiguous load and replicate eight words.
4654 void ld1row(const ZRegister& zt,
4655 const PRegisterZ& pg,
4656 const SVEMemOperand& addr);
4657
4658 // Contiguous load and replicate four doublewords.
4659 void ld1rod(const ZRegister& zt,
4660 const PRegisterZ& pg,
4661 const SVEMemOperand& addr);
4662
4663 // Load and broadcast signed byte to vector.
4664 void ld1rsb(const ZRegister& zt,
4665 const PRegisterZ& pg,
4666 const SVEMemOperand& addr);
4667
4668 // Load and broadcast signed halfword to vector.
4669 void ld1rsh(const ZRegister& zt,
4670 const PRegisterZ& pg,
4671 const SVEMemOperand& addr);
4672
4673 // Load and broadcast signed word to vector.
4674 void ld1rsw(const ZRegister& zt,
4675 const PRegisterZ& pg,
4676 const SVEMemOperand& addr);
4677
4678 // Contiguous/gather load signed bytes to vector.
4679 void ld1sb(const ZRegister& zt,
4680 const PRegisterZ& pg,
4681 const SVEMemOperand& addr);
4682
4683 // Contiguous/gather load signed halfwords to vector.
4684 void ld1sh(const ZRegister& zt,
4685 const PRegisterZ& pg,
4686 const SVEMemOperand& addr);
4687
4688 // Contiguous/gather load signed words to vector.
4689 void ld1sw(const ZRegister& zt,
4690 const PRegisterZ& pg,
4691 const SVEMemOperand& addr);
4692
4693 // TODO: Merge other loads into the SVEMemOperand versions.
4694
4695 // Contiguous load two-byte structures to two vectors.
4696 void ld2b(const ZRegister& zt1,
4697 const ZRegister& zt2,
4698 const PRegisterZ& pg,
4699 const SVEMemOperand& addr);
4700
4701 // Contiguous load two-halfword structures to two vectors.
4702 void ld2h(const ZRegister& zt1,
4703 const ZRegister& zt2,
4704 const PRegisterZ& pg,
4705 const SVEMemOperand& addr);
4706
4707 // Contiguous load two-word structures to two vectors.
4708 void ld2w(const ZRegister& zt1,
4709 const ZRegister& zt2,
4710 const PRegisterZ& pg,
4711 const SVEMemOperand& addr);
4712
4713 // Contiguous load two-doubleword structures to two vectors.
4714 void ld2d(const ZRegister& zt1,
4715 const ZRegister& zt2,
4716 const PRegisterZ& pg,
4717 const SVEMemOperand& addr);
4718
4719 // Contiguous load three-byte structures to three vectors.
4720 void ld3b(const ZRegister& zt1,
4721 const ZRegister& zt2,
4722 const ZRegister& zt3,
4723 const PRegisterZ& pg,
4724 const SVEMemOperand& addr);
4725
4726 // Contiguous load three-halfword structures to three vectors.
4727 void ld3h(const ZRegister& zt1,
4728 const ZRegister& zt2,
4729 const ZRegister& zt3,
4730 const PRegisterZ& pg,
4731 const SVEMemOperand& addr);
4732
4733 // Contiguous load three-word structures to three vectors.
4734 void ld3w(const ZRegister& zt1,
4735 const ZRegister& zt2,
4736 const ZRegister& zt3,
4737 const PRegisterZ& pg,
4738 const SVEMemOperand& addr);
4739
4740 // Contiguous load three-doubleword structures to three vectors.
4741 void ld3d(const ZRegister& zt1,
4742 const ZRegister& zt2,
4743 const ZRegister& zt3,
4744 const PRegisterZ& pg,
4745 const SVEMemOperand& addr);
4746
4747 // Contiguous load four-byte structures to four vectors.
4748 void ld4b(const ZRegister& zt1,
4749 const ZRegister& zt2,
4750 const ZRegister& zt3,
4751 const ZRegister& zt4,
4752 const PRegisterZ& pg,
4753 const SVEMemOperand& addr);
4754
4755 // Contiguous load four-halfword structures to four vectors.
4756 void ld4h(const ZRegister& zt1,
4757 const ZRegister& zt2,
4758 const ZRegister& zt3,
4759 const ZRegister& zt4,
4760 const PRegisterZ& pg,
4761 const SVEMemOperand& addr);
4762
4763 // Contiguous load four-word structures to four vectors.
4764 void ld4w(const ZRegister& zt1,
4765 const ZRegister& zt2,
4766 const ZRegister& zt3,
4767 const ZRegister& zt4,
4768 const PRegisterZ& pg,
4769 const SVEMemOperand& addr);
4770
4771 // Contiguous load four-doubleword structures to four vectors.
4772 void ld4d(const ZRegister& zt1,
4773 const ZRegister& zt2,
4774 const ZRegister& zt3,
4775 const ZRegister& zt4,
4776 const PRegisterZ& pg,
4777 const SVEMemOperand& addr);
4778
4779 // Contiguous load first-fault unsigned bytes to vector.
4780 void ldff1b(const ZRegister& zt,
4781 const PRegisterZ& pg,
4782 const SVEMemOperand& addr);
4783
4784 // Contiguous load first-fault unsigned halfwords to vector.
4785 void ldff1h(const ZRegister& zt,
4786 const PRegisterZ& pg,
4787 const SVEMemOperand& addr);
4788
4789 // Contiguous load first-fault unsigned words to vector.
4790 void ldff1w(const ZRegister& zt,
4791 const PRegisterZ& pg,
4792 const SVEMemOperand& addr);
4793
4794 // Contiguous load first-fault doublewords to vector.
4795 void ldff1d(const ZRegister& zt,
4796 const PRegisterZ& pg,
4797 const SVEMemOperand& addr);
4798
4799 // Contiguous load first-fault signed bytes to vector.
4800 void ldff1sb(const ZRegister& zt,
4801 const PRegisterZ& pg,
4802 const SVEMemOperand& addr);
4803
4804 // Contiguous load first-fault signed halfwords to vector.
4805 void ldff1sh(const ZRegister& zt,
4806 const PRegisterZ& pg,
4807 const SVEMemOperand& addr);
4808
4809 // Contiguous load first-fault signed words to vector.
4810 void ldff1sw(const ZRegister& zt,
4811 const PRegisterZ& pg,
4812 const SVEMemOperand& addr);
4813
4814 // Gather load first-fault unsigned bytes to vector.
4815 void ldff1b(const ZRegister& zt,
4816 const PRegisterZ& pg,
4817 const Register& xn,
4818 const ZRegister& zm);
4819
4820 // Gather load first-fault unsigned bytes to vector (immediate index).
4821 void ldff1b(const ZRegister& zt,
4822 const PRegisterZ& pg,
4823 const ZRegister& zn,
4824 int imm5);
4825
4826 // Gather load first-fault doublewords to vector (vector index).
4827 void ldff1d(const ZRegister& zt,
4828 const PRegisterZ& pg,
4829 const Register& xn,
4830 const ZRegister& zm);
4831
4832 // Gather load first-fault doublewords to vector (immediate index).
4833 void ldff1d(const ZRegister& zt,
4834 const PRegisterZ& pg,
4835 const ZRegister& zn,
4836 int imm5);
4837
4838 // Gather load first-fault unsigned halfwords to vector (vector index).
4839 void ldff1h(const ZRegister& zt,
4840 const PRegisterZ& pg,
4841 const Register& xn,
4842 const ZRegister& zm);
4843
4844 // Gather load first-fault unsigned halfwords to vector (immediate index).
4845 void ldff1h(const ZRegister& zt,
4846 const PRegisterZ& pg,
4847 const ZRegister& zn,
4848 int imm5);
4849
4850 // Gather load first-fault signed bytes to vector (vector index).
4851 void ldff1sb(const ZRegister& zt,
4852 const PRegisterZ& pg,
4853 const Register& xn,
4854 const ZRegister& zm);
4855
4856 // Gather load first-fault signed bytes to vector (immediate index).
4857 void ldff1sb(const ZRegister& zt,
4858 const PRegisterZ& pg,
4859 const ZRegister& zn,
4860 int imm5);
4861
4862 // Gather load first-fault signed halfwords to vector (vector index).
4863 void ldff1sh(const ZRegister& zt,
4864 const PRegisterZ& pg,
4865 const Register& xn,
4866 const ZRegister& zm);
4867
4868 // Gather load first-fault signed halfwords to vector (immediate index).
4869 void ldff1sh(const ZRegister& zt,
4870 const PRegisterZ& pg,
4871 const ZRegister& zn,
4872 int imm5);
4873
4874 // Gather load first-fault signed words to vector (vector index).
4875 void ldff1sw(const ZRegister& zt,
4876 const PRegisterZ& pg,
4877 const Register& xn,
4878 const ZRegister& zm);
4879
4880 // Gather load first-fault signed words to vector (immediate index).
4881 void ldff1sw(const ZRegister& zt,
4882 const PRegisterZ& pg,
4883 const ZRegister& zn,
4884 int imm5);
4885
4886 // Gather load first-fault unsigned words to vector (vector index).
4887 void ldff1w(const ZRegister& zt,
4888 const PRegisterZ& pg,
4889 const Register& xn,
4890 const ZRegister& zm);
4891
4892 // Gather load first-fault unsigned words to vector (immediate index).
4893 void ldff1w(const ZRegister& zt,
4894 const PRegisterZ& pg,
4895 const ZRegister& zn,
4896 int imm5);
4897
4898 // Contiguous load non-fault unsigned bytes to vector (immediate index).
4899 void ldnf1b(const ZRegister& zt,
4900 const PRegisterZ& pg,
4901 const SVEMemOperand& addr);
4902
4903 // Contiguous load non-fault doublewords to vector (immediate index).
4904 void ldnf1d(const ZRegister& zt,
4905 const PRegisterZ& pg,
4906 const SVEMemOperand& addr);
4907
4908 // Contiguous load non-fault unsigned halfwords to vector (immediate
4909 // index).
4910 void ldnf1h(const ZRegister& zt,
4911 const PRegisterZ& pg,
4912 const SVEMemOperand& addr);
4913
4914 // Contiguous load non-fault signed bytes to vector (immediate index).
4915 void ldnf1sb(const ZRegister& zt,
4916 const PRegisterZ& pg,
4917 const SVEMemOperand& addr);
4918
4919 // Contiguous load non-fault signed halfwords to vector (immediate index).
4920 void ldnf1sh(const ZRegister& zt,
4921 const PRegisterZ& pg,
4922 const SVEMemOperand& addr);
4923
4924 // Contiguous load non-fault signed words to vector (immediate index).
4925 void ldnf1sw(const ZRegister& zt,
4926 const PRegisterZ& pg,
4927 const SVEMemOperand& addr);
4928
4929 // Contiguous load non-fault unsigned words to vector (immediate index).
4930 void ldnf1w(const ZRegister& zt,
4931 const PRegisterZ& pg,
4932 const SVEMemOperand& addr);
4933
4934 // Contiguous load non-temporal bytes to vector.
4935 void ldnt1b(const ZRegister& zt,
4936 const PRegisterZ& pg,
4937 const SVEMemOperand& addr);
4938
4939 // Contiguous load non-temporal halfwords to vector.
4940 void ldnt1h(const ZRegister& zt,
4941 const PRegisterZ& pg,
4942 const SVEMemOperand& addr);
4943
4944 // Contiguous load non-temporal words to vector.
4945 void ldnt1w(const ZRegister& zt,
4946 const PRegisterZ& pg,
4947 const SVEMemOperand& addr);
4948
4949 // Contiguous load non-temporal doublewords to vector.
4950 void ldnt1d(const ZRegister& zt,
4951 const PRegisterZ& pg,
4952 const SVEMemOperand& addr);
4953
4954 // Load SVE predicate/vector register.
4955 void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4956
4957 // Logical shift left by immediate (predicated).
4958 void lsl(const ZRegister& zd,
4959 const PRegisterM& pg,
4960 const ZRegister& zn,
4961 int shift);
4962
4963 // Logical shift left by 64-bit wide elements (predicated).
4964 void lsl(const ZRegister& zd,
4965 const PRegisterM& pg,
4966 const ZRegister& zn,
4967 const ZRegister& zm);
4968
4969 // Logical shift left by immediate (unpredicated).
4970 void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4971
4972 // Logical shift left by 64-bit wide elements (unpredicated).
4973 void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4974
4975 // Reversed logical shift left by vector (predicated).
4976 void lslr(const ZRegister& zd,
4977 const PRegisterM& pg,
4978 const ZRegister& zn,
4979 const ZRegister& zm);
4980
4981 // Logical shift right by immediate (predicated).
4982 void lsr(const ZRegister& zd,
4983 const PRegisterM& pg,
4984 const ZRegister& zn,
4985 int shift);
4986
4987 // Logical shift right by 64-bit wide elements (predicated).
4988 void lsr(const ZRegister& zd,
4989 const PRegisterM& pg,
4990 const ZRegister& zn,
4991 const ZRegister& zm);
4992
4993 // Logical shift right by immediate (unpredicated).
4994 void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4995
4996 // Logical shift right by 64-bit wide elements (unpredicated).
4997 void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4998
4999 // Reversed logical shift right by vector (predicated).
5000 void lsrr(const ZRegister& zd,
5001 const PRegisterM& pg,
5002 const ZRegister& zn,
5003 const ZRegister& zm);
5004
5005 // Bitwise invert predicate.
5006 void not_(const PRegisterWithLaneSize& pd,
5007 const PRegisterZ& pg,
5008 const PRegisterWithLaneSize& pn);
5009
5010 // Bitwise invert predicate, setting the condition flags.
5011 void nots(const PRegisterWithLaneSize& pd,
5012 const PRegisterZ& pg,
5013 const PRegisterWithLaneSize& pn);
5014
5015 // Multiply-add vectors (predicated), writing multiplicand
5016 // [Zdn = Za + Zdn * Zm].
5017 void mad(const ZRegister& zdn,
5018 const PRegisterM& pg,
5019 const ZRegister& zm,
5020 const ZRegister& za);
5021
5022 // Multiply-add vectors (predicated), writing addend
5023 // [Zda = Zda + Zn * Zm].
5024 void mla(const ZRegister& zda,
5025 const PRegisterM& pg,
5026 const ZRegister& zn,
5027 const ZRegister& zm);
5028
5029 // Multiply-subtract vectors (predicated), writing addend
5030 // [Zda = Zda - Zn * Zm].
5031 void mls(const ZRegister& zda,
5032 const PRegisterM& pg,
5033 const ZRegister& zn,
5034 const ZRegister& zm);
5035
5036 // Move predicates (unpredicated)
5037 void mov(const PRegister& pd, const PRegister& pn);
5038
5039 // Move predicates (merging)
5040 void mov(const PRegisterWithLaneSize& pd,
5041 const PRegisterM& pg,
5042 const PRegisterWithLaneSize& pn);
5043
5044 // Move predicates (zeroing)
5045 void mov(const PRegisterWithLaneSize& pd,
5046 const PRegisterZ& pg,
5047 const PRegisterWithLaneSize& pn);
5048
5049 // Move general-purpose register to vector elements (unpredicated)
5050 void mov(const ZRegister& zd, const Register& xn);
5051
5052 // Move SIMD&FP scalar register to vector elements (unpredicated)
5053 void mov(const ZRegister& zd, const VRegister& vn);
5054
5055 // Move vector register (unpredicated)
5056 void mov(const ZRegister& zd, const ZRegister& zn);
5057
5058 // Move indexed element to vector elements (unpredicated)
5059 void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5060
5061 // Move general-purpose register to vector elements (predicated)
5062 void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5063
5064 // Move SIMD&FP scalar register to vector elements (predicated)
5065 void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5066
5067 // Move vector elements (predicated)
5068 void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5069
5070 // Move signed integer immediate to vector elements (predicated)
5071 void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5072
5073 // Move signed immediate to vector elements (unpredicated).
5074 void mov(const ZRegister& zd, int imm8, int shift);
5075
5076 // Move logical bitmask immediate to vector (unpredicated).
5077 void mov(const ZRegister& zd, uint64_t imm);
5078
5079 // Move predicate (unpredicated), setting the condition flags
5080 void movs(const PRegister& pd, const PRegister& pn);
5081
5082 // Move predicates (zeroing), setting the condition flags
5083 void movs(const PRegisterWithLaneSize& pd,
5084 const PRegisterZ& pg,
5085 const PRegisterWithLaneSize& pn);
5086
5087 // Move prefix (predicated).
5088 void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5089
5090 // Move prefix (unpredicated).
5091 void movprfx(const ZRegister& zd, const ZRegister& zn);
5092
5093 // Multiply-subtract vectors (predicated), writing multiplicand
5094 // [Zdn = Za - Zdn * Zm].
5095 void msb(const ZRegister& zdn,
5096 const PRegisterM& pg,
5097 const ZRegister& zm,
5098 const ZRegister& za);
5099
5100 // Multiply vectors (predicated).
5101 void mul(const ZRegister& zd,
5102 const PRegisterM& pg,
5103 const ZRegister& zn,
5104 const ZRegister& zm);
5105
5106 // Multiply by immediate (unpredicated).
5107 void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5108
5109 // Bitwise NAND predicates.
5110 void nand(const PRegisterWithLaneSize& pd,
5111 const PRegisterZ& pg,
5112 const PRegisterWithLaneSize& pn,
5113 const PRegisterWithLaneSize& pm);
5114
5115 // Bitwise NAND predicates.
5116 void nands(const PRegisterWithLaneSize& pd,
5117 const PRegisterZ& pg,
5118 const PRegisterWithLaneSize& pn,
5119 const PRegisterWithLaneSize& pm);
5120
5121 // Negate (predicated).
5122 void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5123
5124 // Bitwise NOR predicates.
5125 void nor(const PRegisterWithLaneSize& pd,
5126 const PRegisterZ& pg,
5127 const PRegisterWithLaneSize& pn,
5128 const PRegisterWithLaneSize& pm);
5129
5130 // Bitwise NOR predicates.
5131 void nors(const PRegisterWithLaneSize& pd,
5132 const PRegisterZ& pg,
5133 const PRegisterWithLaneSize& pn,
5134 const PRegisterWithLaneSize& pm);
5135
5136 // Bitwise invert vector (predicated).
5137 void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5138
5139 // Bitwise OR inverted predicate.
5140 void orn(const PRegisterWithLaneSize& pd,
5141 const PRegisterZ& pg,
5142 const PRegisterWithLaneSize& pn,
5143 const PRegisterWithLaneSize& pm);
5144
5145 // Bitwise OR inverted predicate.
5146 void orns(const PRegisterWithLaneSize& pd,
5147 const PRegisterZ& pg,
5148 const PRegisterWithLaneSize& pn,
5149 const PRegisterWithLaneSize& pm);
5150
5151 // Bitwise OR with inverted immediate (unpredicated).
5152 void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5153
5154 // Bitwise OR predicate.
5155 void orr(const PRegisterWithLaneSize& pd,
5156 const PRegisterZ& pg,
5157 const PRegisterWithLaneSize& pn,
5158 const PRegisterWithLaneSize& pm);
5159
5160 // Bitwise OR vectors (predicated).
5161 void orr(const ZRegister& zd,
5162 const PRegisterM& pg,
5163 const ZRegister& zn,
5164 const ZRegister& zm);
5165
5166 // Bitwise OR with immediate (unpredicated).
5167 void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5168
5169 // Bitwise OR vectors (unpredicated).
5170 void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5171
5172 // Bitwise OR predicate.
5173 void orrs(const PRegisterWithLaneSize& pd,
5174 const PRegisterZ& pg,
5175 const PRegisterWithLaneSize& pn,
5176 const PRegisterWithLaneSize& pm);
5177
5178 // Bitwise OR reduction to scalar.
5179 void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5180
5181 // Set all predicate elements to false.
5182 void pfalse(const PRegisterWithLaneSize& pd);
5183
5184 // Set the first active predicate element to true.
5185 void pfirst(const PRegisterWithLaneSize& pd,
5186 const PRegister& pg,
5187 const PRegisterWithLaneSize& pn);
5188
5189 // Find next active predicate.
5190 void pnext(const PRegisterWithLaneSize& pd,
5191 const PRegister& pg,
5192 const PRegisterWithLaneSize& pn);
5193
5194 // Prefetch bytes.
5195 void prfb(PrefetchOperation prfop,
5196 const PRegister& pg,
5197 const SVEMemOperand& addr);
5198
5199 // Prefetch halfwords.
5200 void prfh(PrefetchOperation prfop,
5201 const PRegister& pg,
5202 const SVEMemOperand& addr);
5203
5204 // Prefetch words.
5205 void prfw(PrefetchOperation prfop,
5206 const PRegister& pg,
5207 const SVEMemOperand& addr);
5208
5209 // Prefetch doublewords.
5210 void prfd(PrefetchOperation prfop,
5211 const PRegister& pg,
5212 const SVEMemOperand& addr);
5213
5214 // Set condition flags for predicate.
5215 void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5216
5217 // Initialise predicate from named constraint.
5218 void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5219
5220 // Initialise predicate from named constraint.
5221 void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5222
5223 // Unpack and widen half of predicate.
5224 void punpkhi(const PRegisterWithLaneSize& pd,
5225 const PRegisterWithLaneSize& pn);
5226
5227 // Unpack and widen half of predicate.
5228 void punpklo(const PRegisterWithLaneSize& pd,
5229 const PRegisterWithLaneSize& pn);
5230
5231 // Reverse bits (predicated).
5232 void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5233
5234 // Read the first-fault register.
5235 void rdffr(const PRegisterWithLaneSize& pd);
5236
5237 // Return predicate of succesfully loaded elements.
5238 void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5239
5240 // Return predicate of succesfully loaded elements.
5241 void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5242
5243 // Read multiple of vector register size to scalar register.
5244 void rdvl(const Register& xd, int imm6);
5245
5246 // Reverse all elements in a predicate.
5247 void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5248
5249 // Reverse all elements in a vector (unpredicated).
5250 void rev(const ZRegister& zd, const ZRegister& zn);
5251
5252 // Reverse bytes / halfwords / words within elements (predicated).
5253 void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5254
5255 // Reverse bytes / halfwords / words within elements (predicated).
5256 void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5257
5258 // Reverse bytes / halfwords / words within elements (predicated).
5259 void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5260
5261 // Signed absolute difference (predicated).
5262 void sabd(const ZRegister& zd,
5263 const PRegisterM& pg,
5264 const ZRegister& zn,
5265 const ZRegister& zm);
5266
5267 // Signed add reduction to scalar.
5268 void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5269
5270 // Signed integer convert to floating-point (predicated).
5271 void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5272
5273 // Signed divide (predicated).
5274 void sdiv(const ZRegister& zd,
5275 const PRegisterM& pg,
5276 const ZRegister& zn,
5277 const ZRegister& zm);
5278
5279 // Signed reversed divide (predicated).
5280 void sdivr(const ZRegister& zd,
5281 const PRegisterM& pg,
5282 const ZRegister& zn,
5283 const ZRegister& zm);
5284
5285 // Signed dot product by indexed quadtuplet.
5286 void sdot(const ZRegister& zda,
5287 const ZRegister& zn,
5288 const ZRegister& zm,
5289 int index);
5290
5291 // Signed dot product.
5292 void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5293
5294 // Conditionally select elements from two predicates.
5295 void sel(const PRegisterWithLaneSize& pd,
5296 const PRegister& pg,
5297 const PRegisterWithLaneSize& pn,
5298 const PRegisterWithLaneSize& pm);
5299
5300 // Conditionally select elements from two vectors.
5301 void sel(const ZRegister& zd,
5302 const PRegister& pg,
5303 const ZRegister& zn,
5304 const ZRegister& zm);
5305
5306 // Initialise the first-fault register to all true.
5307 void setffr();
5308
5309 // Signed maximum vectors (predicated).
5310 void smax(const ZRegister& zd,
5311 const PRegisterM& pg,
5312 const ZRegister& zn,
5313 const ZRegister& zm);
5314
5315 // Signed maximum with immediate (unpredicated).
5316 void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5317
5318 // Signed maximum reduction to scalar.
5319 void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5320
5321 // Signed minimum vectors (predicated).
5322 void smin(const ZRegister& zd,
5323 const PRegisterM& pg,
5324 const ZRegister& zn,
5325 const ZRegister& zm);
5326
5327 // Signed minimum with immediate (unpredicated).
5328 void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5329
5330 // Signed minimum reduction to scalar.
5331 void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5332
5333 // Signed multiply returning high half (predicated).
5334 void smulh(const ZRegister& zd,
5335 const PRegisterM& pg,
5336 const ZRegister& zn,
5337 const ZRegister& zm);
5338
5339 // Splice two vectors under predicate control.
5340 void splice(const ZRegister& zd,
5341 const PRegister& pg,
5342 const ZRegister& zn,
5343 const ZRegister& zm);
5344
5345 // Splice two vectors under predicate control (constructive).
5346 void splice_con(const ZRegister& zd,
5347 const PRegister& pg,
5348 const ZRegister& zn,
5349 const ZRegister& zm);
5350
5351 // Signed saturating add vectors (unpredicated).
5352 void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5353
5354 // Signed saturating add immediate (unpredicated).
5355 void sqadd(const ZRegister& zd,
5356 const ZRegister& zn,
5357 int imm8,
5358 int shift = -1);
5359
5360 // Signed saturating decrement scalar by multiple of 8-bit predicate
5361 // constraint element count.
5362 void sqdecb(const Register& xd,
5363 const Register& wn,
5364 int pattern,
5365 int multiplier);
5366
5367 // Signed saturating decrement scalar by multiple of 8-bit predicate
5368 // constraint element count.
5369 void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5370
5371 // Signed saturating decrement scalar by multiple of 64-bit predicate
5372 // constraint element count.
5373 void sqdecd(const Register& xd,
5374 const Register& wn,
5375 int pattern = SVE_ALL,
5376 int multiplier = 1);
5377
5378 // Signed saturating decrement scalar by multiple of 64-bit predicate
5379 // constraint element count.
5380 void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5381
5382 // Signed saturating decrement vector by multiple of 64-bit predicate
5383 // constraint element count.
5384 void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5385
5386 // Signed saturating decrement scalar by multiple of 16-bit predicate
5387 // constraint element count.
5388 void sqdech(const Register& xd,
5389 const Register& wn,
5390 int pattern = SVE_ALL,
5391 int multiplier = 1);
5392
5393 // Signed saturating decrement scalar by multiple of 16-bit predicate
5394 // constraint element count.
5395 void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5396
5397 // Signed saturating decrement vector by multiple of 16-bit predicate
5398 // constraint element count.
5399 void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5400
5401 // Signed saturating decrement scalar by active predicate element count.
5402 void sqdecp(const Register& xd,
5403 const PRegisterWithLaneSize& pg,
5404 const Register& wn);
5405
5406 // Signed saturating decrement scalar by active predicate element count.
5407 void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5408
5409 // Signed saturating decrement vector by active predicate element count.
5410 void sqdecp(const ZRegister& zdn, const PRegister& pg);
5411
5412 // Signed saturating decrement scalar by multiple of 32-bit predicate
5413 // constraint element count.
5414 void sqdecw(const Register& xd,
5415 const Register& wn,
5416 int pattern = SVE_ALL,
5417 int multiplier = 1);
5418
5419 // Signed saturating decrement scalar by multiple of 32-bit predicate
5420 // constraint element count.
5421 void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5422
5423 // Signed saturating decrement vector by multiple of 32-bit predicate
5424 // constraint element count.
5425 void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5426
5427 // Signed saturating increment scalar by multiple of 8-bit predicate
5428 // constraint element count.
5429 void sqincb(const Register& xd,
5430 const Register& wn,
5431 int pattern = SVE_ALL,
5432 int multiplier = 1);
5433
5434 // Signed saturating increment scalar by multiple of 8-bit predicate
5435 // constraint element count.
5436 void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5437
5438 // Signed saturating increment scalar by multiple of 64-bit predicate
5439 // constraint element count.
5440 void sqincd(const Register& xd,
5441 const Register& wn,
5442 int pattern,
5443 int multiplier);
5444
5445 // Signed saturating increment scalar by multiple of 64-bit predicate
5446 // constraint element count.
5447 void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5448
5449 // Signed saturating increment vector by multiple of 64-bit predicate
5450 // constraint element count.
5451 void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5452
5453 // Signed saturating increment scalar by multiple of 16-bit predicate
5454 // constraint element count.
5455 void sqinch(const Register& xd,
5456 const Register& wn,
5457 int pattern = SVE_ALL,
5458 int multiplier = 1);
5459
5460 // Signed saturating increment scalar by multiple of 16-bit predicate
5461 // constraint element count.
5462 void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5463
5464 // Signed saturating increment vector by multiple of 16-bit predicate
5465 // constraint element count.
5466 void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5467
5468 // Signed saturating increment scalar by active predicate element count.
5469 void sqincp(const Register& xd,
5470 const PRegisterWithLaneSize& pg,
5471 const Register& wn);
5472
5473 // Signed saturating increment scalar by active predicate element count.
5474 void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5475
5476 // Signed saturating increment vector by active predicate element count.
5477 void sqincp(const ZRegister& zdn, const PRegister& pg);
5478
5479 // Signed saturating increment scalar by multiple of 32-bit predicate
5480 // constraint element count.
5481 void sqincw(const Register& xd,
5482 const Register& wn,
5483 int pattern = SVE_ALL,
5484 int multiplier = 1);
5485
5486 // Signed saturating increment scalar by multiple of 32-bit predicate
5487 // constraint element count.
5488 void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5489
5490 // Signed saturating increment vector by multiple of 32-bit predicate
5491 // constraint element count.
5492 void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5493
5494 // Signed saturating subtract vectors (unpredicated).
5495 void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5496
5497 // Signed saturating subtract immediate (unpredicated).
5498 void sqsub(const ZRegister& zd,
5499 const ZRegister& zn,
5500 int imm8,
5501 int shift = -1);
5502
5503 // Contiguous/scatter store bytes from vector.
5504 void st1b(const ZRegister& zt,
5505 const PRegister& pg,
5506 const SVEMemOperand& addr);
5507
5508 // Contiguous/scatter store halfwords from vector.
5509 void st1h(const ZRegister& zt,
5510 const PRegister& pg,
5511 const SVEMemOperand& addr);
5512
5513 // Contiguous/scatter store words from vector.
5514 void st1w(const ZRegister& zt,
5515 const PRegister& pg,
5516 const SVEMemOperand& addr);
5517
5518 // Contiguous/scatter store doublewords from vector.
5519 void st1d(const ZRegister& zt,
5520 const PRegister& pg,
5521 const SVEMemOperand& addr);
5522
5523 // Contiguous store two-byte structures from two vectors.
5524 void st2b(const ZRegister& zt1,
5525 const ZRegister& zt2,
5526 const PRegister& pg,
5527 const SVEMemOperand& addr);
5528
5529 // Contiguous store two-halfword structures from two vectors.
5530 void st2h(const ZRegister& zt1,
5531 const ZRegister& zt2,
5532 const PRegister& pg,
5533 const SVEMemOperand& addr);
5534
5535 // Contiguous store two-word structures from two vectors.
5536 void st2w(const ZRegister& zt1,
5537 const ZRegister& zt2,
5538 const PRegister& pg,
5539 const SVEMemOperand& addr);
5540
5541 // Contiguous store two-doubleword structures from two vectors,
5542 void st2d(const ZRegister& zt1,
5543 const ZRegister& zt2,
5544 const PRegister& pg,
5545 const SVEMemOperand& addr);
5546
5547 // Contiguous store three-byte structures from three vectors.
5548 void st3b(const ZRegister& zt1,
5549 const ZRegister& zt2,
5550 const ZRegister& zt3,
5551 const PRegister& pg,
5552 const SVEMemOperand& addr);
5553
5554 // Contiguous store three-halfword structures from three vectors.
5555 void st3h(const ZRegister& zt1,
5556 const ZRegister& zt2,
5557 const ZRegister& zt3,
5558 const PRegister& pg,
5559 const SVEMemOperand& addr);
5560
5561 // Contiguous store three-word structures from three vectors.
5562 void st3w(const ZRegister& zt1,
5563 const ZRegister& zt2,
5564 const ZRegister& zt3,
5565 const PRegister& pg,
5566 const SVEMemOperand& addr);
5567
5568 // Contiguous store three-doubleword structures from three vectors.
5569 void st3d(const ZRegister& zt1,
5570 const ZRegister& zt2,
5571 const ZRegister& zt3,
5572 const PRegister& pg,
5573 const SVEMemOperand& addr);
5574
5575 // Contiguous store four-byte structures from four vectors.
5576 void st4b(const ZRegister& zt1,
5577 const ZRegister& zt2,
5578 const ZRegister& zt3,
5579 const ZRegister& zt4,
5580 const PRegister& pg,
5581 const SVEMemOperand& addr);
5582
5583 // Contiguous store four-halfword structures from four vectors.
5584 void st4h(const ZRegister& zt1,
5585 const ZRegister& zt2,
5586 const ZRegister& zt3,
5587 const ZRegister& zt4,
5588 const PRegister& pg,
5589 const SVEMemOperand& addr);
5590
5591 // Contiguous store four-word structures from four vectors.
5592 void st4w(const ZRegister& zt1,
5593 const ZRegister& zt2,
5594 const ZRegister& zt3,
5595 const ZRegister& zt4,
5596 const PRegister& pg,
5597 const SVEMemOperand& addr);
5598
5599 // Contiguous store four-doubleword structures from four vectors.
5600 void st4d(const ZRegister& zt1,
5601 const ZRegister& zt2,
5602 const ZRegister& zt3,
5603 const ZRegister& zt4,
5604 const PRegister& pg,
5605 const SVEMemOperand& addr);
5606
5607 // Contiguous store non-temporal bytes from vector.
5608 void stnt1b(const ZRegister& zt,
5609 const PRegister& pg,
5610 const SVEMemOperand& addr);
5611
5612 // Contiguous store non-temporal halfwords from vector.
5613 void stnt1h(const ZRegister& zt,
5614 const PRegister& pg,
5615 const SVEMemOperand& addr);
5616
5617 // Contiguous store non-temporal words from vector.
5618 void stnt1w(const ZRegister& zt,
5619 const PRegister& pg,
5620 const SVEMemOperand& addr);
5621
5622 // Contiguous store non-temporal doublewords from vector.
5623 void stnt1d(const ZRegister& zt,
5624 const PRegister& pg,
5625 const SVEMemOperand& addr);
5626
5627 // Store SVE predicate/vector register.
5628 void str(const CPURegister& rt, const SVEMemOperand& addr);
5629
5630 // Subtract vectors (predicated).
5631 void sub(const ZRegister& zd,
5632 const PRegisterM& pg,
5633 const ZRegister& zn,
5634 const ZRegister& zm);
5635
5636 // Subtract vectors (unpredicated).
5637 void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5638
5639 // Subtract immediate (unpredicated).
5640 void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5641
5642 // Reversed subtract vectors (predicated).
5643 void subr(const ZRegister& zd,
5644 const PRegisterM& pg,
5645 const ZRegister& zn,
5646 const ZRegister& zm);
5647
5648 // Reversed subtract from immediate (unpredicated).
5649 void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5650
5651 // Signed unpack and extend half of vector.
5652 void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5653
5654 // Signed unpack and extend half of vector.
5655 void sunpklo(const ZRegister& zd, const ZRegister& zn);
5656
5657 // Signed byte extend (predicated).
5658 void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5659
5660 // Signed halfword extend (predicated).
5661 void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5662
5663 // Signed word extend (predicated).
5664 void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5665
5666 // Programmable table lookup/permute using vector of indices into a
5667 // vector.
5668 void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5669
5670 // Interleave even or odd elements from two predicates.
5671 void trn1(const PRegisterWithLaneSize& pd,
5672 const PRegisterWithLaneSize& pn,
5673 const PRegisterWithLaneSize& pm);
5674
5675 // Interleave even or odd elements from two vectors.
5676 void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5677
5678 // Interleave even or odd elements from two predicates.
5679 void trn2(const PRegisterWithLaneSize& pd,
5680 const PRegisterWithLaneSize& pn,
5681 const PRegisterWithLaneSize& pm);
5682
5683 // Interleave even or odd elements from two vectors.
5684 void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5685
5686 // Unsigned absolute difference (predicated).
5687 void uabd(const ZRegister& zd,
5688 const PRegisterM& pg,
5689 const ZRegister& zn,
5690 const ZRegister& zm);
5691
5692 // Unsigned add reduction to scalar.
5693 void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5694
5695 // Unsigned integer convert to floating-point (predicated).
5696 void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5697
5698 // Unsigned divide (predicated).
5699 void udiv(const ZRegister& zd,
5700 const PRegisterM& pg,
5701 const ZRegister& zn,
5702 const ZRegister& zm);
5703
5704 // Unsigned reversed divide (predicated).
5705 void udivr(const ZRegister& zd,
5706 const PRegisterM& pg,
5707 const ZRegister& zn,
5708 const ZRegister& zm);
5709
5710 // Unsigned dot product by indexed quadtuplet.
5711 void udot(const ZRegister& zda,
5712 const ZRegister& zn,
5713 const ZRegister& zm,
5714 int index);
5715
5716 // Unsigned dot product.
5717 void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5718
5719 // Unsigned maximum vectors (predicated).
5720 void umax(const ZRegister& zd,
5721 const PRegisterM& pg,
5722 const ZRegister& zn,
5723 const ZRegister& zm);
5724
5725 // Unsigned maximum with immediate (unpredicated).
5726 void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5727
5728 // Unsigned maximum reduction to scalar.
5729 void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5730
5731 // Unsigned minimum vectors (predicated).
5732 void umin(const ZRegister& zd,
5733 const PRegisterM& pg,
5734 const ZRegister& zn,
5735 const ZRegister& zm);
5736
5737 // Unsigned minimum with immediate (unpredicated).
5738 void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5739
5740 // Unsigned minimum reduction to scalar.
5741 void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5742
5743 // Unsigned multiply returning high half (predicated).
5744 void umulh(const ZRegister& zd,
5745 const PRegisterM& pg,
5746 const ZRegister& zn,
5747 const ZRegister& zm);
5748
5749 // Unsigned saturating add vectors (unpredicated).
5750 void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5751
5752 // Unsigned saturating add immediate (unpredicated).
5753 void uqadd(const ZRegister& zd,
5754 const ZRegister& zn,
5755 int imm8,
5756 int shift = -1);
5757
5758 // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5759 // constraint element count.
5760 void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5761
5762 // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5763 // constraint element count.
5764 void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5765
5766 // Unsigned saturating decrement vector by multiple of 64-bit predicate
5767 // constraint element count.
5768 void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5769
5770 // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5771 // constraint element count.
5772 void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5773
5774 // Unsigned saturating decrement vector by multiple of 16-bit predicate
5775 // constraint element count.
5776 void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5777
5778 // Unsigned saturating decrement scalar by active predicate element count.
5779 void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5780
5781 // Unsigned saturating decrement vector by active predicate element count.
5782 void uqdecp(const ZRegister& zdn, const PRegister& pg);
5783
5784 // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5785 // constraint element count.
5786 void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5787
5788 // Unsigned saturating decrement vector by multiple of 32-bit predicate
5789 // constraint element count.
5790 void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5791
5792 // Unsigned saturating increment scalar by multiple of 8-bit predicate
5793 // constraint element count.
5794 void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5795
5796 // Unsigned saturating increment scalar by multiple of 64-bit predicate
5797 // constraint element count.
5798 void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5799
5800 // Unsigned saturating increment vector by multiple of 64-bit predicate
5801 // constraint element count.
5802 void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5803
5804 // Unsigned saturating increment scalar by multiple of 16-bit predicate
5805 // constraint element count.
5806 void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5807
5808 // Unsigned saturating increment vector by multiple of 16-bit predicate
5809 // constraint element count.
5810 void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5811
5812 // Unsigned saturating increment scalar by active predicate element count.
5813 void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5814
5815 // Unsigned saturating increment vector by active predicate element count.
5816 void uqincp(const ZRegister& zdn, const PRegister& pg);
5817
5818 // Unsigned saturating increment scalar by multiple of 32-bit predicate
5819 // constraint element count.
5820 void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5821
5822 // Unsigned saturating increment vector by multiple of 32-bit predicate
5823 // constraint element count.
5824 void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5825
5826 // Unsigned saturating subtract vectors (unpredicated).
5827 void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5828
5829 // Unsigned saturating subtract immediate (unpredicated).
5830 void uqsub(const ZRegister& zd,
5831 const ZRegister& zn,
5832 int imm8,
5833 int shift = -1);
5834
5835 // Unsigned unpack and extend half of vector.
5836 void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5837
5838 // Unsigned unpack and extend half of vector.
5839 void uunpklo(const ZRegister& zd, const ZRegister& zn);
5840
5841 // Unsigned byte extend (predicated).
5842 void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5843
5844 // Unsigned halfword extend (predicated).
5845 void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5846
5847 // Unsigned word extend (predicated).
5848 void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5849
5850 // Concatenate even or odd elements from two predicates.
5851 void uzp1(const PRegisterWithLaneSize& pd,
5852 const PRegisterWithLaneSize& pn,
5853 const PRegisterWithLaneSize& pm);
5854
5855 // Concatenate even or odd elements from two vectors.
5856 void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5857
5858 // Concatenate even or odd elements from two predicates.
5859 void uzp2(const PRegisterWithLaneSize& pd,
5860 const PRegisterWithLaneSize& pn,
5861 const PRegisterWithLaneSize& pm);
5862
5863 // Concatenate even or odd elements from two vectors.
5864 void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5865
5866 // While incrementing signed scalar less than or equal to scalar.
5867 void whilele(const PRegisterWithLaneSize& pd,
5868 const Register& rn,
5869 const Register& rm);
5870
5871 // While incrementing unsigned scalar lower than scalar.
5872 void whilelo(const PRegisterWithLaneSize& pd,
5873 const Register& rn,
5874 const Register& rm);
5875
5876 // While incrementing unsigned scalar lower or same as scalar.
5877 void whilels(const PRegisterWithLaneSize& pd,
5878 const Register& rn,
5879 const Register& rm);
5880
5881 // While incrementing signed scalar less than scalar.
5882 void whilelt(const PRegisterWithLaneSize& pd,
5883 const Register& rn,
5884 const Register& rm);
5885
5886 // Write the first-fault register.
5887 void wrffr(const PRegisterWithLaneSize& pn);
5888
5889 // Interleave elements from two half predicates.
5890 void zip1(const PRegisterWithLaneSize& pd,
5891 const PRegisterWithLaneSize& pn,
5892 const PRegisterWithLaneSize& pm);
5893
5894 // Interleave elements from two half vectors.
5895 void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5896
5897 // Interleave elements from two half predicates.
5898 void zip2(const PRegisterWithLaneSize& pd,
5899 const PRegisterWithLaneSize& pn,
5900 const PRegisterWithLaneSize& pm);
5901
5902 // Interleave elements from two half vectors.
5903 void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5904
5905 // Add with carry long (bottom).
5906 void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5907
5908 // Add with carry long (top).
5909 void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5910
5911 // Add narrow high part (bottom).
5912 void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5913
5914 // Add narrow high part (top).
5915 void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5916
5917 // Add pairwise.
5918 void addp(const ZRegister& zd,
5919 const PRegisterM& pg,
5920 const ZRegister& zn,
5921 const ZRegister& zm);
5922
5923 // Bitwise clear and exclusive OR.
5924 void bcax(const ZRegister& zd,
5925 const ZRegister& zn,
5926 const ZRegister& zm,
5927 const ZRegister& zk);
5928
5929 // Scatter lower bits into positions selected by bitmask.
5930 void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5931
5932 // Gather lower bits from positions selected by bitmask.
5933 void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5934
5935 // Group bits to right or left as selected by bitmask.
5936 void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5937
5938 // Bitwise select.
5939 void bsl(const ZRegister& zd,
5940 const ZRegister& zn,
5941 const ZRegister& zm,
5942 const ZRegister& zk);
5943
5944 // Bitwise select with first input inverted.
5945 void bsl1n(const ZRegister& zd,
5946 const ZRegister& zn,
5947 const ZRegister& zm,
5948 const ZRegister& zk);
5949
5950 // Bitwise select with second input inverted.
5951 void bsl2n(const ZRegister& zd,
5952 const ZRegister& zn,
5953 const ZRegister& zm,
5954 const ZRegister& zk);
5955
5956 // Complex integer add with rotate.
5957 void cadd(const ZRegister& zd,
5958 const ZRegister& zn,
5959 const ZRegister& zm,
5960 int rot);
5961
5962 // Complex integer dot product (indexed).
5963 void cdot(const ZRegister& zda,
5964 const ZRegister& zn,
5965 const ZRegister& zm,
5966 int index,
5967 int rot);
5968
5969 // Complex integer dot product.
5970 void cdot(const ZRegister& zda,
5971 const ZRegister& zn,
5972 const ZRegister& zm,
5973 int rot);
5974
5975 // Complex integer multiply-add with rotate (indexed).
5976 void cmla(const ZRegister& zda,
5977 const ZRegister& zn,
5978 const ZRegister& zm,
5979 int index,
5980 int rot);
5981
5982 // Complex integer multiply-add with rotate.
5983 void cmla(const ZRegister& zda,
5984 const ZRegister& zn,
5985 const ZRegister& zm,
5986 int rot);
5987
5988 // Bitwise exclusive OR of three vectors.
5989 void eor3(const ZRegister& zd,
5990 const ZRegister& zn,
5991 const ZRegister& zm,
5992 const ZRegister& zk);
5993
5994 // Interleaving exclusive OR (bottom, top).
5995 void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5996
5997 // Interleaving exclusive OR (top, bottom).
5998 void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5999
6000 // Floating-point add pairwise.
6001 void faddp(const ZRegister& zd,
6002 const PRegisterM& pg,
6003 const ZRegister& zn,
6004 const ZRegister& zm);
6005
6006 // Floating-point up convert long (top, predicated).
6007 void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6008
6009 // Floating-point down convert and narrow (top, predicated).
6010 void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6011
6012 // Floating-point down convert, rounding to odd (predicated).
6013 void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6014
6015 // Floating-point down convert, rounding to odd (top, predicated).
6016 void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6017
6018 // Floating-point base 2 logarithm as integer.
6019 void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6020
6021 // Floating-point maximum number pairwise.
6022 void fmaxnmp(const ZRegister& zd,
6023 const PRegisterM& pg,
6024 const ZRegister& zn,
6025 const ZRegister& zm);
6026
6027 // Floating-point maximum pairwise.
6028 void fmaxp(const ZRegister& zd,
6029 const PRegisterM& pg,
6030 const ZRegister& zn,
6031 const ZRegister& zm);
6032
6033 // Floating-point minimum number pairwise.
6034 void fminnmp(const ZRegister& zd,
6035 const PRegisterM& pg,
6036 const ZRegister& zn,
6037 const ZRegister& zm);
6038
6039 // Floating-point minimum pairwise.
6040 void fminp(const ZRegister& zd,
6041 const PRegisterM& pg,
6042 const ZRegister& zn,
6043 const ZRegister& zm);
6044
6045 // Half-precision floating-point multiply-add long to single-precision
6046 // (bottom).
6047 void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6048
6049 // Half-precision floating-point multiply-add long to single-precision
6050 // (top).
6051 void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6052
6053 // Half-precision floating-point multiply-subtract long from
6054 // single-precision (bottom).
6055 void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6056
6057 // Half-precision floating-point multiply-subtract long from
6058 // single-precision (top, indexed).
6059 void fmlslt(const ZRegister& zda,
6060 const ZRegister& zn,
6061 const ZRegister& zm,
6062 int index);
6063
6064 // Half-precision floating-point multiply-add long to single-precision
6065 // (bottom, indexed).
6066 void fmlalb(const ZRegister& zda,
6067 const ZRegister& zn,
6068 const ZRegister& zm,
6069 int index);
6070
6071 // Half-precision floating-point multiply-add long to single-precision
6072 // (top, indexed).
6073 void fmlalt(const ZRegister& zda,
6074 const ZRegister& zn,
6075 const ZRegister& zm,
6076 int index);
6077
6078 // Half-precision floating-point multiply-subtract long from
6079 // single-precision (bottom, indexed).
6080 void fmlslb(const ZRegister& zda,
6081 const ZRegister& zn,
6082 const ZRegister& zm,
6083 int index);
6084
6085 // Half-precision floating-point multiply-subtract long from
6086 // single-precision (top).
6087 void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6088
6089 // Count matching elements in vector.
6090 void histcnt(const ZRegister& zd,
6091 const PRegisterZ& pg,
6092 const ZRegister& zn,
6093 const ZRegister& zm);
6094
6095 // Count matching elements in vector segments.
6096 void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6097
6098 // Gather load non-temporal signed bytes.
6099 void ldnt1sb(const ZRegister& zt,
6100 const PRegisterZ& pg,
6101 const SVEMemOperand& addr);
6102
6103 // Gather load non-temporal signed halfwords.
6104 void ldnt1sh(const ZRegister& zt,
6105 const PRegisterZ& pg,
6106 const SVEMemOperand& addr);
6107
6108 // Gather load non-temporal signed words.
6109 void ldnt1sw(const ZRegister& zt,
6110 const PRegisterZ& pg,
6111 const SVEMemOperand& addr);
6112
6113 // Detect any matching elements, setting the condition flags.
6114 void match(const PRegisterWithLaneSize& pd,
6115 const PRegisterZ& pg,
6116 const ZRegister& zn,
6117 const ZRegister& zm);
6118
6119 // Multiply-add to accumulator (indexed).
6120 void mla(const ZRegister& zda,
6121 const ZRegister& zn,
6122 const ZRegister& zm,
6123 int index);
6124
6125 // Multiply-subtract from accumulator (indexed).
6126 void mls(const ZRegister& zda,
6127 const ZRegister& zn,
6128 const ZRegister& zm,
6129 int index);
6130
6131 // Multiply (indexed).
6132 void mul(const ZRegister& zd,
6133 const ZRegister& zn,
6134 const ZRegister& zm,
6135 int index);
6136
6137 // Multiply vectors (unpredicated).
6138 void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6139
6140 // Bitwise inverted select.
6141 void nbsl(const ZRegister& zd,
6142 const ZRegister& zn,
6143 const ZRegister& zm,
6144 const ZRegister& zk);
6145
6146 // Detect no matching elements, setting the condition flags.
6147 void nmatch(const PRegisterWithLaneSize& pd,
6148 const PRegisterZ& pg,
6149 const ZRegister& zn,
6150 const ZRegister& zm);
6151
6152 // Polynomial multiply vectors (unpredicated).
6153 void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6154
6155 // Polynomial multiply long (bottom).
6156 void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6157
6158 // Polynomial multiply long (top).
6159 void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6160
6161 // Rounding add narrow high part (bottom).
6162 void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6163
6164 // Rounding add narrow high part (top).
6165 void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6166
6167 // Rounding shift right narrow by immediate (bottom).
6168 void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6169
6170 // Rounding shift right narrow by immediate (top).
6171 void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6172
6173 // Rounding subtract narrow high part (bottom).
6174 void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6175
6176 // Rounding subtract narrow high part (top).
6177 void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6178
6179 // Signed absolute difference and accumulate.
6180 void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6181
6182 // Signed absolute difference and accumulate long (bottom).
6183 void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6184
6185 // Signed absolute difference and accumulate long (top).
6186 void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6187
6188 // Signed absolute difference long (bottom).
6189 void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6190
6191 // Signed absolute difference long (top).
6192 void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6193
6194 // Signed add and accumulate long pairwise.
6195 void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6196
6197 // Signed add long (bottom).
6198 void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6199
6200 // Signed add long (bottom + top).
6201 void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6202
6203 // Signed add long (top).
6204 void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6205
6206 // Signed add wide (bottom).
6207 void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6208
6209 // Signed add wide (top).
6210 void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6211
6212 // Subtract with carry long (bottom).
6213 void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6214
6215 // Subtract with carry long (top).
6216 void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6217
6218 // Signed halving addition.
6219 void shadd(const ZRegister& zd,
6220 const PRegisterM& pg,
6221 const ZRegister& zn,
6222 const ZRegister& zm);
6223
6224 // Shift right narrow by immediate (bottom).
6225 void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6226
6227 // Shift right narrow by immediate (top).
6228 void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6229
6230 // Signed halving subtract.
6231 void shsub(const ZRegister& zd,
6232 const PRegisterM& pg,
6233 const ZRegister& zn,
6234 const ZRegister& zm);
6235
6236 // Signed halving subtract reversed vectors.
6237 void shsubr(const ZRegister& zd,
6238 const PRegisterM& pg,
6239 const ZRegister& zn,
6240 const ZRegister& zm);
6241
6242 // Shift left and insert (immediate).
6243 void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6244
6245 // Signed maximum pairwise.
6246 void smaxp(const ZRegister& zd,
6247 const PRegisterM& pg,
6248 const ZRegister& zn,
6249 const ZRegister& zm);
6250
6251 // Signed minimum pairwise.
6252 void sminp(const ZRegister& zd,
6253 const PRegisterM& pg,
6254 const ZRegister& zn,
6255 const ZRegister& zm);
6256
6257 // Signed multiply-add long to accumulator (bottom, indexed).
6258 void smlalb(const ZRegister& zda,
6259 const ZRegister& zn,
6260 const ZRegister& zm,
6261 int index);
6262
6263 // Signed multiply-add long to accumulator (bottom).
6264 void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6265
6266 // Signed multiply-add long to accumulator (top, indexed).
6267 void smlalt(const ZRegister& zda,
6268 const ZRegister& zn,
6269 const ZRegister& zm,
6270 int index);
6271
6272 // Signed multiply-add long to accumulator (top).
6273 void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6274
6275 // Signed multiply-subtract long from accumulator (bottom, indexed).
6276 void smlslb(const ZRegister& zda,
6277 const ZRegister& zn,
6278 const ZRegister& zm,
6279 int index);
6280
6281 // Signed multiply-subtract long from accumulator (bottom).
6282 void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6283
6284 // Signed multiply-subtract long from accumulator (top, indexed).
6285 void smlslt(const ZRegister& zda,
6286 const ZRegister& zn,
6287 const ZRegister& zm,
6288 int index);
6289
6290 // Signed multiply-subtract long from accumulator (top).
6291 void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6292
6293 // Signed multiply returning high half (unpredicated).
6294 void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6295
6296 // Signed multiply long (bottom, indexed).
6297 void smullb(const ZRegister& zd,
6298 const ZRegister& zn,
6299 const ZRegister& zm,
6300 int index);
6301
6302 // Signed multiply long (bottom).
6303 void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6304
6305 // Signed multiply long (top, indexed).
6306 void smullt(const ZRegister& zd,
6307 const ZRegister& zn,
6308 const ZRegister& zm,
6309 int index);
6310
6311 // Signed multiply long (top).
6312 void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6313
6314 // Signed saturating absolute value.
6315 void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6316
6317 // Signed saturating addition (predicated).
6318 void sqadd(const ZRegister& zd,
6319 const PRegisterM& pg,
6320 const ZRegister& zn,
6321 const ZRegister& zm);
6322
6323 // Saturating complex integer add with rotate.
6324 void sqcadd(const ZRegister& zd,
6325 const ZRegister& zn,
6326 const ZRegister& zm,
6327 int rot);
6328
6329 // Signed saturating doubling multiply-add long to accumulator (bottom,
6330 // indexed).
6331 void sqdmlalb(const ZRegister& zda,
6332 const ZRegister& zn,
6333 const ZRegister& zm,
6334 int index);
6335
6336 // Signed saturating doubling multiply-add long to accumulator (bottom).
6337 void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6338
6339 // Signed saturating doubling multiply-add long to accumulator (bottom x
6340 // top).
6341 void sqdmlalbt(const ZRegister& zda,
6342 const ZRegister& zn,
6343 const ZRegister& zm);
6344
6345 // Signed saturating doubling multiply-add long to accumulator (top,
6346 // indexed).
6347 void sqdmlalt(const ZRegister& zda,
6348 const ZRegister& zn,
6349 const ZRegister& zm,
6350 int index);
6351
6352 // Signed saturating doubling multiply-add long to accumulator (top).
6353 void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6354
6355 // Signed saturating doubling multiply-subtract long from accumulator
6356 // (bottom, indexed).
6357 void sqdmlslb(const ZRegister& zda,
6358 const ZRegister& zn,
6359 const ZRegister& zm,
6360 int index);
6361
6362 // Signed saturating doubling multiply-subtract long from accumulator
6363 // (bottom).
6364 void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6365
6366 // Signed saturating doubling multiply-subtract long from accumulator
6367 // (bottom x top).
6368 void sqdmlslbt(const ZRegister& zda,
6369 const ZRegister& zn,
6370 const ZRegister& zm);
6371
6372 // Signed saturating doubling multiply-subtract long from accumulator
6373 // (top, indexed).
6374 void sqdmlslt(const ZRegister& zda,
6375 const ZRegister& zn,
6376 const ZRegister& zm,
6377 int index);
6378
6379 // Signed saturating doubling multiply-subtract long from accumulator
6380 // (top).
6381 void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6382
6383 // Signed saturating doubling multiply high (indexed).
6384 void sqdmulh(const ZRegister& zd,
6385 const ZRegister& zn,
6386 const ZRegister& zm,
6387 int index);
6388
6389 // Signed saturating doubling multiply high (unpredicated).
6390 void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6391
6392 // Signed saturating doubling multiply long (bottom, indexed).
6393 void sqdmullb(const ZRegister& zd,
6394 const ZRegister& zn,
6395 const ZRegister& zm,
6396 int index);
6397
6398 // Signed saturating doubling multiply long (bottom).
6399 void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6400
6401 // Signed saturating doubling multiply long (top, indexed).
6402 void sqdmullt(const ZRegister& zd,
6403 const ZRegister& zn,
6404 const ZRegister& zm,
6405 int index);
6406
6407 // Signed saturating doubling multiply long (top).
6408 void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6409
6410 // Signed saturating negate.
6411 void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6412
6413 // Saturating rounding doubling complex integer multiply-add high with
6414 // rotate (indexed).
6415 void sqrdcmlah(const ZRegister& zda,
6416 const ZRegister& zn,
6417 const ZRegister& zm,
6418 int index,
6419 int rot);
6420
6421 // Saturating rounding doubling complex integer multiply-add high with
6422 // rotate.
6423 void sqrdcmlah(const ZRegister& zda,
6424 const ZRegister& zn,
6425 const ZRegister& zm,
6426 int rot);
6427
6428 // Signed saturating rounding doubling multiply-add high to accumulator
6429 // (indexed).
6430 void sqrdmlah(const ZRegister& zda,
6431 const ZRegister& zn,
6432 const ZRegister& zm,
6433 int index);
6434
6435 // Signed saturating rounding doubling multiply-add high to accumulator
6436 // (unpredicated).
6437 void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6438
6439 // Signed saturating rounding doubling multiply-subtract high from
6440 // accumulator (indexed).
6441 void sqrdmlsh(const ZRegister& zda,
6442 const ZRegister& zn,
6443 const ZRegister& zm,
6444 int index);
6445
6446 // Signed saturating rounding doubling multiply-subtract high from
6447 // accumulator (unpredicated).
6448 void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6449
6450 // Signed saturating rounding doubling multiply high (indexed).
6451 void sqrdmulh(const ZRegister& zd,
6452 const ZRegister& zn,
6453 const ZRegister& zm,
6454 int index);
6455
6456 // Signed saturating rounding doubling multiply high (unpredicated).
6457 void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6458
6459 // Signed saturating rounding shift left by vector (predicated).
6460 void sqrshl(const ZRegister& zd,
6461 const PRegisterM& pg,
6462 const ZRegister& zn,
6463 const ZRegister& zm);
6464
6465 // Signed saturating rounding shift left reversed vectors (predicated).
6466 void sqrshlr(const ZRegister& zd,
6467 const PRegisterM& pg,
6468 const ZRegister& zn,
6469 const ZRegister& zm);
6470
6471 // Signed saturating rounding shift right narrow by immediate (bottom).
6472 void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6473
6474 // Signed saturating rounding shift right narrow by immediate (top).
6475 void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6476
6477 // Signed saturating rounding shift right unsigned narrow by immediate
6478 // (bottom).
6479 void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6480
6481 // Signed saturating rounding shift right unsigned narrow by immediate
6482 // (top).
6483 void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6484
6485 // Signed saturating shift left by immediate.
6486 void sqshl(const ZRegister& zd,
6487 const PRegisterM& pg,
6488 const ZRegister& zn,
6489 int shift);
6490
6491 // Signed saturating shift left by vector (predicated).
6492 void sqshl(const ZRegister& zd,
6493 const PRegisterM& pg,
6494 const ZRegister& zn,
6495 const ZRegister& zm);
6496
6497 // Signed saturating shift left reversed vectors (predicated).
6498 void sqshlr(const ZRegister& zd,
6499 const PRegisterM& pg,
6500 const ZRegister& zn,
6501 const ZRegister& zm);
6502
6503 // Signed saturating shift left unsigned by immediate.
6504 void sqshlu(const ZRegister& zd,
6505 const PRegisterM& pg,
6506 const ZRegister& zn,
6507 int shift);
6508
6509 // Signed saturating shift right narrow by immediate (bottom).
6510 void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6511
6512 // Signed saturating shift right narrow by immediate (top).
6513 void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6514
6515 // Signed saturating shift right unsigned narrow by immediate (bottom).
6516 void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6517
6518 // Signed saturating shift right unsigned narrow by immediate (top).
6519 void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6520
6521 // Signed saturating subtraction (predicated).
6522 void sqsub(const ZRegister& zd,
6523 const PRegisterM& pg,
6524 const ZRegister& zn,
6525 const ZRegister& zm);
6526
6527 // Signed saturating subtraction reversed vectors (predicated).
6528 void sqsubr(const ZRegister& zd,
6529 const PRegisterM& pg,
6530 const ZRegister& zn,
6531 const ZRegister& zm);
6532
6533 // Signed saturating extract narrow (bottom).
6534 void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6535
6536 // Signed saturating extract narrow (top).
6537 void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6538
6539 // Signed saturating unsigned extract narrow (bottom).
6540 void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6541
6542 // Signed saturating unsigned extract narrow (top).
6543 void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6544
6545 // Signed rounding halving addition.
6546 void srhadd(const ZRegister& zd,
6547 const PRegisterM& pg,
6548 const ZRegister& zn,
6549 const ZRegister& zm);
6550
6551 // Shift right and insert (immediate).
6552 void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6553
6554 // Signed rounding shift left by vector (predicated).
6555 void srshl(const ZRegister& zd,
6556 const PRegisterM& pg,
6557 const ZRegister& zn,
6558 const ZRegister& zm);
6559
6560 // Signed rounding shift left reversed vectors (predicated).
6561 void srshlr(const ZRegister& zd,
6562 const PRegisterM& pg,
6563 const ZRegister& zn,
6564 const ZRegister& zm);
6565
6566 // Signed rounding shift right by immediate.
6567 void srshr(const ZRegister& zd,
6568 const PRegisterM& pg,
6569 const ZRegister& zn,
6570 int shift);
6571
6572 // Signed rounding shift right and accumulate (immediate).
6573 void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6574
6575 // Signed shift left long by immediate (bottom).
6576 void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6577
6578 // Signed shift left long by immediate (top).
6579 void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6580
6581 // Signed shift right and accumulate (immediate).
6582 void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6583
6584 // Signed subtract long (bottom).
6585 void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6586
6587 // Signed subtract long (bottom - top).
6588 void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6589
6590 // Signed subtract long (top).
6591 void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6592
6593 // Signed subtract long (top - bottom).
6594 void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6595
6596 // Signed subtract wide (bottom).
6597 void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6598
6599 // Signed subtract wide (top).
6600 void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6601
6602 // Subtract narrow high part (bottom).
6603 void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6604
6605 // Subtract narrow high part (top).
6606 void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6607
6608 // Signed saturating addition of unsigned value.
6609 void suqadd(const ZRegister& zd,
6610 const PRegisterM& pg,
6611 const ZRegister& zn,
6612 const ZRegister& zm);
6613
6614 // Programmable table lookup in one or two vector table (zeroing).
6615 void tbl(const ZRegister& zd,
6616 const ZRegister& zn1,
6617 const ZRegister& zn2,
6618 const ZRegister& zm);
6619
6620 // Programmable table lookup in single vector table (merging).
6621 void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6622
6623 // Unsigned absolute difference and accumulate.
6624 void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6625
6626 // Unsigned absolute difference and accumulate long (bottom).
6627 void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6628
6629 // Unsigned absolute difference and accumulate long (top).
6630 void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6631
6632 // Unsigned absolute difference long (bottom).
6633 void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6634
6635 // Unsigned absolute difference long (top).
6636 void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6637
6638 // Unsigned add and accumulate long pairwise.
6639 void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6640
6641 // Unsigned add long (bottom).
6642 void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6643
6644 // Unsigned add long (top).
6645 void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6646
6647 // Unsigned add wide (bottom).
6648 void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6649
6650 // Unsigned add wide (top).
6651 void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6652
6653 // Unsigned halving addition.
6654 void uhadd(const ZRegister& zd,
6655 const PRegisterM& pg,
6656 const ZRegister& zn,
6657 const ZRegister& zm);
6658
6659 // Unsigned halving subtract.
6660 void uhsub(const ZRegister& zd,
6661 const PRegisterM& pg,
6662 const ZRegister& zn,
6663 const ZRegister& zm);
6664
6665 // Unsigned halving subtract reversed vectors.
6666 void uhsubr(const ZRegister& zd,
6667 const PRegisterM& pg,
6668 const ZRegister& zn,
6669 const ZRegister& zm);
6670
6671 // Unsigned maximum pairwise.
6672 void umaxp(const ZRegister& zd,
6673 const PRegisterM& pg,
6674 const ZRegister& zn,
6675 const ZRegister& zm);
6676
6677 // Unsigned minimum pairwise.
6678 void uminp(const ZRegister& zd,
6679 const PRegisterM& pg,
6680 const ZRegister& zn,
6681 const ZRegister& zm);
6682
6683 // Unsigned multiply-add long to accumulator (bottom, indexed).
6684 void umlalb(const ZRegister& zda,
6685 const ZRegister& zn,
6686 const ZRegister& zm,
6687 int index);
6688
6689 // Unsigned multiply-add long to accumulator (bottom).
6690 void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6691
6692 // Unsigned multiply-add long to accumulator (top, indexed).
6693 void umlalt(const ZRegister& zda,
6694 const ZRegister& zn,
6695 const ZRegister& zm,
6696 int index);
6697
6698 // Unsigned multiply-add long to accumulator (top).
6699 void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6700
6701 // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6702 void umlslb(const ZRegister& zda,
6703 const ZRegister& zn,
6704 const ZRegister& zm,
6705 int index);
6706
6707 // Unsigned multiply-subtract long from accumulator (bottom).
6708 void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6709
6710 // Unsigned multiply-subtract long from accumulator (top, indexed).
6711 void umlslt(const ZRegister& zda,
6712 const ZRegister& zn,
6713 const ZRegister& zm,
6714 int index);
6715
6716 // Unsigned multiply-subtract long from accumulator (top).
6717 void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6718
6719 // Unsigned multiply returning high half (unpredicated).
6720 void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6721
6722 // Unsigned multiply long (bottom, indexed).
6723 void umullb(const ZRegister& zd,
6724 const ZRegister& zn,
6725 const ZRegister& zm,
6726 int index);
6727
6728 // Unsigned multiply long (bottom).
6729 void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6730
6731 // Unsigned multiply long (top, indexed).
6732 void umullt(const ZRegister& zd,
6733 const ZRegister& zn,
6734 const ZRegister& zm,
6735 int index);
6736
6737 // Unsigned multiply long (top).
6738 void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6739
6740 // Unsigned saturating addition (predicated).
6741 void uqadd(const ZRegister& zd,
6742 const PRegisterM& pg,
6743 const ZRegister& zn,
6744 const ZRegister& zm);
6745
6746 // Unsigned saturating rounding shift left by vector (predicated).
6747 void uqrshl(const ZRegister& zd,
6748 const PRegisterM& pg,
6749 const ZRegister& zn,
6750 const ZRegister& zm);
6751
6752 // Unsigned saturating rounding shift left reversed vectors (predicated).
6753 void uqrshlr(const ZRegister& zd,
6754 const PRegisterM& pg,
6755 const ZRegister& zn,
6756 const ZRegister& zm);
6757
6758 // Unsigned saturating rounding shift right narrow by immediate (bottom).
6759 void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6760
6761 // Unsigned saturating rounding shift right narrow by immediate (top).
6762 void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6763
6764 // Unsigned saturating shift left by immediate.
6765 void uqshl(const ZRegister& zd,
6766 const PRegisterM& pg,
6767 const ZRegister& zn,
6768 int shift);
6769
6770 // Unsigned saturating shift left by vector (predicated).
6771 void uqshl(const ZRegister& zd,
6772 const PRegisterM& pg,
6773 const ZRegister& zn,
6774 const ZRegister& zm);
6775
6776 // Unsigned saturating shift left reversed vectors (predicated).
6777 void uqshlr(const ZRegister& zd,
6778 const PRegisterM& pg,
6779 const ZRegister& zn,
6780 const ZRegister& zm);
6781
6782 // Unsigned saturating shift right narrow by immediate (bottom).
6783 void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6784
6785 // Unsigned saturating shift right narrow by immediate (top).
6786 void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6787
6788 // Unsigned saturating subtraction (predicated).
6789 void uqsub(const ZRegister& zd,
6790 const PRegisterM& pg,
6791 const ZRegister& zn,
6792 const ZRegister& zm);
6793
6794 // Unsigned saturating subtraction reversed vectors (predicated).
6795 void uqsubr(const ZRegister& zd,
6796 const PRegisterM& pg,
6797 const ZRegister& zn,
6798 const ZRegister& zm);
6799
6800 // Unsigned saturating extract narrow (bottom).
6801 void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6802
6803 // Unsigned saturating extract narrow (top).
6804 void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6805
6806 // Unsigned reciprocal estimate (predicated).
6807 void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6808
6809 // Unsigned rounding halving addition.
6810 void urhadd(const ZRegister& zd,
6811 const PRegisterM& pg,
6812 const ZRegister& zn,
6813 const ZRegister& zm);
6814
6815 // Unsigned rounding shift left by vector (predicated).
6816 void urshl(const ZRegister& zd,
6817 const PRegisterM& pg,
6818 const ZRegister& zn,
6819 const ZRegister& zm);
6820
6821 // Unsigned rounding shift left reversed vectors (predicated).
6822 void urshlr(const ZRegister& zd,
6823 const PRegisterM& pg,
6824 const ZRegister& zn,
6825 const ZRegister& zm);
6826
6827 // Unsigned rounding shift right by immediate.
6828 void urshr(const ZRegister& zd,
6829 const PRegisterM& pg,
6830 const ZRegister& zn,
6831 int shift);
6832
6833 // Unsigned reciprocal square root estimate (predicated).
6834 void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6835
6836 // Unsigned rounding shift right and accumulate (immediate).
6837 void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6838
6839 // Unsigned shift left long by immediate (bottom).
6840 void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6841
6842 // Unsigned shift left long by immediate (top).
6843 void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6844
6845 // Unsigned saturating addition of signed value.
6846 void usqadd(const ZRegister& zd,
6847 const PRegisterM& pg,
6848 const ZRegister& zn,
6849 const ZRegister& zm);
6850
6851 // Unsigned shift right and accumulate (immediate).
6852 void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6853
6854 // Unsigned subtract long (bottom).
6855 void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6856
6857 // Unsigned subtract long (top).
6858 void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6859
6860 // Unsigned subtract wide (bottom).
6861 void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6862
6863 // Unsigned subtract wide (top).
6864 void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6865
6866 // While decrementing signed scalar greater than or equal to scalar.
6867 void whilege(const PRegisterWithLaneSize& pd,
6868 const Register& rn,
6869 const Register& rm);
6870
6871 // While decrementing signed scalar greater than scalar.
6872 void whilegt(const PRegisterWithLaneSize& pd,
6873 const Register& rn,
6874 const Register& rm);
6875
6876 // While decrementing unsigned scalar higher than scalar.
6877 void whilehi(const PRegisterWithLaneSize& pd,
6878 const Register& rn,
6879 const Register& rm);
6880
6881 // While decrementing unsigned scalar higher or same as scalar.
6882 void whilehs(const PRegisterWithLaneSize& pd,
6883 const Register& rn,
6884 const Register& rm);
6885
6886 // While free of read-after-write conflicts.
6887 void whilerw(const PRegisterWithLaneSize& pd,
6888 const Register& rn,
6889 const Register& rm);
6890
6891 // While free of write-after-read/write conflicts.
6892 void whilewr(const PRegisterWithLaneSize& pd,
6893 const Register& rn,
6894 const Register& rm);
6895
6896 // Bitwise exclusive OR and rotate right by immediate.
6897 void xar(const ZRegister& zd,
6898 const ZRegister& zn,
6899 const ZRegister& zm,
6900 int shift);
6901
6902 // Floating-point matrix multiply-accumulate.
6903 void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6904
6905 // Signed integer matrix multiply-accumulate.
6906 void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6907
6908 // Unsigned by signed integer matrix multiply-accumulate.
6909 void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6910
6911 // Unsigned integer matrix multiply-accumulate.
6912 void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6913
6914 // Unsigned by signed integer dot product.
6915 void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6916
6917 // Unsigned by signed integer indexed dot product.
6918 void usdot(const ZRegister& zda,
6919 const ZRegister& zn,
6920 const ZRegister& zm,
6921 int index);
6922
6923 // Signed by unsigned integer indexed dot product.
6924 void sudot(const ZRegister& zda,
6925 const ZRegister& zn,
6926 const ZRegister& zm,
6927 int index);
6928
6929 // Emit generic instructions.
6930
6931 // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)6932 void dci(Instr raw_inst) { Emit(raw_inst); }
6933
6934 // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)6935 void dc32(uint32_t data) { dc(data); }
6936
6937 // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)6938 void dc64(uint64_t data) { dc(data); }
6939
6940 // Emit data in the instruction stream.
6941 template <typename T>
dc(T data)6942 void dc(T data) {
6943 VIXL_ASSERT(AllowAssembler());
6944 GetBuffer()->Emit<T>(data);
6945 }
6946
6947 // Copy a string into the instruction stream, including the terminating NULL
6948 // character. The instruction pointer is then aligned correctly for
6949 // subsequent instructions.
EmitString(const char * string)6950 void EmitString(const char* string) {
6951 VIXL_ASSERT(string != NULL);
6952 VIXL_ASSERT(AllowAssembler());
6953
6954 GetBuffer()->EmitString(string);
6955 GetBuffer()->Align();
6956 }
6957
6958 // Code generation helpers.
6959 static bool OneInstrMoveImmediateHelper(Assembler* assm,
6960 const Register& dst,
6961 uint64_t imm);
6962
6963 // Register encoding.
6964 template <int hibit, int lobit>
Rx(CPURegister rx)6965 static Instr Rx(CPURegister rx) {
6966 VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
6967 return ImmUnsignedField<hibit, lobit>(rx.GetCode());
6968 }
6969
6970 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
6971 #define REGISTER_ENCODER(N) \
6972 static Instr R##N(CPURegister r##N) { \
6973 return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
6974 }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)6975 CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
6976 #undef REGISTER_ENCODER
6977 #undef CPU_REGISTER_FIELD_NAMES
6978
6979 static Instr RmNot31(CPURegister rm) {
6980 VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
6981 VIXL_ASSERT(!rm.IsZero());
6982 return Rm(rm);
6983 }
6984
6985 // These encoding functions allow the stack pointer to be encoded, and
6986 // disallow the zero register.
RdSP(Register rd)6987 static Instr RdSP(Register rd) {
6988 VIXL_ASSERT(!rd.IsZero());
6989 return (rd.GetCode() & kRegCodeMask) << Rd_offset;
6990 }
6991
RnSP(Register rn)6992 static Instr RnSP(Register rn) {
6993 VIXL_ASSERT(!rn.IsZero());
6994 return (rn.GetCode() & kRegCodeMask) << Rn_offset;
6995 }
6996
RmSP(Register rm)6997 static Instr RmSP(Register rm) {
6998 VIXL_ASSERT(!rm.IsZero());
6999 return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7000 }
7001
Pd(PRegister pd)7002 static Instr Pd(PRegister pd) {
7003 return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7004 }
7005
Pm(PRegister pm)7006 static Instr Pm(PRegister pm) {
7007 return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7008 }
7009
Pn(PRegister pn)7010 static Instr Pn(PRegister pn) {
7011 return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7012 }
7013
PgLow8(PRegister pg)7014 static Instr PgLow8(PRegister pg) {
7015 // Governing predicates can be merging, zeroing, or unqualified. They should
7016 // never have a lane size.
7017 VIXL_ASSERT(!pg.HasLaneSize());
7018 return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7019 }
7020
7021 template <int hibit, int lobit>
Pg(PRegister pg)7022 static Instr Pg(PRegister pg) {
7023 // Governing predicates can be merging, zeroing, or unqualified. They should
7024 // never have a lane size.
7025 VIXL_ASSERT(!pg.HasLaneSize());
7026 return Rx<hibit, lobit>(pg);
7027 }
7028
7029 // Flags encoding.
Flags(FlagsUpdate S)7030 static Instr Flags(FlagsUpdate S) {
7031 if (S == SetFlags) {
7032 return 1 << FlagsUpdate_offset;
7033 } else if (S == LeaveFlags) {
7034 return 0 << FlagsUpdate_offset;
7035 }
7036 VIXL_UNREACHABLE();
7037 return 0;
7038 }
7039
Cond(Condition cond)7040 static Instr Cond(Condition cond) { return cond << Condition_offset; }
7041
7042 // Generic immediate encoding.
7043 template <int hibit, int lobit>
ImmField(int64_t imm)7044 static Instr ImmField(int64_t imm) {
7045 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7046 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7047 int fieldsize = hibit - lobit + 1;
7048 VIXL_ASSERT(IsIntN(fieldsize, imm));
7049 return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7050 }
7051
7052 // For unsigned immediate encoding.
7053 // TODO: Handle signed and unsigned immediate in satisfactory way.
7054 template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7055 static Instr ImmUnsignedField(uint64_t imm) {
7056 VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7057 VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7058 VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7059 return static_cast<Instr>(imm << lobit);
7060 }
7061
7062 // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7063 static Instr ImmPCRelAddress(int64_t imm21) {
7064 VIXL_ASSERT(IsInt21(imm21));
7065 Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7066 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7067 Instr immlo = imm << ImmPCRelLo_offset;
7068 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7069 }
7070
7071 // Branch encoding.
ImmUncondBranch(int64_t imm26)7072 static Instr ImmUncondBranch(int64_t imm26) {
7073 VIXL_ASSERT(IsInt26(imm26));
7074 return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7075 }
7076
ImmCondBranch(int64_t imm19)7077 static Instr ImmCondBranch(int64_t imm19) {
7078 VIXL_ASSERT(IsInt19(imm19));
7079 return TruncateToUint19(imm19) << ImmCondBranch_offset;
7080 }
7081
ImmCmpBranch(int64_t imm19)7082 static Instr ImmCmpBranch(int64_t imm19) {
7083 VIXL_ASSERT(IsInt19(imm19));
7084 return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7085 }
7086
ImmTestBranch(int64_t imm14)7087 static Instr ImmTestBranch(int64_t imm14) {
7088 VIXL_ASSERT(IsInt14(imm14));
7089 return TruncateToUint14(imm14) << ImmTestBranch_offset;
7090 }
7091
ImmTestBranchBit(unsigned bit_pos)7092 static Instr ImmTestBranchBit(unsigned bit_pos) {
7093 VIXL_ASSERT(IsUint6(bit_pos));
7094 // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7095 unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7096 unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7097 bit5 &= ImmTestBranchBit5_mask;
7098 bit40 &= ImmTestBranchBit40_mask;
7099 return bit5 | bit40;
7100 }
7101
7102 // Data Processing encoding.
SF(Register rd)7103 static Instr SF(Register rd) {
7104 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7105 }
7106
ImmAddSub(int imm)7107 static Instr ImmAddSub(int imm) {
7108 VIXL_ASSERT(IsImmAddSub(imm));
7109 if (IsUint12(imm)) { // No shift required.
7110 imm <<= ImmAddSub_offset;
7111 } else {
7112 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7113 }
7114 return imm;
7115 }
7116
SVEImmSetBits(unsigned imms,unsigned lane_size)7117 static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7118 VIXL_ASSERT(IsUint6(imms));
7119 VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7120 USE(lane_size);
7121 return imms << SVEImmSetBits_offset;
7122 }
7123
SVEImmRotate(unsigned immr,unsigned lane_size)7124 static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7125 VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7126 USE(lane_size);
7127 return immr << SVEImmRotate_offset;
7128 }
7129
SVEBitN(unsigned bitn)7130 static Instr SVEBitN(unsigned bitn) {
7131 VIXL_ASSERT(IsUint1(bitn));
7132 return bitn << SVEBitN_offset;
7133 }
7134
7135 static Instr SVEDtype(unsigned msize_in_bytes_log2,
7136 unsigned esize_in_bytes_log2,
7137 bool is_signed,
7138 int dtype_h_lsb = 23,
7139 int dtype_l_lsb = 21) {
7140 VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7141 VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7142 Instr dtype_h = msize_in_bytes_log2;
7143 Instr dtype_l = esize_in_bytes_log2;
7144 // Signed forms use the encodings where msize would be greater than esize.
7145 if (is_signed) {
7146 dtype_h = dtype_h ^ 0x3;
7147 dtype_l = dtype_l ^ 0x3;
7148 }
7149 VIXL_ASSERT(IsUint2(dtype_h));
7150 VIXL_ASSERT(IsUint2(dtype_l));
7151 VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7152
7153 return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7154 }
7155
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7156 static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7157 unsigned esize_in_bytes_log2,
7158 bool is_signed) {
7159 return SVEDtype(msize_in_bytes_log2,
7160 esize_in_bytes_log2,
7161 is_signed,
7162 23,
7163 13);
7164 }
7165
ImmS(unsigned imms,unsigned reg_size)7166 static Instr ImmS(unsigned imms, unsigned reg_size) {
7167 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7168 ((reg_size == kWRegSize) && IsUint5(imms)));
7169 USE(reg_size);
7170 return imms << ImmS_offset;
7171 }
7172
ImmR(unsigned immr,unsigned reg_size)7173 static Instr ImmR(unsigned immr, unsigned reg_size) {
7174 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7175 ((reg_size == kWRegSize) && IsUint5(immr)));
7176 USE(reg_size);
7177 VIXL_ASSERT(IsUint6(immr));
7178 return immr << ImmR_offset;
7179 }
7180
ImmSetBits(unsigned imms,unsigned reg_size)7181 static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7182 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7183 VIXL_ASSERT(IsUint6(imms));
7184 VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7185 USE(reg_size);
7186 return imms << ImmSetBits_offset;
7187 }
7188
ImmRotate(unsigned immr,unsigned reg_size)7189 static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7190 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7191 VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7192 ((reg_size == kWRegSize) && IsUint5(immr)));
7193 USE(reg_size);
7194 return immr << ImmRotate_offset;
7195 }
7196
ImmLLiteral(int64_t imm19)7197 static Instr ImmLLiteral(int64_t imm19) {
7198 VIXL_ASSERT(IsInt19(imm19));
7199 return TruncateToUint19(imm19) << ImmLLiteral_offset;
7200 }
7201
BitN(unsigned bitn,unsigned reg_size)7202 static Instr BitN(unsigned bitn, unsigned reg_size) {
7203 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7204 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7205 USE(reg_size);
7206 return bitn << BitN_offset;
7207 }
7208
ShiftDP(Shift shift)7209 static Instr ShiftDP(Shift shift) {
7210 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7211 return shift << ShiftDP_offset;
7212 }
7213
ImmDPShift(unsigned amount)7214 static Instr ImmDPShift(unsigned amount) {
7215 VIXL_ASSERT(IsUint6(amount));
7216 return amount << ImmDPShift_offset;
7217 }
7218
ExtendMode(Extend extend)7219 static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7220
ImmExtendShift(unsigned left_shift)7221 static Instr ImmExtendShift(unsigned left_shift) {
7222 VIXL_ASSERT(left_shift <= 4);
7223 return left_shift << ImmExtendShift_offset;
7224 }
7225
ImmCondCmp(unsigned imm)7226 static Instr ImmCondCmp(unsigned imm) {
7227 VIXL_ASSERT(IsUint5(imm));
7228 return imm << ImmCondCmp_offset;
7229 }
7230
Nzcv(StatusFlags nzcv)7231 static Instr Nzcv(StatusFlags nzcv) {
7232 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7233 }
7234
7235 // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7236 static Instr ImmLSUnsigned(int64_t imm12) {
7237 VIXL_ASSERT(IsUint12(imm12));
7238 return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7239 }
7240
ImmLS(int64_t imm9)7241 static Instr ImmLS(int64_t imm9) {
7242 VIXL_ASSERT(IsInt9(imm9));
7243 return TruncateToUint9(imm9) << ImmLS_offset;
7244 }
7245
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7246 static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7247 VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
7248 int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
7249 VIXL_ASSERT(IsInt7(scaled_imm7));
7250 return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7251 }
7252
ImmShiftLS(unsigned shift_amount)7253 static Instr ImmShiftLS(unsigned shift_amount) {
7254 VIXL_ASSERT(IsUint1(shift_amount));
7255 return shift_amount << ImmShiftLS_offset;
7256 }
7257
ImmLSPAC(int64_t imm10)7258 static Instr ImmLSPAC(int64_t imm10) {
7259 VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7260 int64_t scaled_imm10 = imm10 / (1 << 3);
7261 VIXL_ASSERT(IsInt10(scaled_imm10));
7262 uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7263 return (s_bit << ImmLSPACHi_offset) |
7264 (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7265 }
7266
ImmPrefetchOperation(int imm5)7267 static Instr ImmPrefetchOperation(int imm5) {
7268 VIXL_ASSERT(IsUint5(imm5));
7269 return imm5 << ImmPrefetchOperation_offset;
7270 }
7271
ImmException(int imm16)7272 static Instr ImmException(int imm16) {
7273 VIXL_ASSERT(IsUint16(imm16));
7274 return imm16 << ImmException_offset;
7275 }
7276
ImmUdf(int imm16)7277 static Instr ImmUdf(int imm16) {
7278 VIXL_ASSERT(IsUint16(imm16));
7279 return imm16 << ImmUdf_offset;
7280 }
7281
ImmSystemRegister(int imm16)7282 static Instr ImmSystemRegister(int imm16) {
7283 VIXL_ASSERT(IsUint16(imm16));
7284 return imm16 << ImmSystemRegister_offset;
7285 }
7286
ImmRMIFRotation(int imm6)7287 static Instr ImmRMIFRotation(int imm6) {
7288 VIXL_ASSERT(IsUint6(imm6));
7289 return imm6 << ImmRMIFRotation_offset;
7290 }
7291
ImmHint(int imm7)7292 static Instr ImmHint(int imm7) {
7293 VIXL_ASSERT(IsUint7(imm7));
7294 return imm7 << ImmHint_offset;
7295 }
7296
CRm(int imm4)7297 static Instr CRm(int imm4) {
7298 VIXL_ASSERT(IsUint4(imm4));
7299 return imm4 << CRm_offset;
7300 }
7301
CRn(int imm4)7302 static Instr CRn(int imm4) {
7303 VIXL_ASSERT(IsUint4(imm4));
7304 return imm4 << CRn_offset;
7305 }
7306
SysOp(int imm14)7307 static Instr SysOp(int imm14) {
7308 VIXL_ASSERT(IsUint14(imm14));
7309 return imm14 << SysOp_offset;
7310 }
7311
ImmSysOp1(int imm3)7312 static Instr ImmSysOp1(int imm3) {
7313 VIXL_ASSERT(IsUint3(imm3));
7314 return imm3 << SysOp1_offset;
7315 }
7316
ImmSysOp2(int imm3)7317 static Instr ImmSysOp2(int imm3) {
7318 VIXL_ASSERT(IsUint3(imm3));
7319 return imm3 << SysOp2_offset;
7320 }
7321
ImmBarrierDomain(int imm2)7322 static Instr ImmBarrierDomain(int imm2) {
7323 VIXL_ASSERT(IsUint2(imm2));
7324 return imm2 << ImmBarrierDomain_offset;
7325 }
7326
ImmBarrierType(int imm2)7327 static Instr ImmBarrierType(int imm2) {
7328 VIXL_ASSERT(IsUint2(imm2));
7329 return imm2 << ImmBarrierType_offset;
7330 }
7331
7332 // Move immediates encoding.
ImmMoveWide(uint64_t imm)7333 static Instr ImmMoveWide(uint64_t imm) {
7334 VIXL_ASSERT(IsUint16(imm));
7335 return static_cast<Instr>(imm << ImmMoveWide_offset);
7336 }
7337
ShiftMoveWide(int64_t shift)7338 static Instr ShiftMoveWide(int64_t shift) {
7339 VIXL_ASSERT(IsUint2(shift));
7340 return static_cast<Instr>(shift << ShiftMoveWide_offset);
7341 }
7342
7343 // FP Immediates.
7344 static Instr ImmFP16(Float16 imm);
7345 static Instr ImmFP32(float imm);
7346 static Instr ImmFP64(double imm);
7347
7348 // FP register type.
FPType(VRegister fd)7349 static Instr FPType(VRegister fd) {
7350 VIXL_ASSERT(fd.IsScalar());
7351 switch (fd.GetSizeInBits()) {
7352 case 16:
7353 return FP16;
7354 case 32:
7355 return FP32;
7356 case 64:
7357 return FP64;
7358 default:
7359 VIXL_UNREACHABLE();
7360 return 0;
7361 }
7362 }
7363
FPScale(unsigned scale)7364 static Instr FPScale(unsigned scale) {
7365 VIXL_ASSERT(IsUint6(scale));
7366 return scale << FPScale_offset;
7367 }
7368
7369 // Immediate field checking helpers.
7370 static bool IsImmAddSub(int64_t immediate);
7371 static bool IsImmConditionalCompare(int64_t immediate);
7372 static bool IsImmFP16(Float16 imm);
7373 static bool IsImmFP32(float imm);
7374 static bool IsImmFP64(double imm);
7375 static bool IsImmLogical(uint64_t value,
7376 unsigned width,
7377 unsigned* n = NULL,
7378 unsigned* imm_s = NULL,
7379 unsigned* imm_r = NULL);
7380 static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7381 static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7382 static bool IsImmLSUnscaled(int64_t offset);
7383 static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7384 static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7385
7386 // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7387 static Instr VFormat(VRegister vd) {
7388 if (vd.Is64Bits()) {
7389 switch (vd.GetLanes()) {
7390 case 2:
7391 return NEON_2S;
7392 case 4:
7393 return NEON_4H;
7394 case 8:
7395 return NEON_8B;
7396 default:
7397 return 0xffffffff;
7398 }
7399 } else {
7400 VIXL_ASSERT(vd.Is128Bits());
7401 switch (vd.GetLanes()) {
7402 case 2:
7403 return NEON_2D;
7404 case 4:
7405 return NEON_4S;
7406 case 8:
7407 return NEON_8H;
7408 case 16:
7409 return NEON_16B;
7410 default:
7411 return 0xffffffff;
7412 }
7413 }
7414 }
7415
7416 // Instruction bits for vector format in floating point data processing
7417 // operations.
FPFormat(VRegister vd)7418 static Instr FPFormat(VRegister vd) {
7419 switch (vd.GetLanes()) {
7420 case 1:
7421 // Floating point scalar formats.
7422 switch (vd.GetSizeInBits()) {
7423 case 16:
7424 return FP16;
7425 case 32:
7426 return FP32;
7427 case 64:
7428 return FP64;
7429 default:
7430 VIXL_UNREACHABLE();
7431 }
7432 break;
7433 case 2:
7434 // Two lane floating point vector formats.
7435 switch (vd.GetSizeInBits()) {
7436 case 64:
7437 return NEON_FP_2S;
7438 case 128:
7439 return NEON_FP_2D;
7440 default:
7441 VIXL_UNREACHABLE();
7442 }
7443 break;
7444 case 4:
7445 // Four lane floating point vector formats.
7446 switch (vd.GetSizeInBits()) {
7447 case 64:
7448 return NEON_FP_4H;
7449 case 128:
7450 return NEON_FP_4S;
7451 default:
7452 VIXL_UNREACHABLE();
7453 }
7454 break;
7455 case 8:
7456 // Eight lane floating point vector format.
7457 VIXL_ASSERT(vd.Is128Bits());
7458 return NEON_FP_8H;
7459 default:
7460 VIXL_UNREACHABLE();
7461 return 0;
7462 }
7463 VIXL_UNREACHABLE();
7464 return 0;
7465 }
7466
7467 // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7468 static Instr LSVFormat(VRegister vd) {
7469 if (vd.Is64Bits()) {
7470 switch (vd.GetLanes()) {
7471 case 1:
7472 return LS_NEON_1D;
7473 case 2:
7474 return LS_NEON_2S;
7475 case 4:
7476 return LS_NEON_4H;
7477 case 8:
7478 return LS_NEON_8B;
7479 default:
7480 return 0xffffffff;
7481 }
7482 } else {
7483 VIXL_ASSERT(vd.Is128Bits());
7484 switch (vd.GetLanes()) {
7485 case 2:
7486 return LS_NEON_2D;
7487 case 4:
7488 return LS_NEON_4S;
7489 case 8:
7490 return LS_NEON_8H;
7491 case 16:
7492 return LS_NEON_16B;
7493 default:
7494 return 0xffffffff;
7495 }
7496 }
7497 }
7498
7499 // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7500 static Instr SFormat(VRegister vd) {
7501 VIXL_ASSERT(vd.GetLanes() == 1);
7502 switch (vd.GetSizeInBytes()) {
7503 case 1:
7504 return NEON_B;
7505 case 2:
7506 return NEON_H;
7507 case 4:
7508 return NEON_S;
7509 case 8:
7510 return NEON_D;
7511 default:
7512 return 0xffffffff;
7513 }
7514 }
7515
7516 template <typename T>
SVESize(const T & rd)7517 static Instr SVESize(const T& rd) {
7518 VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7519 VIXL_ASSERT(rd.HasLaneSize());
7520 switch (rd.GetLaneSizeInBytes()) {
7521 case 1:
7522 return SVE_B;
7523 case 2:
7524 return SVE_H;
7525 case 4:
7526 return SVE_S;
7527 case 8:
7528 return SVE_D;
7529 default:
7530 return 0xffffffff;
7531 }
7532 }
7533
ImmSVEPredicateConstraint(int pattern)7534 static Instr ImmSVEPredicateConstraint(int pattern) {
7535 VIXL_ASSERT(IsUint5(pattern));
7536 return (pattern << ImmSVEPredicateConstraint_offset) &
7537 ImmSVEPredicateConstraint_mask;
7538 }
7539
ImmNEONHLM(int index,int num_bits)7540 static Instr ImmNEONHLM(int index, int num_bits) {
7541 int h, l, m;
7542 if (num_bits == 3) {
7543 VIXL_ASSERT(IsUint3(index));
7544 h = (index >> 2) & 1;
7545 l = (index >> 1) & 1;
7546 m = (index >> 0) & 1;
7547 } else if (num_bits == 2) {
7548 VIXL_ASSERT(IsUint2(index));
7549 h = (index >> 1) & 1;
7550 l = (index >> 0) & 1;
7551 m = 0;
7552 } else {
7553 VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7554 h = (index >> 0) & 1;
7555 l = 0;
7556 m = 0;
7557 }
7558 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7559 }
7560
ImmRotFcadd(int rot)7561 static Instr ImmRotFcadd(int rot) {
7562 VIXL_ASSERT(rot == 90 || rot == 270);
7563 return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7564 }
7565
ImmRotFcmlaSca(int rot)7566 static Instr ImmRotFcmlaSca(int rot) {
7567 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7568 return (rot / 90) << ImmRotFcmlaSca_offset;
7569 }
7570
ImmRotFcmlaVec(int rot)7571 static Instr ImmRotFcmlaVec(int rot) {
7572 VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7573 return (rot / 90) << ImmRotFcmlaVec_offset;
7574 }
7575
ImmNEONExt(int imm4)7576 static Instr ImmNEONExt(int imm4) {
7577 VIXL_ASSERT(IsUint4(imm4));
7578 return imm4 << ImmNEONExt_offset;
7579 }
7580
ImmNEON5(Instr format,int index)7581 static Instr ImmNEON5(Instr format, int index) {
7582 VIXL_ASSERT(IsUint4(index));
7583 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7584 int imm5 = (index << (s + 1)) | (1 << s);
7585 return imm5 << ImmNEON5_offset;
7586 }
7587
ImmNEON4(Instr format,int index)7588 static Instr ImmNEON4(Instr format, int index) {
7589 VIXL_ASSERT(IsUint4(index));
7590 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7591 int imm4 = index << s;
7592 return imm4 << ImmNEON4_offset;
7593 }
7594
ImmNEONabcdefgh(int imm8)7595 static Instr ImmNEONabcdefgh(int imm8) {
7596 VIXL_ASSERT(IsUint8(imm8));
7597 Instr instr;
7598 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7599 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7600 return instr;
7601 }
7602
NEONCmode(int cmode)7603 static Instr NEONCmode(int cmode) {
7604 VIXL_ASSERT(IsUint4(cmode));
7605 return cmode << NEONCmode_offset;
7606 }
7607
NEONModImmOp(int op)7608 static Instr NEONModImmOp(int op) {
7609 VIXL_ASSERT(IsUint1(op));
7610 return op << NEONModImmOp_offset;
7611 }
7612
7613 // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7614 size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7615 VIXL_ASSERT(label->IsBound());
7616 return GetBuffer().GetOffsetFrom(label->GetLocation());
7617 }
7618 VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7619 size_t SizeOfCodeGeneratedSince(Label* label) const) {
7620 return GetSizeOfCodeGeneratedSince(label);
7621 }
7622
7623 VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7624 size_t GetBufferCapacity() const) {
7625 return GetBuffer().GetCapacity();
7626 }
7627 VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7628 return GetBuffer().GetCapacity();
7629 }
7630
7631 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7632 size_t GetRemainingBufferSpace() const) {
7633 return GetBuffer().GetRemainingBytes();
7634 }
7635 VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7636 size_t RemainingBufferSpace() const) {
7637 return GetBuffer().GetRemainingBytes();
7638 }
7639
GetPic()7640 PositionIndependentCodeOption GetPic() const { return pic_; }
7641 VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7642 return GetPic();
7643 }
7644
GetCPUFeatures()7645 CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7646
SetCPUFeatures(const CPUFeatures & cpu_features)7647 void SetCPUFeatures(const CPUFeatures& cpu_features) {
7648 cpu_features_ = cpu_features;
7649 }
7650
AllowPageOffsetDependentCode()7651 bool AllowPageOffsetDependentCode() const {
7652 return (GetPic() == PageOffsetDependentCode) ||
7653 (GetPic() == PositionDependentCode);
7654 }
7655
AppropriateZeroRegFor(const CPURegister & reg)7656 static Register AppropriateZeroRegFor(const CPURegister& reg) {
7657 return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7658 }
7659
7660 protected:
7661 void LoadStore(const CPURegister& rt,
7662 const MemOperand& addr,
7663 LoadStoreOp op,
7664 LoadStoreScalingOption option = PreferScaledOffset);
7665
7666 void LoadStorePAC(const Register& xt,
7667 const MemOperand& addr,
7668 LoadStorePACOp op);
7669
7670 void LoadStorePair(const CPURegister& rt,
7671 const CPURegister& rt2,
7672 const MemOperand& addr,
7673 LoadStorePairOp op);
7674 void LoadStoreStruct(const VRegister& vt,
7675 const MemOperand& addr,
7676 NEONLoadStoreMultiStructOp op);
7677 void LoadStoreStruct1(const VRegister& vt,
7678 int reg_count,
7679 const MemOperand& addr);
7680 void LoadStoreStructSingle(const VRegister& vt,
7681 uint32_t lane,
7682 const MemOperand& addr,
7683 NEONLoadStoreSingleStructOp op);
7684 void LoadStoreStructSingleAllLanes(const VRegister& vt,
7685 const MemOperand& addr,
7686 NEONLoadStoreSingleStructOp op);
7687 void LoadStoreStructVerify(const VRegister& vt,
7688 const MemOperand& addr,
7689 Instr op);
7690
7691 // Set `is_load` to false in default as it's only used in the
7692 // scalar-plus-vector form.
7693 Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7694 int num_regs,
7695 const SVEMemOperand& addr,
7696 bool is_load = false);
7697
7698 // E.g. st1b, st1h, ...
7699 // This supports both contiguous and scatter stores.
7700 void SVESt1Helper(unsigned msize_in_bytes_log2,
7701 const ZRegister& zt,
7702 const PRegister& pg,
7703 const SVEMemOperand& addr);
7704
7705 // E.g. ld1b, ld1h, ...
7706 // This supports both contiguous and gather loads.
7707 void SVELd1Helper(unsigned msize_in_bytes_log2,
7708 const ZRegister& zt,
7709 const PRegisterZ& pg,
7710 const SVEMemOperand& addr,
7711 bool is_signed);
7712
7713 // E.g. ld1rb, ld1rh, ...
7714 void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7715 const ZRegister& zt,
7716 const PRegisterZ& pg,
7717 const SVEMemOperand& addr,
7718 bool is_signed);
7719
7720 // E.g. ldff1b, ldff1h, ...
7721 // This supports both contiguous and gather loads.
7722 void SVELdff1Helper(unsigned msize_in_bytes_log2,
7723 const ZRegister& zt,
7724 const PRegisterZ& pg,
7725 const SVEMemOperand& addr,
7726 bool is_signed);
7727
7728 // Common code for the helpers above.
7729 void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7730 const ZRegister& zt,
7731 const PRegister& pg,
7732 const SVEMemOperand& addr,
7733 bool is_signed,
7734 Instr op);
7735
7736 // Common code for the helpers above.
7737 void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7738 const ZRegister& zt,
7739 const PRegister& pg,
7740 const SVEMemOperand& addr,
7741 bool is_load,
7742 bool is_signed,
7743 bool is_first_fault);
7744
7745 // E.g. st2b, st3h, ...
7746 void SVESt234Helper(int num_regs,
7747 const ZRegister& zt1,
7748 const PRegister& pg,
7749 const SVEMemOperand& addr);
7750
7751 // E.g. ld2b, ld3h, ...
7752 void SVELd234Helper(int num_regs,
7753 const ZRegister& zt1,
7754 const PRegisterZ& pg,
7755 const SVEMemOperand& addr);
7756
7757 // Common code for the helpers above.
7758 void SVELdSt234Helper(int num_regs,
7759 const ZRegister& zt1,
7760 const PRegister& pg,
7761 const SVEMemOperand& addr,
7762 Instr op);
7763
7764 // E.g. ld1qb, ld1qh, ldnt1b, ...
7765 void SVELd1St1ScaImmHelper(const ZRegister& zt,
7766 const PRegister& pg,
7767 const SVEMemOperand& addr,
7768 Instr regoffset_op,
7769 Instr immoffset_op,
7770 int imm_divisor = 1);
7771
7772 void SVELd1VecScaHelper(const ZRegister& zt,
7773 const PRegister& pg,
7774 const SVEMemOperand& addr,
7775 uint32_t msize,
7776 bool is_signed);
7777 void SVESt1VecScaHelper(const ZRegister& zt,
7778 const PRegister& pg,
7779 const SVEMemOperand& addr,
7780 uint32_t msize);
7781
7782 void Prefetch(PrefetchOperation op,
7783 const MemOperand& addr,
7784 LoadStoreScalingOption option = PreferScaledOffset);
7785 void Prefetch(int op,
7786 const MemOperand& addr,
7787 LoadStoreScalingOption option = PreferScaledOffset);
7788
7789 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7790 // reports a bogus uninitialised warning then.
7791 void Logical(const Register& rd,
7792 const Register& rn,
7793 const Operand operand,
7794 LogicalOp op);
7795
7796 void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7797
7798 void LogicalImmediate(const Register& rd,
7799 const Register& rn,
7800 unsigned n,
7801 unsigned imm_s,
7802 unsigned imm_r,
7803 LogicalOp op);
7804
7805 void ConditionalCompare(const Register& rn,
7806 const Operand& operand,
7807 StatusFlags nzcv,
7808 Condition cond,
7809 ConditionalCompareOp op);
7810
7811 void AddSubWithCarry(const Register& rd,
7812 const Register& rn,
7813 const Operand& operand,
7814 FlagsUpdate S,
7815 AddSubWithCarryOp op);
7816
7817 void CompareVectors(const PRegisterWithLaneSize& pd,
7818 const PRegisterZ& pg,
7819 const ZRegister& zn,
7820 const ZRegister& zm,
7821 SVEIntCompareVectorsOp op);
7822
7823 void CompareVectors(const PRegisterWithLaneSize& pd,
7824 const PRegisterZ& pg,
7825 const ZRegister& zn,
7826 int imm,
7827 SVEIntCompareSignedImmOp op);
7828
7829 void CompareVectors(const PRegisterWithLaneSize& pd,
7830 const PRegisterZ& pg,
7831 const ZRegister& zn,
7832 unsigned imm,
7833 SVEIntCompareUnsignedImmOp op);
7834
7835 void SVEIntAddSubtractImmUnpredicatedHelper(
7836 SVEIntAddSubtractImm_UnpredicatedOp op,
7837 const ZRegister& zd,
7838 int imm8,
7839 int shift);
7840
7841 void SVEElementCountToRegisterHelper(Instr op,
7842 const Register& rd,
7843 int pattern,
7844 int multiplier);
7845
7846 Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
7847
7848 Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
7849
7850 void SVEBitwiseShiftImmediate(const ZRegister& zd,
7851 const ZRegister& zn,
7852 Instr encoded_imm,
7853 Instr op);
7854
7855 void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
7856 const PRegisterM& pg,
7857 Instr encoded_imm,
7858 Instr op);
7859
7860 Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
7861 const ZRegister& zm,
7862 int index,
7863 Instr op_h,
7864 Instr op_s,
7865 Instr op_d);
7866
7867 Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
7868
7869 Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
7870
7871 void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
7872 const PRegister& pg,
7873 const SVEMemOperand& addr,
7874 int prefetch_size);
7875
7876 void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
7877 const PRegister& pg,
7878 const SVEMemOperand& addr,
7879 int prefetch_size);
7880
7881 void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
7882 const PRegister& pg,
7883 const SVEMemOperand& addr,
7884 int prefetch_size);
7885
7886 void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
7887 const PRegister& pg,
7888 const SVEMemOperand& addr,
7889 int prefetch_size);
7890
7891 void SVEPrefetchHelper(PrefetchOperation prfop,
7892 const PRegister& pg,
7893 const SVEMemOperand& addr,
7894 int prefetch_size);
7895
SVEImmPrefetchOperation(PrefetchOperation prfop)7896 static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
7897 // SVE only supports PLD and PST, not PLI.
7898 VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
7899 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
7900 // Check that we can simply map bits.
7901 VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
7902 VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
7903 // Remaining operations map directly.
7904 return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
7905 }
7906
7907 // Functions for emulating operands not directly supported by the instruction
7908 // set.
7909 void EmitShift(const Register& rd,
7910 const Register& rn,
7911 Shift shift,
7912 unsigned amount);
7913 void EmitExtendShift(const Register& rd,
7914 const Register& rn,
7915 Extend extend,
7916 unsigned left_shift);
7917
7918 void AddSub(const Register& rd,
7919 const Register& rn,
7920 const Operand& operand,
7921 FlagsUpdate S,
7922 AddSubOp op);
7923
7924 void NEONTable(const VRegister& vd,
7925 const VRegister& vn,
7926 const VRegister& vm,
7927 NEONTableOp op);
7928
7929 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
7930 // registers. Only simple loads are supported; sign- and zero-extension (such
7931 // as in LDPSW_x or LDRB_w) are not supported.
7932 static LoadStoreOp LoadOpFor(const CPURegister& rt);
7933 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
7934 const CPURegister& rt2);
7935 static LoadStoreOp StoreOpFor(const CPURegister& rt);
7936 static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
7937 const CPURegister& rt2);
7938 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
7939 const CPURegister& rt, const CPURegister& rt2);
7940 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
7941 const CPURegister& rt, const CPURegister& rt2);
7942 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
7943
7944 // Convenience pass-through for CPU feature checks.
7945 bool CPUHas(CPUFeatures::Feature feature0,
7946 CPUFeatures::Feature feature1 = CPUFeatures::kNone,
7947 CPUFeatures::Feature feature2 = CPUFeatures::kNone,
7948 CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
7949 return cpu_features_.Has(feature0, feature1, feature2, feature3);
7950 }
7951
7952 // Determine whether the target CPU has the specified registers, based on the
7953 // currently-enabled CPU features. Presence of a register does not imply
7954 // support for arbitrary operations on it. For example, CPUs with FP have H
7955 // registers, but most half-precision operations require the FPHalf feature.
7956 //
7957 // These are used to check CPU features in loads and stores that have the same
7958 // entry point for both integer and FP registers.
7959 bool CPUHas(const CPURegister& rt) const;
7960 bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
7961
7962 bool CPUHas(SystemRegister sysreg) const;
7963
7964 private:
7965 static uint32_t FP16ToImm8(Float16 imm);
7966 static uint32_t FP32ToImm8(float imm);
7967 static uint32_t FP64ToImm8(double imm);
7968
7969 // Instruction helpers.
7970 void MoveWide(const Register& rd,
7971 uint64_t imm,
7972 int shift,
7973 MoveWideImmediateOp mov_op);
7974 void DataProcShiftedRegister(const Register& rd,
7975 const Register& rn,
7976 const Operand& operand,
7977 FlagsUpdate S,
7978 Instr op);
7979 void DataProcExtendedRegister(const Register& rd,
7980 const Register& rn,
7981 const Operand& operand,
7982 FlagsUpdate S,
7983 Instr op);
7984 void LoadStorePairNonTemporal(const CPURegister& rt,
7985 const CPURegister& rt2,
7986 const MemOperand& addr,
7987 LoadStorePairNonTemporalOp op);
7988 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
7989 void ConditionalSelect(const Register& rd,
7990 const Register& rn,
7991 const Register& rm,
7992 Condition cond,
7993 ConditionalSelectOp op);
7994 void DataProcessing1Source(const Register& rd,
7995 const Register& rn,
7996 DataProcessing1SourceOp op);
7997 void DataProcessing3Source(const Register& rd,
7998 const Register& rn,
7999 const Register& rm,
8000 const Register& ra,
8001 DataProcessing3SourceOp op);
8002 void FPDataProcessing1Source(const VRegister& fd,
8003 const VRegister& fn,
8004 FPDataProcessing1SourceOp op);
8005 void FPDataProcessing3Source(const VRegister& fd,
8006 const VRegister& fn,
8007 const VRegister& fm,
8008 const VRegister& fa,
8009 FPDataProcessing3SourceOp op);
8010 void NEONAcrossLanesL(const VRegister& vd,
8011 const VRegister& vn,
8012 NEONAcrossLanesOp op);
8013 void NEONAcrossLanes(const VRegister& vd,
8014 const VRegister& vn,
8015 NEONAcrossLanesOp op,
8016 Instr op_half);
8017 void NEONModifiedImmShiftLsl(const VRegister& vd,
8018 const int imm8,
8019 const int left_shift,
8020 NEONModifiedImmediateOp op);
8021 void NEONModifiedImmShiftMsl(const VRegister& vd,
8022 const int imm8,
8023 const int shift_amount,
8024 NEONModifiedImmediateOp op);
8025 void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8026 void NEON3Same(const VRegister& vd,
8027 const VRegister& vn,
8028 const VRegister& vm,
8029 NEON3SameOp vop);
8030 void NEON3SameFP16(const VRegister& vd,
8031 const VRegister& vn,
8032 const VRegister& vm,
8033 Instr op);
8034 void NEONFP3Same(const VRegister& vd,
8035 const VRegister& vn,
8036 const VRegister& vm,
8037 Instr op);
8038 void NEON3DifferentL(const VRegister& vd,
8039 const VRegister& vn,
8040 const VRegister& vm,
8041 NEON3DifferentOp vop);
8042 void NEON3DifferentW(const VRegister& vd,
8043 const VRegister& vn,
8044 const VRegister& vm,
8045 NEON3DifferentOp vop);
8046 void NEON3DifferentHN(const VRegister& vd,
8047 const VRegister& vn,
8048 const VRegister& vm,
8049 NEON3DifferentOp vop);
8050 void NEONFP2RegMisc(const VRegister& vd,
8051 const VRegister& vn,
8052 NEON2RegMiscOp vop,
8053 double value = 0.0);
8054 void NEONFP2RegMiscFP16(const VRegister& vd,
8055 const VRegister& vn,
8056 NEON2RegMiscFP16Op vop,
8057 double value = 0.0);
8058 void NEON2RegMisc(const VRegister& vd,
8059 const VRegister& vn,
8060 NEON2RegMiscOp vop,
8061 int value = 0);
8062 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8063 void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8064 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8065 void NEONPerm(const VRegister& vd,
8066 const VRegister& vn,
8067 const VRegister& vm,
8068 NEONPermOp op);
8069 void NEONFPByElement(const VRegister& vd,
8070 const VRegister& vn,
8071 const VRegister& vm,
8072 int vm_index,
8073 NEONByIndexedElementOp op,
8074 NEONByIndexedElementOp op_half);
8075 void NEONByElement(const VRegister& vd,
8076 const VRegister& vn,
8077 const VRegister& vm,
8078 int vm_index,
8079 NEONByIndexedElementOp op);
8080 void NEONByElementL(const VRegister& vd,
8081 const VRegister& vn,
8082 const VRegister& vm,
8083 int vm_index,
8084 NEONByIndexedElementOp op);
8085 void NEONShiftImmediate(const VRegister& vd,
8086 const VRegister& vn,
8087 NEONShiftImmediateOp op,
8088 int immh_immb);
8089 void NEONShiftLeftImmediate(const VRegister& vd,
8090 const VRegister& vn,
8091 int shift,
8092 NEONShiftImmediateOp op);
8093 void NEONShiftRightImmediate(const VRegister& vd,
8094 const VRegister& vn,
8095 int shift,
8096 NEONShiftImmediateOp op);
8097 void NEONShiftImmediateL(const VRegister& vd,
8098 const VRegister& vn,
8099 int shift,
8100 NEONShiftImmediateOp op);
8101 void NEONShiftImmediateN(const VRegister& vd,
8102 const VRegister& vn,
8103 int shift,
8104 NEONShiftImmediateOp op);
8105 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8106
8107 // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8108 // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8109 void ResolveSVEImm8Shift(int* imm8, int* shift);
8110
8111 Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8112
8113 // Encode the specified MemOperand for the specified access size and scaling
8114 // preference.
8115 Instr LoadStoreMemOperand(const MemOperand& addr,
8116 unsigned access_size_in_bytes_log2,
8117 LoadStoreScalingOption option);
8118
8119 // Link the current (not-yet-emitted) instruction to the specified label, then
8120 // return an offset to be encoded in the instruction. If the label is not yet
8121 // bound, an offset of 0 is returned.
8122 ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8123 ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8124 ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8125
8126 // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8127 template <int element_shift>
8128 ptrdiff_t LinkAndGetOffsetTo(Label* label);
8129
8130 // Literal load offset are in words (32-bit).
8131 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8132
8133 // Emit the instruction in buffer_.
Emit(Instr instruction)8134 void Emit(Instr instruction) {
8135 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8136 VIXL_ASSERT(AllowAssembler());
8137 GetBuffer()->Emit32(instruction);
8138 }
8139
8140 PositionIndependentCodeOption pic_;
8141
8142 CPUFeatures cpu_features_;
8143 };
8144
8145
8146 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8147 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8148 return UpdateValue(new_value,
8149 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8150 }
8151
8152
8153 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8154 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8155 return UpdateValue(high64,
8156 low64,
8157 assembler->GetBuffer().GetStartAddress<uint8_t*>());
8158 }
8159
8160
8161 } // namespace aarch64
8162
8163 // Required InvalSet template specialisations.
8164 // TODO: These template specialisations should not live in this file. Move
8165 // Label out of the aarch64 namespace in order to share its implementation
8166 // later.
8167 #define INVAL_SET_TEMPLATE_PARAMETERS \
8168 ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \
8169 aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8170 aarch64::Label::kReclaimFactor
8171 template <>
GetKey(const ptrdiff_t & element)8172 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8173 const ptrdiff_t& element) {
8174 return element;
8175 }
8176 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8177 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8178 ptrdiff_t key) {
8179 *element = key;
8180 }
8181 #undef INVAL_SET_TEMPLATE_PARAMETERS
8182
8183 } // namespace vixl
8184
8185 #endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8186