• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "macro-assembler-aarch64.h"
28 
29 #include <cctype>
30 
31 namespace vixl {
32 namespace aarch64 {
33 
34 
Release()35 void Pool::Release() {
36   if (--monitor_ == 0) {
37     // Ensure the pool has not been blocked for too long.
38     VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_);
39   }
40 }
41 
42 
SetNextCheckpoint(ptrdiff_t checkpoint)43 void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
44   masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint);
45   checkpoint_ = checkpoint;
46 }
47 
48 
49 #ifndef PANDA_BUILD
LiteralPool(MacroAssembler * masm)50 LiteralPool::LiteralPool(MacroAssembler* masm)
51     : Pool(masm),
52       size_(0),
53       first_use_(-1),
54       recommended_checkpoint_(kNoCheckpointRequired) {}
55 #else
LiteralPool(AllocatorWrapper allocator,MacroAssembler * masm)56 LiteralPool::LiteralPool(AllocatorWrapper allocator, MacroAssembler* masm)
57     : Pool(masm),
58       entries_(allocator.Adapter()),
59       size_(0),
60       first_use_(-1),
61       recommended_checkpoint_(kNoCheckpointRequired),
62       deleted_on_destruction_(allocator.Adapter()),
63       allocator_(allocator) {}
64 #endif
65 
~LiteralPool()66 LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
67   VIXL_ASSERT(!IsBlocked());
68 #ifndef VIXL_USE_PANDA_ALLOC
69   VIXL_ASSERT(IsEmpty());
70   for (std::vector<RawLiteral*>::iterator it = deleted_on_destruction_.begin();
71        it != deleted_on_destruction_.end();
72        it++) {
73     delete *it;
74   }
75 #endif
76 }
77 
78 
Reset()79 void LiteralPool::Reset() {
80 #ifndef VIXL_USE_PANDA_ALLOC
81   std::vector<RawLiteral *>::iterator it, end;
82   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
83     RawLiteral* literal = *it;
84     if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
85       delete literal;
86     }
87   }
88 #endif
89   entries_.clear();
90   size_ = 0;
91   first_use_ = -1;
92   Pool::Reset();
93   recommended_checkpoint_ = kNoCheckpointRequired;
94 }
95 
96 
CheckEmitFor(size_t amount,EmitOption option)97 void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
98   if (IsEmpty() || IsBlocked()) return;
99 
100   ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_;
101   if (distance >= kRecommendedLiteralPoolRange) {
102     Emit(option);
103   }
104 }
105 
106 
CheckEmitForBranch(size_t range)107 void LiteralPool::CheckEmitForBranch(size_t range) {
108   if (IsEmpty() || IsBlocked()) return;
109   if (GetMaxSize() >= range) Emit();
110 }
111 
112 // We use a subclass to access the protected `ExactAssemblyScope` constructor
113 // giving us control over the pools. This allows us to use this scope within
114 // code emitting pools without creating a circular dependency.
115 // We keep the constructor private to restrict usage of this helper class.
116 class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
117  private:
ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler * masm,size_t size)118   ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size)
119       : ExactAssemblyScope(masm,
120                            size,
121                            ExactAssemblyScope::kExactSize,
122                            ExactAssemblyScope::kIgnorePools) {}
123 
124   friend void LiteralPool::Emit(LiteralPool::EmitOption);
125   friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t);
126 };
127 
128 
Emit(EmitOption option)129 void LiteralPool::Emit(EmitOption option) {
130   // There is an issue if we are asked to emit a blocked or empty pool.
131   VIXL_ASSERT(!IsBlocked());
132   VIXL_ASSERT(!IsEmpty());
133 
134   size_t pool_size = GetSize();
135   size_t emit_size = pool_size;
136   if (option == kBranchRequired) emit_size += kInstructionSize;
137 #ifndef PANDA_BUILD
138   Label end_of_pool;
139 #else
140   Label end_of_pool(allocator_);
141 #endif
142 
143   VIXL_ASSERT(emit_size % kInstructionSize == 0);
144   {
145     CodeBufferCheckScope guard(masm_,
146                                emit_size,
147                                CodeBufferCheckScope::kCheck,
148                                CodeBufferCheckScope::kExactSize);
149 #ifdef VIXL_DEBUG
150     // Also explicitly disallow usage of the `MacroAssembler` here.
151     masm_->SetAllowMacroInstructions(false);
152 #endif
153     if (option == kBranchRequired) {
154       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
155       masm_->b(&end_of_pool);
156     }
157 
158     {
159       // Marker indicating the size of the literal pool in 32-bit words.
160       VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
161       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
162       masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
163     }
164 
165     // Now populate the literal pool.
166 #ifndef PANDA_BUILD
167     std::vector<RawLiteral *>::iterator it, end;
168 #else
169     Vector<RawLiteral*>::iterator it, end;
170 #endif
171     for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
172       VIXL_ASSERT((*it)->IsUsed());
173       masm_->place(*it);
174     }
175 
176     if (option == kBranchRequired) masm_->bind(&end_of_pool);
177 #ifdef VIXL_DEBUG
178     masm_->SetAllowMacroInstructions(true);
179 #endif
180   }
181 
182   Reset();
183 }
184 
185 
AddEntry(RawLiteral * literal)186 void LiteralPool::AddEntry(RawLiteral* literal) {
187   // A literal must be registered immediately before its first use. Here we
188   // cannot control that it is its first use, but we check no code has been
189   // emitted since its last use.
190   VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse());
191 
192   UpdateFirstUse(masm_->GetCursorOffset());
193   VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_);
194   entries_.push_back(literal);
195   size_ += literal->GetSize();
196 }
197 
198 
UpdateFirstUse(ptrdiff_t use_position)199 void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) {
200   first_use_ = std::min(first_use_, use_position);
201   if (first_use_ == -1) {
202     first_use_ = use_position;
203     SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint());
204     SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange);
205   } else {
206     VIXL_ASSERT(use_position > first_use_);
207   }
208 }
209 
210 
Reset()211 void VeneerPool::Reset() {
212   Pool::Reset();
213   unresolved_branches_.Reset();
214 }
215 
216 
Release()217 void VeneerPool::Release() {
218   --monitor_;
219 #ifndef PANDA_BUILD
220   if (monitor_ == 0) {
221     VIXL_ASSERT(IsEmpty() || masm_->GetCursorOffset() <
222                                  unresolved_branches_.GetFirstLimit());
223   }
224 #else
225   // Assert disabled, because we use own allocator
226 #endif
227 }
228 
229 
RegisterUnresolvedBranch(ptrdiff_t branch_pos,Label * label,ImmBranchType branch_type)230 void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos,
231                                           Label* label,
232                                           ImmBranchType branch_type) {
233   VIXL_ASSERT(!label->IsBound());
234   BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type);
235   unresolved_branches_.insert(branch_info);
236   UpdateNextCheckPoint();
237   // TODO: In debug mode register the label with the assembler to make sure it
238   // is bound with masm Bind and not asm bind.
239 }
240 
241 
DeleteUnresolvedBranchInfoForLabel(Label * label)242 void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) {
243   if (IsEmpty()) {
244     VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired);
245     return;
246   }
247 
248   if (label->IsLinked()) {
249     Label::LabelLinksIterator links_it(label);
250     for (; !links_it.Done(); links_it.Advance()) {
251       ptrdiff_t link_offset = *links_it.Current();
252       Instruction* link = masm_->GetInstructionAt(link_offset);
253 
254       // ADR instructions are not handled.
255       if (BranchTypeUsesVeneers(link->GetBranchType())) {
256         BranchInfo branch_info(link_offset, label, link->GetBranchType());
257         unresolved_branches_.erase(branch_info);
258       }
259     }
260   }
261 
262   UpdateNextCheckPoint();
263 }
264 
265 
ShouldEmitVeneer(int64_t first_unreacheable_pc,size_t amount)266 bool VeneerPool::ShouldEmitVeneer(int64_t first_unreacheable_pc,
267                                   size_t amount) {
268   ptrdiff_t offset =
269       kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize();
270   return (masm_->GetCursorOffset() + offset) > first_unreacheable_pc;
271 }
272 
273 
CheckEmitFor(size_t amount,EmitOption option)274 void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) {
275   if (IsEmpty()) return;
276 
277 #ifndef PANDA_BUILD
278   VIXL_ASSERT(masm_->GetCursorOffset() + kPoolNonVeneerCodeSize <
279               unresolved_branches_.GetFirstLimit());
280 #else
281   // In codegen may be generated unused Labels - to allocate them in one chunk
282 #endif
283 
284   if (IsBlocked()) return;
285 
286   if (ShouldEmitVeneers(amount)) {
287     Emit(option, amount);
288   } else {
289     UpdateNextCheckPoint();
290   }
291 }
292 
293 
Emit(EmitOption option,size_t amount)294 void VeneerPool::Emit(EmitOption option, size_t amount) {
295   // There is an issue if we are asked to emit a blocked or empty pool.
296   VIXL_ASSERT(!IsBlocked());
297   VIXL_ASSERT(!IsEmpty());
298 
299 #ifndef PANDA_BUILD
300   Label end;
301 #else
302   Label end(allocator_);
303 #endif
304   if (option == kBranchRequired) {
305     ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
306     masm_->b(&end);
307   }
308 
309   // We want to avoid generating veneer pools too often, so generate veneers for
310   // branches that don't immediately require a veneer but will soon go out of
311   // range.
312   static const size_t kVeneerEmissionMargin = 1 * KBytes;
313 
314 #ifndef PANDA_BUILD
315   for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) {
316 #else
317   for (BranchInfoSetIterator it(allocator_, &unresolved_branches_); !it.Done();) {
318 #endif
319     BranchInfo* branch_info = it.Current();
320     if (branch_info && ShouldEmitVeneer(branch_info->first_unreacheable_pc_,
321                          amount + kVeneerEmissionMargin)) {
322       CodeBufferCheckScope scope(masm_,
323                                  kVeneerCodeSize,
324                                  CodeBufferCheckScope::kCheck,
325                                  CodeBufferCheckScope::kExactSize);
326       ptrdiff_t branch_pos = branch_info->pc_offset_;
327       Instruction* branch = masm_->GetInstructionAt(branch_pos);
328       Label* label = branch_info->label_;
329 
330       // Patch the branch to point to the current position, and emit a branch
331       // to the label.
332       Instruction* veneer = masm_->GetCursorAddress<Instruction*>();
333       branch->SetImmPCOffsetTarget(veneer);
334       {
335         ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
336         masm_->b(label);
337       }
338 
339       // Update the label. The branch patched does not point to it any longer.
340       label->DeleteLink(branch_pos);
341 
342       it.DeleteCurrentAndAdvance();
343     } else {
344       it.AdvanceToNextType();
345     }
346   }
347 
348   UpdateNextCheckPoint();
349 
350   masm_->bind(&end);
351 }
352 
353 #ifndef PANDA_BUILD
354 MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
355 #else
356 MacroAssembler::MacroAssembler(PandaAllocator* allocator,
357           PositionIndependentCodeOption pic)
358 #endif
359     : Assembler(pic),
360 #ifdef VIXL_DEBUG
361       allow_macro_instructions_(true),
362 #endif
363       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
364       sp_(sp),
365       tmp_list_(ip0, ip1),
366       v_tmp_list_(d30, d31),
367       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
368       current_scratch_scope_(NULL),
369 #ifndef PANDA_BUILD
370       literal_pool_(this),
371       veneer_pool_(this),
372       recommended_checkpoint_(Pool::kNoCheckpointRequired),
373       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
374 #else
375       literal_pool_(allocator, this),
376       veneer_pool_(allocator, this),
377       recommended_checkpoint_(Pool::kNoCheckpointRequired),
378       fp_nan_propagation_(NoFPMacroNaNPropagationSelected),
379       allocator_(allocator) {
380 #endif
381   checkpoint_ = GetNextCheckPoint();
382 #ifndef VIXL_DEBUG
383   USE(allow_macro_instructions_);
384 #endif
385 }
386 
387 #ifndef PANDA_BUILD
388 MacroAssembler::MacroAssembler(size_t capacity,
389                                PositionIndependentCodeOption pic)
390     : Assembler(capacity, pic),
391 #ifdef VIXL_DEBUG
392       allow_macro_instructions_(true),
393 #endif
394       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
395       sp_(sp),
396       tmp_list_(ip0, ip1),
397       v_tmp_list_(d30, d31),
398       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
399       current_scratch_scope_(NULL),
400       literal_pool_(this),
401       veneer_pool_(this),
402       recommended_checkpoint_(Pool::kNoCheckpointRequired),
403       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
404   checkpoint_ = GetNextCheckPoint();
405 }
406 #endif
407 
408 #ifndef PANDA_BUILD
409 MacroAssembler::MacroAssembler(byte* buffer,
410                                size_t capacity,
411                                PositionIndependentCodeOption pic)
412     : Assembler(buffer, capacity, pic),
413 #ifdef VIXL_DEBUG
414       allow_macro_instructions_(true),
415 #endif
416       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
417       sp_(sp),
418       tmp_list_(ip0, ip1),
419       v_tmp_list_(d30, d31),
420       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
421       current_scratch_scope_(NULL),
422       literal_pool_(this),
423       veneer_pool_(this),
424       recommended_checkpoint_(Pool::kNoCheckpointRequired),
425       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
426   checkpoint_ = GetNextCheckPoint();
427 }
428 #else
429 MacroAssembler::MacroAssembler(PandaAllocator* allocator, byte* buffer,
430                                size_t capacity,
431                                PositionIndependentCodeOption pic)
432     : Assembler(buffer, capacity, pic),
433 #ifdef VIXL_DEBUG
434       allow_macro_instructions_(true),
435 #endif
436       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
437       sp_(sp),
438       tmp_list_(ip0, ip1),
439       v_tmp_list_(d30, d31),
440       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
441       current_scratch_scope_(NULL),
442       literal_pool_(allocator, this),
443       veneer_pool_(allocator, this),
444       recommended_checkpoint_(Pool::kNoCheckpointRequired), allocator_(allocator) {
445   checkpoint_ = GetNextCheckPoint();
446 }
447 #endif
448 
449 MacroAssembler::~MacroAssembler() {}
450 
451 
452 void MacroAssembler::Reset() {
453   Assembler::Reset();
454 
455   VIXL_ASSERT(!literal_pool_.IsBlocked());
456   literal_pool_.Reset();
457   veneer_pool_.Reset();
458 
459   checkpoint_ = GetNextCheckPoint();
460 }
461 
462 
463 void MacroAssembler::FinalizeCode(FinalizeOption option) {
464   if (!literal_pool_.IsEmpty()) {
465     // The user may decide to emit more code after Finalize, emit a branch if
466     // that's the case.
467     literal_pool_.Emit(option == kUnreachable ? Pool::kNoBranchRequired
468                                               : Pool::kBranchRequired);
469   }
470   VIXL_ASSERT(veneer_pool_.IsEmpty());
471 
472   Assembler::FinalizeCode();
473 }
474 
475 
476 void MacroAssembler::CheckEmitFor(size_t amount) {
477   CheckEmitPoolsFor(amount);
478   GetBuffer()->EnsureSpaceFor(amount);
479 }
480 
481 
482 void MacroAssembler::CheckEmitPoolsFor(size_t amount) {
483   literal_pool_.CheckEmitFor(amount);
484   veneer_pool_.CheckEmitFor(amount);
485   checkpoint_ = GetNextCheckPoint();
486 }
487 
488 
489 int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
490                                         const Register& rd,
491                                         uint64_t imm) {
492   bool emit_code = (masm != NULL);
493   VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
494   // The worst case for size is mov 64-bit immediate to sp:
495   //  * up to 4 instructions to materialise the constant
496   //  * 1 instruction to move to sp
497   MacroEmissionCheckScope guard(masm);
498 
499   // Immediates on Aarch64 can be produced using an initial value, and zero to
500   // three move keep operations.
501   //
502   // Initial values can be generated with:
503   //  1. 64-bit move zero (movz).
504   //  2. 32-bit move inverted (movn).
505   //  3. 64-bit move inverted.
506   //  4. 32-bit orr immediate.
507   //  5. 64-bit orr immediate.
508   // Move-keep may then be used to modify each of the 16-bit half words.
509   //
510   // The code below supports all five initial value generators, and
511   // applying move-keep operations to move-zero and move-inverted initial
512   // values.
513 
514   // Try to move the immediate in one instruction, and if that fails, switch to
515   // using multiple instructions.
516   if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
517     return 1;
518   } else {
519     int instruction_count = 0;
520     unsigned reg_size = rd.GetSizeInBits();
521 
522     // Generic immediate case. Imm will be represented by
523     //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
524     // A move-zero or move-inverted is generated for the first non-zero or
525     // non-0xffff immX, and a move-keep for subsequent non-zero immX.
526 
527     uint64_t ignored_halfword = 0;
528     bool invert_move = false;
529     // If the number of 0xffff halfwords is greater than the number of 0x0000
530     // halfwords, it's more efficient to use move-inverted.
531     if (CountClearHalfWords(~imm, reg_size) >
532         CountClearHalfWords(imm, reg_size)) {
533       ignored_halfword = 0xffff;
534       invert_move = true;
535     }
536 
537     // Mov instructions can't move values into the stack pointer, so set up a
538     // temporary register, if needed.
539     UseScratchRegisterScope temps;
540     Register temp;
541     if (emit_code) {
542       temps.Open(masm);
543       temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
544     }
545 
546     // Iterate through the halfwords. Use movn/movz for the first non-ignored
547     // halfword, and movk for subsequent halfwords.
548     VIXL_ASSERT((reg_size % 16) == 0);
549     bool first_mov_done = false;
550     for (unsigned i = 0; i < (reg_size / 16); i++) {
551       uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
552       if (imm16 != ignored_halfword) {
553         if (!first_mov_done) {
554           if (invert_move) {
555             if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
556             instruction_count++;
557           } else {
558             if (emit_code) masm->movz(temp, imm16, 16 * i);
559             instruction_count++;
560           }
561           first_mov_done = true;
562         } else {
563           // Construct a wider constant.
564           if (emit_code) masm->movk(temp, imm16, 16 * i);
565           instruction_count++;
566         }
567       }
568     }
569 
570     VIXL_ASSERT(first_mov_done);
571 
572     // Move the temporary if the original destination register was the stack
573     // pointer.
574     if (rd.IsSP()) {
575       if (emit_code) masm->mov(rd, temp);
576       instruction_count++;
577     }
578     return instruction_count;
579   }
580 }
581 
582 
583 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
584   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
585               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
586   if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
587     B(static_cast<Condition>(type), label);
588   } else {
589     switch (type) {
590       case always:
591         B(label);
592         break;
593       case never:
594         break;
595       case reg_zero:
596         Cbz(reg, label);
597         break;
598       case reg_not_zero:
599         Cbnz(reg, label);
600         break;
601       case reg_bit_clear:
602         Tbz(reg, bit, label);
603         break;
604       case reg_bit_set:
605         Tbnz(reg, bit, label);
606         break;
607       default:
608         VIXL_UNREACHABLE();
609     }
610   }
611 }
612 
613 
614 void MacroAssembler::B(Label* label) {
615   // We don't need to check the size of the literal pool, because the size of
616   // the literal pool is already bounded by the literal range, which is smaller
617   // than the range of this branch.
618   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(UncondBranchType) >
619               Instruction::kLoadLiteralRange);
620   SingleEmissionCheckScope guard(this);
621   b(label);
622 }
623 
624 
625 void MacroAssembler::B(Label* label, Condition cond) {
626   // We don't need to check the size of the literal pool, because the size of
627   // the literal pool is already bounded by the literal range, which is smaller
628   // than the range of this branch.
629   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CondBranchType) >
630               Instruction::kLoadLiteralRange);
631   VIXL_ASSERT(allow_macro_instructions_);
632   VIXL_ASSERT((cond != al) && (cond != nv));
633   EmissionCheckScope guard(this, 2 * kInstructionSize);
634 
635   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
636 #ifndef PANDA_BUILD
637     Label done;
638 #else
639     Label done(allocator_);
640 #endif
641     b(&done, InvertCondition(cond));
642     b(label);
643     bind(&done);
644   } else {
645     if (!label->IsBound()) {
646       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
647                                             label,
648                                             CondBranchType);
649     }
650     b(label, cond);
651   }
652 }
653 
654 
655 void MacroAssembler::Cbnz(const Register& rt, Label* label) {
656   // We don't need to check the size of the literal pool, because the size of
657   // the literal pool is already bounded by the literal range, which is smaller
658   // than the range of this branch.
659   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
660               Instruction::kLoadLiteralRange);
661   VIXL_ASSERT(allow_macro_instructions_);
662   VIXL_ASSERT(!rt.IsZero());
663   EmissionCheckScope guard(this, 2 * kInstructionSize);
664 
665   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
666 #ifndef PANDA_BUILD
667    Label done;
668 #else
669    Label done(allocator_);
670 #endif
671     cbz(rt, &done);
672     b(label);
673     bind(&done);
674   } else {
675     if (!label->IsBound()) {
676       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
677                                             label,
678                                             CompareBranchType);
679     }
680     cbnz(rt, label);
681   }
682 }
683 
684 
685 void MacroAssembler::Cbz(const Register& rt, Label* label) {
686   // We don't need to check the size of the literal pool, because the size of
687   // the literal pool is already bounded by the literal range, which is smaller
688   // than the range of this branch.
689   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
690               Instruction::kLoadLiteralRange);
691   VIXL_ASSERT(allow_macro_instructions_);
692   VIXL_ASSERT(!rt.IsZero());
693   EmissionCheckScope guard(this, 2 * kInstructionSize);
694 
695   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
696 #ifndef PANDA_BUILD
697    Label done;
698 #else
699    Label done(allocator_);
700 #endif
701     cbnz(rt, &done);
702     b(label);
703     bind(&done);
704   } else {
705     if (!label->IsBound()) {
706       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
707                                             label,
708                                             CompareBranchType);
709     }
710     cbz(rt, label);
711   }
712 }
713 
714 
715 void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
716   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
717   // can become impossible because we emit the literal pool first.
718   literal_pool_.CheckEmitForBranch(
719       Instruction::GetImmBranchForwardRange(TestBranchType));
720   VIXL_ASSERT(allow_macro_instructions_);
721   VIXL_ASSERT(!rt.IsZero());
722   EmissionCheckScope guard(this, 2 * kInstructionSize);
723 
724   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
725 #ifndef PANDA_BUILD
726    Label done;
727 #else
728    Label done(allocator_);
729 #endif
730     tbz(rt, bit_pos, &done);
731     b(label);
732     bind(&done);
733   } else {
734     if (!label->IsBound()) {
735       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
736                                             label,
737                                             TestBranchType);
738     }
739     tbnz(rt, bit_pos, label);
740   }
741 }
742 
743 
744 void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
745   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
746   // can become impossible because we emit the literal pool first.
747   literal_pool_.CheckEmitForBranch(
748       Instruction::GetImmBranchForwardRange(TestBranchType));
749   VIXL_ASSERT(allow_macro_instructions_);
750   VIXL_ASSERT(!rt.IsZero());
751   EmissionCheckScope guard(this, 2 * kInstructionSize);
752 
753   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
754 #ifndef PANDA_BUILD
755    Label done;
756 #else
757    Label done(allocator_);
758 #endif
759     tbnz(rt, bit_pos, &done);
760     b(label);
761     bind(&done);
762   } else {
763     if (!label->IsBound()) {
764       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
765                                             label,
766                                             TestBranchType);
767     }
768     tbz(rt, bit_pos, label);
769   }
770 }
771 
772 void MacroAssembler::Bind(Label* label, BranchTargetIdentifier id) {
773   VIXL_ASSERT(allow_macro_instructions_);
774   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
775   if (id == EmitBTI_none) {
776     bind(label);
777   } else {
778     // Emit this inside an ExactAssemblyScope to ensure there are no extra
779     // instructions between the bind and the target identifier instruction.
780     ExactAssemblyScope scope(this, kInstructionSize);
781     bind(label);
782     if (id == EmitPACIASP) {
783       paciasp();
784     } else if (id == EmitPACIBSP) {
785       pacibsp();
786     } else {
787       bti(id);
788     }
789   }
790 }
791 
792 // Bind a label to a specified offset from the start of the buffer.
793 void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) {
794   VIXL_ASSERT(allow_macro_instructions_);
795   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
796   Assembler::BindToOffset(label, offset);
797 }
798 
799 
800 void MacroAssembler::And(const Register& rd,
801                          const Register& rn,
802                          const Operand& operand) {
803   VIXL_ASSERT(allow_macro_instructions_);
804   LogicalMacro(rd, rn, operand, AND);
805 }
806 
807 
808 void MacroAssembler::Ands(const Register& rd,
809                           const Register& rn,
810                           const Operand& operand) {
811   VIXL_ASSERT(allow_macro_instructions_);
812   LogicalMacro(rd, rn, operand, ANDS);
813 }
814 
815 
816 void MacroAssembler::Tst(const Register& rn, const Operand& operand) {
817   VIXL_ASSERT(allow_macro_instructions_);
818   Ands(AppropriateZeroRegFor(rn), rn, operand);
819 }
820 
821 
822 void MacroAssembler::Bic(const Register& rd,
823                          const Register& rn,
824                          const Operand& operand) {
825   VIXL_ASSERT(allow_macro_instructions_);
826   LogicalMacro(rd, rn, operand, BIC);
827 }
828 
829 
830 void MacroAssembler::Bics(const Register& rd,
831                           const Register& rn,
832                           const Operand& operand) {
833   VIXL_ASSERT(allow_macro_instructions_);
834   LogicalMacro(rd, rn, operand, BICS);
835 }
836 
837 
838 void MacroAssembler::Orr(const Register& rd,
839                          const Register& rn,
840                          const Operand& operand) {
841   VIXL_ASSERT(allow_macro_instructions_);
842   LogicalMacro(rd, rn, operand, ORR);
843 }
844 
845 
846 void MacroAssembler::Orn(const Register& rd,
847                          const Register& rn,
848                          const Operand& operand) {
849   VIXL_ASSERT(allow_macro_instructions_);
850   LogicalMacro(rd, rn, operand, ORN);
851 }
852 
853 
854 void MacroAssembler::Eor(const Register& rd,
855                          const Register& rn,
856                          const Operand& operand) {
857   VIXL_ASSERT(allow_macro_instructions_);
858   LogicalMacro(rd, rn, operand, EOR);
859 }
860 
861 
862 void MacroAssembler::Eon(const Register& rd,
863                          const Register& rn,
864                          const Operand& operand) {
865   VIXL_ASSERT(allow_macro_instructions_);
866   LogicalMacro(rd, rn, operand, EON);
867 }
868 
869 
870 void MacroAssembler::LogicalMacro(const Register& rd,
871                                   const Register& rn,
872                                   const Operand& operand,
873                                   LogicalOp op) {
874   // The worst case for size is logical immediate to sp:
875   //  * up to 4 instructions to materialise the constant
876   //  * 1 instruction to do the operation
877   //  * 1 instruction to move to sp
878   MacroEmissionCheckScope guard(this);
879   UseScratchRegisterScope temps(this);
880   // Use `rd` as a temp, if we can.
881   temps.Include(rd);
882   // We read `rn` after evaluating `operand`.
883   temps.Exclude(rn);
884   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
885   // because we don't need it after it is evaluated.
886 
887   if (operand.IsImmediate()) {
888     uint64_t immediate = operand.GetImmediate();
889     unsigned reg_size = rd.GetSizeInBits();
890 
891     // If the operation is NOT, invert the operation and immediate.
892     if ((op & NOT) == NOT) {
893       op = static_cast<LogicalOp>(op & ~NOT);
894       immediate = ~immediate;
895     }
896 
897     // Ignore the top 32 bits of an immediate if we're moving to a W register.
898     if (rd.Is32Bits()) {
899       // Check that the top 32 bits are consistent.
900       VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
901                   ((immediate >> kWRegSize) == 0xffffffff));
902       immediate &= kWRegMask;
903     }
904 
905     VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
906 
907     // Special cases for all set or all clear immediates.
908     if (immediate == 0) {
909       switch (op) {
910         case AND:
911           Mov(rd, 0);
912           return;
913         case ORR:
914           VIXL_FALLTHROUGH();
915         case EOR:
916           Mov(rd, rn);
917           return;
918         case ANDS:
919           VIXL_FALLTHROUGH();
920         case BICS:
921           break;
922         default:
923           VIXL_UNREACHABLE();
924       }
925     } else if ((rd.Is64Bits() && (immediate == UINT64_C(0xffffffffffffffff))) ||
926                (rd.Is32Bits() && (immediate == UINT64_C(0x00000000ffffffff)))) {
927       switch (op) {
928         case AND:
929           Mov(rd, rn);
930           return;
931         case ORR:
932           Mov(rd, immediate);
933           return;
934         case EOR:
935           Mvn(rd, rn);
936           return;
937         case ANDS:
938           VIXL_FALLTHROUGH();
939         case BICS:
940           break;
941         default:
942           VIXL_UNREACHABLE();
943       }
944     }
945 
946     unsigned n, imm_s, imm_r;
947     if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
948       // Immediate can be encoded in the instruction.
949       LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
950     } else {
951       // Immediate can't be encoded: synthesize using move immediate.
952       Register temp = temps.AcquireSameSizeAs(rn);
953       VIXL_ASSERT(!temp.Aliases(rn));
954 
955       // If the left-hand input is the stack pointer, we can't pre-shift the
956       // immediate, as the encoding won't allow the subsequent post shift.
957       PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
958       Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
959 
960       if (rd.Is(sp) || rd.Is(wsp)) {
961         // If rd is the stack pointer we cannot use it as the destination
962         // register so we use the temp register as an intermediate again.
963         Logical(temp, rn, imm_operand, op);
964         Mov(rd, temp);
965       } else {
966         Logical(rd, rn, imm_operand, op);
967       }
968     }
969   } else if (operand.IsExtendedRegister()) {
970     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
971     // Add/sub extended supports shift <= 4. We want to support exactly the
972     // same modes here.
973     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
974     VIXL_ASSERT(
975         operand.GetRegister().Is64Bits() ||
976         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
977 
978     Register temp = temps.AcquireSameSizeAs(rn);
979     VIXL_ASSERT(!temp.Aliases(rn));
980     EmitExtendShift(temp,
981                     operand.GetRegister(),
982                     operand.GetExtend(),
983                     operand.GetShiftAmount());
984     Logical(rd, rn, Operand(temp), op);
985   } else {
986     // The operand can be encoded in the instruction.
987     VIXL_ASSERT(operand.IsShiftedRegister());
988     Logical(rd, rn, operand, op);
989   }
990 }
991 
992 
993 void MacroAssembler::Mov(const Register& rd,
994                          const Operand& operand,
995                          DiscardMoveMode discard_mode) {
996   VIXL_ASSERT(allow_macro_instructions_);
997   // The worst case for size is mov immediate with up to 4 instructions.
998   MacroEmissionCheckScope guard(this);
999 
1000   if (operand.IsImmediate()) {
1001     // Call the macro assembler for generic immediates.
1002     Mov(rd, operand.GetImmediate());
1003   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
1004     // Emit a shift instruction if moving a shifted register. This operation
1005     // could also be achieved using an orr instruction (like orn used by Mvn),
1006     // but using a shift instruction makes the disassembly clearer.
1007     EmitShift(rd,
1008               operand.GetRegister(),
1009               operand.GetShift(),
1010               operand.GetShiftAmount());
1011   } else if (operand.IsExtendedRegister()) {
1012     // Emit an extend instruction if moving an extended register. This handles
1013     // extend with post-shift operations, too.
1014     EmitExtendShift(rd,
1015                     operand.GetRegister(),
1016                     operand.GetExtend(),
1017                     operand.GetShiftAmount());
1018   } else {
1019     Mov(rd, operand.GetRegister(), discard_mode);
1020   }
1021 }
1022 
1023 
1024 void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
1025   VIXL_ASSERT(IsUint16(imm));
1026   int byte1 = (imm & 0xff);
1027   int byte2 = ((imm >> 8) & 0xff);
1028   if (byte1 == byte2) {
1029     movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
1030   } else if (byte1 == 0) {
1031     movi(vd, byte2, LSL, 8);
1032   } else if (byte2 == 0) {
1033     movi(vd, byte1);
1034   } else if (byte1 == 0xff) {
1035     mvni(vd, ~byte2 & 0xff, LSL, 8);
1036   } else if (byte2 == 0xff) {
1037     mvni(vd, ~byte1 & 0xff);
1038   } else {
1039     UseScratchRegisterScope temps(this);
1040     Register temp = temps.AcquireW();
1041     movz(temp, imm);
1042     dup(vd, temp);
1043   }
1044 }
1045 
1046 
1047 void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
1048   VIXL_ASSERT(IsUint32(imm));
1049 
1050   uint8_t bytes[sizeof(imm)];
1051   memcpy(bytes, &imm, sizeof(imm));
1052 
1053   // All bytes are either 0x00 or 0xff.
1054   {
1055     bool all0orff = true;
1056     for (int i = 0; i < 4; ++i) {
1057       if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
1058         all0orff = false;
1059         break;
1060       }
1061     }
1062 
1063     if (all0orff == true) {
1064       movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
1065       return;
1066     }
1067   }
1068 
1069   // Of the 4 bytes, only one byte is non-zero.
1070   for (int i = 0; i < 4; i++) {
1071     if ((imm & (0xff << (i * 8))) == imm) {
1072       movi(vd, bytes[i], LSL, i * 8);
1073       return;
1074     }
1075   }
1076 
1077   // Of the 4 bytes, only one byte is not 0xff.
1078   for (int i = 0; i < 4; i++) {
1079     uint32_t mask = ~(0xff << (i * 8));
1080     if ((imm & mask) == mask) {
1081       mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
1082       return;
1083     }
1084   }
1085 
1086   // Immediate is of the form 0x00MMFFFF.
1087   if ((imm & 0xff00ffff) == 0x0000ffff) {
1088     movi(vd, bytes[2], MSL, 16);
1089     return;
1090   }
1091 
1092   // Immediate is of the form 0x0000MMFF.
1093   if ((imm & 0xffff00ff) == 0x000000ff) {
1094     movi(vd, bytes[1], MSL, 8);
1095     return;
1096   }
1097 
1098   // Immediate is of the form 0xFFMM0000.
1099   if ((imm & 0xff00ffff) == 0xff000000) {
1100     mvni(vd, ~bytes[2] & 0xff, MSL, 16);
1101     return;
1102   }
1103   // Immediate is of the form 0xFFFFMM00.
1104   if ((imm & 0xffff00ff) == 0xffff0000) {
1105     mvni(vd, ~bytes[1] & 0xff, MSL, 8);
1106     return;
1107   }
1108 
1109   // Top and bottom 16-bits are equal.
1110   if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
1111     Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
1112     return;
1113   }
1114 
1115   // Default case.
1116   {
1117     UseScratchRegisterScope temps(this);
1118     Register temp = temps.AcquireW();
1119     Mov(temp, imm);
1120     dup(vd, temp);
1121   }
1122 }
1123 
1124 
1125 void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
1126   // All bytes are either 0x00 or 0xff.
1127   {
1128     bool all0orff = true;
1129     for (int i = 0; i < 8; ++i) {
1130       int byteval = (imm >> (i * 8)) & 0xff;
1131       if (byteval != 0 && byteval != 0xff) {
1132         all0orff = false;
1133         break;
1134       }
1135     }
1136     if (all0orff == true) {
1137       movi(vd, imm);
1138       return;
1139     }
1140   }
1141 
1142   // Top and bottom 32-bits are equal.
1143   if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
1144     Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
1145     return;
1146   }
1147 
1148   // Default case.
1149   {
1150     UseScratchRegisterScope temps(this);
1151     Register temp = temps.AcquireX();
1152     Mov(temp, imm);
1153     if (vd.Is1D()) {
1154       fmov(vd.D(), temp);
1155     } else {
1156       dup(vd.V2D(), temp);
1157     }
1158   }
1159 }
1160 
1161 
1162 void MacroAssembler::Movi(const VRegister& vd,
1163                           uint64_t imm,
1164                           Shift shift,
1165                           int shift_amount) {
1166   VIXL_ASSERT(allow_macro_instructions_);
1167   MacroEmissionCheckScope guard(this);
1168   if (shift_amount != 0 || shift != LSL) {
1169     movi(vd, imm, shift, shift_amount);
1170   } else if (vd.Is8B() || vd.Is16B()) {
1171     // 8-bit immediate.
1172     VIXL_ASSERT(IsUint8(imm));
1173     movi(vd, imm);
1174   } else if (vd.Is4H() || vd.Is8H()) {
1175     // 16-bit immediate.
1176     Movi16bitHelper(vd, imm);
1177   } else if (vd.Is2S() || vd.Is4S()) {
1178     // 32-bit immediate.
1179     Movi32bitHelper(vd, imm);
1180   } else {
1181     // 64-bit immediate.
1182     Movi64bitHelper(vd, imm);
1183   }
1184 }
1185 
1186 
1187 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
1188   // TODO: Move 128-bit values in a more efficient way.
1189   VIXL_ASSERT(vd.Is128Bits());
1190   if (hi == lo) {
1191     Movi(vd.V2D(), lo);
1192     return;
1193   }
1194 
1195   Movi(vd.V1D(), lo);
1196 
1197   if (hi != 0) {
1198     UseScratchRegisterScope temps(this);
1199     // TODO: Figure out if using a temporary V register to materialise the
1200     // immediate is better.
1201     Register temp = temps.AcquireX();
1202     Mov(temp, hi);
1203     Ins(vd.V2D(), 1, temp);
1204   }
1205 }
1206 
1207 
1208 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
1209   VIXL_ASSERT(allow_macro_instructions_);
1210   // The worst case for size is mvn immediate with up to 4 instructions.
1211   MacroEmissionCheckScope guard(this);
1212 
1213   if (operand.IsImmediate()) {
1214     // Call the macro assembler for generic immediates.
1215     Mvn(rd, operand.GetImmediate());
1216   } else if (operand.IsExtendedRegister()) {
1217     // Emit two instructions for the extend case. This differs from Mov, as
1218     // the extend and invert can't be achieved in one instruction.
1219     EmitExtendShift(rd,
1220                     operand.GetRegister(),
1221                     operand.GetExtend(),
1222                     operand.GetShiftAmount());
1223     mvn(rd, rd);
1224   } else {
1225     // Otherwise, register and shifted register cases can be handled by the
1226     // assembler directly, using orn.
1227     mvn(rd, operand);
1228   }
1229 }
1230 
1231 
1232 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
1233   VIXL_ASSERT(allow_macro_instructions_);
1234   MoveImmediateHelper(this, rd, imm);
1235 }
1236 
1237 
1238 void MacroAssembler::Ccmp(const Register& rn,
1239                           const Operand& operand,
1240                           StatusFlags nzcv,
1241                           Condition cond) {
1242   VIXL_ASSERT(allow_macro_instructions_);
1243   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1244     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
1245   } else {
1246     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
1247   }
1248 }
1249 
1250 
1251 void MacroAssembler::Ccmn(const Register& rn,
1252                           const Operand& operand,
1253                           StatusFlags nzcv,
1254                           Condition cond) {
1255   VIXL_ASSERT(allow_macro_instructions_);
1256   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1257     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
1258   } else {
1259     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
1260   }
1261 }
1262 
1263 
1264 void MacroAssembler::ConditionalCompareMacro(const Register& rn,
1265                                              const Operand& operand,
1266                                              StatusFlags nzcv,
1267                                              Condition cond,
1268                                              ConditionalCompareOp op) {
1269   VIXL_ASSERT((cond != al) && (cond != nv));
1270   // The worst case for size is ccmp immediate:
1271   //  * up to 4 instructions to materialise the constant
1272   //  * 1 instruction for ccmp
1273   MacroEmissionCheckScope guard(this);
1274 
1275   if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) ||
1276       (operand.IsImmediate() &&
1277        IsImmConditionalCompare(operand.GetImmediate()))) {
1278     // The immediate can be encoded in the instruction, or the operand is an
1279     // unshifted register: call the assembler.
1280     ConditionalCompare(rn, operand, nzcv, cond, op);
1281   } else {
1282     UseScratchRegisterScope temps(this);
1283     // The operand isn't directly supported by the instruction: perform the
1284     // operation on a temporary register.
1285     Register temp = temps.AcquireSameSizeAs(rn);
1286     Mov(temp, operand);
1287     ConditionalCompare(rn, temp, nzcv, cond, op);
1288   }
1289 }
1290 
1291 
1292 void MacroAssembler::CselHelper(MacroAssembler* masm,
1293                                 const Register& rd,
1294                                 Operand left,
1295                                 Operand right,
1296                                 Condition cond,
1297                                 bool* should_synthesise_left,
1298                                 bool* should_synthesise_right) {
1299   bool emit_code = (masm != NULL);
1300 
1301   VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_);
1302   VIXL_ASSERT((cond != al) && (cond != nv));
1303   VIXL_ASSERT(!rd.IsZero() && !rd.IsSP());
1304   VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP());
1305   VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP());
1306 
1307   if (should_synthesise_left != NULL) *should_synthesise_left = false;
1308   if (should_synthesise_right != NULL) *should_synthesise_right = false;
1309 
1310   // The worst case for size occurs when the inputs are two non encodable
1311   // constants:
1312   //  * up to 4 instructions to materialise the left constant
1313   //  * up to 4 instructions to materialise the right constant
1314   //  * 1 instruction for csel
1315   EmissionCheckScope guard(masm, 9 * kInstructionSize);
1316   UseScratchRegisterScope temps;
1317   if (masm != NULL) {
1318     temps.Open(masm);
1319   }
1320 
1321   // Try to handle cases where both inputs are immediates.
1322   bool left_is_immediate = left.IsImmediate() || left.IsZero();
1323   bool right_is_immediate = right.IsImmediate() || right.IsZero();
1324   if (left_is_immediate && right_is_immediate &&
1325       CselSubHelperTwoImmediates(masm,
1326                                  rd,
1327                                  left.GetEquivalentImmediate(),
1328                                  right.GetEquivalentImmediate(),
1329                                  cond,
1330                                  should_synthesise_left,
1331                                  should_synthesise_right)) {
1332     return;
1333   }
1334 
1335   // Handle cases where one of the two inputs is -1, 0, or 1.
1336   bool left_is_small_immediate =
1337       left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) &&
1338                             (left.GetEquivalentImmediate() <= 1));
1339   bool right_is_small_immediate =
1340       right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) &&
1341                              (right.GetEquivalentImmediate() <= 1));
1342   if (right_is_small_immediate || left_is_small_immediate) {
1343     bool swapped_inputs = false;
1344     if (!right_is_small_immediate) {
1345       std::swap(left, right);
1346       cond = InvertCondition(cond);
1347       swapped_inputs = true;
1348     }
1349     CselSubHelperRightSmallImmediate(masm,
1350                                      &temps,
1351                                      rd,
1352                                      left,
1353                                      right,
1354                                      cond,
1355                                      swapped_inputs ? should_synthesise_right
1356                                                     : should_synthesise_left);
1357     return;
1358   }
1359 
1360   // Otherwise both inputs need to be available in registers. Synthesise them
1361   // if necessary and emit the `csel`.
1362   if (!left.IsPlainRegister()) {
1363     if (emit_code) {
1364       Register temp = temps.AcquireSameSizeAs(rd);
1365       masm->Mov(temp, left);
1366       left = temp;
1367     }
1368     if (should_synthesise_left != NULL) *should_synthesise_left = true;
1369   }
1370   if (!right.IsPlainRegister()) {
1371     if (emit_code) {
1372       Register temp = temps.AcquireSameSizeAs(rd);
1373       masm->Mov(temp, right);
1374       right = temp;
1375     }
1376     if (should_synthesise_right != NULL) *should_synthesise_right = true;
1377   }
1378   if (emit_code) {
1379     VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister());
1380     if (left.GetRegister().Is(right.GetRegister())) {
1381       masm->Mov(rd, left.GetRegister());
1382     } else {
1383       masm->csel(rd, left.GetRegister(), right.GetRegister(), cond);
1384     }
1385   }
1386 }
1387 
1388 
1389 bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm,
1390                                                 const Register& rd,
1391                                                 int64_t left,
1392                                                 int64_t right,
1393                                                 Condition cond,
1394                                                 bool* should_synthesise_left,
1395                                                 bool* should_synthesise_right) {
1396   bool emit_code = (masm != NULL);
1397   if (should_synthesise_left != NULL) *should_synthesise_left = false;
1398   if (should_synthesise_right != NULL) *should_synthesise_right = false;
1399 
1400   if (left == right) {
1401     if (emit_code) masm->Mov(rd, left);
1402     return true;
1403   } else if (left == -right) {
1404     if (should_synthesise_right != NULL) *should_synthesise_right = true;
1405     if (emit_code) {
1406       masm->Mov(rd, right);
1407       masm->Cneg(rd, rd, cond);
1408     }
1409     return true;
1410   }
1411 
1412   if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) {
1413     return true;
1414   } else {
1415     std::swap(left, right);
1416     if (CselSubHelperTwoOrderedImmediates(masm,
1417                                           rd,
1418                                           left,
1419                                           right,
1420                                           InvertCondition(cond))) {
1421       return true;
1422     }
1423   }
1424 
1425   // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond`
1426   // with `cinc`.
1427   return false;
1428 }
1429 
1430 
1431 bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm,
1432                                                        const Register& rd,
1433                                                        int64_t left,
1434                                                        int64_t right,
1435                                                        Condition cond) {
1436   bool emit_code = (masm != NULL);
1437 
1438   if ((left == 1) && (right == 0)) {
1439     if (emit_code) masm->cset(rd, cond);
1440     return true;
1441   } else if ((left == -1) && (right == 0)) {
1442     if (emit_code) masm->csetm(rd, cond);
1443     return true;
1444   }
1445   return false;
1446 }
1447 
1448 
1449 void MacroAssembler::CselSubHelperRightSmallImmediate(
1450     MacroAssembler* masm,
1451     UseScratchRegisterScope* temps,
1452     const Register& rd,
1453     const Operand& left,
1454     const Operand& right,
1455     Condition cond,
1456     bool* should_synthesise_left) {
1457   bool emit_code = (masm != NULL);
1458   VIXL_ASSERT((right.IsImmediate() || right.IsZero()) &&
1459               (-1 <= right.GetEquivalentImmediate()) &&
1460               (right.GetEquivalentImmediate() <= 1));
1461   Register left_register;
1462 
1463   if (left.IsPlainRegister()) {
1464     left_register = left.GetRegister();
1465   } else {
1466     if (emit_code) {
1467       left_register = temps->AcquireSameSizeAs(rd);
1468       masm->Mov(left_register, left);
1469     }
1470     if (should_synthesise_left != NULL) *should_synthesise_left = true;
1471   }
1472   if (emit_code) {
1473     int64_t imm = right.GetEquivalentImmediate();
1474     Register zr = AppropriateZeroRegFor(rd);
1475     if (imm == 0) {
1476       masm->csel(rd, left_register, zr, cond);
1477     } else if (imm == 1) {
1478       masm->csinc(rd, left_register, zr, cond);
1479     } else {
1480       VIXL_ASSERT(imm == -1);
1481       masm->csinv(rd, left_register, zr, cond);
1482     }
1483   }
1484 }
1485 
1486 
1487 void MacroAssembler::Add(const Register& rd,
1488                          const Register& rn,
1489                          const Operand& operand,
1490                          FlagsUpdate S) {
1491   VIXL_ASSERT(allow_macro_instructions_);
1492   if (operand.IsImmediate()) {
1493     int64_t imm = operand.GetImmediate();
1494     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
1495         IsImmAddSub(-imm)) {
1496       AddSubMacro(rd, rn, -imm, S, SUB);
1497       return;
1498     }
1499   }
1500   AddSubMacro(rd, rn, operand, S, ADD);
1501 }
1502 
1503 
1504 void MacroAssembler::Adds(const Register& rd,
1505                           const Register& rn,
1506                           const Operand& operand) {
1507   Add(rd, rn, operand, SetFlags);
1508 }
1509 
1510 #define MINMAX(V)        \
1511   V(Smax, smax, IsInt8)  \
1512   V(Smin, smin, IsInt8)  \
1513   V(Umax, umax, IsUint8) \
1514   V(Umin, umin, IsUint8)
1515 
1516 #define VIXL_DEFINE_MASM_FUNC(MASM, ASM, RANGE)      \
1517   void MacroAssembler::MASM(const Register& rd,      \
1518                             const Register& rn,      \
1519                             const Operand& op) {     \
1520     VIXL_ASSERT(allow_macro_instructions_);          \
1521     if (op.IsImmediate()) {                          \
1522       int64_t imm = op.GetImmediate();               \
1523       if (!RANGE(imm)) {                             \
1524         UseScratchRegisterScope temps(this);         \
1525         Register temp = temps.AcquireSameSizeAs(rd); \
1526         Mov(temp, imm);                              \
1527         MASM(rd, rn, temp);                          \
1528         return;                                      \
1529       }                                              \
1530     }                                                \
1531     SingleEmissionCheckScope guard(this);            \
1532     ASM(rd, rn, op);                                 \
1533   }
1534 MINMAX(VIXL_DEFINE_MASM_FUNC)
1535 #undef VIXL_DEFINE_MASM_FUNC
1536 
1537 void MacroAssembler::St2g(const Register& rt, const MemOperand& addr) {
1538   VIXL_ASSERT(allow_macro_instructions_);
1539   SingleEmissionCheckScope guard(this);
1540   st2g(rt, addr);
1541 }
1542 
1543 void MacroAssembler::Stg(const Register& rt, const MemOperand& addr) {
1544   VIXL_ASSERT(allow_macro_instructions_);
1545   SingleEmissionCheckScope guard(this);
1546   stg(rt, addr);
1547 }
1548 
1549 void MacroAssembler::Stgp(const Register& rt1,
1550                           const Register& rt2,
1551                           const MemOperand& addr) {
1552   VIXL_ASSERT(allow_macro_instructions_);
1553   SingleEmissionCheckScope guard(this);
1554   stgp(rt1, rt2, addr);
1555 }
1556 
1557 void MacroAssembler::Stz2g(const Register& rt, const MemOperand& addr) {
1558   VIXL_ASSERT(allow_macro_instructions_);
1559   SingleEmissionCheckScope guard(this);
1560   stz2g(rt, addr);
1561 }
1562 
1563 void MacroAssembler::Stzg(const Register& rt, const MemOperand& addr) {
1564   VIXL_ASSERT(allow_macro_instructions_);
1565   SingleEmissionCheckScope guard(this);
1566   stzg(rt, addr);
1567 }
1568 
1569 void MacroAssembler::Ldg(const Register& rt, const MemOperand& addr) {
1570   VIXL_ASSERT(allow_macro_instructions_);
1571   SingleEmissionCheckScope guard(this);
1572   ldg(rt, addr);
1573 }
1574 
1575 void MacroAssembler::Sub(const Register& rd,
1576                          const Register& rn,
1577                          const Operand& operand,
1578                          FlagsUpdate S) {
1579   VIXL_ASSERT(allow_macro_instructions_);
1580   if (operand.IsImmediate()) {
1581     int64_t imm = operand.GetImmediate();
1582     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
1583         IsImmAddSub(-imm)) {
1584       AddSubMacro(rd, rn, -imm, S, ADD);
1585       return;
1586     }
1587   }
1588   AddSubMacro(rd, rn, operand, S, SUB);
1589 }
1590 
1591 
1592 void MacroAssembler::Subs(const Register& rd,
1593                           const Register& rn,
1594                           const Operand& operand) {
1595   Sub(rd, rn, operand, SetFlags);
1596 }
1597 
1598 
1599 void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
1600   VIXL_ASSERT(allow_macro_instructions_);
1601   Adds(AppropriateZeroRegFor(rn), rn, operand);
1602 }
1603 
1604 
1605 void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
1606   VIXL_ASSERT(allow_macro_instructions_);
1607   Subs(AppropriateZeroRegFor(rn), rn, operand);
1608 }
1609 
1610 
1611 void MacroAssembler::Fcmp(const VRegister& fn, double value, FPTrapFlags trap) {
1612   VIXL_ASSERT(allow_macro_instructions_);
1613   // The worst case for size is:
1614   //  * 1 to materialise the constant, using literal pool if necessary
1615   //  * 1 instruction for fcmp{e}
1616   MacroEmissionCheckScope guard(this);
1617   if (value != 0.0) {
1618     UseScratchRegisterScope temps(this);
1619     VRegister tmp = temps.AcquireSameSizeAs(fn);
1620     Fmov(tmp, value);
1621     FPCompareMacro(fn, tmp, trap);
1622   } else {
1623     FPCompareMacro(fn, value, trap);
1624   }
1625 }
1626 
1627 
1628 void MacroAssembler::Fcmpe(const VRegister& fn, double value) {
1629   Fcmp(fn, value, EnableTrap);
1630 }
1631 
1632 
1633 void MacroAssembler::Fmov(VRegister vd, double imm) {
1634   VIXL_ASSERT(allow_macro_instructions_);
1635   // Floating point immediates are loaded through the literal pool.
1636   MacroEmissionCheckScope guard(this);
1637   uint64_t rawbits = DoubleToRawbits(imm);
1638 
1639   if (rawbits == 0) {
1640     fmov(vd.D(), xzr);
1641     return;
1642   }
1643 
1644   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
1645     Fmov(vd, Float16(imm));
1646     return;
1647   }
1648 
1649   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
1650     Fmov(vd, static_cast<float>(imm));
1651     return;
1652   }
1653 
1654   VIXL_ASSERT(vd.Is1D() || vd.Is2D());
1655   if (IsImmFP64(rawbits)) {
1656     fmov(vd, imm);
1657   } else if (vd.IsScalar()) {
1658     ldr(vd,
1659 #ifndef PANDA_BUILD
1660         new Literal<double>(imm,
1661 #else
1662         allocator_.New<Literal<double>>(imm,
1663 #endif
1664                             &literal_pool_,
1665                             RawLiteral::kDeletedOnPlacementByPool));
1666   } else {
1667     // TODO: consider NEON support for load literal.
1668     Movi(vd, rawbits);
1669   }
1670 }
1671 
1672 
1673 void MacroAssembler::Fmov(VRegister vd, float imm) {
1674   VIXL_ASSERT(allow_macro_instructions_);
1675   // Floating point immediates are loaded through the literal pool.
1676   MacroEmissionCheckScope guard(this);
1677   uint32_t rawbits = FloatToRawbits(imm);
1678 
1679   if (rawbits == 0) {
1680     fmov(vd.S(), wzr);
1681     return;
1682   }
1683 
1684   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
1685     Fmov(vd, Float16(imm));
1686     return;
1687   }
1688 
1689   if (vd.Is1D() || vd.Is2D()) {
1690     Fmov(vd, static_cast<double>(imm));
1691     return;
1692   }
1693 
1694   VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
1695   if (IsImmFP32(rawbits)) {
1696     fmov(vd, imm);
1697   } else if (vd.IsScalar()) {
1698     ldr(vd,
1699 #ifndef PANDA_BUILD
1700         new Literal<float>(imm,
1701 #else
1702         allocator_.New<Literal<float>>(imm,
1703 #endif
1704                            &literal_pool_,
1705                            RawLiteral::kDeletedOnPlacementByPool));
1706   } else {
1707     // TODO: consider NEON support for load literal.
1708     Movi(vd, rawbits);
1709   }
1710 }
1711 
1712 
1713 void MacroAssembler::Fmov(VRegister vd, Float16 imm) {
1714   VIXL_ASSERT(allow_macro_instructions_);
1715   MacroEmissionCheckScope guard(this);
1716 
1717   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
1718     Fmov(vd, FPToFloat(imm, kIgnoreDefaultNaN));
1719     return;
1720   }
1721 
1722   if (vd.Is1D() || vd.Is2D()) {
1723     Fmov(vd, FPToDouble(imm, kIgnoreDefaultNaN));
1724     return;
1725   }
1726 
1727   VIXL_ASSERT(vd.Is1H() || vd.Is4H() || vd.Is8H());
1728   uint16_t rawbits = Float16ToRawbits(imm);
1729   if (IsImmFP16(imm)) {
1730     fmov(vd, imm);
1731   } else {
1732     if (vd.IsScalar()) {
1733       if (rawbits == 0x0) {
1734         fmov(vd, wzr);
1735       } else {
1736         // We can use movz instead of the literal pool.
1737         UseScratchRegisterScope temps(this);
1738         Register temp = temps.AcquireW();
1739         Mov(temp, rawbits);
1740         Fmov(vd, temp);
1741       }
1742     } else {
1743       // TODO: consider NEON support for load literal.
1744       Movi(vd, static_cast<uint64_t>(rawbits));
1745     }
1746   }
1747 }
1748 
1749 
1750 void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
1751   VIXL_ASSERT(allow_macro_instructions_);
1752   if (operand.IsImmediate()) {
1753     Mov(rd, -operand.GetImmediate());
1754   } else {
1755     Sub(rd, AppropriateZeroRegFor(rd), operand);
1756   }
1757 }
1758 
1759 
1760 void MacroAssembler::Negs(const Register& rd, const Operand& operand) {
1761   VIXL_ASSERT(allow_macro_instructions_);
1762   Subs(rd, AppropriateZeroRegFor(rd), operand);
1763 }
1764 
1765 
1766 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
1767                                               uint64_t imm) {
1768   return OneInstrMoveImmediateHelper(this, dst, imm);
1769 }
1770 
1771 
1772 Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
1773                                                   uint64_t imm,
1774                                                   PreShiftImmMode mode) {
1775   int reg_size = dst.GetSizeInBits();
1776 
1777   // Encode the immediate in a single move instruction, if possible.
1778   if (TryOneInstrMoveImmediate(dst, imm)) {
1779     // The move was successful; nothing to do here.
1780   } else {
1781     // Pre-shift the immediate to the least-significant bits of the register.
1782     int shift_low = CountTrailingZeros(imm, reg_size);
1783     if (mode == kLimitShiftForSP) {
1784       // When applied to the stack pointer, the subsequent arithmetic operation
1785       // can use the extend form to shift left by a maximum of four bits. Right
1786       // shifts are not allowed, so we filter them out later before the new
1787       // immediate is tested.
1788       shift_low = std::min(shift_low, 4);
1789     }
1790     // TryOneInstrMoveImmediate handles `imm` with a value of zero, so shift_low
1791     // must lie in the range [0, 63], and the shifts below are well-defined.
1792     VIXL_ASSERT((shift_low >= 0) && (shift_low < 64));
1793     // imm_low = imm >> shift_low (with sign extension)
1794     uint64_t imm_low = ExtractSignedBitfield64(63, shift_low, imm);
1795 
1796     // Pre-shift the immediate to the most-significant bits of the register,
1797     // inserting set bits in the least-significant bits.
1798     int shift_high = CountLeadingZeros(imm, reg_size);
1799     VIXL_ASSERT((shift_high >= 0) && (shift_high < 64));
1800     uint64_t imm_high = (imm << shift_high) | GetUintMask(shift_high);
1801 
1802     if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
1803       // The new immediate has been moved into the destination's low bits:
1804       // return a new leftward-shifting operand.
1805       return Operand(dst, LSL, shift_low);
1806     } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
1807       // The new immediate has been moved into the destination's high bits:
1808       // return a new rightward-shifting operand.
1809       return Operand(dst, LSR, shift_high);
1810     } else {
1811       Mov(dst, imm);
1812     }
1813   }
1814   return Operand(dst);
1815 }
1816 
1817 
1818 void MacroAssembler::Move(const GenericOperand& dst,
1819                           const GenericOperand& src) {
1820   if (dst.Equals(src)) {
1821     return;
1822   }
1823 
1824   VIXL_ASSERT(dst.IsValid() && src.IsValid());
1825 
1826   // The sizes of the operands must match exactly.
1827   VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits());
1828   VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize);
1829   int operand_size = static_cast<int>(dst.GetSizeInBits());
1830 
1831   if (dst.IsCPURegister() && src.IsCPURegister()) {
1832     CPURegister dst_reg = dst.GetCPURegister();
1833     CPURegister src_reg = src.GetCPURegister();
1834     if (dst_reg.IsRegister() && src_reg.IsRegister()) {
1835       Mov(Register(dst_reg), Register(src_reg));
1836     } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) {
1837       Fmov(VRegister(dst_reg), VRegister(src_reg));
1838     } else {
1839       if (dst_reg.IsRegister()) {
1840         Fmov(Register(dst_reg), VRegister(src_reg));
1841       } else {
1842         Fmov(VRegister(dst_reg), Register(src_reg));
1843       }
1844     }
1845     return;
1846   }
1847 
1848   if (dst.IsMemOperand() && src.IsMemOperand()) {
1849     UseScratchRegisterScope temps(this);
1850     CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size);
1851     Ldr(temp, src.GetMemOperand());
1852     Str(temp, dst.GetMemOperand());
1853     return;
1854   }
1855 
1856   if (dst.IsCPURegister()) {
1857     Ldr(dst.GetCPURegister(), src.GetMemOperand());
1858   } else {
1859     Str(src.GetCPURegister(), dst.GetMemOperand());
1860   }
1861 }
1862 
1863 
1864 void MacroAssembler::ComputeAddress(const Register& dst,
1865                                     const MemOperand& mem_op) {
1866   // We cannot handle pre-indexing or post-indexing.
1867   VIXL_ASSERT(mem_op.GetAddrMode() == Offset);
1868   Register base = mem_op.GetBaseRegister();
1869   if (mem_op.IsImmediateOffset()) {
1870     Add(dst, base, mem_op.GetOffset());
1871   } else {
1872     VIXL_ASSERT(mem_op.IsRegisterOffset());
1873     Register reg_offset = mem_op.GetRegisterOffset();
1874     Shift shift = mem_op.GetShift();
1875     Extend extend = mem_op.GetExtend();
1876     if (shift == NO_SHIFT) {
1877       VIXL_ASSERT(extend != NO_EXTEND);
1878       Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount()));
1879     } else {
1880       VIXL_ASSERT(extend == NO_EXTEND);
1881       Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount()));
1882     }
1883   }
1884 }
1885 
1886 
1887 void MacroAssembler::AddSubMacro(const Register& rd,
1888                                  const Register& rn,
1889                                  const Operand& operand,
1890                                  FlagsUpdate S,
1891                                  AddSubOp op) {
1892   // Worst case is add/sub immediate:
1893   //  * up to 4 instructions to materialise the constant
1894   //  * 1 instruction for add/sub
1895   MacroEmissionCheckScope guard(this);
1896 
1897   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
1898       (S == LeaveFlags)) {
1899     // The instruction would be a nop. Avoid generating useless code.
1900     return;
1901   }
1902 
1903   if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) ||
1904       (rn.IsZero() && !operand.IsShiftedRegister()) ||
1905       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
1906     UseScratchRegisterScope temps(this);
1907     // Use `rd` as a temp, if we can.
1908     temps.Include(rd);
1909     // We read `rn` after evaluating `operand`.
1910     temps.Exclude(rn);
1911     // It doesn't matter if `operand` is in `temps` (e.g. because it alises
1912     // `rd`) because we don't need it after it is evaluated.
1913     Register temp = temps.AcquireSameSizeAs(rn);
1914     if (operand.IsImmediate()) {
1915       PreShiftImmMode mode = kAnyShift;
1916 
1917       // If the destination or source register is the stack pointer, we can
1918       // only pre-shift the immediate right by values supported in the add/sub
1919       // extend encoding.
1920       if (rd.IsSP()) {
1921         // If the destination is SP and flags will be set, we can't pre-shift
1922         // the immediate at all.
1923         mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
1924       } else if (rn.IsSP()) {
1925         mode = kLimitShiftForSP;
1926       }
1927 
1928       Operand imm_operand =
1929           MoveImmediateForShiftedOp(temp, operand.GetImmediate(), mode);
1930       AddSub(rd, rn, imm_operand, S, op);
1931     } else {
1932       Mov(temp, operand);
1933       AddSub(rd, rn, temp, S, op);
1934     }
1935   } else {
1936     AddSub(rd, rn, operand, S, op);
1937   }
1938 }
1939 
1940 
1941 void MacroAssembler::Adc(const Register& rd,
1942                          const Register& rn,
1943                          const Operand& operand) {
1944   VIXL_ASSERT(allow_macro_instructions_);
1945   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
1946 }
1947 
1948 
1949 void MacroAssembler::Adcs(const Register& rd,
1950                           const Register& rn,
1951                           const Operand& operand) {
1952   VIXL_ASSERT(allow_macro_instructions_);
1953   AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
1954 }
1955 
1956 
1957 void MacroAssembler::Sbc(const Register& rd,
1958                          const Register& rn,
1959                          const Operand& operand) {
1960   VIXL_ASSERT(allow_macro_instructions_);
1961   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
1962 }
1963 
1964 
1965 void MacroAssembler::Sbcs(const Register& rd,
1966                           const Register& rn,
1967                           const Operand& operand) {
1968   VIXL_ASSERT(allow_macro_instructions_);
1969   AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
1970 }
1971 
1972 
1973 void MacroAssembler::Ngc(const Register& rd, const Operand& operand) {
1974   VIXL_ASSERT(allow_macro_instructions_);
1975   Register zr = AppropriateZeroRegFor(rd);
1976   Sbc(rd, zr, operand);
1977 }
1978 
1979 
1980 void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) {
1981   VIXL_ASSERT(allow_macro_instructions_);
1982   Register zr = AppropriateZeroRegFor(rd);
1983   Sbcs(rd, zr, operand);
1984 }
1985 
1986 
1987 void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
1988                                           const Register& rn,
1989                                           const Operand& operand,
1990                                           FlagsUpdate S,
1991                                           AddSubWithCarryOp op) {
1992   VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits());
1993   // Worst case is addc/subc immediate:
1994   //  * up to 4 instructions to materialise the constant
1995   //  * 1 instruction for add/sub
1996   MacroEmissionCheckScope guard(this);
1997   UseScratchRegisterScope temps(this);
1998   // Use `rd` as a temp, if we can.
1999   temps.Include(rd);
2000   // We read `rn` after evaluating `operand`.
2001   temps.Exclude(rn);
2002   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
2003   // because we don't need it after it is evaluated.
2004 
2005   if (operand.IsImmediate() ||
2006       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
2007     // Add/sub with carry (immediate or ROR shifted register.)
2008     Register temp = temps.AcquireSameSizeAs(rn);
2009     Mov(temp, operand);
2010     AddSubWithCarry(rd, rn, Operand(temp), S, op);
2011   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
2012     // Add/sub with carry (shifted register).
2013     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits());
2014     VIXL_ASSERT(operand.GetShift() != ROR);
2015     VIXL_ASSERT(
2016         IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
2017                 operand.GetShiftAmount()));
2018     Register temp = temps.AcquireSameSizeAs(rn);
2019     EmitShift(temp,
2020               operand.GetRegister(),
2021               operand.GetShift(),
2022               operand.GetShiftAmount());
2023     AddSubWithCarry(rd, rn, Operand(temp), S, op);
2024   } else if (operand.IsExtendedRegister()) {
2025     // Add/sub with carry (extended register).
2026     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
2027     // Add/sub extended supports a shift <= 4. We want to support exactly the
2028     // same modes.
2029     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
2030     VIXL_ASSERT(
2031         operand.GetRegister().Is64Bits() ||
2032         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
2033     Register temp = temps.AcquireSameSizeAs(rn);
2034     EmitExtendShift(temp,
2035                     operand.GetRegister(),
2036                     operand.GetExtend(),
2037                     operand.GetShiftAmount());
2038     AddSubWithCarry(rd, rn, Operand(temp), S, op);
2039   } else {
2040     // The addressing mode is directly supported by the instruction.
2041     AddSubWithCarry(rd, rn, operand, S, op);
2042   }
2043 }
2044 
2045 
2046 void MacroAssembler::Rmif(const Register& xn,
2047                           unsigned shift,
2048                           StatusFlags flags) {
2049   VIXL_ASSERT(allow_macro_instructions_);
2050   SingleEmissionCheckScope guard(this);
2051   rmif(xn, shift, flags);
2052 }
2053 
2054 
2055 void MacroAssembler::Setf8(const Register& wn) {
2056   VIXL_ASSERT(allow_macro_instructions_);
2057   SingleEmissionCheckScope guard(this);
2058   setf8(wn);
2059 }
2060 
2061 
2062 void MacroAssembler::Setf16(const Register& wn) {
2063   VIXL_ASSERT(allow_macro_instructions_);
2064   SingleEmissionCheckScope guard(this);
2065   setf16(wn);
2066 }
2067 
2068 
2069 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
2070   void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
2071     VIXL_ASSERT(allow_macro_instructions_);                            \
2072     LoadStoreMacro(REG, addr, OP);                                     \
2073   }
2074 LS_MACRO_LIST(DEFINE_FUNCTION)
2075 #undef DEFINE_FUNCTION
2076 
2077 
2078 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
2079                                     const MemOperand& addr,
2080                                     LoadStoreOp op) {
2081   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() ||
2082               addr.IsImmediatePreIndex() || addr.IsRegisterOffset());
2083 
2084   // Worst case is ldr/str pre/post index:
2085   //  * 1 instruction for ldr/str
2086   //  * up to 4 instructions to materialise the constant
2087   //  * 1 instruction to update the base
2088   MacroEmissionCheckScope guard(this);
2089 
2090   int64_t offset = addr.GetOffset();
2091   unsigned access_size = CalcLSDataSize(op);
2092 
2093   // Check if an immediate offset fits in the immediate field of the
2094   // appropriate instruction. If not, emit two instructions to perform
2095   // the operation.
2096   if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
2097       !IsImmLSUnscaled(offset)) {
2098     // Immediate offset that can't be encoded using unsigned or unscaled
2099     // addressing modes.
2100     UseScratchRegisterScope temps(this);
2101     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
2102     Mov(temp, addr.GetOffset());
2103     LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
2104   } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) {
2105     // Post-index beyond unscaled addressing range.
2106     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
2107     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
2108   } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) {
2109     // Pre-index beyond unscaled addressing range.
2110     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
2111     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
2112   } else {
2113     // Encodable in one load/store instruction.
2114     LoadStore(rt, addr, op);
2115   }
2116 }
2117 
2118 
2119 #define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
2120   void MacroAssembler::FN(const REGTYPE REG,        \
2121                           const REGTYPE REG2,       \
2122                           const MemOperand& addr) { \
2123     VIXL_ASSERT(allow_macro_instructions_);         \
2124     LoadStorePairMacro(REG, REG2, addr, OP);        \
2125   }
2126 LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
2127 #undef DEFINE_FUNCTION
2128 
2129 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
2130                                         const CPURegister& rt2,
2131                                         const MemOperand& addr,
2132                                         LoadStorePairOp op) {
2133   // TODO(all): Should we support register offset for load-store-pair?
2134   VIXL_ASSERT(!addr.IsRegisterOffset());
2135   // Worst case is ldp/stp immediate:
2136   //  * 1 instruction for ldp/stp
2137   //  * up to 4 instructions to materialise the constant
2138   //  * 1 instruction to update the base
2139   MacroEmissionCheckScope guard(this);
2140 
2141   int64_t offset = addr.GetOffset();
2142   unsigned access_size = CalcLSPairDataSize(op);
2143 
2144   // Check if the offset fits in the immediate field of the appropriate
2145   // instruction. If not, emit two instructions to perform the operation.
2146   if (IsImmLSPair(offset, access_size)) {
2147     // Encodable in one load/store pair instruction.
2148     LoadStorePair(rt, rt2, addr, op);
2149   } else {
2150     Register base = addr.GetBaseRegister();
2151     if (addr.IsImmediateOffset()) {
2152       UseScratchRegisterScope temps(this);
2153       Register temp = temps.AcquireSameSizeAs(base);
2154       Add(temp, base, offset);
2155       LoadStorePair(rt, rt2, MemOperand(temp), op);
2156     } else if (addr.IsImmediatePostIndex()) {
2157       LoadStorePair(rt, rt2, MemOperand(base), op);
2158       Add(base, base, offset);
2159     } else {
2160       VIXL_ASSERT(addr.IsImmediatePreIndex());
2161       Add(base, base, offset);
2162       LoadStorePair(rt, rt2, MemOperand(base), op);
2163     }
2164   }
2165 }
2166 
2167 
2168 void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
2169   MacroEmissionCheckScope guard(this);
2170 
2171   // There are no pre- or post-index modes for prfm.
2172   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
2173 
2174   // The access size is implicitly 8 bytes for all prefetch operations.
2175   unsigned size = kXRegSizeInBytesLog2;
2176 
2177   // Check if an immediate offset fits in the immediate field of the
2178   // appropriate instruction. If not, emit two instructions to perform
2179   // the operation.
2180   if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) &&
2181       !IsImmLSUnscaled(addr.GetOffset())) {
2182     // Immediate offset that can't be encoded using unsigned or unscaled
2183     // addressing modes.
2184     UseScratchRegisterScope temps(this);
2185     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
2186     Mov(temp, addr.GetOffset());
2187     Prefetch(op, MemOperand(addr.GetBaseRegister(), temp));
2188   } else {
2189     // Simple register-offsets are encodable in one instruction.
2190     Prefetch(op, addr);
2191   }
2192 }
2193 
2194 
2195 void MacroAssembler::Push(const CPURegister& src0,
2196                           const CPURegister& src1,
2197                           const CPURegister& src2,
2198                           const CPURegister& src3) {
2199   VIXL_ASSERT(allow_macro_instructions_);
2200   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
2201   VIXL_ASSERT(src0.IsValid());
2202 
2203   int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
2204   int size = src0.GetSizeInBytes();
2205 
2206   PrepareForPush(count, size);
2207   PushHelper(count, size, src0, src1, src2, src3);
2208 }
2209 
2210 
2211 void MacroAssembler::Pop(const CPURegister& dst0,
2212                          const CPURegister& dst1,
2213                          const CPURegister& dst2,
2214                          const CPURegister& dst3) {
2215   // It is not valid to pop into the same register more than once in one
2216   // instruction, not even into the zero register.
2217   VIXL_ASSERT(allow_macro_instructions_);
2218   VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
2219   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
2220   VIXL_ASSERT(dst0.IsValid());
2221 
2222   int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
2223   int size = dst0.GetSizeInBytes();
2224 
2225   PrepareForPop(count, size);
2226   PopHelper(count, size, dst0, dst1, dst2, dst3);
2227 }
2228 
2229 
2230 void MacroAssembler::PushCPURegList(CPURegList registers) {
2231   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
2232   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
2233   VIXL_ASSERT(allow_macro_instructions_);
2234 
2235   int reg_size = registers.GetRegisterSizeInBytes();
2236   PrepareForPush(registers.GetCount(), reg_size);
2237 
2238   // Bump the stack pointer and store two registers at the bottom.
2239   int size = registers.GetTotalSizeInBytes();
2240   const CPURegister& bottom_0 = registers.PopLowestIndex();
2241   const CPURegister& bottom_1 = registers.PopLowestIndex();
2242   if (bottom_0.IsValid() && bottom_1.IsValid()) {
2243     Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
2244   } else if (bottom_0.IsValid()) {
2245     Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
2246   }
2247 
2248   int offset = 2 * reg_size;
2249   while (!registers.IsEmpty()) {
2250     const CPURegister& src0 = registers.PopLowestIndex();
2251     const CPURegister& src1 = registers.PopLowestIndex();
2252     if (src1.IsValid()) {
2253       Stp(src0, src1, MemOperand(StackPointer(), offset));
2254     } else {
2255       Str(src0, MemOperand(StackPointer(), offset));
2256     }
2257     offset += 2 * reg_size;
2258   }
2259 }
2260 
2261 
2262 void MacroAssembler::PopCPURegList(CPURegList registers) {
2263   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
2264   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
2265   VIXL_ASSERT(allow_macro_instructions_);
2266 
2267   int reg_size = registers.GetRegisterSizeInBytes();
2268   PrepareForPop(registers.GetCount(), reg_size);
2269 
2270 
2271   int size = registers.GetTotalSizeInBytes();
2272   const CPURegister& bottom_0 = registers.PopLowestIndex();
2273   const CPURegister& bottom_1 = registers.PopLowestIndex();
2274 
2275   int offset = 2 * reg_size;
2276   while (!registers.IsEmpty()) {
2277     const CPURegister& dst0 = registers.PopLowestIndex();
2278     const CPURegister& dst1 = registers.PopLowestIndex();
2279     if (dst1.IsValid()) {
2280       Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
2281     } else {
2282       Ldr(dst0, MemOperand(StackPointer(), offset));
2283     }
2284     offset += 2 * reg_size;
2285   }
2286 
2287   // Load the two registers at the bottom and drop the stack pointer.
2288   if (bottom_0.IsValid() && bottom_1.IsValid()) {
2289     Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
2290   } else if (bottom_0.IsValid()) {
2291     Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
2292   }
2293 }
2294 
2295 
2296 void MacroAssembler::PushMultipleTimes(int count, Register src) {
2297   VIXL_ASSERT(allow_macro_instructions_);
2298   int size = src.GetSizeInBytes();
2299 
2300   PrepareForPush(count, size);
2301   // Push up to four registers at a time if possible because if the current
2302   // stack pointer is sp and the register size is 32, registers must be pushed
2303   // in blocks of four in order to maintain the 16-byte alignment for sp.
2304   while (count >= 4) {
2305     PushHelper(4, size, src, src, src, src);
2306     count -= 4;
2307   }
2308   if (count >= 2) {
2309     PushHelper(2, size, src, src, NoReg, NoReg);
2310     count -= 2;
2311   }
2312   if (count == 1) {
2313     PushHelper(1, size, src, NoReg, NoReg, NoReg);
2314     count -= 1;
2315   }
2316   VIXL_ASSERT(count == 0);
2317 }
2318 
2319 
2320 void MacroAssembler::PushHelper(int count,
2321                                 int size,
2322                                 const CPURegister& src0,
2323                                 const CPURegister& src1,
2324                                 const CPURegister& src2,
2325                                 const CPURegister& src3) {
2326   // Ensure that we don't unintentionally modify scratch or debug registers.
2327   // Worst case for size is 2 stp.
2328   ExactAssemblyScope scope(this,
2329                            2 * kInstructionSize,
2330                            ExactAssemblyScope::kMaximumSize);
2331 
2332   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
2333   VIXL_ASSERT(size == src0.GetSizeInBytes());
2334 
2335   // When pushing multiple registers, the store order is chosen such that
2336   // Push(a, b) is equivalent to Push(a) followed by Push(b).
2337   switch (count) {
2338     case 1:
2339       VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
2340       str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
2341       break;
2342     case 2:
2343       VIXL_ASSERT(src2.IsNone() && src3.IsNone());
2344       stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
2345       break;
2346     case 3:
2347       VIXL_ASSERT(src3.IsNone());
2348       stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
2349       str(src0, MemOperand(StackPointer(), 2 * size));
2350       break;
2351     case 4:
2352       // Skip over 4 * size, then fill in the gap. This allows four W registers
2353       // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
2354       // all times.
2355       stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
2356       stp(src1, src0, MemOperand(StackPointer(), 2 * size));
2357       break;
2358     default:
2359       VIXL_UNREACHABLE();
2360   }
2361 }
2362 
2363 
2364 void MacroAssembler::PopHelper(int count,
2365                                int size,
2366                                const CPURegister& dst0,
2367                                const CPURegister& dst1,
2368                                const CPURegister& dst2,
2369                                const CPURegister& dst3) {
2370   // Ensure that we don't unintentionally modify scratch or debug registers.
2371   // Worst case for size is 2 ldp.
2372   ExactAssemblyScope scope(this,
2373                            2 * kInstructionSize,
2374                            ExactAssemblyScope::kMaximumSize);
2375 
2376   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
2377   VIXL_ASSERT(size == dst0.GetSizeInBytes());
2378 
2379   // When popping multiple registers, the load order is chosen such that
2380   // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
2381   switch (count) {
2382     case 1:
2383       VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
2384       ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
2385       break;
2386     case 2:
2387       VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
2388       ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
2389       break;
2390     case 3:
2391       VIXL_ASSERT(dst3.IsNone());
2392       ldr(dst2, MemOperand(StackPointer(), 2 * size));
2393       ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
2394       break;
2395     case 4:
2396       // Load the higher addresses first, then load the lower addresses and skip
2397       // the whole block in the second instruction. This allows four W registers
2398       // to be popped using sp, whilst maintaining 16-byte alignment for sp at
2399       // all times.
2400       ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
2401       ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
2402       break;
2403     default:
2404       VIXL_UNREACHABLE();
2405   }
2406 }
2407 
2408 
2409 void MacroAssembler::PrepareForPush(int count, int size) {
2410   if (sp.Is(StackPointer())) {
2411     // If the current stack pointer is sp, then it must be aligned to 16 bytes
2412     // on entry and the total size of the specified registers must also be a
2413     // multiple of 16 bytes.
2414     VIXL_ASSERT((count * size) % 16 == 0);
2415   } else {
2416     // Even if the current stack pointer is not the system stack pointer (sp),
2417     // the system stack pointer will still be modified in order to comply with
2418     // ABI rules about accessing memory below the system stack pointer.
2419     BumpSystemStackPointer(count * size);
2420   }
2421 }
2422 
2423 
2424 void MacroAssembler::PrepareForPop(int count, int size) {
2425   USE(count, size);
2426   if (sp.Is(StackPointer())) {
2427     // If the current stack pointer is sp, then it must be aligned to 16 bytes
2428     // on entry and the total size of the specified registers must also be a
2429     // multiple of 16 bytes.
2430     VIXL_ASSERT((count * size) % 16 == 0);
2431   }
2432 }
2433 
2434 void MacroAssembler::Poke(const Register& src, const Operand& offset) {
2435   VIXL_ASSERT(allow_macro_instructions_);
2436   if (offset.IsImmediate()) {
2437     VIXL_ASSERT(offset.GetImmediate() >= 0);
2438   }
2439 
2440   Str(src, MemOperand(StackPointer(), offset));
2441 }
2442 
2443 
2444 void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
2445   VIXL_ASSERT(allow_macro_instructions_);
2446   if (offset.IsImmediate()) {
2447     VIXL_ASSERT(offset.GetImmediate() >= 0);
2448   }
2449 
2450   Ldr(dst, MemOperand(StackPointer(), offset));
2451 }
2452 
2453 
2454 void MacroAssembler::Claim(const Operand& size) {
2455   VIXL_ASSERT(allow_macro_instructions_);
2456 
2457   if (size.IsZero()) {
2458     return;
2459   }
2460 
2461   if (size.IsImmediate()) {
2462     VIXL_ASSERT(size.GetImmediate() > 0);
2463     if (sp.Is(StackPointer())) {
2464       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2465     }
2466   }
2467 
2468   if (!sp.Is(StackPointer())) {
2469     BumpSystemStackPointer(size);
2470   }
2471 
2472   Sub(StackPointer(), StackPointer(), size);
2473 }
2474 
2475 
2476 void MacroAssembler::Drop(const Operand& size) {
2477   VIXL_ASSERT(allow_macro_instructions_);
2478 
2479   if (size.IsZero()) {
2480     return;
2481   }
2482 
2483   if (size.IsImmediate()) {
2484     VIXL_ASSERT(size.GetImmediate() > 0);
2485     if (sp.Is(StackPointer())) {
2486       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2487     }
2488   }
2489 
2490   Add(StackPointer(), StackPointer(), size);
2491 }
2492 
2493 
2494 void MacroAssembler::PushCalleeSavedRegisters() {
2495   // Ensure that the macro-assembler doesn't use any scratch registers.
2496   // 10 stp will be emitted.
2497   // TODO(all): Should we use GetCalleeSaved and SavedFP.
2498   ExactAssemblyScope scope(this, 10 * kInstructionSize);
2499 
2500   // This method must not be called unless the current stack pointer is sp.
2501   VIXL_ASSERT(sp.Is(StackPointer()));
2502 
2503   MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
2504 
2505   stp(x29, x30, tos);
2506   stp(x27, x28, tos);
2507   stp(x25, x26, tos);
2508   stp(x23, x24, tos);
2509   stp(x21, x22, tos);
2510   stp(x19, x20, tos);
2511 
2512   stp(d14, d15, tos);
2513   stp(d12, d13, tos);
2514   stp(d10, d11, tos);
2515   stp(d8, d9, tos);
2516 }
2517 
2518 
2519 void MacroAssembler::PopCalleeSavedRegisters() {
2520   // Ensure that the macro-assembler doesn't use any scratch registers.
2521   // 10 ldp will be emitted.
2522   // TODO(all): Should we use GetCalleeSaved and SavedFP.
2523   ExactAssemblyScope scope(this, 10 * kInstructionSize);
2524 
2525   // This method must not be called unless the current stack pointer is sp.
2526   VIXL_ASSERT(sp.Is(StackPointer()));
2527 
2528   MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
2529 
2530   ldp(d8, d9, tos);
2531   ldp(d10, d11, tos);
2532   ldp(d12, d13, tos);
2533   ldp(d14, d15, tos);
2534 
2535   ldp(x19, x20, tos);
2536   ldp(x21, x22, tos);
2537   ldp(x23, x24, tos);
2538   ldp(x25, x26, tos);
2539   ldp(x27, x28, tos);
2540   ldp(x29, x30, tos);
2541 }
2542 
2543 void MacroAssembler::LoadCPURegList(CPURegList registers,
2544                                     const MemOperand& src) {
2545   LoadStoreCPURegListHelper(kLoad, registers, src);
2546 }
2547 
2548 void MacroAssembler::StoreCPURegList(CPURegList registers,
2549                                      const MemOperand& dst) {
2550   LoadStoreCPURegListHelper(kStore, registers, dst);
2551 }
2552 
2553 
2554 void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
2555                                                CPURegList registers,
2556                                                const MemOperand& mem) {
2557   // We do not handle pre-indexing or post-indexing.
2558   VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
2559 #ifndef PANDA_BUILD
2560   VIXL_ASSERT(!registers.Overlaps(tmp_list_));
2561 #endif
2562   VIXL_ASSERT(!registers.Overlaps(v_tmp_list_));
2563   VIXL_ASSERT(!registers.Overlaps(p_tmp_list_));
2564   VIXL_ASSERT(!registers.IncludesAliasOf(sp));
2565 
2566   UseScratchRegisterScope temps(this);
2567 
2568   MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps);
2569   const int reg_size = registers.GetRegisterSizeInBytes();
2570 
2571   VIXL_ASSERT(IsPowerOf2(reg_size));
2572 
2573   // Since we are operating on register pairs, we would like to align on double
2574   // the standard size; on the other hand, we don't want to insert an extra
2575   // operation, which will happen if the number of registers is even. Note that
2576   // the alignment of the base pointer is unknown here, but we assume that it
2577   // is more likely to be aligned.
2578   if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) &&
2579       ((registers.GetCount() % 2) != 0)) {
2580     if (op == kStore) {
2581       Str(registers.PopLowestIndex(), loc);
2582     } else {
2583       VIXL_ASSERT(op == kLoad);
2584       Ldr(registers.PopLowestIndex(), loc);
2585     }
2586     loc.AddOffset(reg_size);
2587   }
2588   while (registers.GetCount() >= 2) {
2589     const CPURegister& dst0 = registers.PopLowestIndex();
2590     const CPURegister& dst1 = registers.PopLowestIndex();
2591     if (op == kStore) {
2592       Stp(dst0, dst1, loc);
2593     } else {
2594       VIXL_ASSERT(op == kLoad);
2595       Ldp(dst0, dst1, loc);
2596     }
2597     loc.AddOffset(2 * reg_size);
2598   }
2599   if (!registers.IsEmpty()) {
2600     if (op == kStore) {
2601       Str(registers.PopLowestIndex(), loc);
2602     } else {
2603       VIXL_ASSERT(op == kLoad);
2604       Ldr(registers.PopLowestIndex(), loc);
2605     }
2606   }
2607 }
2608 
2609 MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
2610     const CPURegList& registers,
2611     const MemOperand& mem,
2612     UseScratchRegisterScope* scratch_scope) {
2613   // If necessary, pre-compute the base address for the accesses.
2614   if (mem.IsRegisterOffset()) {
2615     Register reg_base = scratch_scope->AcquireX();
2616     ComputeAddress(reg_base, mem);
2617     return MemOperand(reg_base);
2618 
2619   } else if (mem.IsImmediateOffset()) {
2620     int reg_size = registers.GetRegisterSizeInBytes();
2621     int total_size = registers.GetTotalSizeInBytes();
2622     int64_t min_offset = mem.GetOffset();
2623     int64_t max_offset =
2624         mem.GetOffset() + std::max(0, total_size - 2 * reg_size);
2625     if ((registers.GetCount() >= 2) &&
2626         (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
2627          !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
2628       Register reg_base = scratch_scope->AcquireX();
2629       ComputeAddress(reg_base, mem);
2630       return MemOperand(reg_base);
2631     }
2632   }
2633 
2634   return mem;
2635 }
2636 
2637 void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
2638   VIXL_ASSERT(!sp.Is(StackPointer()));
2639   // TODO: Several callers rely on this not using scratch registers, so we use
2640   // the assembler directly here. However, this means that large immediate
2641   // values of 'space' cannot be handled.
2642   ExactAssemblyScope scope(this, kInstructionSize);
2643   sub(sp, StackPointer(), space);
2644 }
2645 
2646 
2647 // TODO(all): Fix printf for NEON and SVE registers.
2648 
2649 // This is the main Printf implementation. All callee-saved registers are
2650 // preserved, but NZCV and the caller-saved registers may be clobbered.
2651 void MacroAssembler::PrintfNoPreserve(const char* format,
2652                                       const CPURegister& arg0,
2653                                       const CPURegister& arg1,
2654                                       const CPURegister& arg2,
2655                                       const CPURegister& arg3) {
2656   // We cannot handle a caller-saved stack pointer. It doesn't make much sense
2657   // in most cases anyway, so this restriction shouldn't be too serious.
2658   VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
2659 
2660   // The provided arguments, and their proper PCS registers.
2661   CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
2662   CPURegister pcs[kPrintfMaxArgCount];
2663 
2664   int arg_count = kPrintfMaxArgCount;
2665 
2666   // The PCS varargs registers for printf. Note that x0 is used for the printf
2667   // format string.
2668   static const CPURegList kPCSVarargs =
2669       CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
2670   static const CPURegList kPCSVarargsV =
2671       CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1);
2672 
2673   // We can use caller-saved registers as scratch values, except for the
2674   // arguments and the PCS registers where they might need to go.
2675   UseScratchRegisterScope temps(this);
2676   temps.Include(kCallerSaved);
2677   temps.Include(kCallerSavedV);
2678   temps.Exclude(kPCSVarargs);
2679   temps.Exclude(kPCSVarargsV);
2680   temps.Exclude(arg0, arg1, arg2, arg3);
2681 
2682   // Copies of the arg lists that we can iterate through.
2683   CPURegList pcs_varargs = kPCSVarargs;
2684   CPURegList pcs_varargs_fp = kPCSVarargsV;
2685 
2686   // Place the arguments. There are lots of clever tricks and optimizations we
2687   // could use here, but Printf is a debug tool so instead we just try to keep
2688   // it simple: Move each input that isn't already in the right place to a
2689   // scratch register, then move everything back.
2690   for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
2691     // Work out the proper PCS register for this argument.
2692     if (args[i].IsRegister()) {
2693       pcs[i] = pcs_varargs.PopLowestIndex().X();
2694       // We might only need a W register here. We need to know the size of the
2695       // argument so we can properly encode it for the simulator call.
2696       if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
2697     } else if (args[i].IsVRegister()) {
2698       // In C, floats are always cast to doubles for varargs calls.
2699       pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
2700     } else {
2701       VIXL_ASSERT(args[i].IsNone());
2702       arg_count = i;
2703       break;
2704     }
2705 
2706     // If the argument is already in the right place, leave it where it is.
2707     if (args[i].Aliases(pcs[i])) continue;
2708 
2709     // Otherwise, if the argument is in a PCS argument register, allocate an
2710     // appropriate scratch register and then move it out of the way.
2711     if (kPCSVarargs.IncludesAliasOf(args[i]) ||
2712         kPCSVarargsV.IncludesAliasOf(args[i])) {
2713       if (args[i].IsRegister()) {
2714         Register old_arg = Register(args[i]);
2715         Register new_arg = temps.AcquireSameSizeAs(old_arg);
2716         Mov(new_arg, old_arg);
2717         args[i] = new_arg;
2718       } else {
2719         VRegister old_arg(args[i]);
2720         VRegister new_arg = temps.AcquireSameSizeAs(old_arg);
2721         Fmov(new_arg, old_arg);
2722         args[i] = new_arg;
2723       }
2724     }
2725   }
2726 
2727   // Do a second pass to move values into their final positions and perform any
2728   // conversions that may be required.
2729   for (int i = 0; i < arg_count; i++) {
2730     VIXL_ASSERT(pcs[i].GetType() == args[i].GetType());
2731     if (pcs[i].IsRegister()) {
2732       Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
2733     } else {
2734       VIXL_ASSERT(pcs[i].IsVRegister());
2735       if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) {
2736         Fmov(VRegister(pcs[i]), VRegister(args[i]));
2737       } else {
2738         Fcvt(VRegister(pcs[i]), VRegister(args[i]));
2739       }
2740     }
2741   }
2742 
2743   // Load the format string into x0, as per the procedure-call standard.
2744   //
2745   // To make the code as portable as possible, the format string is encoded
2746   // directly in the instruction stream. It might be cleaner to encode it in a
2747   // literal pool, but since Printf is usually used for debugging, it is
2748   // beneficial for it to be minimally dependent on other features.
2749   temps.Exclude(x0);
2750 #ifndef PANDA_BUILD
2751   Label format_address;
2752 #else
2753   Label format_address(allocator_);
2754 #endif
2755   Adr(x0, &format_address);
2756 
2757   // Emit the format string directly in the instruction stream.
2758   {
2759     BlockPoolsScope scope(this);
2760     // Data emitted:
2761     //   branch
2762     //   strlen(format) + 1 (includes null termination)
2763     //   padding to next instruction
2764     //   unreachable
2765     EmissionCheckScope guard(this,
2766                              AlignUp(strlen(format) + 1, kInstructionSize) +
2767                                  2 * kInstructionSize);
2768 #ifndef PANDA_BUILD
2769     Label after_data;
2770 #else
2771     Label after_data(allocator_);
2772 #endif
2773     B(&after_data);
2774     Bind(&format_address);
2775     EmitString(format);
2776     Unreachable();
2777     Bind(&after_data);
2778   }
2779 
2780   // We don't pass any arguments on the stack, but we still need to align the C
2781   // stack pointer to a 16-byte boundary for PCS compliance.
2782   if (!sp.Is(StackPointer())) {
2783     Bic(sp, StackPointer(), 0xf);
2784   }
2785 
2786   // Actually call printf. This part needs special handling for the simulator,
2787   // since the system printf function will use a different instruction set and
2788   // the procedure-call standard will not be compatible.
2789   if (generate_simulator_code_) {
2790     ExactAssemblyScope scope(this, kPrintfLength);
2791     hlt(kPrintfOpcode);
2792     dc32(arg_count);  // kPrintfArgCountOffset
2793 
2794     // Determine the argument pattern.
2795     uint32_t arg_pattern_list = 0;
2796     for (int i = 0; i < arg_count; i++) {
2797       uint32_t arg_pattern;
2798       if (pcs[i].IsRegister()) {
2799         arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
2800       } else {
2801         VIXL_ASSERT(pcs[i].Is64Bits());
2802         arg_pattern = kPrintfArgD;
2803       }
2804       VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
2805       arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
2806     }
2807     dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
2808   } else {
2809     Register tmp = temps.AcquireX();
2810     Mov(tmp, reinterpret_cast<uintptr_t>(printf));
2811     Blr(tmp);
2812   }
2813 }
2814 
2815 
2816 void MacroAssembler::Printf(const char* format,
2817                             CPURegister arg0,
2818                             CPURegister arg1,
2819                             CPURegister arg2,
2820                             CPURegister arg3) {
2821   // We can only print sp if it is the current stack pointer.
2822   if (!sp.Is(StackPointer())) {
2823     VIXL_ASSERT(!sp.Aliases(arg0));
2824     VIXL_ASSERT(!sp.Aliases(arg1));
2825     VIXL_ASSERT(!sp.Aliases(arg2));
2826     VIXL_ASSERT(!sp.Aliases(arg3));
2827   }
2828 
2829   // Make sure that the macro assembler doesn't try to use any of our arguments
2830   // as scratch registers.
2831   UseScratchRegisterScope exclude_all(this);
2832   exclude_all.ExcludeAll();
2833 
2834   // Preserve all caller-saved registers as well as NZCV.
2835   // If sp is the stack pointer, PushCPURegList asserts that the size of each
2836   // list is a multiple of 16 bytes.
2837   PushCPURegList(kCallerSaved);
2838   PushCPURegList(kCallerSavedV);
2839 
2840   {
2841     UseScratchRegisterScope temps(this);
2842     // We can use caller-saved registers as scratch values (except for argN).
2843     temps.Include(kCallerSaved);
2844     temps.Include(kCallerSavedV);
2845     temps.Exclude(arg0, arg1, arg2, arg3);
2846 
2847     // If any of the arguments are the current stack pointer, allocate a new
2848     // register for them, and adjust the value to compensate for pushing the
2849     // caller-saved registers.
2850     bool arg0_sp = StackPointer().Aliases(arg0);
2851     bool arg1_sp = StackPointer().Aliases(arg1);
2852     bool arg2_sp = StackPointer().Aliases(arg2);
2853     bool arg3_sp = StackPointer().Aliases(arg3);
2854     if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
2855       // Allocate a register to hold the original stack pointer value, to pass
2856       // to PrintfNoPreserve as an argument.
2857       Register arg_sp = temps.AcquireX();
2858       Add(arg_sp,
2859           StackPointer(),
2860           kCallerSaved.GetTotalSizeInBytes() +
2861               kCallerSavedV.GetTotalSizeInBytes());
2862       if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits());
2863       if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits());
2864       if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits());
2865       if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits());
2866     }
2867 
2868     // Preserve NZCV.
2869     Register tmp = temps.AcquireX();
2870     Mrs(tmp, NZCV);
2871     Push(tmp, xzr);
2872     temps.Release(tmp);
2873 
2874     PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
2875 
2876     // Restore NZCV.
2877     tmp = temps.AcquireX();
2878     Pop(xzr, tmp);
2879     Msr(NZCV, tmp);
2880     temps.Release(tmp);
2881   }
2882 
2883   PopCPURegList(kCallerSavedV);
2884   PopCPURegList(kCallerSaved);
2885 }
2886 
2887 void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
2888   VIXL_ASSERT(allow_macro_instructions_);
2889 
2890   if (generate_simulator_code_) {
2891     // The arguments to the trace pseudo instruction need to be contiguous in
2892     // memory, so make sure we don't try to emit a literal pool.
2893     ExactAssemblyScope scope(this, kTraceLength);
2894 
2895 #ifndef PANDA_BUILD
2896     Label start;
2897 #else
2898     Label start(allocator_);
2899 #endif
2900     bind(&start);
2901 
2902     // Refer to simulator-aarch64.h for a description of the marker and its
2903     // arguments.
2904     hlt(kTraceOpcode);
2905 
2906     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
2907     dc32(parameters);
2908 
2909     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
2910     dc32(command);
2911   } else {
2912     // Emit nothing on real hardware.
2913     USE(parameters, command);
2914   }
2915 }
2916 
2917 
2918 void MacroAssembler::Log(TraceParameters parameters) {
2919   VIXL_ASSERT(allow_macro_instructions_);
2920 
2921   if (generate_simulator_code_) {
2922     // The arguments to the log pseudo instruction need to be contiguous in
2923     // memory, so make sure we don't try to emit a literal pool.
2924     ExactAssemblyScope scope(this, kLogLength);
2925 
2926 #ifndef PANDA_BUILD
2927     Label start;
2928 #else
2929     Label start(allocator_);
2930 #endif
2931     bind(&start);
2932 
2933     // Refer to simulator-aarch64.h for a description of the marker and its
2934     // arguments.
2935     hlt(kLogOpcode);
2936 
2937     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
2938     dc32(parameters);
2939   } else {
2940     // Emit nothing on real hardware.
2941     USE(parameters);
2942   }
2943 }
2944 
2945 
2946 void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) {
2947   ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode);
2948 }
2949 
2950 
2951 void MacroAssembler::EnableSimulatorCPUFeatures(const CPUFeatures& features) {
2952   ConfigureSimulatorCPUFeaturesHelper(features, kEnableCPUFeaturesOpcode);
2953 }
2954 
2955 
2956 void MacroAssembler::DisableSimulatorCPUFeatures(const CPUFeatures& features) {
2957   ConfigureSimulatorCPUFeaturesHelper(features, kDisableCPUFeaturesOpcode);
2958 }
2959 
2960 
2961 void MacroAssembler::ConfigureSimulatorCPUFeaturesHelper(
2962     const CPUFeatures& features, DebugHltOpcode action) {
2963   VIXL_ASSERT(allow_macro_instructions_);
2964   VIXL_ASSERT(generate_simulator_code_);
2965 
2966   typedef ConfigureCPUFeaturesElementType ElementType;
2967   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <=
2968               std::numeric_limits<ElementType>::max());
2969 
2970   size_t count = features.Count();
2971 
2972   size_t preamble_length = kConfigureCPUFeaturesListOffset;
2973   size_t list_length = (count + 1) * sizeof(ElementType);
2974   size_t padding_length = AlignUp(list_length, kInstructionSize) - list_length;
2975 
2976   size_t total_length = preamble_length + list_length + padding_length;
2977 
2978   // Check the overall code size as well as the size of each component.
2979   ExactAssemblyScope guard_total(this, total_length);
2980 
2981   {  // Preamble: the opcode itself.
2982     ExactAssemblyScope guard_preamble(this, preamble_length);
2983     hlt(action);
2984   }
2985   {  // A kNone-terminated list of features.
2986     ExactAssemblyScope guard_list(this, list_length);
2987     for (CPUFeatures::const_iterator it = features.begin();
2988          it != features.end();
2989          ++it) {
2990       dc(static_cast<ElementType>(*it));
2991     }
2992     dc(static_cast<ElementType>(CPUFeatures::kNone));
2993   }
2994   {  // Padding for instruction alignment.
2995     ExactAssemblyScope guard_padding(this, padding_length);
2996     for (size_t size = 0; size < padding_length; size += sizeof(ElementType)) {
2997       // The exact value is arbitrary.
2998       dc(static_cast<ElementType>(CPUFeatures::kNone));
2999     }
3000   }
3001 }
3002 
3003 void MacroAssembler::SaveSimulatorCPUFeatures() {
3004   VIXL_ASSERT(allow_macro_instructions_);
3005   VIXL_ASSERT(generate_simulator_code_);
3006   SingleEmissionCheckScope guard(this);
3007   hlt(kSaveCPUFeaturesOpcode);
3008 }
3009 
3010 
3011 void MacroAssembler::RestoreSimulatorCPUFeatures() {
3012   VIXL_ASSERT(allow_macro_instructions_);
3013   VIXL_ASSERT(generate_simulator_code_);
3014   SingleEmissionCheckScope guard(this);
3015   hlt(kRestoreCPUFeaturesOpcode);
3016 }
3017 
3018 
3019 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
3020   VIXL_ASSERT(masm_ == NULL);
3021   VIXL_ASSERT(masm != NULL);
3022   masm_ = masm;
3023 
3024   CPURegList* available = masm->GetScratchRegisterList();
3025   CPURegList* available_v = masm->GetScratchVRegisterList();
3026   CPURegList* available_p = masm->GetScratchPRegisterList();
3027   old_available_ = available->GetList();
3028   old_available_v_ = available_v->GetList();
3029   old_available_p_ = available_p->GetList();
3030   VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
3031   VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister);
3032   VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister);
3033 
3034   parent_ = masm->GetCurrentScratchRegisterScope();
3035   masm->SetCurrentScratchRegisterScope(this);
3036 }
3037 
3038 
3039 void UseScratchRegisterScope::Close() {
3040   if (masm_ != NULL) {
3041     // Ensure that scopes nest perfectly, and do not outlive their parents.
3042     // This is a run-time check because the order of destruction of objects in
3043     // the _same_ scope is implementation-defined, and is likely to change in
3044     // optimised builds.
3045     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
3046     masm_->SetCurrentScratchRegisterScope(parent_);
3047 
3048     masm_->GetScratchRegisterList()->SetList(old_available_);
3049     masm_->GetScratchVRegisterList()->SetList(old_available_v_);
3050     masm_->GetScratchPRegisterList()->SetList(old_available_p_);
3051 
3052     masm_ = NULL;
3053   }
3054 }
3055 
3056 
3057 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
3058   return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
3059          masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) ||
3060          masm_->GetScratchPRegisterList()->IncludesAliasOf(reg);
3061 }
3062 
3063 Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
3064   int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode();
3065   return Register(code, size_in_bits);
3066 }
3067 
3068 
3069 VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
3070   int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode();
3071   return VRegister(code, size_in_bits);
3072 }
3073 
3074 
3075 void UseScratchRegisterScope::Release(const CPURegister& reg) {
3076   VIXL_ASSERT(masm_ != NULL);
3077 
3078   // Release(NoReg) has no effect.
3079   if (reg.IsNone()) return;
3080 
3081   ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode());
3082 }
3083 
3084 
3085 void UseScratchRegisterScope::Include(const CPURegList& list) {
3086   VIXL_ASSERT(masm_ != NULL);
3087 
3088   // Including an empty list has no effect.
3089   if (list.IsEmpty()) return;
3090   VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister);
3091 
3092   RegList reg_list = list.GetList();
3093   if (list.GetType() == CPURegister::kRegister) {
3094     // Make sure that neither sp nor xzr are included the list.
3095     reg_list &= ~(xzr.GetBit() | sp.GetBit());
3096   }
3097 
3098   IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list);
3099 }
3100 
3101 
3102 void UseScratchRegisterScope::Include(const Register& reg1,
3103                                       const Register& reg2,
3104                                       const Register& reg3,
3105                                       const Register& reg4) {
3106   VIXL_ASSERT(masm_ != NULL);
3107   RegList include =
3108       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3109   // Make sure that neither sp nor xzr are included the list.
3110   include &= ~(xzr.GetBit() | sp.GetBit());
3111 
3112   IncludeByRegList(masm_->GetScratchRegisterList(), include);
3113 }
3114 
3115 
3116 void UseScratchRegisterScope::Include(const VRegister& reg1,
3117                                       const VRegister& reg2,
3118                                       const VRegister& reg3,
3119                                       const VRegister& reg4) {
3120   RegList include =
3121       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3122   IncludeByRegList(masm_->GetScratchVRegisterList(), include);
3123 }
3124 
3125 
3126 void UseScratchRegisterScope::Include(const CPURegister& reg1,
3127                                       const CPURegister& reg2,
3128                                       const CPURegister& reg3,
3129                                       const CPURegister& reg4) {
3130   RegList include = 0;
3131   RegList include_v = 0;
3132   RegList include_p = 0;
3133 
3134   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
3135 
3136   for (size_t i = 0; i < ArrayLength(regs); i++) {
3137     RegList bit = regs[i].GetBit();
3138     switch (regs[i].GetBank()) {
3139       case CPURegister::kNoRegisterBank:
3140         // Include(NoReg) has no effect.
3141         VIXL_ASSERT(regs[i].IsNone());
3142         break;
3143       case CPURegister::kRRegisterBank:
3144         include |= bit;
3145         break;
3146       case CPURegister::kVRegisterBank:
3147         include_v |= bit;
3148         break;
3149       case CPURegister::kPRegisterBank:
3150         include_p |= bit;
3151         break;
3152     }
3153   }
3154 
3155   IncludeByRegList(masm_->GetScratchRegisterList(), include);
3156   IncludeByRegList(masm_->GetScratchVRegisterList(), include_v);
3157   IncludeByRegList(masm_->GetScratchPRegisterList(), include_p);
3158 }
3159 
3160 
3161 void UseScratchRegisterScope::Exclude(const CPURegList& list) {
3162   ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList());
3163 }
3164 
3165 
3166 void UseScratchRegisterScope::Exclude(const Register& reg1,
3167                                       const Register& reg2,
3168                                       const Register& reg3,
3169                                       const Register& reg4) {
3170   RegList exclude =
3171       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3172   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
3173 }
3174 
3175 
3176 void UseScratchRegisterScope::Exclude(const VRegister& reg1,
3177                                       const VRegister& reg2,
3178                                       const VRegister& reg3,
3179                                       const VRegister& reg4) {
3180   RegList exclude_v =
3181       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3182   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
3183 }
3184 
3185 
3186 void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
3187                                       const CPURegister& reg2,
3188                                       const CPURegister& reg3,
3189                                       const CPURegister& reg4) {
3190   RegList exclude = 0;
3191   RegList exclude_v = 0;
3192   RegList exclude_p = 0;
3193 
3194   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
3195 
3196   for (size_t i = 0; i < ArrayLength(regs); i++) {
3197     RegList bit = regs[i].GetBit();
3198     switch (regs[i].GetBank()) {
3199       case CPURegister::kNoRegisterBank:
3200         // Exclude(NoReg) has no effect.
3201         VIXL_ASSERT(regs[i].IsNone());
3202         break;
3203       case CPURegister::kRRegisterBank:
3204         exclude |= bit;
3205         break;
3206       case CPURegister::kVRegisterBank:
3207         exclude_v |= bit;
3208         break;
3209       case CPURegister::kPRegisterBank:
3210         exclude_p |= bit;
3211         break;
3212     }
3213   }
3214 
3215   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
3216   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
3217   ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p);
3218 }
3219 
3220 
3221 void UseScratchRegisterScope::ExcludeAll() {
3222   ExcludeByRegList(masm_->GetScratchRegisterList(),
3223                    masm_->GetScratchRegisterList()->GetList());
3224   ExcludeByRegList(masm_->GetScratchVRegisterList(),
3225                    masm_->GetScratchVRegisterList()->GetList());
3226   ExcludeByRegList(masm_->GetScratchPRegisterList(),
3227                    masm_->GetScratchPRegisterList()->GetList());
3228 }
3229 
3230 
3231 CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available,
3232                                                  RegList mask) {
3233   VIXL_CHECK((available->GetList() & mask) != 0);
3234   CPURegister result = available->PopLowestIndex(mask);
3235   VIXL_ASSERT(!AreAliased(result, xzr, sp));
3236   return result;
3237 }
3238 
3239 
3240 void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
3241   ReleaseByRegList(available, static_cast<RegList>(1) << code);
3242 }
3243 
3244 
3245 void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
3246                                                RegList regs) {
3247   available->SetList(available->GetList() | regs);
3248 }
3249 
3250 
3251 void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
3252                                                RegList regs) {
3253   available->SetList(available->GetList() | regs);
3254 }
3255 
3256 
3257 void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
3258                                                RegList exclude) {
3259   available->SetList(available->GetList() & ~exclude);
3260 }
3261 
3262 CPURegList* UseScratchRegisterScope::GetAvailableListFor(
3263     CPURegister::RegisterBank bank) {
3264   switch (bank) {
3265     case CPURegister::kNoRegisterBank:
3266       return NULL;
3267     case CPURegister::kRRegisterBank:
3268       return masm_->GetScratchRegisterList();
3269     case CPURegister::kVRegisterBank:
3270       return masm_->GetScratchVRegisterList();
3271     case CPURegister::kPRegisterBank:
3272       return masm_->GetScratchPRegisterList();
3273   }
3274   VIXL_UNREACHABLE();
3275   return NULL;
3276 }
3277 
3278 }  // namespace aarch64
3279 }  // namespace vixl
3280