• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cctype>
28 
29 #include "macro-assembler-aarch64.h"
30 
31 namespace vixl {
32 namespace aarch64 {
33 
34 
Release()35 void Pool::Release() {
36   if (--monitor_ == 0) {
37     // Ensure the pool has not been blocked for too long.
38     VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_);
39   }
40 }
41 
42 
SetNextCheckpoint(ptrdiff_t checkpoint)43 void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
44   masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint);
45   checkpoint_ = checkpoint;
46 }
47 
48 
49 #ifndef PANDA_BUILD
LiteralPool(MacroAssembler * masm)50 LiteralPool::LiteralPool(MacroAssembler* masm)
51     : Pool(masm),
52       size_(0),
53       first_use_(-1),
54       recommended_checkpoint_(kNoCheckpointRequired) {}
55 #else
LiteralPool(AllocatorWrapper allocator,MacroAssembler * masm)56 LiteralPool::LiteralPool(AllocatorWrapper allocator, MacroAssembler* masm)
57     : Pool(masm),
58       entries_(allocator.Adapter()),
59       size_(0),
60       first_use_(-1),
61       recommended_checkpoint_(kNoCheckpointRequired),
62       deleted_on_destruction_(allocator.Adapter()),
63       allocator_(allocator) {}
64 #endif
65 
~LiteralPool()66 LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
67   VIXL_ASSERT(!IsBlocked());
68 #ifndef VIXL_USE_PANDA_ALLOC
69   VIXL_ASSERT(IsEmpty());
70   for (std::vector<RawLiteral*>::iterator it = deleted_on_destruction_.begin();
71        it != deleted_on_destruction_.end();
72        it++) {
73     delete *it;
74   }
75 #endif
76 }
77 
78 
Reset()79 void LiteralPool::Reset() {
80 #ifndef VIXL_USE_PANDA_ALLOC
81   std::vector<RawLiteral *>::iterator it, end;
82   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
83     RawLiteral* literal = *it;
84     if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
85       delete literal;
86     }
87   }
88 #endif
89   entries_.clear();
90   size_ = 0;
91   first_use_ = -1;
92   Pool::Reset();
93   recommended_checkpoint_ = kNoCheckpointRequired;
94 }
95 
96 
CheckEmitFor(size_t amount,EmitOption option)97 void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
98   if (IsEmpty() || IsBlocked()) return;
99 
100   ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_;
101   if (distance >= kRecommendedLiteralPoolRange) {
102     Emit(option);
103   }
104 }
105 
106 
CheckEmitForBranch(size_t range)107 void LiteralPool::CheckEmitForBranch(size_t range) {
108   if (IsEmpty() || IsBlocked()) return;
109   if (GetMaxSize() >= range) Emit();
110 }
111 
112 // We use a subclass to access the protected `ExactAssemblyScope` constructor
113 // giving us control over the pools. This allows us to use this scope within
114 // code emitting pools without creating a circular dependency.
115 // We keep the constructor private to restrict usage of this helper class.
116 class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
117  private:
ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler * masm,size_t size)118   ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size)
119       : ExactAssemblyScope(masm,
120                            size,
121                            ExactAssemblyScope::kExactSize,
122                            ExactAssemblyScope::kIgnorePools) {}
123 
124   friend void LiteralPool::Emit(LiteralPool::EmitOption);
125   friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t);
126 };
127 
128 
Emit(EmitOption option)129 void LiteralPool::Emit(EmitOption option) {
130   // There is an issue if we are asked to emit a blocked or empty pool.
131   VIXL_ASSERT(!IsBlocked());
132   VIXL_ASSERT(!IsEmpty());
133 
134   size_t pool_size = GetSize();
135   size_t emit_size = pool_size;
136   if (option == kBranchRequired) emit_size += kInstructionSize;
137 #ifndef PANDA_BUILD
138   Label end_of_pool;
139 #else
140   Label end_of_pool(allocator_);
141 #endif
142 
143   VIXL_ASSERT(emit_size % kInstructionSize == 0);
144   {
145     CodeBufferCheckScope guard(masm_,
146                                emit_size,
147                                CodeBufferCheckScope::kCheck,
148                                CodeBufferCheckScope::kExactSize);
149 #ifdef VIXL_DEBUG
150     // Also explicitly disallow usage of the `MacroAssembler` here.
151     masm_->SetAllowMacroInstructions(false);
152 #endif
153     if (option == kBranchRequired) {
154       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
155       masm_->b(&end_of_pool);
156     }
157 
158     {
159       // Marker indicating the size of the literal pool in 32-bit words.
160       VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
161       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
162       masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
163     }
164 
165     // Now populate the literal pool.
166 #ifndef PANDA_BUILD
167     std::vector<RawLiteral *>::iterator it, end;
168 #else
169     Vector<RawLiteral*>::iterator it, end;
170 #endif
171     for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
172       VIXL_ASSERT((*it)->IsUsed());
173       masm_->place(*it);
174     }
175 
176     if (option == kBranchRequired) masm_->bind(&end_of_pool);
177 #ifdef VIXL_DEBUG
178     masm_->SetAllowMacroInstructions(true);
179 #endif
180   }
181 
182   Reset();
183 }
184 
185 
AddEntry(RawLiteral * literal)186 void LiteralPool::AddEntry(RawLiteral* literal) {
187   // A literal must be registered immediately before its first use. Here we
188   // cannot control that it is its first use, but we check no code has been
189   // emitted since its last use.
190   VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse());
191 
192   UpdateFirstUse(masm_->GetCursorOffset());
193   VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_);
194   entries_.push_back(literal);
195   size_ += literal->GetSize();
196 }
197 
198 
UpdateFirstUse(ptrdiff_t use_position)199 void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) {
200   first_use_ = std::min(first_use_, use_position);
201   if (first_use_ == -1) {
202     first_use_ = use_position;
203     SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint());
204     SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange);
205   } else {
206     VIXL_ASSERT(use_position > first_use_);
207   }
208 }
209 
210 
Reset()211 void VeneerPool::Reset() {
212   Pool::Reset();
213   unresolved_branches_.Reset();
214 }
215 
216 
Release()217 void VeneerPool::Release() {
218   --monitor_;
219 #ifndef PANDA_BUILD
220   if (monitor_ == 0) {
221     VIXL_ASSERT(IsEmpty() ||
222                 masm_->GetCursorOffset() <
223                     unresolved_branches_.GetFirstLimit());
224   }
225 #else
226   // Assert disabled, because we use own allocator
227 #endif
228 }
229 
230 
RegisterUnresolvedBranch(ptrdiff_t branch_pos,Label * label,ImmBranchType branch_type)231 void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos,
232                                           Label* label,
233                                           ImmBranchType branch_type) {
234   VIXL_ASSERT(!label->IsBound());
235   BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type);
236   unresolved_branches_.insert(branch_info);
237   UpdateNextCheckPoint();
238   // TODO: In debug mode register the label with the assembler to make sure it
239   // is bound with masm Bind and not asm bind.
240 }
241 
242 
DeleteUnresolvedBranchInfoForLabel(Label * label)243 void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) {
244   if (IsEmpty()) {
245     VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired);
246     return;
247   }
248 
249   if (label->IsLinked()) {
250     Label::LabelLinksIterator links_it(label);
251     for (; !links_it.Done(); links_it.Advance()) {
252       ptrdiff_t link_offset = *links_it.Current();
253       Instruction* link = masm_->GetInstructionAt(link_offset);
254 
255       // ADR instructions are not handled.
256       if (BranchTypeUsesVeneers(link->GetBranchType())) {
257         BranchInfo branch_info(link_offset, label, link->GetBranchType());
258         unresolved_branches_.erase(branch_info);
259       }
260     }
261   }
262 
263   UpdateNextCheckPoint();
264 }
265 
266 
ShouldEmitVeneer(int64_t first_unreacheable_pc,size_t amount)267 bool VeneerPool::ShouldEmitVeneer(int64_t first_unreacheable_pc,
268                                   size_t amount) {
269   ptrdiff_t offset =
270       kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize();
271   return (masm_->GetCursorOffset() + offset) > first_unreacheable_pc;
272 }
273 
274 
CheckEmitFor(size_t amount,EmitOption option)275 void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) {
276   if (IsEmpty()) return;
277 
278 #ifndef PANDA_BUILD
279   VIXL_ASSERT(masm_->GetCursorOffset() + kPoolNonVeneerCodeSize <
280               unresolved_branches_.GetFirstLimit());
281 #else
282   // In codegen may be generated unused Labels - to allocate them in one chunk
283 #endif
284 
285   if (IsBlocked()) return;
286 
287   if (ShouldEmitVeneers(amount)) {
288     Emit(option, amount);
289   } else {
290     UpdateNextCheckPoint();
291   }
292 }
293 
294 
Emit(EmitOption option,size_t amount)295 void VeneerPool::Emit(EmitOption option, size_t amount) {
296   // There is an issue if we are asked to emit a blocked or empty pool.
297   VIXL_ASSERT(!IsBlocked());
298   VIXL_ASSERT(!IsEmpty());
299 
300 #ifndef PANDA_BUILD
301   Label end;
302 #else
303   Label end(allocator_);
304 #endif
305   if (option == kBranchRequired) {
306     ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
307     masm_->b(&end);
308   }
309 
310   // We want to avoid generating veneer pools too often, so generate veneers for
311   // branches that don't immediately require a veneer but will soon go out of
312   // range.
313   static const size_t kVeneerEmissionMargin = 1 * KBytes;
314 
315 #ifndef PANDA_BUILD
316   for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) {
317 #else
318   for (BranchInfoSetIterator it(allocator_, &unresolved_branches_); !it.Done();) {
319 #endif
320     BranchInfo* branch_info = it.Current();
321     if (branch_info && ShouldEmitVeneer(branch_info->first_unreacheable_pc_,
322                          amount + kVeneerEmissionMargin)) {
323       CodeBufferCheckScope scope(masm_,
324                                  kVeneerCodeSize,
325                                  CodeBufferCheckScope::kCheck,
326                                  CodeBufferCheckScope::kExactSize);
327       ptrdiff_t branch_pos = branch_info->pc_offset_;
328       Instruction* branch = masm_->GetInstructionAt(branch_pos);
329       Label* label = branch_info->label_;
330 
331       // Patch the branch to point to the current position, and emit a branch
332       // to the label.
333       Instruction* veneer = masm_->GetCursorAddress<Instruction*>();
334       branch->SetImmPCOffsetTarget(veneer);
335       {
336         ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
337         masm_->b(label);
338       }
339 
340       // Update the label. The branch patched does not point to it any longer.
341       label->DeleteLink(branch_pos);
342 
343       it.DeleteCurrentAndAdvance();
344     } else {
345       it.AdvanceToNextType();
346     }
347   }
348 
349   UpdateNextCheckPoint();
350 
351   masm_->bind(&end);
352 }
353 
354 #ifndef PANDA_BUILD
355 MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
356 #else
357 MacroAssembler::MacroAssembler(PandaAllocator* allocator,
358           PositionIndependentCodeOption pic)
359 #endif
360     : Assembler(pic),
361 #ifdef VIXL_DEBUG
362       allow_macro_instructions_(true),
363 #endif
364       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
365       sp_(sp),
366       tmp_list_(ip0, ip1),
367       v_tmp_list_(d30, d31),
368       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
369       current_scratch_scope_(NULL),
370 #ifndef PANDA_BUILD
371       literal_pool_(this),
372       veneer_pool_(this),
373       recommended_checkpoint_(Pool::kNoCheckpointRequired),
374       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
375 #else
376       literal_pool_(allocator, this),
377       veneer_pool_(allocator, this),
378       recommended_checkpoint_(Pool::kNoCheckpointRequired),
379       fp_nan_propagation_(NoFPMacroNaNPropagationSelected),
380       allocator_(allocator) {
381 #endif
382   checkpoint_ = GetNextCheckPoint();
383 #ifndef VIXL_DEBUG
384   USE(allow_macro_instructions_);
385 #endif
386 }
387 
388 #ifndef PANDA_BUILD
389 MacroAssembler::MacroAssembler(size_t capacity,
390                                PositionIndependentCodeOption pic)
391     : Assembler(capacity, pic),
392 #ifdef VIXL_DEBUG
393       allow_macro_instructions_(true),
394 #endif
395       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
396       sp_(sp),
397       tmp_list_(ip0, ip1),
398       v_tmp_list_(d30, d31),
399       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
400       current_scratch_scope_(NULL),
401       literal_pool_(this),
402       veneer_pool_(this),
403       recommended_checkpoint_(Pool::kNoCheckpointRequired),
404       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
405   checkpoint_ = GetNextCheckPoint();
406 }
407 #endif
408 
409 #ifndef PANDA_BUILD
410 MacroAssembler::MacroAssembler(byte* buffer,
411                                size_t capacity,
412                                PositionIndependentCodeOption pic)
413     : Assembler(buffer, capacity, pic),
414 #ifdef VIXL_DEBUG
415       allow_macro_instructions_(true),
416 #endif
417       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
418       sp_(sp),
419       tmp_list_(ip0, ip1),
420       v_tmp_list_(d30, d31),
421       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
422       current_scratch_scope_(NULL),
423       literal_pool_(this),
424       veneer_pool_(this),
425       recommended_checkpoint_(Pool::kNoCheckpointRequired),
426       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
427   checkpoint_ = GetNextCheckPoint();
428 }
429 #else
430 MacroAssembler::MacroAssembler(PandaAllocator* allocator, byte* buffer,
431                                size_t capacity,
432                                PositionIndependentCodeOption pic)
433     : Assembler(buffer, capacity, pic),
434 #ifdef VIXL_DEBUG
435       allow_macro_instructions_(true),
436 #endif
437       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
438       sp_(sp),
439       tmp_list_(ip0, ip1),
440       v_tmp_list_(d30, d31),
441       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
442       current_scratch_scope_(NULL),
443       literal_pool_(allocator, this),
444       veneer_pool_(allocator, this),
445       recommended_checkpoint_(Pool::kNoCheckpointRequired), allocator_(allocator) {
446   checkpoint_ = GetNextCheckPoint();
447 }
448 #endif
449 
450 MacroAssembler::~MacroAssembler() {}
451 
452 
453 void MacroAssembler::Reset() {
454   Assembler::Reset();
455 
456   VIXL_ASSERT(!literal_pool_.IsBlocked());
457   literal_pool_.Reset();
458   veneer_pool_.Reset();
459 
460   checkpoint_ = GetNextCheckPoint();
461 }
462 
463 
464 void MacroAssembler::FinalizeCode(FinalizeOption option) {
465   if (!literal_pool_.IsEmpty()) {
466     // The user may decide to emit more code after Finalize, emit a branch if
467     // that's the case.
468     literal_pool_.Emit(option == kUnreachable ? Pool::kNoBranchRequired
469                                               : Pool::kBranchRequired);
470   }
471   VIXL_ASSERT(veneer_pool_.IsEmpty());
472 
473   Assembler::FinalizeCode();
474 }
475 
476 
477 void MacroAssembler::CheckEmitFor(size_t amount) {
478   CheckEmitPoolsFor(amount);
479   GetBuffer()->EnsureSpaceFor(amount);
480 }
481 
482 
483 void MacroAssembler::CheckEmitPoolsFor(size_t amount) {
484   literal_pool_.CheckEmitFor(amount);
485   veneer_pool_.CheckEmitFor(amount);
486   checkpoint_ = GetNextCheckPoint();
487 }
488 
489 
490 int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
491                                         const Register& rd,
492                                         uint64_t imm) {
493   bool emit_code = (masm != NULL);
494   VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
495   // The worst case for size is mov 64-bit immediate to sp:
496   //  * up to 4 instructions to materialise the constant
497   //  * 1 instruction to move to sp
498   MacroEmissionCheckScope guard(masm);
499 
500   // Immediates on Aarch64 can be produced using an initial value, and zero to
501   // three move keep operations.
502   //
503   // Initial values can be generated with:
504   //  1. 64-bit move zero (movz).
505   //  2. 32-bit move inverted (movn).
506   //  3. 64-bit move inverted.
507   //  4. 32-bit orr immediate.
508   //  5. 64-bit orr immediate.
509   // Move-keep may then be used to modify each of the 16-bit half words.
510   //
511   // The code below supports all five initial value generators, and
512   // applying move-keep operations to move-zero and move-inverted initial
513   // values.
514 
515   // Try to move the immediate in one instruction, and if that fails, switch to
516   // using multiple instructions.
517   if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
518     return 1;
519   } else {
520     int instruction_count = 0;
521     unsigned reg_size = rd.GetSizeInBits();
522 
523     // Generic immediate case. Imm will be represented by
524     //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
525     // A move-zero or move-inverted is generated for the first non-zero or
526     // non-0xffff immX, and a move-keep for subsequent non-zero immX.
527 
528     uint64_t ignored_halfword = 0;
529     bool invert_move = false;
530     // If the number of 0xffff halfwords is greater than the number of 0x0000
531     // halfwords, it's more efficient to use move-inverted.
532     if (CountClearHalfWords(~imm, reg_size) >
533         CountClearHalfWords(imm, reg_size)) {
534       ignored_halfword = 0xffff;
535       invert_move = true;
536     }
537 
538     // Mov instructions can't move values into the stack pointer, so set up a
539     // temporary register, if needed.
540     UseScratchRegisterScope temps;
541     Register temp;
542     if (emit_code) {
543       temps.Open(masm);
544       temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
545     }
546 
547     // Iterate through the halfwords. Use movn/movz for the first non-ignored
548     // halfword, and movk for subsequent halfwords.
549     VIXL_ASSERT((reg_size % 16) == 0);
550     bool first_mov_done = false;
551     for (unsigned i = 0; i < (reg_size / 16); i++) {
552       uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
553       if (imm16 != ignored_halfword) {
554         if (!first_mov_done) {
555           if (invert_move) {
556             if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
557             instruction_count++;
558           } else {
559             if (emit_code) masm->movz(temp, imm16, 16 * i);
560             instruction_count++;
561           }
562           first_mov_done = true;
563         } else {
564           // Construct a wider constant.
565           if (emit_code) masm->movk(temp, imm16, 16 * i);
566           instruction_count++;
567         }
568       }
569     }
570 
571     VIXL_ASSERT(first_mov_done);
572 
573     // Move the temporary if the original destination register was the stack
574     // pointer.
575     if (rd.IsSP()) {
576       if (emit_code) masm->mov(rd, temp);
577       instruction_count++;
578     }
579     return instruction_count;
580   }
581 }
582 
583 
584 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
585   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
586               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
587   if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
588     B(static_cast<Condition>(type), label);
589   } else {
590     switch (type) {
591       case always:
592         B(label);
593         break;
594       case never:
595         break;
596       case reg_zero:
597         Cbz(reg, label);
598         break;
599       case reg_not_zero:
600         Cbnz(reg, label);
601         break;
602       case reg_bit_clear:
603         Tbz(reg, bit, label);
604         break;
605       case reg_bit_set:
606         Tbnz(reg, bit, label);
607         break;
608       default:
609         VIXL_UNREACHABLE();
610     }
611   }
612 }
613 
614 
615 void MacroAssembler::B(Label* label) {
616   // We don't need to check the size of the literal pool, because the size of
617   // the literal pool is already bounded by the literal range, which is smaller
618   // than the range of this branch.
619   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(UncondBranchType) >
620               Instruction::kLoadLiteralRange);
621   SingleEmissionCheckScope guard(this);
622   b(label);
623 }
624 
625 
626 void MacroAssembler::B(Label* label, Condition cond) {
627   // We don't need to check the size of the literal pool, because the size of
628   // the literal pool is already bounded by the literal range, which is smaller
629   // than the range of this branch.
630   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CondBranchType) >
631               Instruction::kLoadLiteralRange);
632   VIXL_ASSERT(allow_macro_instructions_);
633   VIXL_ASSERT((cond != al) && (cond != nv));
634   EmissionCheckScope guard(this, 2 * kInstructionSize);
635 
636   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
637 #ifndef PANDA_BUILD
638     Label done;
639 #else
640     Label done(allocator_);
641 #endif
642     b(&done, InvertCondition(cond));
643     b(label);
644     bind(&done);
645   } else {
646     if (!label->IsBound()) {
647       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
648                                             label,
649                                             CondBranchType);
650     }
651     b(label, cond);
652   }
653 }
654 
655 
656 void MacroAssembler::Cbnz(const Register& rt, Label* label) {
657   // We don't need to check the size of the literal pool, because the size of
658   // the literal pool is already bounded by the literal range, which is smaller
659   // than the range of this branch.
660   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
661               Instruction::kLoadLiteralRange);
662   VIXL_ASSERT(allow_macro_instructions_);
663   VIXL_ASSERT(!rt.IsZero());
664   EmissionCheckScope guard(this, 2 * kInstructionSize);
665 
666   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
667 #ifndef PANDA_BUILD
668    Label done;
669 #else
670    Label done(allocator_);
671 #endif
672     cbz(rt, &done);
673     b(label);
674     bind(&done);
675   } else {
676     if (!label->IsBound()) {
677       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
678                                             label,
679                                             CompareBranchType);
680     }
681     cbnz(rt, label);
682   }
683 }
684 
685 
686 void MacroAssembler::Cbz(const Register& rt, Label* label) {
687   // We don't need to check the size of the literal pool, because the size of
688   // the literal pool is already bounded by the literal range, which is smaller
689   // than the range of this branch.
690   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
691               Instruction::kLoadLiteralRange);
692   VIXL_ASSERT(allow_macro_instructions_);
693   VIXL_ASSERT(!rt.IsZero());
694   EmissionCheckScope guard(this, 2 * kInstructionSize);
695 
696   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
697 #ifndef PANDA_BUILD
698    Label done;
699 #else
700    Label done(allocator_);
701 #endif
702     cbnz(rt, &done);
703     b(label);
704     bind(&done);
705   } else {
706     if (!label->IsBound()) {
707       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
708                                             label,
709                                             CompareBranchType);
710     }
711     cbz(rt, label);
712   }
713 }
714 
715 
716 void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
717   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
718   // can become impossible because we emit the literal pool first.
719   literal_pool_.CheckEmitForBranch(
720       Instruction::GetImmBranchForwardRange(TestBranchType));
721   VIXL_ASSERT(allow_macro_instructions_);
722   VIXL_ASSERT(!rt.IsZero());
723   EmissionCheckScope guard(this, 2 * kInstructionSize);
724 
725   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
726 #ifndef PANDA_BUILD
727    Label done;
728 #else
729    Label done(allocator_);
730 #endif
731     tbz(rt, bit_pos, &done);
732     b(label);
733     bind(&done);
734   } else {
735     if (!label->IsBound()) {
736       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
737                                             label,
738                                             TestBranchType);
739     }
740     tbnz(rt, bit_pos, label);
741   }
742 }
743 
744 
745 void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
746   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
747   // can become impossible because we emit the literal pool first.
748   literal_pool_.CheckEmitForBranch(
749       Instruction::GetImmBranchForwardRange(TestBranchType));
750   VIXL_ASSERT(allow_macro_instructions_);
751   VIXL_ASSERT(!rt.IsZero());
752   EmissionCheckScope guard(this, 2 * kInstructionSize);
753 
754   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
755 #ifndef PANDA_BUILD
756    Label done;
757 #else
758    Label done(allocator_);
759 #endif
760     tbnz(rt, bit_pos, &done);
761     b(label);
762     bind(&done);
763   } else {
764     if (!label->IsBound()) {
765       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
766                                             label,
767                                             TestBranchType);
768     }
769     tbz(rt, bit_pos, label);
770   }
771 }
772 
773 void MacroAssembler::Bind(Label* label, BranchTargetIdentifier id) {
774   VIXL_ASSERT(allow_macro_instructions_);
775   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
776   if (id == EmitBTI_none) {
777     bind(label);
778   } else {
779     // Emit this inside an ExactAssemblyScope to ensure there are no extra
780     // instructions between the bind and the target identifier instruction.
781     ExactAssemblyScope scope(this, kInstructionSize);
782     bind(label);
783     if (id == EmitPACIASP) {
784       paciasp();
785     } else if (id == EmitPACIBSP) {
786       pacibsp();
787     } else {
788       bti(id);
789     }
790   }
791 }
792 
793 // Bind a label to a specified offset from the start of the buffer.
794 void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) {
795   VIXL_ASSERT(allow_macro_instructions_);
796   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
797   Assembler::BindToOffset(label, offset);
798 }
799 
800 
801 void MacroAssembler::And(const Register& rd,
802                          const Register& rn,
803                          const Operand& operand) {
804   VIXL_ASSERT(allow_macro_instructions_);
805   LogicalMacro(rd, rn, operand, AND);
806 }
807 
808 
809 void MacroAssembler::Ands(const Register& rd,
810                           const Register& rn,
811                           const Operand& operand) {
812   VIXL_ASSERT(allow_macro_instructions_);
813   LogicalMacro(rd, rn, operand, ANDS);
814 }
815 
816 
817 void MacroAssembler::Tst(const Register& rn, const Operand& operand) {
818   VIXL_ASSERT(allow_macro_instructions_);
819   Ands(AppropriateZeroRegFor(rn), rn, operand);
820 }
821 
822 
823 void MacroAssembler::Bic(const Register& rd,
824                          const Register& rn,
825                          const Operand& operand) {
826   VIXL_ASSERT(allow_macro_instructions_);
827   LogicalMacro(rd, rn, operand, BIC);
828 }
829 
830 
831 void MacroAssembler::Bics(const Register& rd,
832                           const Register& rn,
833                           const Operand& operand) {
834   VIXL_ASSERT(allow_macro_instructions_);
835   LogicalMacro(rd, rn, operand, BICS);
836 }
837 
838 
839 void MacroAssembler::Orr(const Register& rd,
840                          const Register& rn,
841                          const Operand& operand) {
842   VIXL_ASSERT(allow_macro_instructions_);
843   LogicalMacro(rd, rn, operand, ORR);
844 }
845 
846 
847 void MacroAssembler::Orn(const Register& rd,
848                          const Register& rn,
849                          const Operand& operand) {
850   VIXL_ASSERT(allow_macro_instructions_);
851   LogicalMacro(rd, rn, operand, ORN);
852 }
853 
854 
855 void MacroAssembler::Eor(const Register& rd,
856                          const Register& rn,
857                          const Operand& operand) {
858   VIXL_ASSERT(allow_macro_instructions_);
859   LogicalMacro(rd, rn, operand, EOR);
860 }
861 
862 
863 void MacroAssembler::Eon(const Register& rd,
864                          const Register& rn,
865                          const Operand& operand) {
866   VIXL_ASSERT(allow_macro_instructions_);
867   LogicalMacro(rd, rn, operand, EON);
868 }
869 
870 
871 void MacroAssembler::LogicalMacro(const Register& rd,
872                                   const Register& rn,
873                                   const Operand& operand,
874                                   LogicalOp op) {
875   // The worst case for size is logical immediate to sp:
876   //  * up to 4 instructions to materialise the constant
877   //  * 1 instruction to do the operation
878   //  * 1 instruction to move to sp
879   MacroEmissionCheckScope guard(this);
880   UseScratchRegisterScope temps(this);
881   // Use `rd` as a temp, if we can.
882   temps.Include(rd);
883   // We read `rn` after evaluating `operand`.
884   temps.Exclude(rn);
885   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
886   // because we don't need it after it is evaluated.
887 
888   if (operand.IsImmediate()) {
889     uint64_t immediate = operand.GetImmediate();
890     unsigned reg_size = rd.GetSizeInBits();
891 
892     // If the operation is NOT, invert the operation and immediate.
893     if ((op & NOT) == NOT) {
894       op = static_cast<LogicalOp>(op & ~NOT);
895       immediate = ~immediate;
896     }
897 
898     // Ignore the top 32 bits of an immediate if we're moving to a W register.
899     if (rd.Is32Bits()) {
900       // Check that the top 32 bits are consistent.
901       VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
902                   ((immediate >> kWRegSize) == 0xffffffff));
903       immediate &= kWRegMask;
904     }
905 
906     VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
907 
908     // Special cases for all set or all clear immediates.
909     if (immediate == 0) {
910       switch (op) {
911         case AND:
912           Mov(rd, 0);
913           return;
914         case ORR:
915           VIXL_FALLTHROUGH();
916         case EOR:
917           Mov(rd, rn);
918           return;
919         case ANDS:
920           VIXL_FALLTHROUGH();
921         case BICS:
922           break;
923         default:
924           VIXL_UNREACHABLE();
925       }
926     } else if ((rd.Is64Bits() && (immediate == UINT64_C(0xffffffffffffffff))) ||
927                (rd.Is32Bits() && (immediate == UINT64_C(0x00000000ffffffff)))) {
928       switch (op) {
929         case AND:
930           Mov(rd, rn);
931           return;
932         case ORR:
933           Mov(rd, immediate);
934           return;
935         case EOR:
936           Mvn(rd, rn);
937           return;
938         case ANDS:
939           VIXL_FALLTHROUGH();
940         case BICS:
941           break;
942         default:
943           VIXL_UNREACHABLE();
944       }
945     }
946 
947     unsigned n, imm_s, imm_r;
948     if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
949       // Immediate can be encoded in the instruction.
950       LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
951     } else {
952       // Immediate can't be encoded: synthesize using move immediate.
953       Register temp = temps.AcquireSameSizeAs(rn);
954       VIXL_ASSERT(!temp.Aliases(rn));
955 
956       // If the left-hand input is the stack pointer, we can't pre-shift the
957       // immediate, as the encoding won't allow the subsequent post shift.
958       PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
959       Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
960 
961       if (rd.Is(sp) || rd.Is(wsp)) {
962         // If rd is the stack pointer we cannot use it as the destination
963         // register so we use the temp register as an intermediate again.
964         Logical(temp, rn, imm_operand, op);
965         Mov(rd, temp);
966       } else {
967         Logical(rd, rn, imm_operand, op);
968       }
969     }
970   } else if (operand.IsExtendedRegister()) {
971     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
972     // Add/sub extended supports shift <= 4. We want to support exactly the
973     // same modes here.
974     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
975     VIXL_ASSERT(
976         operand.GetRegister().Is64Bits() ||
977         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
978 
979     Register temp = temps.AcquireSameSizeAs(rn);
980     VIXL_ASSERT(!temp.Aliases(rn));
981     EmitExtendShift(temp,
982                     operand.GetRegister(),
983                     operand.GetExtend(),
984                     operand.GetShiftAmount());
985     Logical(rd, rn, Operand(temp), op);
986   } else {
987     // The operand can be encoded in the instruction.
988     VIXL_ASSERT(operand.IsShiftedRegister());
989     Logical(rd, rn, operand, op);
990   }
991 }
992 
993 
994 void MacroAssembler::Mov(const Register& rd,
995                          const Operand& operand,
996                          DiscardMoveMode discard_mode) {
997   VIXL_ASSERT(allow_macro_instructions_);
998   // The worst case for size is mov immediate with up to 4 instructions.
999   MacroEmissionCheckScope guard(this);
1000 
1001   if (operand.IsImmediate()) {
1002     // Call the macro assembler for generic immediates.
1003     Mov(rd, operand.GetImmediate());
1004   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
1005     // Emit a shift instruction if moving a shifted register. This operation
1006     // could also be achieved using an orr instruction (like orn used by Mvn),
1007     // but using a shift instruction makes the disassembly clearer.
1008     EmitShift(rd,
1009               operand.GetRegister(),
1010               operand.GetShift(),
1011               operand.GetShiftAmount());
1012   } else if (operand.IsExtendedRegister()) {
1013     // Emit an extend instruction if moving an extended register. This handles
1014     // extend with post-shift operations, too.
1015     EmitExtendShift(rd,
1016                     operand.GetRegister(),
1017                     operand.GetExtend(),
1018                     operand.GetShiftAmount());
1019   } else {
1020     Mov(rd, operand.GetRegister(), discard_mode);
1021   }
1022 }
1023 
1024 
1025 void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
1026   VIXL_ASSERT(IsUint16(imm));
1027   int byte1 = (imm & 0xff);
1028   int byte2 = ((imm >> 8) & 0xff);
1029   if (byte1 == byte2) {
1030     movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
1031   } else if (byte1 == 0) {
1032     movi(vd, byte2, LSL, 8);
1033   } else if (byte2 == 0) {
1034     movi(vd, byte1);
1035   } else if (byte1 == 0xff) {
1036     mvni(vd, ~byte2 & 0xff, LSL, 8);
1037   } else if (byte2 == 0xff) {
1038     mvni(vd, ~byte1 & 0xff);
1039   } else {
1040     UseScratchRegisterScope temps(this);
1041     Register temp = temps.AcquireW();
1042     movz(temp, imm);
1043     dup(vd, temp);
1044   }
1045 }
1046 
1047 
1048 void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
1049   VIXL_ASSERT(IsUint32(imm));
1050 
1051   uint8_t bytes[sizeof(imm)];
1052   memcpy(bytes, &imm, sizeof(imm));
1053 
1054   // All bytes are either 0x00 or 0xff.
1055   {
1056     bool all0orff = true;
1057     for (int i = 0; i < 4; ++i) {
1058       if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
1059         all0orff = false;
1060         break;
1061       }
1062     }
1063 
1064     if (all0orff == true) {
1065       movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
1066       return;
1067     }
1068   }
1069 
1070   // Of the 4 bytes, only one byte is non-zero.
1071   for (int i = 0; i < 4; i++) {
1072     if ((imm & (0xff << (i * 8))) == imm) {
1073       movi(vd, bytes[i], LSL, i * 8);
1074       return;
1075     }
1076   }
1077 
1078   // Of the 4 bytes, only one byte is not 0xff.
1079   for (int i = 0; i < 4; i++) {
1080     uint32_t mask = ~(0xff << (i * 8));
1081     if ((imm & mask) == mask) {
1082       mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
1083       return;
1084     }
1085   }
1086 
1087   // Immediate is of the form 0x00MMFFFF.
1088   if ((imm & 0xff00ffff) == 0x0000ffff) {
1089     movi(vd, bytes[2], MSL, 16);
1090     return;
1091   }
1092 
1093   // Immediate is of the form 0x0000MMFF.
1094   if ((imm & 0xffff00ff) == 0x000000ff) {
1095     movi(vd, bytes[1], MSL, 8);
1096     return;
1097   }
1098 
1099   // Immediate is of the form 0xFFMM0000.
1100   if ((imm & 0xff00ffff) == 0xff000000) {
1101     mvni(vd, ~bytes[2] & 0xff, MSL, 16);
1102     return;
1103   }
1104   // Immediate is of the form 0xFFFFMM00.
1105   if ((imm & 0xffff00ff) == 0xffff0000) {
1106     mvni(vd, ~bytes[1] & 0xff, MSL, 8);
1107     return;
1108   }
1109 
1110   // Top and bottom 16-bits are equal.
1111   if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
1112     Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
1113     return;
1114   }
1115 
1116   // Default case.
1117   {
1118     UseScratchRegisterScope temps(this);
1119     Register temp = temps.AcquireW();
1120     Mov(temp, imm);
1121     dup(vd, temp);
1122   }
1123 }
1124 
1125 
1126 void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
1127   // All bytes are either 0x00 or 0xff.
1128   {
1129     bool all0orff = true;
1130     for (int i = 0; i < 8; ++i) {
1131       int byteval = (imm >> (i * 8)) & 0xff;
1132       if (byteval != 0 && byteval != 0xff) {
1133         all0orff = false;
1134         break;
1135       }
1136     }
1137     if (all0orff == true) {
1138       movi(vd, imm);
1139       return;
1140     }
1141   }
1142 
1143   // Top and bottom 32-bits are equal.
1144   if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
1145     Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
1146     return;
1147   }
1148 
1149   // Default case.
1150   {
1151     UseScratchRegisterScope temps(this);
1152     Register temp = temps.AcquireX();
1153     Mov(temp, imm);
1154     if (vd.Is1D()) {
1155       mov(vd.D(), 0, temp);
1156     } else {
1157       dup(vd.V2D(), temp);
1158     }
1159   }
1160 }
1161 
1162 
1163 void MacroAssembler::Movi(const VRegister& vd,
1164                           uint64_t imm,
1165                           Shift shift,
1166                           int shift_amount) {
1167   VIXL_ASSERT(allow_macro_instructions_);
1168   MacroEmissionCheckScope guard(this);
1169   if (shift_amount != 0 || shift != LSL) {
1170     movi(vd, imm, shift, shift_amount);
1171   } else if (vd.Is8B() || vd.Is16B()) {
1172     // 8-bit immediate.
1173     VIXL_ASSERT(IsUint8(imm));
1174     movi(vd, imm);
1175   } else if (vd.Is4H() || vd.Is8H()) {
1176     // 16-bit immediate.
1177     Movi16bitHelper(vd, imm);
1178   } else if (vd.Is2S() || vd.Is4S()) {
1179     // 32-bit immediate.
1180     Movi32bitHelper(vd, imm);
1181   } else {
1182     // 64-bit immediate.
1183     Movi64bitHelper(vd, imm);
1184   }
1185 }
1186 
1187 
1188 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
1189   // TODO: Move 128-bit values in a more efficient way.
1190   VIXL_ASSERT(vd.Is128Bits());
1191   Movi(vd.V2D(), lo);
1192   if (hi != lo) {
1193     UseScratchRegisterScope temps(this);
1194     // TODO: Figure out if using a temporary V register to materialise the
1195     // immediate is better.
1196     Register temp = temps.AcquireX();
1197     Mov(temp, hi);
1198     Ins(vd.V2D(), 1, temp);
1199   }
1200 }
1201 
1202 
1203 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
1204   VIXL_ASSERT(allow_macro_instructions_);
1205   // The worst case for size is mvn immediate with up to 4 instructions.
1206   MacroEmissionCheckScope guard(this);
1207 
1208   if (operand.IsImmediate()) {
1209     // Call the macro assembler for generic immediates.
1210     Mvn(rd, operand.GetImmediate());
1211   } else if (operand.IsExtendedRegister()) {
1212     // Emit two instructions for the extend case. This differs from Mov, as
1213     // the extend and invert can't be achieved in one instruction.
1214     EmitExtendShift(rd,
1215                     operand.GetRegister(),
1216                     operand.GetExtend(),
1217                     operand.GetShiftAmount());
1218     mvn(rd, rd);
1219   } else {
1220     // Otherwise, register and shifted register cases can be handled by the
1221     // assembler directly, using orn.
1222     mvn(rd, operand);
1223   }
1224 }
1225 
1226 
1227 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
1228   VIXL_ASSERT(allow_macro_instructions_);
1229   MoveImmediateHelper(this, rd, imm);
1230 }
1231 
1232 
1233 void MacroAssembler::Ccmp(const Register& rn,
1234                           const Operand& operand,
1235                           StatusFlags nzcv,
1236                           Condition cond) {
1237   VIXL_ASSERT(allow_macro_instructions_);
1238   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1239     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
1240   } else {
1241     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
1242   }
1243 }
1244 
1245 
1246 void MacroAssembler::Ccmn(const Register& rn,
1247                           const Operand& operand,
1248                           StatusFlags nzcv,
1249                           Condition cond) {
1250   VIXL_ASSERT(allow_macro_instructions_);
1251   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
1252     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
1253   } else {
1254     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
1255   }
1256 }
1257 
1258 
1259 void MacroAssembler::ConditionalCompareMacro(const Register& rn,
1260                                              const Operand& operand,
1261                                              StatusFlags nzcv,
1262                                              Condition cond,
1263                                              ConditionalCompareOp op) {
1264   VIXL_ASSERT((cond != al) && (cond != nv));
1265   // The worst case for size is ccmp immediate:
1266   //  * up to 4 instructions to materialise the constant
1267   //  * 1 instruction for ccmp
1268   MacroEmissionCheckScope guard(this);
1269 
1270   if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) ||
1271       (operand.IsImmediate() &&
1272        IsImmConditionalCompare(operand.GetImmediate()))) {
1273     // The immediate can be encoded in the instruction, or the operand is an
1274     // unshifted register: call the assembler.
1275     ConditionalCompare(rn, operand, nzcv, cond, op);
1276   } else {
1277     UseScratchRegisterScope temps(this);
1278     // The operand isn't directly supported by the instruction: perform the
1279     // operation on a temporary register.
1280     Register temp = temps.AcquireSameSizeAs(rn);
1281     Mov(temp, operand);
1282     ConditionalCompare(rn, temp, nzcv, cond, op);
1283   }
1284 }
1285 
1286 
1287 void MacroAssembler::CselHelper(MacroAssembler* masm,
1288                                 const Register& rd,
1289                                 Operand left,
1290                                 Operand right,
1291                                 Condition cond,
1292                                 bool* should_synthesise_left,
1293                                 bool* should_synthesise_right) {
1294   bool emit_code = (masm != NULL);
1295 
1296   VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_);
1297   VIXL_ASSERT((cond != al) && (cond != nv));
1298   VIXL_ASSERT(!rd.IsZero() && !rd.IsSP());
1299   VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP());
1300   VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP());
1301 
1302   if (should_synthesise_left != NULL) *should_synthesise_left = false;
1303   if (should_synthesise_right != NULL) *should_synthesise_right = false;
1304 
1305   // The worst case for size occurs when the inputs are two non encodable
1306   // constants:
1307   //  * up to 4 instructions to materialise the left constant
1308   //  * up to 4 instructions to materialise the right constant
1309   //  * 1 instruction for csel
1310   EmissionCheckScope guard(masm, 9 * kInstructionSize);
1311   UseScratchRegisterScope temps;
1312   if (masm != NULL) {
1313     temps.Open(masm);
1314   }
1315 
1316   // Try to handle cases where both inputs are immediates.
1317   bool left_is_immediate = left.IsImmediate() || left.IsZero();
1318   bool right_is_immediate = right.IsImmediate() || right.IsZero();
1319   if (left_is_immediate && right_is_immediate &&
1320       CselSubHelperTwoImmediates(masm,
1321                                  rd,
1322                                  left.GetEquivalentImmediate(),
1323                                  right.GetEquivalentImmediate(),
1324                                  cond,
1325                                  should_synthesise_left,
1326                                  should_synthesise_right)) {
1327     return;
1328   }
1329 
1330   // Handle cases where one of the two inputs is -1, 0, or 1.
1331   bool left_is_small_immediate =
1332       left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) &&
1333                             (left.GetEquivalentImmediate() <= 1));
1334   bool right_is_small_immediate =
1335       right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) &&
1336                              (right.GetEquivalentImmediate() <= 1));
1337   if (right_is_small_immediate || left_is_small_immediate) {
1338     bool swapped_inputs = false;
1339     if (!right_is_small_immediate) {
1340       std::swap(left, right);
1341       cond = InvertCondition(cond);
1342       swapped_inputs = true;
1343     }
1344     CselSubHelperRightSmallImmediate(masm,
1345                                      &temps,
1346                                      rd,
1347                                      left,
1348                                      right,
1349                                      cond,
1350                                      swapped_inputs ? should_synthesise_right
1351                                                     : should_synthesise_left);
1352     return;
1353   }
1354 
1355   // Otherwise both inputs need to be available in registers. Synthesise them
1356   // if necessary and emit the `csel`.
1357   if (!left.IsPlainRegister()) {
1358     if (emit_code) {
1359       Register temp = temps.AcquireSameSizeAs(rd);
1360       masm->Mov(temp, left);
1361       left = temp;
1362     }
1363     if (should_synthesise_left != NULL) *should_synthesise_left = true;
1364   }
1365   if (!right.IsPlainRegister()) {
1366     if (emit_code) {
1367       Register temp = temps.AcquireSameSizeAs(rd);
1368       masm->Mov(temp, right);
1369       right = temp;
1370     }
1371     if (should_synthesise_right != NULL) *should_synthesise_right = true;
1372   }
1373   if (emit_code) {
1374     VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister());
1375     if (left.GetRegister().Is(right.GetRegister())) {
1376       masm->Mov(rd, left.GetRegister());
1377     } else {
1378       masm->csel(rd, left.GetRegister(), right.GetRegister(), cond);
1379     }
1380   }
1381 }
1382 
1383 
1384 bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm,
1385                                                 const Register& rd,
1386                                                 int64_t left,
1387                                                 int64_t right,
1388                                                 Condition cond,
1389                                                 bool* should_synthesise_left,
1390                                                 bool* should_synthesise_right) {
1391   bool emit_code = (masm != NULL);
1392   if (should_synthesise_left != NULL) *should_synthesise_left = false;
1393   if (should_synthesise_right != NULL) *should_synthesise_right = false;
1394 
1395   if (left == right) {
1396     if (emit_code) masm->Mov(rd, left);
1397     return true;
1398   } else if (left == -right) {
1399     if (should_synthesise_right != NULL) *should_synthesise_right = true;
1400     if (emit_code) {
1401       masm->Mov(rd, right);
1402       masm->Cneg(rd, rd, cond);
1403     }
1404     return true;
1405   }
1406 
1407   if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) {
1408     return true;
1409   } else {
1410     std::swap(left, right);
1411     if (CselSubHelperTwoOrderedImmediates(masm,
1412                                           rd,
1413                                           left,
1414                                           right,
1415                                           InvertCondition(cond))) {
1416       return true;
1417     }
1418   }
1419 
1420   // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond`
1421   // with `cinc`.
1422   return false;
1423 }
1424 
1425 
1426 bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm,
1427                                                        const Register& rd,
1428                                                        int64_t left,
1429                                                        int64_t right,
1430                                                        Condition cond) {
1431   bool emit_code = (masm != NULL);
1432 
1433   if ((left == 1) && (right == 0)) {
1434     if (emit_code) masm->cset(rd, cond);
1435     return true;
1436   } else if ((left == -1) && (right == 0)) {
1437     if (emit_code) masm->csetm(rd, cond);
1438     return true;
1439   }
1440   return false;
1441 }
1442 
1443 
1444 void MacroAssembler::CselSubHelperRightSmallImmediate(
1445     MacroAssembler* masm,
1446     UseScratchRegisterScope* temps,
1447     const Register& rd,
1448     const Operand& left,
1449     const Operand& right,
1450     Condition cond,
1451     bool* should_synthesise_left) {
1452   bool emit_code = (masm != NULL);
1453   VIXL_ASSERT((right.IsImmediate() || right.IsZero()) &&
1454               (-1 <= right.GetEquivalentImmediate()) &&
1455               (right.GetEquivalentImmediate() <= 1));
1456   Register left_register;
1457 
1458   if (left.IsPlainRegister()) {
1459     left_register = left.GetRegister();
1460   } else {
1461     if (emit_code) {
1462       left_register = temps->AcquireSameSizeAs(rd);
1463       masm->Mov(left_register, left);
1464     }
1465     if (should_synthesise_left != NULL) *should_synthesise_left = true;
1466   }
1467   if (emit_code) {
1468     int64_t imm = right.GetEquivalentImmediate();
1469     Register zr = AppropriateZeroRegFor(rd);
1470     if (imm == 0) {
1471       masm->csel(rd, left_register, zr, cond);
1472     } else if (imm == 1) {
1473       masm->csinc(rd, left_register, zr, cond);
1474     } else {
1475       VIXL_ASSERT(imm == -1);
1476       masm->csinv(rd, left_register, zr, cond);
1477     }
1478   }
1479 }
1480 
1481 
1482 void MacroAssembler::Add(const Register& rd,
1483                          const Register& rn,
1484                          const Operand& operand,
1485                          FlagsUpdate S) {
1486   VIXL_ASSERT(allow_macro_instructions_);
1487   if (operand.IsImmediate()) {
1488     int64_t imm = operand.GetImmediate();
1489     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
1490         IsImmAddSub(-imm)) {
1491       AddSubMacro(rd, rn, -imm, S, SUB);
1492       return;
1493     }
1494   }
1495   AddSubMacro(rd, rn, operand, S, ADD);
1496 }
1497 
1498 
1499 void MacroAssembler::Adds(const Register& rd,
1500                           const Register& rn,
1501                           const Operand& operand) {
1502   Add(rd, rn, operand, SetFlags);
1503 }
1504 
1505 
1506 void MacroAssembler::Sub(const Register& rd,
1507                          const Register& rn,
1508                          const Operand& operand,
1509                          FlagsUpdate S) {
1510   VIXL_ASSERT(allow_macro_instructions_);
1511   if (operand.IsImmediate()) {
1512     int64_t imm = operand.GetImmediate();
1513     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
1514         IsImmAddSub(-imm)) {
1515       AddSubMacro(rd, rn, -imm, S, ADD);
1516       return;
1517     }
1518   }
1519   AddSubMacro(rd, rn, operand, S, SUB);
1520 }
1521 
1522 
1523 void MacroAssembler::Subs(const Register& rd,
1524                           const Register& rn,
1525                           const Operand& operand) {
1526   Sub(rd, rn, operand, SetFlags);
1527 }
1528 
1529 
1530 void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
1531   VIXL_ASSERT(allow_macro_instructions_);
1532   Adds(AppropriateZeroRegFor(rn), rn, operand);
1533 }
1534 
1535 
1536 void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
1537   VIXL_ASSERT(allow_macro_instructions_);
1538   Subs(AppropriateZeroRegFor(rn), rn, operand);
1539 }
1540 
1541 
1542 void MacroAssembler::Fcmp(const VRegister& fn, double value, FPTrapFlags trap) {
1543   VIXL_ASSERT(allow_macro_instructions_);
1544   // The worst case for size is:
1545   //  * 1 to materialise the constant, using literal pool if necessary
1546   //  * 1 instruction for fcmp{e}
1547   MacroEmissionCheckScope guard(this);
1548   if (value != 0.0) {
1549     UseScratchRegisterScope temps(this);
1550     VRegister tmp = temps.AcquireSameSizeAs(fn);
1551     Fmov(tmp, value);
1552     FPCompareMacro(fn, tmp, trap);
1553   } else {
1554     FPCompareMacro(fn, value, trap);
1555   }
1556 }
1557 
1558 
1559 void MacroAssembler::Fcmpe(const VRegister& fn, double value) {
1560   Fcmp(fn, value, EnableTrap);
1561 }
1562 
1563 
1564 void MacroAssembler::Fmov(VRegister vd, double imm) {
1565   VIXL_ASSERT(allow_macro_instructions_);
1566   // Floating point immediates are loaded through the literal pool.
1567   MacroEmissionCheckScope guard(this);
1568 
1569   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
1570     Fmov(vd, Float16(imm));
1571     return;
1572   }
1573 
1574   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
1575     Fmov(vd, static_cast<float>(imm));
1576     return;
1577   }
1578 
1579   VIXL_ASSERT(vd.Is1D() || vd.Is2D());
1580   if (IsImmFP64(imm)) {
1581     fmov(vd, imm);
1582   } else {
1583     uint64_t rawbits = DoubleToRawbits(imm);
1584     if (vd.IsScalar()) {
1585       if (rawbits == 0) {
1586         fmov(vd, xzr);
1587       } else {
1588         ldr(vd,
1589 #ifndef PANDA_BUILD
1590             new Literal<double>(imm,
1591 #else
1592             allocator_.New<Literal<double>> (imm,
1593 #endif
1594                                 &literal_pool_,
1595                                 RawLiteral::kDeletedOnPlacementByPool));
1596       }
1597     } else {
1598       // TODO: consider NEON support for load literal.
1599       Movi(vd, rawbits);
1600     }
1601   }
1602 }
1603 
1604 
1605 void MacroAssembler::Fmov(VRegister vd, float imm) {
1606   VIXL_ASSERT(allow_macro_instructions_);
1607   // Floating point immediates are loaded through the literal pool.
1608   MacroEmissionCheckScope guard(this);
1609 
1610   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
1611     Fmov(vd, Float16(imm));
1612     return;
1613   }
1614 
1615   if (vd.Is1D() || vd.Is2D()) {
1616     Fmov(vd, static_cast<double>(imm));
1617     return;
1618   }
1619 
1620   VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
1621   if (IsImmFP32(imm)) {
1622     fmov(vd, imm);
1623   } else {
1624     uint32_t rawbits = FloatToRawbits(imm);
1625     if (vd.IsScalar()) {
1626       if (rawbits == 0) {
1627         fmov(vd, wzr);
1628       } else {
1629         ldr(vd,
1630 #ifndef PANDA_BUILD
1631             new Literal<float>(imm,
1632 #else
1633             allocator_.New<Literal<float>>(imm,
1634 #endif
1635                                &literal_pool_,
1636                                RawLiteral::kDeletedOnPlacementByPool));
1637       }
1638     } else {
1639       // TODO: consider NEON support for load literal.
1640       Movi(vd, rawbits);
1641     }
1642   }
1643 }
1644 
1645 
1646 void MacroAssembler::Fmov(VRegister vd, Float16 imm) {
1647   VIXL_ASSERT(allow_macro_instructions_);
1648   MacroEmissionCheckScope guard(this);
1649 
1650   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
1651     Fmov(vd, FPToFloat(imm, kIgnoreDefaultNaN));
1652     return;
1653   }
1654 
1655   if (vd.Is1D() || vd.Is2D()) {
1656     Fmov(vd, FPToDouble(imm, kIgnoreDefaultNaN));
1657     return;
1658   }
1659 
1660   VIXL_ASSERT(vd.Is1H() || vd.Is4H() || vd.Is8H());
1661   uint16_t rawbits = Float16ToRawbits(imm);
1662   if (IsImmFP16(imm)) {
1663     fmov(vd, imm);
1664   } else {
1665     if (vd.IsScalar()) {
1666       if (rawbits == 0x0) {
1667         fmov(vd, wzr);
1668       } else {
1669         // We can use movz instead of the literal pool.
1670         UseScratchRegisterScope temps(this);
1671         Register temp = temps.AcquireW();
1672         Mov(temp, rawbits);
1673         Fmov(vd, temp);
1674       }
1675     } else {
1676       // TODO: consider NEON support for load literal.
1677       Movi(vd, static_cast<uint64_t>(rawbits));
1678     }
1679   }
1680 }
1681 
1682 
1683 void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
1684   VIXL_ASSERT(allow_macro_instructions_);
1685   if (operand.IsImmediate()) {
1686     Mov(rd, -operand.GetImmediate());
1687   } else {
1688     Sub(rd, AppropriateZeroRegFor(rd), operand);
1689   }
1690 }
1691 
1692 
1693 void MacroAssembler::Negs(const Register& rd, const Operand& operand) {
1694   VIXL_ASSERT(allow_macro_instructions_);
1695   Subs(rd, AppropriateZeroRegFor(rd), operand);
1696 }
1697 
1698 
1699 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
1700                                               uint64_t imm) {
1701   return OneInstrMoveImmediateHelper(this, dst, imm);
1702 }
1703 
1704 
1705 Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
1706                                                   uint64_t imm,
1707                                                   PreShiftImmMode mode) {
1708   int reg_size = dst.GetSizeInBits();
1709 
1710   // Encode the immediate in a single move instruction, if possible.
1711   if (TryOneInstrMoveImmediate(dst, imm)) {
1712     // The move was successful; nothing to do here.
1713   } else {
1714     // Pre-shift the immediate to the least-significant bits of the register.
1715     int shift_low = CountTrailingZeros(imm, reg_size);
1716     if (mode == kLimitShiftForSP) {
1717       // When applied to the stack pointer, the subsequent arithmetic operation
1718       // can use the extend form to shift left by a maximum of four bits. Right
1719       // shifts are not allowed, so we filter them out later before the new
1720       // immediate is tested.
1721       shift_low = std::min(shift_low, 4);
1722     }
1723     // TryOneInstrMoveImmediate handles `imm` with a value of zero, so shift_low
1724     // must lie in the range [0, 63], and the shifts below are well-defined.
1725     VIXL_ASSERT((shift_low >= 0) && (shift_low < 64));
1726     // imm_low = imm >> shift_low (with sign extension)
1727     uint64_t imm_low = ExtractSignedBitfield64(63, shift_low, imm);
1728 
1729     // Pre-shift the immediate to the most-significant bits of the register,
1730     // inserting set bits in the least-significant bits.
1731     int shift_high = CountLeadingZeros(imm, reg_size);
1732     VIXL_ASSERT((shift_high >= 0) && (shift_high < 64));
1733     uint64_t imm_high = (imm << shift_high) | GetUintMask(shift_high);
1734 
1735     if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
1736       // The new immediate has been moved into the destination's low bits:
1737       // return a new leftward-shifting operand.
1738       return Operand(dst, LSL, shift_low);
1739     } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
1740       // The new immediate has been moved into the destination's high bits:
1741       // return a new rightward-shifting operand.
1742       return Operand(dst, LSR, shift_high);
1743     } else {
1744       Mov(dst, imm);
1745     }
1746   }
1747   return Operand(dst);
1748 }
1749 
1750 
1751 void MacroAssembler::Move(const GenericOperand& dst,
1752                           const GenericOperand& src) {
1753   if (dst.Equals(src)) {
1754     return;
1755   }
1756 
1757   VIXL_ASSERT(dst.IsValid() && src.IsValid());
1758 
1759   // The sizes of the operands must match exactly.
1760   VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits());
1761   VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize);
1762   int operand_size = static_cast<int>(dst.GetSizeInBits());
1763 
1764   if (dst.IsCPURegister() && src.IsCPURegister()) {
1765     CPURegister dst_reg = dst.GetCPURegister();
1766     CPURegister src_reg = src.GetCPURegister();
1767     if (dst_reg.IsRegister() && src_reg.IsRegister()) {
1768       Mov(Register(dst_reg), Register(src_reg));
1769     } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) {
1770       Fmov(VRegister(dst_reg), VRegister(src_reg));
1771     } else {
1772       if (dst_reg.IsRegister()) {
1773         Fmov(Register(dst_reg), VRegister(src_reg));
1774       } else {
1775         Fmov(VRegister(dst_reg), Register(src_reg));
1776       }
1777     }
1778     return;
1779   }
1780 
1781   if (dst.IsMemOperand() && src.IsMemOperand()) {
1782     UseScratchRegisterScope temps(this);
1783     CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size);
1784     Ldr(temp, src.GetMemOperand());
1785     Str(temp, dst.GetMemOperand());
1786     return;
1787   }
1788 
1789   if (dst.IsCPURegister()) {
1790     Ldr(dst.GetCPURegister(), src.GetMemOperand());
1791   } else {
1792     Str(src.GetCPURegister(), dst.GetMemOperand());
1793   }
1794 }
1795 
1796 
1797 void MacroAssembler::ComputeAddress(const Register& dst,
1798                                     const MemOperand& mem_op) {
1799   // We cannot handle pre-indexing or post-indexing.
1800   VIXL_ASSERT(mem_op.GetAddrMode() == Offset);
1801   Register base = mem_op.GetBaseRegister();
1802   if (mem_op.IsImmediateOffset()) {
1803     Add(dst, base, mem_op.GetOffset());
1804   } else {
1805     VIXL_ASSERT(mem_op.IsRegisterOffset());
1806     Register reg_offset = mem_op.GetRegisterOffset();
1807     Shift shift = mem_op.GetShift();
1808     Extend extend = mem_op.GetExtend();
1809     if (shift == NO_SHIFT) {
1810       VIXL_ASSERT(extend != NO_EXTEND);
1811       Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount()));
1812     } else {
1813       VIXL_ASSERT(extend == NO_EXTEND);
1814       Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount()));
1815     }
1816   }
1817 }
1818 
1819 
1820 void MacroAssembler::AddSubMacro(const Register& rd,
1821                                  const Register& rn,
1822                                  const Operand& operand,
1823                                  FlagsUpdate S,
1824                                  AddSubOp op) {
1825   // Worst case is add/sub immediate:
1826   //  * up to 4 instructions to materialise the constant
1827   //  * 1 instruction for add/sub
1828   MacroEmissionCheckScope guard(this);
1829 
1830   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
1831       (S == LeaveFlags)) {
1832     // The instruction would be a nop. Avoid generating useless code.
1833     return;
1834   }
1835 
1836   if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) ||
1837       (rn.IsZero() && !operand.IsShiftedRegister()) ||
1838       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
1839     UseScratchRegisterScope temps(this);
1840     // Use `rd` as a temp, if we can.
1841     temps.Include(rd);
1842     // We read `rn` after evaluating `operand`.
1843     temps.Exclude(rn);
1844     // It doesn't matter if `operand` is in `temps` (e.g. because it alises
1845     // `rd`) because we don't need it after it is evaluated.
1846     Register temp = temps.AcquireSameSizeAs(rn);
1847     if (operand.IsImmediate()) {
1848       PreShiftImmMode mode = kAnyShift;
1849 
1850       // If the destination or source register is the stack pointer, we can
1851       // only pre-shift the immediate right by values supported in the add/sub
1852       // extend encoding.
1853       if (rd.IsSP()) {
1854         // If the destination is SP and flags will be set, we can't pre-shift
1855         // the immediate at all.
1856         mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
1857       } else if (rn.IsSP()) {
1858         mode = kLimitShiftForSP;
1859       }
1860 
1861       Operand imm_operand =
1862           MoveImmediateForShiftedOp(temp, operand.GetImmediate(), mode);
1863       AddSub(rd, rn, imm_operand, S, op);
1864     } else {
1865       Mov(temp, operand);
1866       AddSub(rd, rn, temp, S, op);
1867     }
1868   } else {
1869     AddSub(rd, rn, operand, S, op);
1870   }
1871 }
1872 
1873 
1874 void MacroAssembler::Adc(const Register& rd,
1875                          const Register& rn,
1876                          const Operand& operand) {
1877   VIXL_ASSERT(allow_macro_instructions_);
1878   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
1879 }
1880 
1881 
1882 void MacroAssembler::Adcs(const Register& rd,
1883                           const Register& rn,
1884                           const Operand& operand) {
1885   VIXL_ASSERT(allow_macro_instructions_);
1886   AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
1887 }
1888 
1889 
1890 void MacroAssembler::Sbc(const Register& rd,
1891                          const Register& rn,
1892                          const Operand& operand) {
1893   VIXL_ASSERT(allow_macro_instructions_);
1894   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
1895 }
1896 
1897 
1898 void MacroAssembler::Sbcs(const Register& rd,
1899                           const Register& rn,
1900                           const Operand& operand) {
1901   VIXL_ASSERT(allow_macro_instructions_);
1902   AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
1903 }
1904 
1905 
1906 void MacroAssembler::Ngc(const Register& rd, const Operand& operand) {
1907   VIXL_ASSERT(allow_macro_instructions_);
1908   Register zr = AppropriateZeroRegFor(rd);
1909   Sbc(rd, zr, operand);
1910 }
1911 
1912 
1913 void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) {
1914   VIXL_ASSERT(allow_macro_instructions_);
1915   Register zr = AppropriateZeroRegFor(rd);
1916   Sbcs(rd, zr, operand);
1917 }
1918 
1919 
1920 void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
1921                                           const Register& rn,
1922                                           const Operand& operand,
1923                                           FlagsUpdate S,
1924                                           AddSubWithCarryOp op) {
1925   VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits());
1926   // Worst case is addc/subc immediate:
1927   //  * up to 4 instructions to materialise the constant
1928   //  * 1 instruction for add/sub
1929   MacroEmissionCheckScope guard(this);
1930   UseScratchRegisterScope temps(this);
1931   // Use `rd` as a temp, if we can.
1932   temps.Include(rd);
1933   // We read `rn` after evaluating `operand`.
1934   temps.Exclude(rn);
1935   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
1936   // because we don't need it after it is evaluated.
1937 
1938   if (operand.IsImmediate() ||
1939       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
1940     // Add/sub with carry (immediate or ROR shifted register.)
1941     Register temp = temps.AcquireSameSizeAs(rn);
1942     Mov(temp, operand);
1943     AddSubWithCarry(rd, rn, Operand(temp), S, op);
1944   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
1945     // Add/sub with carry (shifted register).
1946     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits());
1947     VIXL_ASSERT(operand.GetShift() != ROR);
1948     VIXL_ASSERT(
1949         IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
1950                 operand.GetShiftAmount()));
1951     Register temp = temps.AcquireSameSizeAs(rn);
1952     EmitShift(temp,
1953               operand.GetRegister(),
1954               operand.GetShift(),
1955               operand.GetShiftAmount());
1956     AddSubWithCarry(rd, rn, Operand(temp), S, op);
1957   } else if (operand.IsExtendedRegister()) {
1958     // Add/sub with carry (extended register).
1959     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
1960     // Add/sub extended supports a shift <= 4. We want to support exactly the
1961     // same modes.
1962     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
1963     VIXL_ASSERT(
1964         operand.GetRegister().Is64Bits() ||
1965         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
1966     Register temp = temps.AcquireSameSizeAs(rn);
1967     EmitExtendShift(temp,
1968                     operand.GetRegister(),
1969                     operand.GetExtend(),
1970                     operand.GetShiftAmount());
1971     AddSubWithCarry(rd, rn, Operand(temp), S, op);
1972   } else {
1973     // The addressing mode is directly supported by the instruction.
1974     AddSubWithCarry(rd, rn, operand, S, op);
1975   }
1976 }
1977 
1978 
1979 void MacroAssembler::Rmif(const Register& xn,
1980                           unsigned shift,
1981                           StatusFlags flags) {
1982   VIXL_ASSERT(allow_macro_instructions_);
1983   SingleEmissionCheckScope guard(this);
1984   rmif(xn, shift, flags);
1985 }
1986 
1987 
1988 void MacroAssembler::Setf8(const Register& wn) {
1989   VIXL_ASSERT(allow_macro_instructions_);
1990   SingleEmissionCheckScope guard(this);
1991   setf8(wn);
1992 }
1993 
1994 
1995 void MacroAssembler::Setf16(const Register& wn) {
1996   VIXL_ASSERT(allow_macro_instructions_);
1997   SingleEmissionCheckScope guard(this);
1998   setf16(wn);
1999 }
2000 
2001 
2002 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
2003   void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
2004     VIXL_ASSERT(allow_macro_instructions_);                            \
2005     LoadStoreMacro(REG, addr, OP);                                     \
2006   }
2007 LS_MACRO_LIST(DEFINE_FUNCTION)
2008 #undef DEFINE_FUNCTION
2009 
2010 
2011 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
2012                                     const MemOperand& addr,
2013                                     LoadStoreOp op) {
2014   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() ||
2015               addr.IsImmediatePreIndex() || addr.IsRegisterOffset());
2016 
2017   // Worst case is ldr/str pre/post index:
2018   //  * 1 instruction for ldr/str
2019   //  * up to 4 instructions to materialise the constant
2020   //  * 1 instruction to update the base
2021   MacroEmissionCheckScope guard(this);
2022 
2023   int64_t offset = addr.GetOffset();
2024   unsigned access_size = CalcLSDataSize(op);
2025 
2026   // Check if an immediate offset fits in the immediate field of the
2027   // appropriate instruction. If not, emit two instructions to perform
2028   // the operation.
2029   if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
2030       !IsImmLSUnscaled(offset)) {
2031     // Immediate offset that can't be encoded using unsigned or unscaled
2032     // addressing modes.
2033     UseScratchRegisterScope temps(this);
2034     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
2035     Mov(temp, addr.GetOffset());
2036     LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
2037   } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) {
2038     // Post-index beyond unscaled addressing range.
2039     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
2040     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
2041   } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) {
2042     // Pre-index beyond unscaled addressing range.
2043     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
2044     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
2045   } else {
2046     // Encodable in one load/store instruction.
2047     LoadStore(rt, addr, op);
2048   }
2049 }
2050 
2051 
2052 #define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
2053   void MacroAssembler::FN(const REGTYPE REG,        \
2054                           const REGTYPE REG2,       \
2055                           const MemOperand& addr) { \
2056     VIXL_ASSERT(allow_macro_instructions_);         \
2057     LoadStorePairMacro(REG, REG2, addr, OP);        \
2058   }
2059 LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
2060 #undef DEFINE_FUNCTION
2061 
2062 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
2063                                         const CPURegister& rt2,
2064                                         const MemOperand& addr,
2065                                         LoadStorePairOp op) {
2066   // TODO(all): Should we support register offset for load-store-pair?
2067   VIXL_ASSERT(!addr.IsRegisterOffset());
2068   // Worst case is ldp/stp immediate:
2069   //  * 1 instruction for ldp/stp
2070   //  * up to 4 instructions to materialise the constant
2071   //  * 1 instruction to update the base
2072   MacroEmissionCheckScope guard(this);
2073 
2074   int64_t offset = addr.GetOffset();
2075   unsigned access_size = CalcLSPairDataSize(op);
2076 
2077   // Check if the offset fits in the immediate field of the appropriate
2078   // instruction. If not, emit two instructions to perform the operation.
2079   if (IsImmLSPair(offset, access_size)) {
2080     // Encodable in one load/store pair instruction.
2081     LoadStorePair(rt, rt2, addr, op);
2082   } else {
2083     Register base = addr.GetBaseRegister();
2084     if (addr.IsImmediateOffset()) {
2085       UseScratchRegisterScope temps(this);
2086       Register temp = temps.AcquireSameSizeAs(base);
2087       Add(temp, base, offset);
2088       LoadStorePair(rt, rt2, MemOperand(temp), op);
2089     } else if (addr.IsImmediatePostIndex()) {
2090       LoadStorePair(rt, rt2, MemOperand(base), op);
2091       Add(base, base, offset);
2092     } else {
2093       VIXL_ASSERT(addr.IsImmediatePreIndex());
2094       Add(base, base, offset);
2095       LoadStorePair(rt, rt2, MemOperand(base), op);
2096     }
2097   }
2098 }
2099 
2100 
2101 void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
2102   MacroEmissionCheckScope guard(this);
2103 
2104   // There are no pre- or post-index modes for prfm.
2105   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
2106 
2107   // The access size is implicitly 8 bytes for all prefetch operations.
2108   unsigned size = kXRegSizeInBytesLog2;
2109 
2110   // Check if an immediate offset fits in the immediate field of the
2111   // appropriate instruction. If not, emit two instructions to perform
2112   // the operation.
2113   if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) &&
2114       !IsImmLSUnscaled(addr.GetOffset())) {
2115     // Immediate offset that can't be encoded using unsigned or unscaled
2116     // addressing modes.
2117     UseScratchRegisterScope temps(this);
2118     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
2119     Mov(temp, addr.GetOffset());
2120     Prefetch(op, MemOperand(addr.GetBaseRegister(), temp));
2121   } else {
2122     // Simple register-offsets are encodable in one instruction.
2123     Prefetch(op, addr);
2124   }
2125 }
2126 
2127 
2128 void MacroAssembler::Push(const CPURegister& src0,
2129                           const CPURegister& src1,
2130                           const CPURegister& src2,
2131                           const CPURegister& src3) {
2132   VIXL_ASSERT(allow_macro_instructions_);
2133   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
2134   VIXL_ASSERT(src0.IsValid());
2135 
2136   int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
2137   int size = src0.GetSizeInBytes();
2138 
2139   PrepareForPush(count, size);
2140   PushHelper(count, size, src0, src1, src2, src3);
2141 }
2142 
2143 
2144 void MacroAssembler::Pop(const CPURegister& dst0,
2145                          const CPURegister& dst1,
2146                          const CPURegister& dst2,
2147                          const CPURegister& dst3) {
2148   // It is not valid to pop into the same register more than once in one
2149   // instruction, not even into the zero register.
2150   VIXL_ASSERT(allow_macro_instructions_);
2151   VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
2152   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
2153   VIXL_ASSERT(dst0.IsValid());
2154 
2155   int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
2156   int size = dst0.GetSizeInBytes();
2157 
2158   PrepareForPop(count, size);
2159   PopHelper(count, size, dst0, dst1, dst2, dst3);
2160 }
2161 
2162 
2163 void MacroAssembler::PushCPURegList(CPURegList registers) {
2164   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
2165   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
2166   VIXL_ASSERT(allow_macro_instructions_);
2167 
2168   int reg_size = registers.GetRegisterSizeInBytes();
2169   PrepareForPush(registers.GetCount(), reg_size);
2170 
2171   // Bump the stack pointer and store two registers at the bottom.
2172   int size = registers.GetTotalSizeInBytes();
2173   const CPURegister& bottom_0 = registers.PopLowestIndex();
2174   const CPURegister& bottom_1 = registers.PopLowestIndex();
2175   if (bottom_0.IsValid() && bottom_1.IsValid()) {
2176     Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
2177   } else if (bottom_0.IsValid()) {
2178     Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
2179   }
2180 
2181   int offset = 2 * reg_size;
2182   while (!registers.IsEmpty()) {
2183     const CPURegister& src0 = registers.PopLowestIndex();
2184     const CPURegister& src1 = registers.PopLowestIndex();
2185     if (src1.IsValid()) {
2186       Stp(src0, src1, MemOperand(StackPointer(), offset));
2187     } else {
2188       Str(src0, MemOperand(StackPointer(), offset));
2189     }
2190     offset += 2 * reg_size;
2191   }
2192 }
2193 
2194 
2195 void MacroAssembler::PopCPURegList(CPURegList registers) {
2196   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
2197   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
2198   VIXL_ASSERT(allow_macro_instructions_);
2199 
2200   int reg_size = registers.GetRegisterSizeInBytes();
2201   PrepareForPop(registers.GetCount(), reg_size);
2202 
2203 
2204   int size = registers.GetTotalSizeInBytes();
2205   const CPURegister& bottom_0 = registers.PopLowestIndex();
2206   const CPURegister& bottom_1 = registers.PopLowestIndex();
2207 
2208   int offset = 2 * reg_size;
2209   while (!registers.IsEmpty()) {
2210     const CPURegister& dst0 = registers.PopLowestIndex();
2211     const CPURegister& dst1 = registers.PopLowestIndex();
2212     if (dst1.IsValid()) {
2213       Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
2214     } else {
2215       Ldr(dst0, MemOperand(StackPointer(), offset));
2216     }
2217     offset += 2 * reg_size;
2218   }
2219 
2220   // Load the two registers at the bottom and drop the stack pointer.
2221   if (bottom_0.IsValid() && bottom_1.IsValid()) {
2222     Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
2223   } else if (bottom_0.IsValid()) {
2224     Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
2225   }
2226 }
2227 
2228 
2229 void MacroAssembler::PushMultipleTimes(int count, Register src) {
2230   VIXL_ASSERT(allow_macro_instructions_);
2231   int size = src.GetSizeInBytes();
2232 
2233   PrepareForPush(count, size);
2234   // Push up to four registers at a time if possible because if the current
2235   // stack pointer is sp and the register size is 32, registers must be pushed
2236   // in blocks of four in order to maintain the 16-byte alignment for sp.
2237   while (count >= 4) {
2238     PushHelper(4, size, src, src, src, src);
2239     count -= 4;
2240   }
2241   if (count >= 2) {
2242     PushHelper(2, size, src, src, NoReg, NoReg);
2243     count -= 2;
2244   }
2245   if (count == 1) {
2246     PushHelper(1, size, src, NoReg, NoReg, NoReg);
2247     count -= 1;
2248   }
2249   VIXL_ASSERT(count == 0);
2250 }
2251 
2252 
2253 void MacroAssembler::PushHelper(int count,
2254                                 int size,
2255                                 const CPURegister& src0,
2256                                 const CPURegister& src1,
2257                                 const CPURegister& src2,
2258                                 const CPURegister& src3) {
2259   // Ensure that we don't unintentionally modify scratch or debug registers.
2260   // Worst case for size is 2 stp.
2261   ExactAssemblyScope scope(this,
2262                            2 * kInstructionSize,
2263                            ExactAssemblyScope::kMaximumSize);
2264 
2265   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
2266   VIXL_ASSERT(size == src0.GetSizeInBytes());
2267 
2268   // When pushing multiple registers, the store order is chosen such that
2269   // Push(a, b) is equivalent to Push(a) followed by Push(b).
2270   switch (count) {
2271     case 1:
2272       VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
2273       str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
2274       break;
2275     case 2:
2276       VIXL_ASSERT(src2.IsNone() && src3.IsNone());
2277       stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
2278       break;
2279     case 3:
2280       VIXL_ASSERT(src3.IsNone());
2281       stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
2282       str(src0, MemOperand(StackPointer(), 2 * size));
2283       break;
2284     case 4:
2285       // Skip over 4 * size, then fill in the gap. This allows four W registers
2286       // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
2287       // all times.
2288       stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
2289       stp(src1, src0, MemOperand(StackPointer(), 2 * size));
2290       break;
2291     default:
2292       VIXL_UNREACHABLE();
2293   }
2294 }
2295 
2296 
2297 void MacroAssembler::PopHelper(int count,
2298                                int size,
2299                                const CPURegister& dst0,
2300                                const CPURegister& dst1,
2301                                const CPURegister& dst2,
2302                                const CPURegister& dst3) {
2303   // Ensure that we don't unintentionally modify scratch or debug registers.
2304   // Worst case for size is 2 ldp.
2305   ExactAssemblyScope scope(this,
2306                            2 * kInstructionSize,
2307                            ExactAssemblyScope::kMaximumSize);
2308 
2309   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
2310   VIXL_ASSERT(size == dst0.GetSizeInBytes());
2311 
2312   // When popping multiple registers, the load order is chosen such that
2313   // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
2314   switch (count) {
2315     case 1:
2316       VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
2317       ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
2318       break;
2319     case 2:
2320       VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
2321       ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
2322       break;
2323     case 3:
2324       VIXL_ASSERT(dst3.IsNone());
2325       ldr(dst2, MemOperand(StackPointer(), 2 * size));
2326       ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
2327       break;
2328     case 4:
2329       // Load the higher addresses first, then load the lower addresses and skip
2330       // the whole block in the second instruction. This allows four W registers
2331       // to be popped using sp, whilst maintaining 16-byte alignment for sp at
2332       // all times.
2333       ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
2334       ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
2335       break;
2336     default:
2337       VIXL_UNREACHABLE();
2338   }
2339 }
2340 
2341 
2342 void MacroAssembler::PrepareForPush(int count, int size) {
2343   if (sp.Is(StackPointer())) {
2344     // If the current stack pointer is sp, then it must be aligned to 16 bytes
2345     // on entry and the total size of the specified registers must also be a
2346     // multiple of 16 bytes.
2347     VIXL_ASSERT((count * size) % 16 == 0);
2348   } else {
2349     // Even if the current stack pointer is not the system stack pointer (sp),
2350     // the system stack pointer will still be modified in order to comply with
2351     // ABI rules about accessing memory below the system stack pointer.
2352     BumpSystemStackPointer(count * size);
2353   }
2354 }
2355 
2356 
2357 void MacroAssembler::PrepareForPop(int count, int size) {
2358   USE(count, size);
2359   if (sp.Is(StackPointer())) {
2360     // If the current stack pointer is sp, then it must be aligned to 16 bytes
2361     // on entry and the total size of the specified registers must also be a
2362     // multiple of 16 bytes.
2363     VIXL_ASSERT((count * size) % 16 == 0);
2364   }
2365 }
2366 
2367 void MacroAssembler::Poke(const Register& src, const Operand& offset) {
2368   VIXL_ASSERT(allow_macro_instructions_);
2369   if (offset.IsImmediate()) {
2370     VIXL_ASSERT(offset.GetImmediate() >= 0);
2371   }
2372 
2373   Str(src, MemOperand(StackPointer(), offset));
2374 }
2375 
2376 
2377 void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
2378   VIXL_ASSERT(allow_macro_instructions_);
2379   if (offset.IsImmediate()) {
2380     VIXL_ASSERT(offset.GetImmediate() >= 0);
2381   }
2382 
2383   Ldr(dst, MemOperand(StackPointer(), offset));
2384 }
2385 
2386 
2387 void MacroAssembler::Claim(const Operand& size) {
2388   VIXL_ASSERT(allow_macro_instructions_);
2389 
2390   if (size.IsZero()) {
2391     return;
2392   }
2393 
2394   if (size.IsImmediate()) {
2395     VIXL_ASSERT(size.GetImmediate() > 0);
2396     if (sp.Is(StackPointer())) {
2397       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2398     }
2399   }
2400 
2401   if (!sp.Is(StackPointer())) {
2402     BumpSystemStackPointer(size);
2403   }
2404 
2405   Sub(StackPointer(), StackPointer(), size);
2406 }
2407 
2408 
2409 void MacroAssembler::Drop(const Operand& size) {
2410   VIXL_ASSERT(allow_macro_instructions_);
2411 
2412   if (size.IsZero()) {
2413     return;
2414   }
2415 
2416   if (size.IsImmediate()) {
2417     VIXL_ASSERT(size.GetImmediate() > 0);
2418     if (sp.Is(StackPointer())) {
2419       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
2420     }
2421   }
2422 
2423   Add(StackPointer(), StackPointer(), size);
2424 }
2425 
2426 
2427 void MacroAssembler::PushCalleeSavedRegisters() {
2428   // Ensure that the macro-assembler doesn't use any scratch registers.
2429   // 10 stp will be emitted.
2430   // TODO(all): Should we use GetCalleeSaved and SavedFP.
2431   ExactAssemblyScope scope(this, 10 * kInstructionSize);
2432 
2433   // This method must not be called unless the current stack pointer is sp.
2434   VIXL_ASSERT(sp.Is(StackPointer()));
2435 
2436   MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
2437 
2438   stp(x29, x30, tos);
2439   stp(x27, x28, tos);
2440   stp(x25, x26, tos);
2441   stp(x23, x24, tos);
2442   stp(x21, x22, tos);
2443   stp(x19, x20, tos);
2444 
2445   stp(d14, d15, tos);
2446   stp(d12, d13, tos);
2447   stp(d10, d11, tos);
2448   stp(d8, d9, tos);
2449 }
2450 
2451 
2452 void MacroAssembler::PopCalleeSavedRegisters() {
2453   // Ensure that the macro-assembler doesn't use any scratch registers.
2454   // 10 ldp will be emitted.
2455   // TODO(all): Should we use GetCalleeSaved and SavedFP.
2456   ExactAssemblyScope scope(this, 10 * kInstructionSize);
2457 
2458   // This method must not be called unless the current stack pointer is sp.
2459   VIXL_ASSERT(sp.Is(StackPointer()));
2460 
2461   MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
2462 
2463   ldp(d8, d9, tos);
2464   ldp(d10, d11, tos);
2465   ldp(d12, d13, tos);
2466   ldp(d14, d15, tos);
2467 
2468   ldp(x19, x20, tos);
2469   ldp(x21, x22, tos);
2470   ldp(x23, x24, tos);
2471   ldp(x25, x26, tos);
2472   ldp(x27, x28, tos);
2473   ldp(x29, x30, tos);
2474 }
2475 
2476 void MacroAssembler::LoadCPURegList(CPURegList registers,
2477                                     const MemOperand& src) {
2478   LoadStoreCPURegListHelper(kLoad, registers, src);
2479 }
2480 
2481 void MacroAssembler::StoreCPURegList(CPURegList registers,
2482                                      const MemOperand& dst) {
2483   LoadStoreCPURegListHelper(kStore, registers, dst);
2484 }
2485 
2486 
2487 void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
2488                                                CPURegList registers,
2489                                                const MemOperand& mem) {
2490   // We do not handle pre-indexing or post-indexing.
2491   VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
2492 #ifndef PANDA_BUILD
2493   VIXL_ASSERT(!registers.Overlaps(tmp_list_));
2494 #endif
2495   VIXL_ASSERT(!registers.Overlaps(v_tmp_list_));
2496   VIXL_ASSERT(!registers.Overlaps(p_tmp_list_));
2497   VIXL_ASSERT(!registers.IncludesAliasOf(sp));
2498 
2499   UseScratchRegisterScope temps(this);
2500 
2501   MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps);
2502   const int reg_size = registers.GetRegisterSizeInBytes();
2503 
2504   VIXL_ASSERT(IsPowerOf2(reg_size));
2505 
2506   // Since we are operating on register pairs, we would like to align on double
2507   // the standard size; on the other hand, we don't want to insert an extra
2508   // operation, which will happen if the number of registers is even. Note that
2509   // the alignment of the base pointer is unknown here, but we assume that it
2510   // is more likely to be aligned.
2511   if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) &&
2512       ((registers.GetCount() % 2) != 0)) {
2513     if (op == kStore) {
2514       Str(registers.PopLowestIndex(), loc);
2515     } else {
2516       VIXL_ASSERT(op == kLoad);
2517       Ldr(registers.PopLowestIndex(), loc);
2518     }
2519     loc.AddOffset(reg_size);
2520   }
2521   while (registers.GetCount() >= 2) {
2522     const CPURegister& dst0 = registers.PopLowestIndex();
2523     const CPURegister& dst1 = registers.PopLowestIndex();
2524     if (op == kStore) {
2525       Stp(dst0, dst1, loc);
2526     } else {
2527       VIXL_ASSERT(op == kLoad);
2528       Ldp(dst0, dst1, loc);
2529     }
2530     loc.AddOffset(2 * reg_size);
2531   }
2532   if (!registers.IsEmpty()) {
2533     if (op == kStore) {
2534       Str(registers.PopLowestIndex(), loc);
2535     } else {
2536       VIXL_ASSERT(op == kLoad);
2537       Ldr(registers.PopLowestIndex(), loc);
2538     }
2539   }
2540 }
2541 
2542 MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
2543     const CPURegList& registers,
2544     const MemOperand& mem,
2545     UseScratchRegisterScope* scratch_scope) {
2546   // If necessary, pre-compute the base address for the accesses.
2547   if (mem.IsRegisterOffset()) {
2548     Register reg_base = scratch_scope->AcquireX();
2549     ComputeAddress(reg_base, mem);
2550     return MemOperand(reg_base);
2551 
2552   } else if (mem.IsImmediateOffset()) {
2553     int reg_size = registers.GetRegisterSizeInBytes();
2554     int total_size = registers.GetTotalSizeInBytes();
2555     int64_t min_offset = mem.GetOffset();
2556     int64_t max_offset =
2557         mem.GetOffset() + std::max(0, total_size - 2 * reg_size);
2558     if ((registers.GetCount() >= 2) &&
2559         (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
2560          !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
2561       Register reg_base = scratch_scope->AcquireX();
2562       ComputeAddress(reg_base, mem);
2563       return MemOperand(reg_base);
2564     }
2565   }
2566 
2567   return mem;
2568 }
2569 
2570 void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
2571   VIXL_ASSERT(!sp.Is(StackPointer()));
2572   // TODO: Several callers rely on this not using scratch registers, so we use
2573   // the assembler directly here. However, this means that large immediate
2574   // values of 'space' cannot be handled.
2575   ExactAssemblyScope scope(this, kInstructionSize);
2576   sub(sp, StackPointer(), space);
2577 }
2578 
2579 
2580 // TODO(all): Fix printf for NEON and SVE registers.
2581 
2582 // This is the main Printf implementation. All callee-saved registers are
2583 // preserved, but NZCV and the caller-saved registers may be clobbered.
2584 void MacroAssembler::PrintfNoPreserve(const char* format,
2585                                       const CPURegister& arg0,
2586                                       const CPURegister& arg1,
2587                                       const CPURegister& arg2,
2588                                       const CPURegister& arg3) {
2589   // We cannot handle a caller-saved stack pointer. It doesn't make much sense
2590   // in most cases anyway, so this restriction shouldn't be too serious.
2591   VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
2592 
2593   // The provided arguments, and their proper PCS registers.
2594   CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
2595   CPURegister pcs[kPrintfMaxArgCount];
2596 
2597   int arg_count = kPrintfMaxArgCount;
2598 
2599   // The PCS varargs registers for printf. Note that x0 is used for the printf
2600   // format string.
2601   static const CPURegList kPCSVarargs =
2602       CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
2603   static const CPURegList kPCSVarargsV =
2604       CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1);
2605 
2606   // We can use caller-saved registers as scratch values, except for the
2607   // arguments and the PCS registers where they might need to go.
2608   UseScratchRegisterScope temps(this);
2609   temps.Include(kCallerSaved);
2610   temps.Include(kCallerSavedV);
2611   temps.Exclude(kPCSVarargs);
2612   temps.Exclude(kPCSVarargsV);
2613   temps.Exclude(arg0, arg1, arg2, arg3);
2614 
2615   // Copies of the arg lists that we can iterate through.
2616   CPURegList pcs_varargs = kPCSVarargs;
2617   CPURegList pcs_varargs_fp = kPCSVarargsV;
2618 
2619   // Place the arguments. There are lots of clever tricks and optimizations we
2620   // could use here, but Printf is a debug tool so instead we just try to keep
2621   // it simple: Move each input that isn't already in the right place to a
2622   // scratch register, then move everything back.
2623   for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
2624     // Work out the proper PCS register for this argument.
2625     if (args[i].IsRegister()) {
2626       pcs[i] = pcs_varargs.PopLowestIndex().X();
2627       // We might only need a W register here. We need to know the size of the
2628       // argument so we can properly encode it for the simulator call.
2629       if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
2630     } else if (args[i].IsVRegister()) {
2631       // In C, floats are always cast to doubles for varargs calls.
2632       pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
2633     } else {
2634       VIXL_ASSERT(args[i].IsNone());
2635       arg_count = i;
2636       break;
2637     }
2638 
2639     // If the argument is already in the right place, leave it where it is.
2640     if (args[i].Aliases(pcs[i])) continue;
2641 
2642     // Otherwise, if the argument is in a PCS argument register, allocate an
2643     // appropriate scratch register and then move it out of the way.
2644     if (kPCSVarargs.IncludesAliasOf(args[i]) ||
2645         kPCSVarargsV.IncludesAliasOf(args[i])) {
2646       if (args[i].IsRegister()) {
2647         Register old_arg = Register(args[i]);
2648         Register new_arg = temps.AcquireSameSizeAs(old_arg);
2649         Mov(new_arg, old_arg);
2650         args[i] = new_arg;
2651       } else {
2652         VRegister old_arg(args[i]);
2653         VRegister new_arg = temps.AcquireSameSizeAs(old_arg);
2654         Fmov(new_arg, old_arg);
2655         args[i] = new_arg;
2656       }
2657     }
2658   }
2659 
2660   // Do a second pass to move values into their final positions and perform any
2661   // conversions that may be required.
2662   for (int i = 0; i < arg_count; i++) {
2663     VIXL_ASSERT(pcs[i].GetType() == args[i].GetType());
2664     if (pcs[i].IsRegister()) {
2665       Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
2666     } else {
2667       VIXL_ASSERT(pcs[i].IsVRegister());
2668       if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) {
2669         Fmov(VRegister(pcs[i]), VRegister(args[i]));
2670       } else {
2671         Fcvt(VRegister(pcs[i]), VRegister(args[i]));
2672       }
2673     }
2674   }
2675 
2676   // Load the format string into x0, as per the procedure-call standard.
2677   //
2678   // To make the code as portable as possible, the format string is encoded
2679   // directly in the instruction stream. It might be cleaner to encode it in a
2680   // literal pool, but since Printf is usually used for debugging, it is
2681   // beneficial for it to be minimally dependent on other features.
2682   temps.Exclude(x0);
2683 #ifndef PANDA_BUILD
2684   Label format_address;
2685 #else
2686   Label format_address(allocator_);
2687 #endif
2688   Adr(x0, &format_address);
2689 
2690   // Emit the format string directly in the instruction stream.
2691   {
2692     BlockPoolsScope scope(this);
2693     // Data emitted:
2694     //   branch
2695     //   strlen(format) + 1 (includes null termination)
2696     //   padding to next instruction
2697     //   unreachable
2698     EmissionCheckScope guard(this,
2699                              AlignUp(strlen(format) + 1, kInstructionSize) +
2700                                  2 * kInstructionSize);
2701 #ifndef PANDA_BUILD
2702     Label after_data;
2703 #else
2704     Label after_data(allocator_);
2705 #endif
2706     B(&after_data);
2707     Bind(&format_address);
2708     EmitString(format);
2709     Unreachable();
2710     Bind(&after_data);
2711   }
2712 
2713   // We don't pass any arguments on the stack, but we still need to align the C
2714   // stack pointer to a 16-byte boundary for PCS compliance.
2715   if (!sp.Is(StackPointer())) {
2716     Bic(sp, StackPointer(), 0xf);
2717   }
2718 
2719   // Actually call printf. This part needs special handling for the simulator,
2720   // since the system printf function will use a different instruction set and
2721   // the procedure-call standard will not be compatible.
2722   if (generate_simulator_code_) {
2723     ExactAssemblyScope scope(this, kPrintfLength);
2724     hlt(kPrintfOpcode);
2725     dc32(arg_count);  // kPrintfArgCountOffset
2726 
2727     // Determine the argument pattern.
2728     uint32_t arg_pattern_list = 0;
2729     for (int i = 0; i < arg_count; i++) {
2730       uint32_t arg_pattern;
2731       if (pcs[i].IsRegister()) {
2732         arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
2733       } else {
2734         VIXL_ASSERT(pcs[i].Is64Bits());
2735         arg_pattern = kPrintfArgD;
2736       }
2737       VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
2738       arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
2739     }
2740     dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
2741   } else {
2742     Register tmp = temps.AcquireX();
2743     Mov(tmp, reinterpret_cast<uintptr_t>(printf));
2744     Blr(tmp);
2745   }
2746 }
2747 
2748 
2749 void MacroAssembler::Printf(const char* format,
2750                             CPURegister arg0,
2751                             CPURegister arg1,
2752                             CPURegister arg2,
2753                             CPURegister arg3) {
2754   // We can only print sp if it is the current stack pointer.
2755   if (!sp.Is(StackPointer())) {
2756     VIXL_ASSERT(!sp.Aliases(arg0));
2757     VIXL_ASSERT(!sp.Aliases(arg1));
2758     VIXL_ASSERT(!sp.Aliases(arg2));
2759     VIXL_ASSERT(!sp.Aliases(arg3));
2760   }
2761 
2762   // Make sure that the macro assembler doesn't try to use any of our arguments
2763   // as scratch registers.
2764   UseScratchRegisterScope exclude_all(this);
2765   exclude_all.ExcludeAll();
2766 
2767   // Preserve all caller-saved registers as well as NZCV.
2768   // If sp is the stack pointer, PushCPURegList asserts that the size of each
2769   // list is a multiple of 16 bytes.
2770   PushCPURegList(kCallerSaved);
2771   PushCPURegList(kCallerSavedV);
2772 
2773   {
2774     UseScratchRegisterScope temps(this);
2775     // We can use caller-saved registers as scratch values (except for argN).
2776     temps.Include(kCallerSaved);
2777     temps.Include(kCallerSavedV);
2778     temps.Exclude(arg0, arg1, arg2, arg3);
2779 
2780     // If any of the arguments are the current stack pointer, allocate a new
2781     // register for them, and adjust the value to compensate for pushing the
2782     // caller-saved registers.
2783     bool arg0_sp = StackPointer().Aliases(arg0);
2784     bool arg1_sp = StackPointer().Aliases(arg1);
2785     bool arg2_sp = StackPointer().Aliases(arg2);
2786     bool arg3_sp = StackPointer().Aliases(arg3);
2787     if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
2788       // Allocate a register to hold the original stack pointer value, to pass
2789       // to PrintfNoPreserve as an argument.
2790       Register arg_sp = temps.AcquireX();
2791       Add(arg_sp,
2792           StackPointer(),
2793           kCallerSaved.GetTotalSizeInBytes() +
2794               kCallerSavedV.GetTotalSizeInBytes());
2795       if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits());
2796       if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits());
2797       if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits());
2798       if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits());
2799     }
2800 
2801     // Preserve NZCV.
2802     Register tmp = temps.AcquireX();
2803     Mrs(tmp, NZCV);
2804     Push(tmp, xzr);
2805     temps.Release(tmp);
2806 
2807     PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
2808 
2809     // Restore NZCV.
2810     tmp = temps.AcquireX();
2811     Pop(xzr, tmp);
2812     Msr(NZCV, tmp);
2813     temps.Release(tmp);
2814   }
2815 
2816   PopCPURegList(kCallerSavedV);
2817   PopCPURegList(kCallerSaved);
2818 }
2819 
2820 void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
2821   VIXL_ASSERT(allow_macro_instructions_);
2822 
2823   if (generate_simulator_code_) {
2824     // The arguments to the trace pseudo instruction need to be contiguous in
2825     // memory, so make sure we don't try to emit a literal pool.
2826     ExactAssemblyScope scope(this, kTraceLength);
2827 
2828 #ifndef PANDA_BUILD
2829     Label start;
2830 #else
2831     Label start(allocator_);
2832 #endif
2833     bind(&start);
2834 
2835     // Refer to simulator-aarch64.h for a description of the marker and its
2836     // arguments.
2837     hlt(kTraceOpcode);
2838 
2839     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
2840     dc32(parameters);
2841 
2842     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
2843     dc32(command);
2844   } else {
2845     // Emit nothing on real hardware.
2846     USE(parameters, command);
2847   }
2848 }
2849 
2850 
2851 void MacroAssembler::Log(TraceParameters parameters) {
2852   VIXL_ASSERT(allow_macro_instructions_);
2853 
2854   if (generate_simulator_code_) {
2855     // The arguments to the log pseudo instruction need to be contiguous in
2856     // memory, so make sure we don't try to emit a literal pool.
2857     ExactAssemblyScope scope(this, kLogLength);
2858 
2859 #ifndef PANDA_BUILD
2860     Label start;
2861 #else
2862     Label start(allocator_);
2863 #endif
2864     bind(&start);
2865 
2866     // Refer to simulator-aarch64.h for a description of the marker and its
2867     // arguments.
2868     hlt(kLogOpcode);
2869 
2870     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
2871     dc32(parameters);
2872   } else {
2873     // Emit nothing on real hardware.
2874     USE(parameters);
2875   }
2876 }
2877 
2878 
2879 void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) {
2880   ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode);
2881 }
2882 
2883 
2884 void MacroAssembler::EnableSimulatorCPUFeatures(const CPUFeatures& features) {
2885   ConfigureSimulatorCPUFeaturesHelper(features, kEnableCPUFeaturesOpcode);
2886 }
2887 
2888 
2889 void MacroAssembler::DisableSimulatorCPUFeatures(const CPUFeatures& features) {
2890   ConfigureSimulatorCPUFeaturesHelper(features, kDisableCPUFeaturesOpcode);
2891 }
2892 
2893 
2894 void MacroAssembler::ConfigureSimulatorCPUFeaturesHelper(
2895     const CPUFeatures& features, DebugHltOpcode action) {
2896   VIXL_ASSERT(allow_macro_instructions_);
2897   VIXL_ASSERT(generate_simulator_code_);
2898 
2899   typedef ConfigureCPUFeaturesElementType ElementType;
2900   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <=
2901               std::numeric_limits<ElementType>::max());
2902 
2903   size_t count = features.Count();
2904 
2905   size_t preamble_length = kConfigureCPUFeaturesListOffset;
2906   size_t list_length = (count + 1) * sizeof(ElementType);
2907   size_t padding_length = AlignUp(list_length, kInstructionSize) - list_length;
2908 
2909   size_t total_length = preamble_length + list_length + padding_length;
2910 
2911   // Check the overall code size as well as the size of each component.
2912   ExactAssemblyScope guard_total(this, total_length);
2913 
2914   {  // Preamble: the opcode itself.
2915     ExactAssemblyScope guard_preamble(this, preamble_length);
2916     hlt(action);
2917   }
2918   {  // A kNone-terminated list of features.
2919     ExactAssemblyScope guard_list(this, list_length);
2920     for (CPUFeatures::const_iterator it = features.begin();
2921          it != features.end();
2922          ++it) {
2923       dc(static_cast<ElementType>(*it));
2924     }
2925     dc(static_cast<ElementType>(CPUFeatures::kNone));
2926   }
2927   {  // Padding for instruction alignment.
2928     ExactAssemblyScope guard_padding(this, padding_length);
2929     for (size_t size = 0; size < padding_length; size += sizeof(ElementType)) {
2930       // The exact value is arbitrary.
2931       dc(static_cast<ElementType>(CPUFeatures::kNone));
2932     }
2933   }
2934 }
2935 
2936 void MacroAssembler::SaveSimulatorCPUFeatures() {
2937   VIXL_ASSERT(allow_macro_instructions_);
2938   VIXL_ASSERT(generate_simulator_code_);
2939   SingleEmissionCheckScope guard(this);
2940   hlt(kSaveCPUFeaturesOpcode);
2941 }
2942 
2943 
2944 void MacroAssembler::RestoreSimulatorCPUFeatures() {
2945   VIXL_ASSERT(allow_macro_instructions_);
2946   VIXL_ASSERT(generate_simulator_code_);
2947   SingleEmissionCheckScope guard(this);
2948   hlt(kRestoreCPUFeaturesOpcode);
2949 }
2950 
2951 
2952 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
2953   VIXL_ASSERT(masm_ == NULL);
2954   VIXL_ASSERT(masm != NULL);
2955   masm_ = masm;
2956 
2957   CPURegList* available = masm->GetScratchRegisterList();
2958   CPURegList* available_v = masm->GetScratchVRegisterList();
2959   CPURegList* available_p = masm->GetScratchPRegisterList();
2960   old_available_ = available->GetList();
2961   old_available_v_ = available_v->GetList();
2962   old_available_p_ = available_p->GetList();
2963   VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
2964   VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister);
2965   VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister);
2966 
2967   parent_ = masm->GetCurrentScratchRegisterScope();
2968   masm->SetCurrentScratchRegisterScope(this);
2969 }
2970 
2971 
2972 void UseScratchRegisterScope::Close() {
2973   if (masm_ != NULL) {
2974     // Ensure that scopes nest perfectly, and do not outlive their parents.
2975     // This is a run-time check because the order of destruction of objects in
2976     // the _same_ scope is implementation-defined, and is likely to change in
2977     // optimised builds.
2978     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
2979     masm_->SetCurrentScratchRegisterScope(parent_);
2980 
2981     masm_->GetScratchRegisterList()->SetList(old_available_);
2982     masm_->GetScratchVRegisterList()->SetList(old_available_v_);
2983     masm_->GetScratchPRegisterList()->SetList(old_available_p_);
2984 
2985     masm_ = NULL;
2986   }
2987 }
2988 
2989 
2990 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
2991   return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
2992          masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) ||
2993          masm_->GetScratchPRegisterList()->IncludesAliasOf(reg);
2994 }
2995 
2996 Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
2997   int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode();
2998   return Register(code, size_in_bits);
2999 }
3000 
3001 
3002 VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
3003   int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode();
3004   return VRegister(code, size_in_bits);
3005 }
3006 
3007 
3008 void UseScratchRegisterScope::Release(const CPURegister& reg) {
3009   VIXL_ASSERT(masm_ != NULL);
3010 
3011   // Release(NoReg) has no effect.
3012   if (reg.IsNone()) return;
3013 
3014   ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode());
3015 }
3016 
3017 
3018 void UseScratchRegisterScope::Include(const CPURegList& list) {
3019   VIXL_ASSERT(masm_ != NULL);
3020 
3021   // Including an empty list has no effect.
3022   if (list.IsEmpty()) return;
3023   VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister);
3024 
3025   RegList reg_list = list.GetList();
3026   if (list.GetType() == CPURegister::kRegister) {
3027     // Make sure that neither sp nor xzr are included the list.
3028     reg_list &= ~(xzr.GetBit() | sp.GetBit());
3029   }
3030 
3031   IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list);
3032 }
3033 
3034 
3035 void UseScratchRegisterScope::Include(const Register& reg1,
3036                                       const Register& reg2,
3037                                       const Register& reg3,
3038                                       const Register& reg4) {
3039   VIXL_ASSERT(masm_ != NULL);
3040   RegList include =
3041       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3042   // Make sure that neither sp nor xzr are included the list.
3043   include &= ~(xzr.GetBit() | sp.GetBit());
3044 
3045   IncludeByRegList(masm_->GetScratchRegisterList(), include);
3046 }
3047 
3048 
3049 void UseScratchRegisterScope::Include(const VRegister& reg1,
3050                                       const VRegister& reg2,
3051                                       const VRegister& reg3,
3052                                       const VRegister& reg4) {
3053   RegList include =
3054       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3055   IncludeByRegList(masm_->GetScratchVRegisterList(), include);
3056 }
3057 
3058 
3059 void UseScratchRegisterScope::Include(const CPURegister& reg1,
3060                                       const CPURegister& reg2,
3061                                       const CPURegister& reg3,
3062                                       const CPURegister& reg4) {
3063   RegList include = 0;
3064   RegList include_v = 0;
3065   RegList include_p = 0;
3066 
3067   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
3068 
3069   for (size_t i = 0; i < ArrayLength(regs); i++) {
3070     RegList bit = regs[i].GetBit();
3071     switch (regs[i].GetBank()) {
3072       case CPURegister::kNoRegisterBank:
3073         // Include(NoReg) has no effect.
3074         VIXL_ASSERT(regs[i].IsNone());
3075         break;
3076       case CPURegister::kRRegisterBank:
3077         include |= bit;
3078         break;
3079       case CPURegister::kVRegisterBank:
3080         include_v |= bit;
3081         break;
3082       case CPURegister::kPRegisterBank:
3083         include_p |= bit;
3084         break;
3085     }
3086   }
3087 
3088   IncludeByRegList(masm_->GetScratchRegisterList(), include);
3089   IncludeByRegList(masm_->GetScratchVRegisterList(), include_v);
3090   IncludeByRegList(masm_->GetScratchPRegisterList(), include_p);
3091 }
3092 
3093 
3094 void UseScratchRegisterScope::Exclude(const CPURegList& list) {
3095   ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList());
3096 }
3097 
3098 
3099 void UseScratchRegisterScope::Exclude(const Register& reg1,
3100                                       const Register& reg2,
3101                                       const Register& reg3,
3102                                       const Register& reg4) {
3103   RegList exclude =
3104       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3105   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
3106 }
3107 
3108 
3109 void UseScratchRegisterScope::Exclude(const VRegister& reg1,
3110                                       const VRegister& reg2,
3111                                       const VRegister& reg3,
3112                                       const VRegister& reg4) {
3113   RegList exclude_v =
3114       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
3115   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
3116 }
3117 
3118 
3119 void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
3120                                       const CPURegister& reg2,
3121                                       const CPURegister& reg3,
3122                                       const CPURegister& reg4) {
3123   RegList exclude = 0;
3124   RegList exclude_v = 0;
3125   RegList exclude_p = 0;
3126 
3127   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
3128 
3129   for (size_t i = 0; i < ArrayLength(regs); i++) {
3130     RegList bit = regs[i].GetBit();
3131     switch (regs[i].GetBank()) {
3132       case CPURegister::kNoRegisterBank:
3133         // Exclude(NoReg) has no effect.
3134         VIXL_ASSERT(regs[i].IsNone());
3135         break;
3136       case CPURegister::kRRegisterBank:
3137         exclude |= bit;
3138         break;
3139       case CPURegister::kVRegisterBank:
3140         exclude_v |= bit;
3141         break;
3142       case CPURegister::kPRegisterBank:
3143         exclude_p |= bit;
3144         break;
3145     }
3146   }
3147 
3148   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
3149   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
3150   ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p);
3151 }
3152 
3153 
3154 void UseScratchRegisterScope::ExcludeAll() {
3155   ExcludeByRegList(masm_->GetScratchRegisterList(),
3156                    masm_->GetScratchRegisterList()->GetList());
3157   ExcludeByRegList(masm_->GetScratchVRegisterList(),
3158                    masm_->GetScratchVRegisterList()->GetList());
3159   ExcludeByRegList(masm_->GetScratchPRegisterList(),
3160                    masm_->GetScratchPRegisterList()->GetList());
3161 }
3162 
3163 
3164 CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available,
3165                                                  RegList mask) {
3166   VIXL_CHECK((available->GetList() & mask) != 0);
3167   CPURegister result = available->PopLowestIndex(mask);
3168   VIXL_ASSERT(!AreAliased(result, xzr, sp));
3169   return result;
3170 }
3171 
3172 
3173 void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
3174   ReleaseByRegList(available, static_cast<RegList>(1) << code);
3175 }
3176 
3177 
3178 void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
3179                                                RegList regs) {
3180   available->SetList(available->GetList() | regs);
3181 }
3182 
3183 
3184 void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
3185                                                RegList regs) {
3186   available->SetList(available->GetList() | regs);
3187 }
3188 
3189 
3190 void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
3191                                                RegList exclude) {
3192   available->SetList(available->GetList() & ~exclude);
3193 }
3194 
3195 CPURegList* UseScratchRegisterScope::GetAvailableListFor(
3196     CPURegister::RegisterBank bank) {
3197   switch (bank) {
3198     case CPURegister::kNoRegisterBank:
3199       return NULL;
3200     case CPURegister::kRRegisterBank:
3201       return masm_->GetScratchRegisterList();
3202     case CPURegister::kVRegisterBank:
3203       return masm_->GetScratchVRegisterList();
3204     case CPURegister::kPRegisterBank:
3205       return masm_->GetScratchPRegisterList();
3206       return NULL;
3207   }
3208   VIXL_UNREACHABLE();
3209   return NULL;
3210 }
3211 
3212 }  // namespace aarch64
3213 }  // namespace vixl
3214