• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm_vixl.h"
18 
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/globals.h"
26 #include "class_root-inl.h"
27 #include "class_table.h"
28 #include "code_generator_utils.h"
29 #include "common_arm.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "intrinsics_list.h"
38 #include "intrinsics_utils.h"
39 #include "jit/profiling_info.h"
40 #include "linker/linker_patch.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "profiling_info_builder.h"
45 #include "scoped_thread_state_change-inl.h"
46 #include "thread.h"
47 #include "trace.h"
48 #include "utils/arm/assembler_arm_vixl.h"
49 #include "utils/arm/managed_register_arm.h"
50 #include "utils/assembler.h"
51 #include "utils/stack_checks.h"
52 
53 namespace art HIDDEN {
54 namespace arm {
55 
56 namespace vixl32 = vixl::aarch32;
57 using namespace vixl32;  // NOLINT(build/namespaces)
58 
59 using helpers::DRegisterFrom;
60 using helpers::HighRegisterFrom;
61 using helpers::InputDRegisterAt;
62 using helpers::InputOperandAt;
63 using helpers::InputRegister;
64 using helpers::InputRegisterAt;
65 using helpers::InputSRegisterAt;
66 using helpers::InputVRegister;
67 using helpers::InputVRegisterAt;
68 using helpers::Int32ConstantFrom;
69 using helpers::Int64ConstantFrom;
70 using helpers::LocationFrom;
71 using helpers::LowRegisterFrom;
72 using helpers::LowSRegisterFrom;
73 using helpers::OperandFrom;
74 using helpers::OutputRegister;
75 using helpers::OutputSRegister;
76 using helpers::OutputVRegister;
77 using helpers::RegisterFrom;
78 using helpers::SRegisterFrom;
79 using helpers::Uint64ConstantFrom;
80 
81 using vixl::EmissionCheckScope;
82 using vixl::ExactAssemblyScope;
83 using vixl::CodeBufferCheckScope;
84 
85 using RegisterList = vixl32::RegisterList;
86 
ExpectedPairLayout(Location location)87 static bool ExpectedPairLayout(Location location) {
88   // We expected this for both core and fpu register pairs.
89   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
90 }
91 // Use a local definition to prevent copying mistakes.
92 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
93 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95 
96 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
97 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
101 
102 // Using a base helps identify when we hit Marking Register check breakpoints.
103 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
104 
105 #ifdef __
106 #error "ARM Codegen VIXL macro-assembler macro already defined."
107 #endif
108 
109 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
110 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()->  // NOLINT
111 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
112 
113 // Marker that code is yet to be, and must, be implemented.
114 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
115 
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)116 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
117   return rt.IsLow() && rn.IsLow() && offset < 32u;
118 }
119 
120 class EmitAdrCode {
121  public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)122   EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
123       : assembler_(assembler), rd_(rd), label_(label) {
124     DCHECK(!assembler->AllowMacroInstructions());  // In ExactAssemblyScope.
125     adr_location_ = assembler->GetCursorOffset();
126     assembler->adr(EncodingSize(Wide), rd, label);
127   }
128 
~EmitAdrCode()129   ~EmitAdrCode() {
130     DCHECK(label_->IsBound());
131     // The ADR emitted by the assembler does not set the Thumb mode bit we need.
132     // TODO: Maybe extend VIXL to allow ADR for return address?
133     uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
134     // Expecting ADR encoding T3 with `(offset & 1) == 0`.
135     DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
136     DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
137     DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
138     DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
139     // Add the Thumb mode bit.
140     raw_adr[2] |= 0x01u;
141   }
142 
143  private:
144   ArmVIXLMacroAssembler* const assembler_;
145   vixl32::Register rd_;
146   vixl32::Label* const label_;
147   int32_t adr_location_;
148 };
149 
OneRegInReferenceOutSaveEverythingCallerSaves()150 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
151   InvokeRuntimeCallingConventionARMVIXL calling_convention;
152   RegisterSet caller_saves = RegisterSet::Empty();
153   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
154   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
155   // that the kPrimNot result register is the same as the first argument register.
156   return caller_saves;
157 }
158 
159 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
160 // for each live D registers they treat two corresponding S registers as live ones.
161 //
162 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
163 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
164 // S registers corner cases) and save/restore this new list treating them as D registers.
165 // - decreasing code size
166 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
167 //   restored and then used in regular non SlowPath code as D register.
168 //
169 // For the following example (v means the S register is live):
170 //   D names: |    D0   |    D1   |    D2   |    D4   | ...
171 //   S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
172 //   Live?    |    |  v |  v |  v |  v |  v |  v |    | ...
173 //
174 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
175 // as D registers.
176 //
177 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
178 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)179 static size_t SaveContiguousSRegisterList(size_t first,
180                                           size_t last,
181                                           CodeGenerator* codegen,
182                                           size_t stack_offset) {
183   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
184   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
185   DCHECK_LE(first, last);
186   if ((first == last) && (first == 0)) {
187     __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
188     return stack_offset + kSRegSizeInBytes;
189   }
190   if (first % 2 == 1) {
191     __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
192     stack_offset += kSRegSizeInBytes;
193   }
194 
195   bool save_last = false;
196   if (last % 2 == 0) {
197     save_last = true;
198     --last;
199   }
200 
201   if (first < last) {
202     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
203     DCHECK_EQ((last - first + 1) % 2, 0u);
204     size_t number_of_d_regs = (last - first + 1) / 2;
205 
206     if (number_of_d_regs == 1) {
207       __ Vstr(d_reg, MemOperand(sp, stack_offset));
208     } else if (number_of_d_regs > 1) {
209       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
210       vixl32::Register base = sp;
211       if (stack_offset != 0) {
212         base = temps.Acquire();
213         __ Add(base, sp, Operand::From(stack_offset));
214       }
215       __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
216     }
217     stack_offset += number_of_d_regs * kDRegSizeInBytes;
218   }
219 
220   if (save_last) {
221     __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
222     stack_offset += kSRegSizeInBytes;
223   }
224 
225   return stack_offset;
226 }
227 
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)228 static size_t RestoreContiguousSRegisterList(size_t first,
229                                              size_t last,
230                                              CodeGenerator* codegen,
231                                              size_t stack_offset) {
232   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
233   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
234   DCHECK_LE(first, last);
235   if ((first == last) && (first == 0)) {
236     __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
237     return stack_offset + kSRegSizeInBytes;
238   }
239   if (first % 2 == 1) {
240     __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
241     stack_offset += kSRegSizeInBytes;
242   }
243 
244   bool restore_last = false;
245   if (last % 2 == 0) {
246     restore_last = true;
247     --last;
248   }
249 
250   if (first < last) {
251     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
252     DCHECK_EQ((last - first + 1) % 2, 0u);
253     size_t number_of_d_regs = (last - first + 1) / 2;
254     if (number_of_d_regs == 1) {
255       __ Vldr(d_reg, MemOperand(sp, stack_offset));
256     } else if (number_of_d_regs > 1) {
257       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
258       vixl32::Register base = sp;
259       if (stack_offset != 0) {
260         base = temps.Acquire();
261         __ Add(base, sp, Operand::From(stack_offset));
262       }
263       __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
264     }
265     stack_offset += number_of_d_regs * kDRegSizeInBytes;
266   }
267 
268   if (restore_last) {
269     __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
270     stack_offset += kSRegSizeInBytes;
271   }
272 
273   return stack_offset;
274 }
275 
GetLoadOperandType(DataType::Type type)276 static LoadOperandType GetLoadOperandType(DataType::Type type) {
277   switch (type) {
278     case DataType::Type::kReference:
279       return kLoadWord;
280     case DataType::Type::kBool:
281     case DataType::Type::kUint8:
282       return kLoadUnsignedByte;
283     case DataType::Type::kInt8:
284       return kLoadSignedByte;
285     case DataType::Type::kUint16:
286       return kLoadUnsignedHalfword;
287     case DataType::Type::kInt16:
288       return kLoadSignedHalfword;
289     case DataType::Type::kInt32:
290       return kLoadWord;
291     case DataType::Type::kInt64:
292       return kLoadWordPair;
293     case DataType::Type::kFloat32:
294       return kLoadSWord;
295     case DataType::Type::kFloat64:
296       return kLoadDWord;
297     default:
298       LOG(FATAL) << "Unreachable type " << type;
299       UNREACHABLE();
300   }
301 }
302 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)303 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
304   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
305   size_t orig_offset = stack_offset;
306 
307   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
308   for (uint32_t i : LowToHighBits(core_spills)) {
309     // If the register holds an object, update the stack mask.
310     if (locations->RegisterContainsObject(i)) {
311       locations->SetStackBit(stack_offset / kVRegSize);
312     }
313     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
314     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
315     saved_core_stack_offsets_[i] = stack_offset;
316     stack_offset += kArmWordSize;
317   }
318 
319   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
320   arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
321 
322   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
323   orig_offset = stack_offset;
324   for (uint32_t i : LowToHighBits(fp_spills)) {
325     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
326     saved_fpu_stack_offsets_[i] = stack_offset;
327     stack_offset += kArmWordSize;
328   }
329 
330   stack_offset = orig_offset;
331   while (fp_spills != 0u) {
332     uint32_t begin = CTZ(fp_spills);
333     uint32_t tmp = fp_spills + (1u << begin);
334     fp_spills &= tmp;  // Clear the contiguous range of 1s.
335     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
336     stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
337   }
338   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
339 }
340 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)341 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
342   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
343   size_t orig_offset = stack_offset;
344 
345   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
346   for (uint32_t i : LowToHighBits(core_spills)) {
347     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
348     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
349     stack_offset += kArmWordSize;
350   }
351 
352   // TODO(VIXL): Check the coherency of stack_offset after this with a test.
353   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
354   arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
355 
356   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
357   while (fp_spills != 0u) {
358     uint32_t begin = CTZ(fp_spills);
359     uint32_t tmp = fp_spills + (1u << begin);
360     fp_spills &= tmp;  // Clear the contiguous range of 1s.
361     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
362     stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
363   }
364   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 }
366 
367 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
368  public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)369   explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
370 
EmitNativeCode(CodeGenerator * codegen)371   void EmitNativeCode(CodeGenerator* codegen) override {
372     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
373     __ Bind(GetEntryLabel());
374     if (instruction_->CanThrowIntoCatchBlock()) {
375       // Live registers will be restored in the catch block if caught.
376       SaveLiveRegisters(codegen, instruction_->GetLocations());
377     }
378     arm_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, this);
379     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
380   }
381 
IsFatal() const382   bool IsFatal() const override { return true; }
383 
GetDescription() const384   const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
385 
386  private:
387   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
388 };
389 
390 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
391  public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)392   explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
393       : SlowPathCodeARMVIXL(instruction) {}
394 
EmitNativeCode(CodeGenerator * codegen)395   void EmitNativeCode(CodeGenerator* codegen) override {
396     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
397     __ Bind(GetEntryLabel());
398     arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, this);
399     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
400   }
401 
IsFatal() const402   bool IsFatal() const override { return true; }
403 
GetDescription() const404   const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
405 
406  private:
407   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
408 };
409 
410 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
411  public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)412   SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
413       : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
414 
EmitNativeCode(CodeGenerator * codegen)415   void EmitNativeCode(CodeGenerator* codegen) override {
416     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
417     __ Bind(GetEntryLabel());
418     arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, this);
419     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
420     if (successor_ == nullptr) {
421       __ B(GetReturnLabel());
422     } else {
423       __ B(arm_codegen->GetLabelOf(successor_));
424     }
425   }
426 
GetReturnLabel()427   vixl32::Label* GetReturnLabel() {
428     DCHECK(successor_ == nullptr);
429     return &return_label_;
430   }
431 
GetSuccessor() const432   HBasicBlock* GetSuccessor() const {
433     return successor_;
434   }
435 
GetDescription() const436   const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
437 
438  private:
439   // If not null, the block to branch to after the suspend check.
440   HBasicBlock* const successor_;
441 
442   // If `successor_` is null, the label to branch to after the suspend check.
443   vixl32::Label return_label_;
444 
445   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
446 };
447 
448 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
449  public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)450   explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
451       : SlowPathCodeARMVIXL(instruction) {}
452 
EmitNativeCode(CodeGenerator * codegen)453   void EmitNativeCode(CodeGenerator* codegen) override {
454     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
455     LocationSummary* locations = instruction_->GetLocations();
456 
457     __ Bind(GetEntryLabel());
458     if (instruction_->CanThrowIntoCatchBlock()) {
459       // Live registers will be restored in the catch block if caught.
460       SaveLiveRegisters(codegen, instruction_->GetLocations());
461     }
462     // We're moving two locations to locations that could overlap, so we need a parallel
463     // move resolver.
464     InvokeRuntimeCallingConventionARMVIXL calling_convention;
465     codegen->EmitParallelMoves(
466         locations->InAt(0),
467         LocationFrom(calling_convention.GetRegisterAt(0)),
468         DataType::Type::kInt32,
469         locations->InAt(1),
470         LocationFrom(calling_convention.GetRegisterAt(1)),
471         DataType::Type::kInt32);
472     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
473         ? kQuickThrowStringBounds
474         : kQuickThrowArrayBounds;
475     arm_codegen->InvokeRuntime(entrypoint, instruction_, this);
476     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
477     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
478   }
479 
IsFatal() const480   bool IsFatal() const override { return true; }
481 
GetDescription() const482   const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
483 
484  private:
485   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
486 };
487 
488 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
489  public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)490   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
491       : SlowPathCodeARMVIXL(at), cls_(cls) {
492     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
493     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
494   }
495 
EmitNativeCode(CodeGenerator * codegen)496   void EmitNativeCode(CodeGenerator* codegen) override {
497     LocationSummary* locations = instruction_->GetLocations();
498     Location out = locations->Out();
499     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
500     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
501 
502     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
503     __ Bind(GetEntryLabel());
504     SaveLiveRegisters(codegen, locations);
505 
506     InvokeRuntimeCallingConventionARMVIXL calling_convention;
507     if (must_resolve_type) {
508       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
509              arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
510              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
511                              &cls_->GetDexFile()));
512       dex::TypeIndex type_index = cls_->GetTypeIndex();
513       __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
514       if (cls_->NeedsAccessCheck()) {
515         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
516         arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, this);
517       } else {
518         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
519         arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, this);
520       }
521       // If we also must_do_clinit, the resolved type is now in the correct register.
522     } else {
523       DCHECK(must_do_clinit);
524       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
525       arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
526     }
527     if (must_do_clinit) {
528       arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, this);
529       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
530     }
531 
532     // Move the class to the desired location.
533     if (out.IsValid()) {
534       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
535       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
536     }
537     RestoreLiveRegisters(codegen, locations);
538     __ B(GetExitLabel());
539   }
540 
GetDescription() const541   const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
542 
543  private:
544   // The class this slow path will load.
545   HLoadClass* const cls_;
546 
547   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
548 };
549 
550 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
551  public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)552   explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
553       : SlowPathCodeARMVIXL(instruction) {}
554 
EmitNativeCode(CodeGenerator * codegen)555   void EmitNativeCode(CodeGenerator* codegen) override {
556     DCHECK(instruction_->IsLoadString());
557     DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
558     LocationSummary* locations = instruction_->GetLocations();
559     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
560     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
561 
562     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
563     __ Bind(GetEntryLabel());
564     SaveLiveRegisters(codegen, locations);
565 
566     InvokeRuntimeCallingConventionARMVIXL calling_convention;
567     __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
568     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, this);
569     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
570 
571     arm_codegen->Move32(locations->Out(), LocationFrom(r0));
572     RestoreLiveRegisters(codegen, locations);
573 
574     __ B(GetExitLabel());
575   }
576 
GetDescription() const577   const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
578 
579  private:
580   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
581 };
582 
583 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
584  public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)585   TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
586       : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
587 
EmitNativeCode(CodeGenerator * codegen)588   void EmitNativeCode(CodeGenerator* codegen) override {
589     LocationSummary* locations = instruction_->GetLocations();
590     DCHECK(instruction_->IsCheckCast()
591            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
592 
593     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
594     __ Bind(GetEntryLabel());
595 
596     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
597       SaveLiveRegisters(codegen, locations);
598     }
599 
600     // We're moving two locations to locations that could overlap, so we need a parallel
601     // move resolver.
602     InvokeRuntimeCallingConventionARMVIXL calling_convention;
603 
604     codegen->EmitParallelMoves(locations->InAt(0),
605                                LocationFrom(calling_convention.GetRegisterAt(0)),
606                                DataType::Type::kReference,
607                                locations->InAt(1),
608                                LocationFrom(calling_convention.GetRegisterAt(1)),
609                                DataType::Type::kReference);
610     if (instruction_->IsInstanceOf()) {
611       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, this);
612       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
613       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
614     } else {
615       DCHECK(instruction_->IsCheckCast());
616       arm_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, this);
617       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
618     }
619 
620     if (!is_fatal_) {
621       RestoreLiveRegisters(codegen, locations);
622       __ B(GetExitLabel());
623     }
624   }
625 
GetDescription() const626   const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
627 
IsFatal() const628   bool IsFatal() const override { return is_fatal_; }
629 
630  private:
631   const bool is_fatal_;
632 
633   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
634 };
635 
636 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
637  public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)638   explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
639       : SlowPathCodeARMVIXL(instruction) {}
640 
EmitNativeCode(CodeGenerator * codegen)641   void EmitNativeCode(CodeGenerator* codegen) override {
642     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
643     __ Bind(GetEntryLabel());
644         LocationSummary* locations = instruction_->GetLocations();
645     SaveLiveRegisters(codegen, locations);
646     InvokeRuntimeCallingConventionARMVIXL calling_convention;
647     __ Mov(calling_convention.GetRegisterAt(0),
648            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
649 
650     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, this);
651     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
652   }
653 
GetDescription() const654   const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
655 
656  private:
657   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
658 };
659 
660 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
661  public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)662   explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
663 
EmitNativeCode(CodeGenerator * codegen)664   void EmitNativeCode(CodeGenerator* codegen) override {
665     LocationSummary* locations = instruction_->GetLocations();
666     __ Bind(GetEntryLabel());
667     SaveLiveRegisters(codegen, locations);
668 
669     InvokeRuntimeCallingConventionARMVIXL calling_convention;
670     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
671     parallel_move.AddMove(
672         locations->InAt(0),
673         LocationFrom(calling_convention.GetRegisterAt(0)),
674         DataType::Type::kReference,
675         nullptr);
676     parallel_move.AddMove(
677         locations->InAt(1),
678         LocationFrom(calling_convention.GetRegisterAt(1)),
679         DataType::Type::kInt32,
680         nullptr);
681     parallel_move.AddMove(
682         locations->InAt(2),
683         LocationFrom(calling_convention.GetRegisterAt(2)),
684         DataType::Type::kReference,
685         nullptr);
686     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
687 
688     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
689     arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, this);
690     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
691     RestoreLiveRegisters(codegen, locations);
692     __ B(GetExitLabel());
693   }
694 
GetDescription() const695   const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
696 
697  private:
698   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
699 };
700 
701 // Slow path generating a read barrier for a heap reference.
702 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
703  public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)704   ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
705                                              Location out,
706                                              Location ref,
707                                              Location obj,
708                                              uint32_t offset,
709                                              Location index)
710       : SlowPathCodeARMVIXL(instruction),
711         out_(out),
712         ref_(ref),
713         obj_(obj),
714         offset_(offset),
715         index_(index) {
716     // If `obj` is equal to `out` or `ref`, it means the initial object
717     // has been overwritten by (or after) the heap object reference load
718     // to be instrumented, e.g.:
719     //
720     //   __ LoadFromOffset(kLoadWord, out, out, offset);
721     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
722     //
723     // In that case, we have lost the information about the original
724     // object, and the emitted read barrier cannot work properly.
725     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
726     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
727   }
728 
EmitNativeCode(CodeGenerator * codegen)729   void EmitNativeCode(CodeGenerator* codegen) override {
730     DCHECK(codegen->EmitReadBarrier());
731     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
732     LocationSummary* locations = instruction_->GetLocations();
733     vixl32::Register reg_out = RegisterFrom(out_);
734     DCHECK(locations->CanCall());
735     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
736     DCHECK(instruction_->IsInstanceFieldGet() ||
737            instruction_->IsStaticFieldGet() ||
738            instruction_->IsArrayGet() ||
739            instruction_->IsInstanceOf() ||
740            instruction_->IsCheckCast() ||
741            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
742         << "Unexpected instruction in read barrier for heap reference slow path: "
743         << instruction_->DebugName();
744     // The read barrier instrumentation of object ArrayGet
745     // instructions does not support the HIntermediateAddress
746     // instruction.
747     DCHECK(!(instruction_->IsArrayGet() &&
748              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
749 
750     __ Bind(GetEntryLabel());
751     SaveLiveRegisters(codegen, locations);
752 
753     // We may have to change the index's value, but as `index_` is a
754     // constant member (like other "inputs" of this slow path),
755     // introduce a copy of it, `index`.
756     Location index = index_;
757     if (index_.IsValid()) {
758       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
759       if (instruction_->IsArrayGet()) {
760         // Compute the actual memory offset and store it in `index`.
761         vixl32::Register index_reg = RegisterFrom(index_);
762         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
763         if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
764           // We are about to change the value of `index_reg` (see the
765           // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
766           // art::arm::ArmVIXLMacroAssembler::Add below), but it has
767           // not been saved by the previous call to
768           // art::SlowPathCode::SaveLiveRegisters, as it is a
769           // callee-save register --
770           // art::SlowPathCode::SaveLiveRegisters does not consider
771           // callee-save registers, as it has been designed with the
772           // assumption that callee-save registers are supposed to be
773           // handled by the called function.  So, as a callee-save
774           // register, `index_reg` _would_ eventually be saved onto
775           // the stack, but it would be too late: we would have
776           // changed its value earlier.  Therefore, we manually save
777           // it here into another freely available register,
778           // `free_reg`, chosen of course among the caller-save
779           // registers (as a callee-save `free_reg` register would
780           // exhibit the same problem).
781           //
782           // Note we could have requested a temporary register from
783           // the register allocator instead; but we prefer not to, as
784           // this is a slow path, and we know we can find a
785           // caller-save register that is available.
786           vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
787           __ Mov(free_reg, index_reg);
788           index_reg = free_reg;
789           index = LocationFrom(index_reg);
790         } else {
791           // The initial register stored in `index_` has already been
792           // saved in the call to art::SlowPathCode::SaveLiveRegisters
793           // (as it is not a callee-save register), so we can freely
794           // use it.
795         }
796         // Shifting the index value contained in `index_reg` by the scale
797         // factor (2) cannot overflow in practice, as the runtime is
798         // unable to allocate object arrays with a size larger than
799         // 2^26 - 1 (that is, 2^28 - 4 bytes).
800         __ Lsl(index_reg, index_reg, TIMES_4);
801         static_assert(
802             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
803             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
804         __ Add(index_reg, index_reg, offset_);
805       } else {
806         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
807         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
808         // object.
809         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
810         DCHECK(instruction_->GetLocations()->Intrinsified());
811         HInvoke* invoke = instruction_->AsInvoke();
812         DCHECK(IsUnsafeGetReference(invoke) ||
813                IsVarHandleGet(invoke) ||
814                IsVarHandleCASFamily(invoke))
815             << invoke->GetIntrinsic();
816         DCHECK_EQ(offset_, 0U);
817         // Though UnsafeGet's offset location is a register pair, we only pass the low
818         // part (high part is irrelevant for 32-bit addresses) to the slow path.
819         // For VarHandle intrinsics, the index is always just a register.
820         DCHECK(index_.IsRegister());
821         index = index_;
822       }
823     }
824 
825     // We're moving two or three locations to locations that could
826     // overlap, so we need a parallel move resolver.
827     InvokeRuntimeCallingConventionARMVIXL calling_convention;
828     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
829     parallel_move.AddMove(ref_,
830                           LocationFrom(calling_convention.GetRegisterAt(0)),
831                           DataType::Type::kReference,
832                           nullptr);
833     parallel_move.AddMove(obj_,
834                           LocationFrom(calling_convention.GetRegisterAt(1)),
835                           DataType::Type::kReference,
836                           nullptr);
837     if (index.IsValid()) {
838       parallel_move.AddMove(index,
839                             LocationFrom(calling_convention.GetRegisterAt(2)),
840                             DataType::Type::kInt32,
841                             nullptr);
842       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
843     } else {
844       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
845       __ Mov(calling_convention.GetRegisterAt(2), offset_);
846     }
847     arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, this);
848     CheckEntrypointTypes<
849         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
850     arm_codegen->Move32(out_, LocationFrom(r0));
851 
852     RestoreLiveRegisters(codegen, locations);
853     __ B(GetExitLabel());
854   }
855 
GetDescription() const856   const char* GetDescription() const override {
857     return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
858   }
859 
860  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)861   vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
862     uint32_t ref = RegisterFrom(ref_).GetCode();
863     uint32_t obj = RegisterFrom(obj_).GetCode();
864     for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
865       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
866         return vixl32::Register(i);
867       }
868     }
869     // We shall never fail to find a free caller-save register, as
870     // there are more than two core caller-save registers on ARM
871     // (meaning it is possible to find one which is different from
872     // `ref` and `obj`).
873     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
874     LOG(FATAL) << "Could not find a free caller-save register";
875     UNREACHABLE();
876   }
877 
878   const Location out_;
879   const Location ref_;
880   const Location obj_;
881   const uint32_t offset_;
882   // An additional location containing an index to an array.
883   // Only used for HArrayGet and the UnsafeGetObject &
884   // UnsafeGetObjectVolatile intrinsics.
885   const Location index_;
886 
887   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
888 };
889 
890 // Slow path generating a read barrier for a GC root.
891 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
892  public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)893   ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
894       : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
895   }
896 
EmitNativeCode(CodeGenerator * codegen)897   void EmitNativeCode(CodeGenerator* codegen) override {
898     DCHECK(codegen->EmitReadBarrier());
899     LocationSummary* locations = instruction_->GetLocations();
900     vixl32::Register reg_out = RegisterFrom(out_);
901     DCHECK(locations->CanCall());
902     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
903     DCHECK(instruction_->IsLoadClass() ||
904            instruction_->IsLoadString() ||
905            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
906         << "Unexpected instruction in read barrier for GC root slow path: "
907         << instruction_->DebugName();
908 
909     __ Bind(GetEntryLabel());
910     SaveLiveRegisters(codegen, locations);
911 
912     InvokeRuntimeCallingConventionARMVIXL calling_convention;
913     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
914     arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
915     arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, this);
916     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
917     arm_codegen->Move32(out_, LocationFrom(r0));
918 
919     RestoreLiveRegisters(codegen, locations);
920     __ B(GetExitLabel());
921   }
922 
GetDescription() const923   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
924 
925  private:
926   const Location out_;
927   const Location root_;
928 
929   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
930 };
931 
932 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
933  public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)934   explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
935       : SlowPathCodeARMVIXL(instruction) {}
936 
EmitNativeCode(CodeGenerator * codegen)937   void EmitNativeCode(CodeGenerator* codegen) override {
938     LocationSummary* locations = instruction_->GetLocations();
939     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
940     QuickEntrypointEnum entry_point =
941         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
942     __ Bind(GetEntryLabel());
943     SaveLiveRegisters(codegen, locations);
944     if (instruction_->IsMethodExitHook()) {
945       // Load frame size to pass to the exit hooks
946       __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
947     }
948     arm_codegen->InvokeRuntime(entry_point, instruction_, this);
949     RestoreLiveRegisters(codegen, locations);
950     __ B(GetExitLabel());
951   }
952 
GetDescription() const953   const char* GetDescription() const override {
954     return "MethodEntryExitHooksSlowPath";
955   }
956 
957  private:
958   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
959 };
960 
961 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
962  public:
CompileOptimizedSlowPathARMVIXL(HSuspendCheck * suspend_check,vixl32::Register profiling_info)963   CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
964                                   vixl32::Register profiling_info)
965       : SlowPathCodeARMVIXL(suspend_check),
966         profiling_info_(profiling_info) {}
967 
EmitNativeCode(CodeGenerator * codegen)968   void EmitNativeCode(CodeGenerator* codegen) override {
969     uint32_t entry_point_offset =
970         GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
971     __ Bind(GetEntryLabel());
972     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
973     UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
974     vixl32::Register tmp = temps.Acquire();
975     __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
976     __ Strh(tmp,
977             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
978     __ Ldr(lr, MemOperand(tr, entry_point_offset));
979     // Note: we don't record the call here (and therefore don't generate a stack
980     // map), as the entrypoint should never be suspended.
981     __ Blx(lr);
982     __ B(GetExitLabel());
983   }
984 
GetDescription() const985   const char* GetDescription() const override {
986     return "CompileOptimizedSlowPath";
987   }
988 
989  private:
990   vixl32::Register profiling_info_;
991 
992   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
993 };
994 
ARMCondition(IfCondition cond)995 inline vixl32::Condition ARMCondition(IfCondition cond) {
996   switch (cond) {
997     case kCondEQ: return eq;
998     case kCondNE: return ne;
999     case kCondLT: return lt;
1000     case kCondLE: return le;
1001     case kCondGT: return gt;
1002     case kCondGE: return ge;
1003     case kCondB:  return lo;
1004     case kCondBE: return ls;
1005     case kCondA:  return hi;
1006     case kCondAE: return hs;
1007   }
1008   LOG(FATAL) << "Unreachable";
1009   UNREACHABLE();
1010 }
1011 
1012 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1013 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1014   switch (cond) {
1015     case kCondEQ: return eq;
1016     case kCondNE: return ne;
1017     // Signed to unsigned.
1018     case kCondLT: return lo;
1019     case kCondLE: return ls;
1020     case kCondGT: return hi;
1021     case kCondGE: return hs;
1022     // Unsigned remain unchanged.
1023     case kCondB:  return lo;
1024     case kCondBE: return ls;
1025     case kCondA:  return hi;
1026     case kCondAE: return hs;
1027   }
1028   LOG(FATAL) << "Unreachable";
1029   UNREACHABLE();
1030 }
1031 
ARMFPCondition(IfCondition cond,bool gt_bias)1032 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1033   // The ARM condition codes can express all the necessary branches, see the
1034   // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1035   // There is no dex instruction or HIR that would need the missing conditions
1036   // "equal or unordered" or "not equal".
1037   switch (cond) {
1038     case kCondEQ: return eq;
1039     case kCondNE: return ne /* unordered */;
1040     case kCondLT: return gt_bias ? cc : lt /* unordered */;
1041     case kCondLE: return gt_bias ? ls : le /* unordered */;
1042     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1043     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1044     default:
1045       LOG(FATAL) << "UNREACHABLE";
1046       UNREACHABLE();
1047   }
1048 }
1049 
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1050 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1051   switch (op_kind) {
1052     case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1053     case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1054     case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1055     default:
1056       LOG(FATAL) << "Unexpected op kind " << op_kind;
1057       UNREACHABLE();
1058   }
1059 }
1060 
DumpCoreRegister(std::ostream & stream,int reg) const1061 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1062   stream << vixl32::Register(reg);
1063 }
1064 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1065 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1066   stream << vixl32::SRegister(reg);
1067 }
1068 
GetInstructionSetFeatures() const1069 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1070   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1071 }
1072 
ComputeSRegisterListMask(const SRegisterList & regs)1073 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1074   uint32_t mask = 0;
1075   for (uint32_t i = regs.GetFirstSRegister().GetCode();
1076        i <= regs.GetLastSRegister().GetCode();
1077        ++i) {
1078     mask |= (1 << i);
1079   }
1080   return mask;
1081 }
1082 
1083 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1084 size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
1085                                               [[maybe_unused]] uint32_t reg_id) {
1086   TODO_VIXL32(FATAL);
1087   UNREACHABLE();
1088 }
1089 
1090 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1091 size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
1092                                                  [[maybe_unused]] uint32_t reg_id) {
1093   TODO_VIXL32(FATAL);
1094   UNREACHABLE();
1095 }
1096 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1098                                                        [[maybe_unused]] uint32_t reg_id) {
1099   TODO_VIXL32(FATAL);
1100   UNREACHABLE();
1101 }
1102 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1103 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1104                                                           [[maybe_unused]] uint32_t reg_id) {
1105   TODO_VIXL32(FATAL);
1106   UNREACHABLE();
1107 }
1108 
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1109 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1110                                         vixl32::Register out,
1111                                         vixl32::Register first,
1112                                         const Operand& second,
1113                                         CodeGeneratorARMVIXL* codegen) {
1114   if (second.IsImmediate() && second.GetImmediate() == 0) {
1115     const Operand in = kind == HInstruction::kAnd
1116         ? Operand(0)
1117         : Operand(first);
1118 
1119     __ Mov(out, in);
1120   } else {
1121     switch (kind) {
1122       case HInstruction::kAdd:
1123         __ Add(out, first, second);
1124         break;
1125       case HInstruction::kAnd:
1126         __ And(out, first, second);
1127         break;
1128       case HInstruction::kOr:
1129         __ Orr(out, first, second);
1130         break;
1131       case HInstruction::kSub:
1132         __ Sub(out, first, second);
1133         break;
1134       case HInstruction::kXor:
1135         __ Eor(out, first, second);
1136         break;
1137       default:
1138         LOG(FATAL) << "Unexpected instruction kind: " << kind;
1139         UNREACHABLE();
1140     }
1141   }
1142 }
1143 
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1144 static void GenerateDataProc(HInstruction::InstructionKind kind,
1145                              const Location& out,
1146                              const Location& first,
1147                              const Operand& second_lo,
1148                              const Operand& second_hi,
1149                              CodeGeneratorARMVIXL* codegen) {
1150   const vixl32::Register first_hi = HighRegisterFrom(first);
1151   const vixl32::Register first_lo = LowRegisterFrom(first);
1152   const vixl32::Register out_hi = HighRegisterFrom(out);
1153   const vixl32::Register out_lo = LowRegisterFrom(out);
1154 
1155   if (kind == HInstruction::kAdd) {
1156     __ Adds(out_lo, first_lo, second_lo);
1157     __ Adc(out_hi, first_hi, second_hi);
1158   } else if (kind == HInstruction::kSub) {
1159     __ Subs(out_lo, first_lo, second_lo);
1160     __ Sbc(out_hi, first_hi, second_hi);
1161   } else {
1162     GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1163     GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1164   }
1165 }
1166 
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1167 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1168   return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1169 }
1170 
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1171 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1172                                  CodeGeneratorARMVIXL* codegen) {
1173   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1174   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1175 
1176   const LocationSummary* const locations = instruction->GetLocations();
1177   const uint32_t shift_value = instruction->GetShiftAmount();
1178   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1179   const Location first = locations->InAt(0);
1180   const Location second = locations->InAt(1);
1181   const Location out = locations->Out();
1182   const vixl32::Register first_hi = HighRegisterFrom(first);
1183   const vixl32::Register first_lo = LowRegisterFrom(first);
1184   const vixl32::Register out_hi = HighRegisterFrom(out);
1185   const vixl32::Register out_lo = LowRegisterFrom(out);
1186   const vixl32::Register second_hi = HighRegisterFrom(second);
1187   const vixl32::Register second_lo = LowRegisterFrom(second);
1188   const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1189 
1190   if (shift_value >= 32) {
1191     if (shift == ShiftType::LSL) {
1192       GenerateDataProcInstruction(kind,
1193                                   out_hi,
1194                                   first_hi,
1195                                   Operand(second_lo, ShiftType::LSL, shift_value - 32),
1196                                   codegen);
1197       GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1198     } else if (shift == ShiftType::ASR) {
1199       GenerateDataProc(kind,
1200                        out,
1201                        first,
1202                        GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1203                        Operand(second_hi, ShiftType::ASR, 31),
1204                        codegen);
1205     } else {
1206       DCHECK_EQ(shift, ShiftType::LSR);
1207       GenerateDataProc(kind,
1208                        out,
1209                        first,
1210                        GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1211                        0,
1212                        codegen);
1213     }
1214   } else {
1215     DCHECK_GT(shift_value, 1U);
1216     DCHECK_LT(shift_value, 32U);
1217 
1218     UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1219 
1220     if (shift == ShiftType::LSL) {
1221       // We are not doing this for HInstruction::kAdd because the output will require
1222       // Location::kOutputOverlap; not applicable to other cases.
1223       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1224         GenerateDataProcInstruction(kind,
1225                                     out_hi,
1226                                     first_hi,
1227                                     Operand(second_hi, ShiftType::LSL, shift_value),
1228                                     codegen);
1229         GenerateDataProcInstruction(kind,
1230                                     out_hi,
1231                                     out_hi,
1232                                     Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1233                                     codegen);
1234         GenerateDataProcInstruction(kind,
1235                                     out_lo,
1236                                     first_lo,
1237                                     Operand(second_lo, ShiftType::LSL, shift_value),
1238                                     codegen);
1239       } else {
1240         const vixl32::Register temp = temps.Acquire();
1241 
1242         __ Lsl(temp, second_hi, shift_value);
1243         __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1244         GenerateDataProc(kind,
1245                          out,
1246                          first,
1247                          Operand(second_lo, ShiftType::LSL, shift_value),
1248                          temp,
1249                          codegen);
1250       }
1251     } else {
1252       DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1253 
1254       // We are not doing this for HInstruction::kAdd because the output will require
1255       // Location::kOutputOverlap; not applicable to other cases.
1256       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1257         GenerateDataProcInstruction(kind,
1258                                     out_lo,
1259                                     first_lo,
1260                                     Operand(second_lo, ShiftType::LSR, shift_value),
1261                                     codegen);
1262         GenerateDataProcInstruction(kind,
1263                                     out_lo,
1264                                     out_lo,
1265                                     Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1266                                     codegen);
1267         GenerateDataProcInstruction(kind,
1268                                     out_hi,
1269                                     first_hi,
1270                                     Operand(second_hi, shift, shift_value),
1271                                     codegen);
1272       } else {
1273         const vixl32::Register temp = temps.Acquire();
1274 
1275         __ Lsr(temp, second_lo, shift_value);
1276         __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1277         GenerateDataProc(kind,
1278                          out,
1279                          first,
1280                          temp,
1281                          Operand(second_hi, shift, shift_value),
1282                          codegen);
1283       }
1284     }
1285   }
1286 }
1287 
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1288 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1289   const Location rhs_loc = instruction->GetLocations()->InAt(1);
1290   if (rhs_loc.IsConstant()) {
1291     // 0.0 is the only immediate that can be encoded directly in
1292     // a VCMP instruction.
1293     //
1294     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1295     // specify that in a floating-point comparison, positive zero
1296     // and negative zero are considered equal, so we can use the
1297     // literal 0.0 for both cases here.
1298     //
1299     // Note however that some methods (Float.equal, Float.compare,
1300     // Float.compareTo, Double.equal, Double.compare,
1301     // Double.compareTo, Math.max, Math.min, StrictMath.max,
1302     // StrictMath.min) consider 0.0 to be (strictly) greater than
1303     // -0.0. So if we ever translate calls to these methods into a
1304     // HCompare instruction, we must handle the -0.0 case with
1305     // care here.
1306     DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1307 
1308     const DataType::Type type = instruction->InputAt(0)->GetType();
1309 
1310     if (type == DataType::Type::kFloat32) {
1311       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1312     } else {
1313       DCHECK_EQ(type, DataType::Type::kFloat64);
1314       __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1315     }
1316   } else {
1317     __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1318   }
1319 }
1320 
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1321 static int64_t AdjustConstantForCondition(int64_t value,
1322                                           IfCondition* condition,
1323                                           IfCondition* opposite) {
1324   if (value == 1) {
1325     if (*condition == kCondB) {
1326       value = 0;
1327       *condition = kCondEQ;
1328       *opposite = kCondNE;
1329     } else if (*condition == kCondAE) {
1330       value = 0;
1331       *condition = kCondNE;
1332       *opposite = kCondEQ;
1333     }
1334   } else if (value == -1) {
1335     if (*condition == kCondGT) {
1336       value = 0;
1337       *condition = kCondGE;
1338       *opposite = kCondLT;
1339     } else if (*condition == kCondLE) {
1340       value = 0;
1341       *condition = kCondLT;
1342       *opposite = kCondGE;
1343     }
1344   }
1345 
1346   return value;
1347 }
1348 
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1349 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1350     HCondition* condition,
1351     bool invert,
1352     CodeGeneratorARMVIXL* codegen) {
1353   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1354 
1355   const LocationSummary* const locations = condition->GetLocations();
1356   IfCondition cond = condition->GetCondition();
1357   IfCondition opposite = condition->GetOppositeCondition();
1358 
1359   if (invert) {
1360     std::swap(cond, opposite);
1361   }
1362 
1363   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1364   const Location left = locations->InAt(0);
1365   const Location right = locations->InAt(1);
1366 
1367   DCHECK(right.IsConstant());
1368 
1369   const vixl32::Register left_high = HighRegisterFrom(left);
1370   const vixl32::Register left_low = LowRegisterFrom(left);
1371   int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1372   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1373 
1374   // Comparisons against 0 are common enough to deserve special attention.
1375   if (value == 0) {
1376     switch (cond) {
1377       case kCondNE:
1378       // x > 0 iff x != 0 when the comparison is unsigned.
1379       case kCondA:
1380         ret = std::make_pair(ne, eq);
1381         FALLTHROUGH_INTENDED;
1382       case kCondEQ:
1383       // x <= 0 iff x == 0 when the comparison is unsigned.
1384       case kCondBE:
1385         __ Orrs(temps.Acquire(), left_low, left_high);
1386         return ret;
1387       case kCondLT:
1388       case kCondGE:
1389         __ Cmp(left_high, 0);
1390         return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1391       // Trivially true or false.
1392       case kCondB:
1393         ret = std::make_pair(ne, eq);
1394         FALLTHROUGH_INTENDED;
1395       case kCondAE:
1396         __ Cmp(left_low, left_low);
1397         return ret;
1398       default:
1399         break;
1400     }
1401   }
1402 
1403   switch (cond) {
1404     case kCondEQ:
1405     case kCondNE:
1406     case kCondB:
1407     case kCondBE:
1408     case kCondA:
1409     case kCondAE: {
1410       const uint32_t value_low = Low32Bits(value);
1411       Operand operand_low(value_low);
1412 
1413       __ Cmp(left_high, High32Bits(value));
1414 
1415       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1416       // we must ensure that the operands corresponding to the least significant
1417       // halves of the inputs fit into a 16-bit CMP encoding.
1418       if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1419         operand_low = Operand(temps.Acquire());
1420         __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1421       }
1422 
1423       // We use the scope because of the IT block that follows.
1424       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1425                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1426                                CodeBufferCheckScope::kExactSize);
1427 
1428       __ it(eq);
1429       __ cmp(eq, left_low, operand_low);
1430       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1431       break;
1432     }
1433     case kCondLE:
1434     case kCondGT:
1435       // Trivially true or false.
1436       if (value == std::numeric_limits<int64_t>::max()) {
1437         __ Cmp(left_low, left_low);
1438         ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1439         break;
1440       }
1441 
1442       if (cond == kCondLE) {
1443         DCHECK_EQ(opposite, kCondGT);
1444         cond = kCondLT;
1445         opposite = kCondGE;
1446       } else {
1447         DCHECK_EQ(cond, kCondGT);
1448         DCHECK_EQ(opposite, kCondLE);
1449         cond = kCondGE;
1450         opposite = kCondLT;
1451       }
1452 
1453       value++;
1454       FALLTHROUGH_INTENDED;
1455     case kCondGE:
1456     case kCondLT: {
1457       __ Cmp(left_low, Low32Bits(value));
1458       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1459       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1460       break;
1461     }
1462     default:
1463       LOG(FATAL) << "Unreachable";
1464       UNREACHABLE();
1465   }
1466 
1467   return ret;
1468 }
1469 
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1470 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1471     HCondition* condition,
1472     bool invert,
1473     CodeGeneratorARMVIXL* codegen) {
1474   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1475 
1476   const LocationSummary* const locations = condition->GetLocations();
1477   IfCondition cond = condition->GetCondition();
1478   IfCondition opposite = condition->GetOppositeCondition();
1479 
1480   if (invert) {
1481     std::swap(cond, opposite);
1482   }
1483 
1484   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1485   Location left = locations->InAt(0);
1486   Location right = locations->InAt(1);
1487 
1488   DCHECK(right.IsRegisterPair());
1489 
1490   switch (cond) {
1491     case kCondEQ:
1492     case kCondNE:
1493     case kCondB:
1494     case kCondBE:
1495     case kCondA:
1496     case kCondAE: {
1497       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1498 
1499       // We use the scope because of the IT block that follows.
1500       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1501                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1502                                CodeBufferCheckScope::kExactSize);
1503 
1504       __ it(eq);
1505       __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1506       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1507       break;
1508     }
1509     case kCondLE:
1510     case kCondGT:
1511       if (cond == kCondLE) {
1512         DCHECK_EQ(opposite, kCondGT);
1513         cond = kCondGE;
1514         opposite = kCondLT;
1515       } else {
1516         DCHECK_EQ(cond, kCondGT);
1517         DCHECK_EQ(opposite, kCondLE);
1518         cond = kCondLT;
1519         opposite = kCondGE;
1520       }
1521 
1522       std::swap(left, right);
1523       FALLTHROUGH_INTENDED;
1524     case kCondGE:
1525     case kCondLT: {
1526       UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1527 
1528       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1529       __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1530       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1531       break;
1532     }
1533     default:
1534       LOG(FATAL) << "Unreachable";
1535       UNREACHABLE();
1536   }
1537 
1538   return ret;
1539 }
1540 
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1541 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1542                                                                     bool invert,
1543                                                                     CodeGeneratorARMVIXL* codegen) {
1544   const DataType::Type type = condition->GetLeft()->GetType();
1545   IfCondition cond = condition->GetCondition();
1546   IfCondition opposite = condition->GetOppositeCondition();
1547   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1548 
1549   if (invert) {
1550     std::swap(cond, opposite);
1551   }
1552 
1553   if (type == DataType::Type::kInt64) {
1554     ret = condition->GetLocations()->InAt(1).IsConstant()
1555         ? GenerateLongTestConstant(condition, invert, codegen)
1556         : GenerateLongTest(condition, invert, codegen);
1557   } else if (DataType::IsFloatingPointType(type)) {
1558     GenerateVcmp(condition, codegen);
1559     __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1560     ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1561                          ARMFPCondition(opposite, condition->IsGtBias()));
1562   } else {
1563     DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1564     __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1565     ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1566   }
1567 
1568   return ret;
1569 }
1570 
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1571 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1572   const vixl32::Register out = OutputRegister(cond);
1573   const auto condition = GenerateTest(cond, false, codegen);
1574 
1575   __ Mov(LeaveFlags, out, 0);
1576 
1577   if (out.IsLow()) {
1578     // We use the scope because of the IT block that follows.
1579     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1580                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1581                              CodeBufferCheckScope::kExactSize);
1582 
1583     __ it(condition.first);
1584     __ mov(condition.first, out, 1);
1585   } else {
1586     vixl32::Label done_label;
1587     vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1588 
1589     __ B(condition.second, final_label, /* is_far_target= */ false);
1590     __ Mov(out, 1);
1591 
1592     if (done_label.IsReferenced()) {
1593       __ Bind(&done_label);
1594     }
1595   }
1596 }
1597 
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1598 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1599   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1600 
1601   const LocationSummary* const locations = cond->GetLocations();
1602   IfCondition condition = cond->GetCondition();
1603   const vixl32::Register out = OutputRegister(cond);
1604   const Location left = locations->InAt(0);
1605   const Location right = locations->InAt(1);
1606   vixl32::Register left_high = HighRegisterFrom(left);
1607   vixl32::Register left_low = LowRegisterFrom(left);
1608   vixl32::Register temp;
1609   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1610 
1611   if (right.IsConstant()) {
1612     IfCondition opposite = cond->GetOppositeCondition();
1613     const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1614                                                      &condition,
1615                                                      &opposite);
1616     Operand right_high = High32Bits(value);
1617     Operand right_low = Low32Bits(value);
1618 
1619     // The output uses Location::kNoOutputOverlap.
1620     if (out.Is(left_high)) {
1621       std::swap(left_low, left_high);
1622       std::swap(right_low, right_high);
1623     }
1624 
1625     __ Sub(out, left_low, right_low);
1626     temp = temps.Acquire();
1627     __ Sub(temp, left_high, right_high);
1628   } else {
1629     DCHECK(right.IsRegisterPair());
1630     temp = temps.Acquire();
1631     __ Sub(temp, left_high, HighRegisterFrom(right));
1632     __ Sub(out, left_low, LowRegisterFrom(right));
1633   }
1634 
1635   // Need to check after calling AdjustConstantForCondition().
1636   DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1637 
1638   if (condition == kCondNE && out.IsLow()) {
1639     __ Orrs(out, out, temp);
1640 
1641     // We use the scope because of the IT block that follows.
1642     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1643                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1644                              CodeBufferCheckScope::kExactSize);
1645 
1646     __ it(ne);
1647     __ mov(ne, out, 1);
1648   } else {
1649     __ Orr(out, out, temp);
1650     codegen->GenerateConditionWithZero(condition, out, out, temp);
1651   }
1652 }
1653 
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1654 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1655   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1656 
1657   const LocationSummary* const locations = cond->GetLocations();
1658   IfCondition condition = cond->GetCondition();
1659   const vixl32::Register out = OutputRegister(cond);
1660   const Location left = locations->InAt(0);
1661   const Location right = locations->InAt(1);
1662 
1663   if (right.IsConstant()) {
1664     IfCondition opposite = cond->GetOppositeCondition();
1665 
1666     // Comparisons against 0 are common enough to deserve special attention.
1667     if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1668       switch (condition) {
1669         case kCondNE:
1670         case kCondA:
1671           if (out.IsLow()) {
1672             // We only care if both input registers are 0 or not.
1673             __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1674 
1675             // We use the scope because of the IT block that follows.
1676             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1677                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1678                                      CodeBufferCheckScope::kExactSize);
1679 
1680             __ it(ne);
1681             __ mov(ne, out, 1);
1682             return;
1683           }
1684 
1685           FALLTHROUGH_INTENDED;
1686         case kCondEQ:
1687         case kCondBE:
1688           // We only care if both input registers are 0 or not.
1689           __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1690           codegen->GenerateConditionWithZero(condition, out, out);
1691           return;
1692         case kCondLT:
1693         case kCondGE:
1694           // We only care about the sign bit.
1695           FALLTHROUGH_INTENDED;
1696         case kCondAE:
1697         case kCondB:
1698           codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1699           return;
1700         case kCondLE:
1701         case kCondGT:
1702         default:
1703           break;
1704       }
1705     }
1706   }
1707 
1708   // If `out` is a low register, then the GenerateConditionGeneric()
1709   // function generates a shorter code sequence that is still branchless.
1710   if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1711     GenerateEqualLong(cond, codegen);
1712     return;
1713   }
1714 
1715   GenerateConditionGeneric(cond, codegen);
1716 }
1717 
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1718 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1719                                                     CodeGeneratorARMVIXL* codegen) {
1720   const DataType::Type type = cond->GetLeft()->GetType();
1721 
1722   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1723 
1724   if (type == DataType::Type::kInt64) {
1725     GenerateConditionLong(cond, codegen);
1726     return;
1727   }
1728 
1729   IfCondition condition = cond->GetCondition();
1730   vixl32::Register in = InputRegisterAt(cond, 0);
1731   const vixl32::Register out = OutputRegister(cond);
1732   const Location right = cond->GetLocations()->InAt(1);
1733   int64_t value;
1734 
1735   if (right.IsConstant()) {
1736     IfCondition opposite = cond->GetOppositeCondition();
1737 
1738     value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1739 
1740     // Comparisons against 0 are common enough to deserve special attention.
1741     if (value == 0) {
1742       switch (condition) {
1743         case kCondNE:
1744         case kCondA:
1745           if (out.IsLow() && out.Is(in)) {
1746             __ Cmp(out, 0);
1747 
1748             // We use the scope because of the IT block that follows.
1749             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1750                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1751                                      CodeBufferCheckScope::kExactSize);
1752 
1753             __ it(ne);
1754             __ mov(ne, out, 1);
1755             return;
1756           }
1757 
1758           FALLTHROUGH_INTENDED;
1759         case kCondEQ:
1760         case kCondBE:
1761         case kCondLT:
1762         case kCondGE:
1763         case kCondAE:
1764         case kCondB:
1765           codegen->GenerateConditionWithZero(condition, out, in);
1766           return;
1767         case kCondLE:
1768         case kCondGT:
1769         default:
1770           break;
1771       }
1772     }
1773   }
1774 
1775   if (condition == kCondEQ || condition == kCondNE) {
1776     Operand operand(0);
1777 
1778     if (right.IsConstant()) {
1779       operand = Operand::From(value);
1780     } else if (out.Is(RegisterFrom(right))) {
1781       // Avoid 32-bit instructions if possible.
1782       operand = InputOperandAt(cond, 0);
1783       in = RegisterFrom(right);
1784     } else {
1785       operand = InputOperandAt(cond, 1);
1786     }
1787 
1788     if (condition == kCondNE && out.IsLow()) {
1789       __ Subs(out, in, operand);
1790 
1791       // We use the scope because of the IT block that follows.
1792       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1793                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1794                                CodeBufferCheckScope::kExactSize);
1795 
1796       __ it(ne);
1797       __ mov(ne, out, 1);
1798     } else {
1799       __ Sub(out, in, operand);
1800       codegen->GenerateConditionWithZero(condition, out, out);
1801     }
1802 
1803     return;
1804   }
1805 
1806   GenerateConditionGeneric(cond, codegen);
1807 }
1808 
CanEncodeConstantAs8BitImmediate(HConstant * constant)1809 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1810   const DataType::Type type = constant->GetType();
1811   bool ret = false;
1812 
1813   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1814 
1815   if (type == DataType::Type::kInt64) {
1816     const uint64_t value = Uint64ConstantFrom(constant);
1817 
1818     ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1819   } else {
1820     ret = IsUint<8>(Int32ConstantFrom(constant));
1821   }
1822 
1823   return ret;
1824 }
1825 
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1826 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1827   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1828 
1829   if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1830     return Location::ConstantLocation(constant);
1831   }
1832 
1833   return Location::RequiresRegister();
1834 }
1835 
CanGenerateConditionalMove(const Location & out,const Location & src)1836 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1837   // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1838   // we check that we are not dealing with floating-point output (there is no
1839   // 16-bit VMOV encoding).
1840   if (!out.IsRegister() && !out.IsRegisterPair()) {
1841     return false;
1842   }
1843 
1844   // For constants, we also check that the output is in one or two low registers,
1845   // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1846   // MOV encoding can be used.
1847   if (src.IsConstant()) {
1848     if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1849       return false;
1850     }
1851 
1852     if (out.IsRegister()) {
1853       if (!RegisterFrom(out).IsLow()) {
1854         return false;
1855       }
1856     } else {
1857       DCHECK(out.IsRegisterPair());
1858 
1859       if (!HighRegisterFrom(out).IsLow()) {
1860         return false;
1861       }
1862     }
1863   }
1864 
1865   return true;
1866 }
1867 
1868 #undef __
1869 
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1870 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1871                                                    vixl32::Label* final_label) {
1872   DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1873   DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1874 
1875   const HBasicBlock* const block = instruction->GetBlock();
1876   const HLoopInformation* const info = block->GetLoopInformation();
1877   HInstruction* const next = instruction->GetNext();
1878 
1879   // Avoid a branch to a branch.
1880   if (next->IsGoto() && (info == nullptr ||
1881                          !info->IsBackEdge(*block) ||
1882                          !info->HasSuspendCheck())) {
1883     final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1884   }
1885 
1886   return final_label;
1887 }
1888 
1889 namespace detail {
1890 
1891 // Mark which intrinsics we don't have handcrafted code for.
1892 template <Intrinsics T>
1893 struct IsUnimplemented {
1894   bool is_unimplemented = false;
1895 };
1896 
1897 #define TRUE_OVERRIDE(Name)                     \
1898   template <>                                   \
1899   struct IsUnimplemented<Intrinsics::k##Name> { \
1900     bool is_unimplemented = true;               \
1901   };
1902 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1903 #undef TRUE_OVERRIDE
1904 
1905 static constexpr bool kIsIntrinsicUnimplemented[] = {
1906     false,  // kNone
1907 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1908     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1909     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1910 #undef IS_UNIMPLEMENTED
1911 };
1912 
1913 }  // namespace detail
1914 
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1915 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1916                                            const CompilerOptions& compiler_options,
1917                                            OptimizingCompilerStats* stats)
1918     : CodeGenerator(graph,
1919                     kNumberOfCoreRegisters,
1920                     kNumberOfSRegisters,
1921                     kNumberOfRegisterPairs,
1922                     kCoreCalleeSaves.GetList(),
1923                     ComputeSRegisterListMask(kFpuCalleeSaves),
1924                     compiler_options,
1925                     stats,
1926                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1927       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1928       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1929       location_builder_(graph, this),
1930       instruction_visitor_(graph, this),
1931       move_resolver_(graph->GetAllocator(), this),
1932       assembler_(graph->GetAllocator()),
1933       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1934       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1935       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1936       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1937       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1938       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1939       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1940       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1941       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1942       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1943       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1944       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1945       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1946       uint32_literals_(std::less<uint32_t>(),
1947                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1948       jit_string_patches_(StringReferenceValueComparator(),
1949                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950       jit_class_patches_(TypeReferenceValueComparator(),
1951                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1952       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1953                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1954   // Always save the LR register to mimic Quick.
1955   AddAllocatedRegister(Location::RegisterLocation(LR));
1956   // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1957   // S0-S31, which alias to D0-D15.
1958   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1959   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1960 }
1961 
EmitTable(CodeGeneratorARMVIXL * codegen)1962 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1963   uint32_t num_entries = switch_instr_->GetNumEntries();
1964   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1965 
1966   // We are about to use the assembler to place literals directly. Make sure we have enough
1967   // underlying code buffer and we have generated a jump table of the right size, using
1968   // codegen->GetVIXLAssembler()->GetBuffer().Align();
1969   ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1970                          num_entries * sizeof(int32_t),
1971                          CodeBufferCheckScope::kMaximumSize);
1972   // TODO(VIXL): Check that using lower case bind is fine here.
1973   codegen->GetVIXLAssembler()->bind(&table_start_);
1974   for (uint32_t i = 0; i < num_entries; i++) {
1975     codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1976   }
1977 }
1978 
FixTable(CodeGeneratorARMVIXL * codegen)1979 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1980   uint32_t num_entries = switch_instr_->GetNumEntries();
1981   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1982 
1983   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1984   for (uint32_t i = 0; i < num_entries; i++) {
1985     vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1986     DCHECK(target_label->IsBound());
1987     int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
1988     // When doing BX to address we need to have lower bit set to 1 in T32.
1989     if (codegen->GetVIXLAssembler()->IsUsingT32()) {
1990       jump_offset++;
1991     }
1992     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
1993     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
1994 
1995     bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
1996   }
1997 }
1998 
FixJumpTables()1999 void CodeGeneratorARMVIXL::FixJumpTables() {
2000   for (auto&& jump_table : jump_tables_) {
2001     jump_table->FixTable(this);
2002   }
2003 }
2004 
2005 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
2006 
Finalize()2007 void CodeGeneratorARMVIXL::Finalize() {
2008   FixJumpTables();
2009 
2010   // Emit JIT baker read barrier slow paths.
2011   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2012   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2013     uint32_t encoded_data = entry.first;
2014     vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2015     __ Bind(slow_path_entry);
2016     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2017   }
2018 
2019   GetAssembler()->FinalizeCode();
2020   CodeGenerator::Finalize();
2021 
2022   // Verify Baker read barrier linker patches.
2023   if (kIsDebugBuild) {
2024     ArrayRef<const uint8_t> code(GetCode());
2025     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2026       DCHECK(info.label.IsBound());
2027       uint32_t literal_offset = info.label.GetLocation();
2028       DCHECK_ALIGNED(literal_offset, 2u);
2029 
2030       auto GetInsn16 = [&code](uint32_t offset) {
2031         DCHECK_ALIGNED(offset, 2u);
2032         return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2033                (static_cast<uint32_t>(code[offset + 1]) << 8);
2034       };
2035       auto GetInsn32 = [=](uint32_t offset) {
2036         return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2037       };
2038 
2039       uint32_t encoded_data = info.custom_data;
2040       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2041       // Check that the next instruction matches the expected LDR.
2042       switch (kind) {
2043         case BakerReadBarrierKind::kField: {
2044           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2045           if (width == BakerReadBarrierWidth::kWide) {
2046             DCHECK_GE(code.size() - literal_offset, 8u);
2047             uint32_t next_insn = GetInsn32(literal_offset + 4u);
2048             // LDR (immediate), encoding T3, with correct base_reg.
2049             CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2050             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2051             CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2052           } else {
2053             DCHECK_GE(code.size() - literal_offset, 6u);
2054             uint32_t next_insn = GetInsn16(literal_offset + 4u);
2055             // LDR (immediate), encoding T1, with correct base_reg.
2056             CheckValidReg(next_insn & 0x7u);  // Check destination register.
2057             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2058             CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2059           }
2060           break;
2061         }
2062         case BakerReadBarrierKind::kArray: {
2063           DCHECK_GE(code.size() - literal_offset, 8u);
2064           uint32_t next_insn = GetInsn32(literal_offset + 4u);
2065           // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2066           CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2067           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2068           CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2069           CheckValidReg(next_insn & 0xf);  // Check index register
2070           break;
2071         }
2072         case BakerReadBarrierKind::kGcRoot: {
2073           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2074           if (width == BakerReadBarrierWidth::kWide) {
2075             DCHECK_GE(literal_offset, 4u);
2076             uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2077             // LDR (immediate), encoding T3, with correct root_reg.
2078             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2079             CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2080           } else {
2081             DCHECK_GE(literal_offset, 2u);
2082             uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2083             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2084             // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2085             // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2086             if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2087               CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2088             }
2089           }
2090           break;
2091         }
2092         case BakerReadBarrierKind::kIntrinsicCas: {
2093           DCHECK_GE(literal_offset, 4u);
2094           uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2095           // MOV (register), encoding T3, with correct root_reg.
2096           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2097           DCHECK_GE(root_reg, 8u);  // Used only for high registers.
2098           CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2099           break;
2100         }
2101         default:
2102           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2103           UNREACHABLE();
2104       }
2105     }
2106   }
2107 }
2108 
SetupBlockedRegisters() const2109 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2110   // Stack register, LR and PC are always reserved.
2111   blocked_core_registers_[SP] = true;
2112   blocked_core_registers_[LR] = true;
2113   blocked_core_registers_[PC] = true;
2114 
2115   // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2116   // that would require some work in the assembler code as the right GC is
2117   // chosen at load-time and not compile time.
2118   if (kReserveMarkingRegister) {
2119     // Reserve marking register.
2120     blocked_core_registers_[MR] = true;
2121   }
2122 
2123   // Reserve thread register.
2124   blocked_core_registers_[TR] = true;
2125 
2126   // Reserve temp register.
2127   blocked_core_registers_[IP] = true;
2128 
2129   if (GetGraph()->IsDebuggable()) {
2130     // Stubs do not save callee-save floating point registers. If the graph
2131     // is debuggable, we need to deal with these registers differently. For
2132     // now, just block them.
2133     for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2134          i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2135          ++i) {
2136       blocked_fpu_registers_[i] = true;
2137     }
2138   }
2139 }
2140 
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2141 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2142                                                                  CodeGeneratorARMVIXL* codegen)
2143       : InstructionCodeGenerator(graph, codegen),
2144         assembler_(codegen->GetAssembler()),
2145         codegen_(codegen) {}
2146 
ComputeSpillMask()2147 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2148   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2149   DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2150       << "At least the return address register must be saved";
2151   // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2152   DCHECK(GetVIXLAssembler()->IsUsingT32());
2153   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2154   // We use vpush and vpop for saving and restoring floating point registers, which take
2155   // a SRegister and the number of registers to save/restore after that SRegister. We
2156   // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2157   // but in the range.
2158   if (fpu_spill_mask_ != 0) {
2159     uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2160     uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2161     for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2162       fpu_spill_mask_ |= (1 << i);
2163     }
2164   }
2165 }
2166 
VisitMethodExitHook(HMethodExitHook * method_hook)2167 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2168   LocationSummary* locations = new (GetGraph()->GetAllocator())
2169       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2170   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2171   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2172   // compute the address to store the timestamp counter.
2173   locations->AddRegisterTemps(3);
2174 }
2175 
GenerateMethodEntryExitHook(HInstruction * instruction)2176 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2177   LocationSummary* locations = instruction->GetLocations();
2178   vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
2179   vixl32::Register value = RegisterFrom(locations->GetTemp(1));
2180   vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
2181 
2182   SlowPathCodeARMVIXL* slow_path =
2183       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2184   codegen_->AddSlowPath(slow_path);
2185 
2186   if (instruction->IsMethodExitHook()) {
2187     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2188     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2189     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2190     // disabled in debuggable runtime. The other bit is used when this method itself requires a
2191     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2192     GetAssembler()->LoadFromOffset(
2193         kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2194     __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
2195   }
2196 
2197   MemberOffset  offset = instruction->IsMethodExitHook() ?
2198       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2199       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2200   uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2201   __ Mov(addr, address + offset.Int32Value());
2202   __ Ldrb(value, MemOperand(addr, 0));
2203   __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
2204   // Check if there are any trace method entry / exit listeners. If no, continue.
2205   __ B(lt, slow_path->GetExitLabel());
2206   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
2207   // If yes, just take the slow path.
2208   __ B(gt, slow_path->GetEntryLabel());
2209 
2210   // Check if there is place in the buffer to store a new entry, if no, take slow path.
2211   uint32_t trace_buffer_curr_entry_offset =
2212       Thread::TraceBufferCurrPtrOffset<kArmPointerSize>().Int32Value();
2213   vixl32::Register curr_entry = value;
2214   vixl32::Register init_entry = addr;
2215   __ Ldr(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2216   __ Subs(curr_entry, curr_entry, static_cast<uint32_t>(kNumEntriesForWallClock * sizeof(void*)));
2217   __ Ldr(init_entry, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
2218   __ Cmp(curr_entry, init_entry);
2219   __ B(lt, slow_path->GetEntryLabel());
2220 
2221   // Update the index in the `Thread`.
2222   __ Str(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2223 
2224   // Record method pointer and trace action.
2225   __ Ldr(tmp, MemOperand(sp, 0));
2226   // Use last two bits to encode trace method action. For MethodEntry it is 0
2227   // so no need to set the bits since they are 0 already.
2228   if (instruction->IsMethodExitHook()) {
2229     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
2230     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
2231     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
2232     __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
2233   }
2234   __ Str(tmp, MemOperand(curr_entry, kMethodOffsetInBytes));
2235 
2236   vixl32::Register tmp1 = init_entry;
2237   // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
2238   __ Mrrc(/* lower 32-bit */ tmp,
2239           /* higher 32-bit */ tmp1,
2240           /* coproc= */ 15,
2241           /* opc1= */ 1,
2242           /* crm= */ 14);
2243   static_assert(kHighTimestampOffsetInBytes ==
2244                 kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
2245   __ Strd(tmp, tmp1, MemOperand(curr_entry, kTimestampOffsetInBytes));
2246   __ Bind(slow_path->GetExitLabel());
2247 }
2248 
VisitMethodExitHook(HMethodExitHook * instruction)2249 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2250   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2251   DCHECK(codegen_->RequiresCurrentMethod());
2252   GenerateMethodEntryExitHook(instruction);
2253 }
2254 
VisitMethodEntryHook(HMethodEntryHook * method_hook)2255 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2256   LocationSummary* locations = new (GetGraph()->GetAllocator())
2257       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2258   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2259   // compute the address to store the timestamp counter.
2260   locations->AddRegisterTemps(3);
2261 }
2262 
VisitMethodEntryHook(HMethodEntryHook * instruction)2263 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2264   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2265   DCHECK(codegen_->RequiresCurrentMethod());
2266   GenerateMethodEntryExitHook(instruction);
2267 }
2268 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)2269 void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
2270                                                  bool is_frame_entry) {
2271   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2272     UseScratchRegisterScope temps(GetVIXLAssembler());
2273     vixl32::Register temp = temps.Acquire();
2274     static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2275     if (!is_frame_entry) {
2276       __ Push(vixl32::Register(kMethodRegister));
2277       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2278       GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2279     }
2280     // Load with zero extend to clear the high bits for integer overflow check.
2281     __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2282     vixl::aarch32::Label done;
2283     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2284     __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2285     __ Add(temp, temp, -1);
2286     __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2287     __ Bind(&done);
2288     if (!is_frame_entry) {
2289       __ Pop(vixl32::Register(kMethodRegister));
2290       GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2291     }
2292   }
2293 
2294   if (GetGraph()->IsCompilingBaseline() &&
2295       GetGraph()->IsUsefulOptimizing() &&
2296       !Runtime::Current()->IsAotCompiler()) {
2297     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2298     DCHECK(info != nullptr);
2299     DCHECK(!HasEmptyFrame());
2300     uint32_t address = reinterpret_cast32<uint32_t>(info);
2301     UseScratchRegisterScope temps(GetVIXLAssembler());
2302     vixl32::Register tmp = temps.Acquire();
2303     SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
2304         suspend_check, /* profiling_info= */ lr);
2305     AddSlowPath(slow_path);
2306     __ Mov(lr, address);
2307     __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2308     __ Adds(tmp, tmp, -1);
2309     __ B(cc, slow_path->GetEntryLabel());
2310     __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2311     __ Bind(slow_path->GetExitLabel());
2312   }
2313 }
2314 
GenerateFrameEntry()2315 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2316   bool skip_overflow_check =
2317       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2318   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2319 
2320   // Check if we need to generate the clinit check. We will jump to the
2321   // resolution stub if the class is not initialized and the executing thread is
2322   // not the thread initializing it.
2323   // We do this before constructing the frame to get the correct stack trace if
2324   // an exception is thrown.
2325   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2326     UseScratchRegisterScope temps(GetVIXLAssembler());
2327     vixl32::Label resolution;
2328     vixl32::Label memory_barrier;
2329 
2330     // Check if we're visibly initialized.
2331 
2332     vixl32::Register temp1 = temps.Acquire();
2333     // Use r4 as other temporary register.
2334     DCHECK(!blocked_core_registers_[R4]);
2335     DCHECK(!kCoreCalleeSaves.Includes(r4));
2336     vixl32::Register temp2 = r4;
2337     for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2338       DCHECK(!reg.Is(r4));
2339     }
2340 
2341     // We don't emit a read barrier here to save on code size. We rely on the
2342     // resolution trampoline to do a suspend check before re-entering this code.
2343     __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2344     __ Ldrb(temp2, MemOperand(temp1, kClassStatusByteOffset));
2345     __ Cmp(temp2, kShiftedVisiblyInitializedValue);
2346     __ B(cs, &frame_entry_label_);
2347 
2348     // Check if we're initialized and jump to code that does a memory barrier if
2349     // so.
2350     __ Cmp(temp2, kShiftedInitializedValue);
2351     __ B(cs, &memory_barrier);
2352 
2353     // Check if we're initializing and the thread initializing is the one
2354     // executing the code.
2355     __ Cmp(temp2, kShiftedInitializingValue);
2356     __ B(lo, &resolution);
2357 
2358     __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2359     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2360     __ Cmp(temp1, temp2);
2361     __ B(eq, &frame_entry_label_);
2362     __ Bind(&resolution);
2363 
2364     // Jump to the resolution stub.
2365     ThreadOffset32 entrypoint_offset =
2366         GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2367     __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2368     __ Bx(temp1);
2369 
2370     __ Bind(&memory_barrier);
2371     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2372   }
2373 
2374   __ Bind(&frame_entry_label_);
2375 
2376   if (HasEmptyFrame()) {
2377     // Ensure that the CFI opcode list is not empty.
2378     GetAssembler()->cfi().Nop();
2379     MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2380     return;
2381   }
2382 
2383   // Make sure the frame size isn't unreasonably large.
2384   DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
2385 
2386   if (!skip_overflow_check) {
2387     // Using r4 instead of IP saves 2 bytes.
2388     UseScratchRegisterScope temps(GetVIXLAssembler());
2389     vixl32::Register temp;
2390     // TODO: Remove this check when R4 is made a callee-save register
2391     // in ART compiled code (b/72801708). Currently we need to make
2392     // sure r4 is not blocked, e.g. in special purpose
2393     // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2394     // here.
2395     if (!blocked_core_registers_[R4]) {
2396       for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2397         DCHECK(!reg.Is(r4));
2398       }
2399       DCHECK(!kCoreCalleeSaves.Includes(r4));
2400       temp = r4;
2401     } else {
2402       temp = temps.Acquire();
2403     }
2404     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2405     // The load must immediately precede RecordPcInfo.
2406     ExactAssemblyScope aas(GetVIXLAssembler(),
2407                            vixl32::kMaxInstructionSizeInBytes,
2408                            CodeBufferCheckScope::kMaximumSize);
2409     __ ldr(temp, MemOperand(temp));
2410     RecordPcInfoForFrameOrBlockEntry();
2411   }
2412 
2413   uint32_t frame_size = GetFrameSize();
2414   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2415   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2416   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2417       core_spills_offset <= 3u * kArmWordSize) {
2418     // Do a single PUSH for core registers including the method and up to two
2419     // filler registers. Then store the single FP spill if any.
2420     // (The worst case is when the method is not required and we actually
2421     // store 3 extra registers but they are stored in the same properly
2422     // aligned 16-byte chunk where we're already writing anyway.)
2423     DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2424     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2425     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2426     __ Push(RegisterList(core_spill_mask_ | extra_regs));
2427     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2428     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2429                                            core_spills_offset,
2430                                            core_spill_mask_,
2431                                            kArmWordSize);
2432     if (fpu_spill_mask_ != 0u) {
2433       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2434       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2435       GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2436       GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2437     }
2438   } else {
2439     __ Push(RegisterList(core_spill_mask_));
2440     GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2441     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2442                                            /*offset=*/ 0,
2443                                            core_spill_mask_,
2444                                            kArmWordSize);
2445     if (fpu_spill_mask_ != 0) {
2446       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2447 
2448       // Check that list is contiguous.
2449       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2450 
2451       __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2452       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2453       GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2454                                              /*offset=*/ 0,
2455                                              fpu_spill_mask_,
2456                                              kArmWordSize);
2457     }
2458 
2459     // Adjust SP and save the current method if we need it. Note that we do
2460     // not save the method in HCurrentMethod, as the instruction might have
2461     // been removed in the SSA graph.
2462     if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2463       DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2464       __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2465       GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2466     } else {
2467       IncreaseFrame(fp_spills_offset);
2468       if (RequiresCurrentMethod()) {
2469         GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2470       }
2471     }
2472   }
2473 
2474   if (GetGraph()->HasShouldDeoptimizeFlag()) {
2475     UseScratchRegisterScope temps(GetVIXLAssembler());
2476     vixl32::Register temp = temps.Acquire();
2477     // Initialize should_deoptimize flag to 0.
2478     __ Mov(temp, 0);
2479     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2480   }
2481 
2482   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2483   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2484 }
2485 
GenerateFrameExit()2486 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2487   if (HasEmptyFrame()) {
2488     __ Bx(lr);
2489     return;
2490   }
2491 
2492   // Pop LR into PC to return.
2493   DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2494   uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2495 
2496   uint32_t frame_size = GetFrameSize();
2497   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2498   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2499   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2500       // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2501       core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2502     // Load the FP spill if any and then do a single POP including the method
2503     // and up to two filler registers. If we have no FP spills, this also has
2504     // the advantage that we do not need to emit CFI directives.
2505     if (fpu_spill_mask_ != 0u) {
2506       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2507       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2508       GetAssembler()->cfi().RememberState();
2509       GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2510       GetAssembler()->cfi().Restore(DWARFReg(sreg));
2511     }
2512     // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2513     // never hold the return value.
2514     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2515     DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2516     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2517     __ Pop(RegisterList(pop_mask | extra_regs));
2518     if (fpu_spill_mask_ != 0u) {
2519       GetAssembler()->cfi().RestoreState();
2520     }
2521   } else {
2522     GetAssembler()->cfi().RememberState();
2523     DecreaseFrame(fp_spills_offset);
2524     if (fpu_spill_mask_ != 0) {
2525       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2526 
2527       // Check that list is contiguous.
2528       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2529 
2530       __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2531       GetAssembler()->cfi().AdjustCFAOffset(
2532           -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2533       GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2534     }
2535     __ Pop(RegisterList(pop_mask));
2536     GetAssembler()->cfi().RestoreState();
2537     GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2538   }
2539 }
2540 
Bind(HBasicBlock * block)2541 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2542   __ Bind(GetLabelOf(block));
2543 }
2544 
GetNextLocation(DataType::Type type)2545 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2546   switch (type) {
2547     case DataType::Type::kReference:
2548     case DataType::Type::kBool:
2549     case DataType::Type::kUint8:
2550     case DataType::Type::kInt8:
2551     case DataType::Type::kUint16:
2552     case DataType::Type::kInt16:
2553     case DataType::Type::kInt32: {
2554       uint32_t index = gp_index_++;
2555       uint32_t stack_index = stack_index_++;
2556       if (index < calling_convention.GetNumberOfRegisters()) {
2557         return LocationFrom(calling_convention.GetRegisterAt(index));
2558       } else {
2559         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2560       }
2561     }
2562 
2563     case DataType::Type::kInt64: {
2564       uint32_t index = gp_index_;
2565       uint32_t stack_index = stack_index_;
2566       gp_index_ += 2;
2567       stack_index_ += 2;
2568       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2569         if (calling_convention.GetRegisterAt(index).Is(r1)) {
2570           // Skip R1, and use R2_R3 instead.
2571           gp_index_++;
2572           index++;
2573         }
2574       }
2575       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2576         DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2577                   calling_convention.GetRegisterAt(index + 1).GetCode());
2578 
2579         return LocationFrom(calling_convention.GetRegisterAt(index),
2580                             calling_convention.GetRegisterAt(index + 1));
2581       } else {
2582         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2583       }
2584     }
2585 
2586     case DataType::Type::kFloat32: {
2587       uint32_t stack_index = stack_index_++;
2588       if (float_index_ % 2 == 0) {
2589         float_index_ = std::max(double_index_, float_index_);
2590       }
2591       if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2592         return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2593       } else {
2594         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2595       }
2596     }
2597 
2598     case DataType::Type::kFloat64: {
2599       double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2600       uint32_t stack_index = stack_index_;
2601       stack_index_ += 2;
2602       if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2603         uint32_t index = double_index_;
2604         double_index_ += 2;
2605         Location result = LocationFrom(
2606           calling_convention.GetFpuRegisterAt(index),
2607           calling_convention.GetFpuRegisterAt(index + 1));
2608         DCHECK(ExpectedPairLayout(result));
2609         return result;
2610       } else {
2611         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2612       }
2613     }
2614 
2615     case DataType::Type::kUint32:
2616     case DataType::Type::kUint64:
2617     case DataType::Type::kVoid:
2618       LOG(FATAL) << "Unexpected parameter type " << type;
2619       UNREACHABLE();
2620   }
2621   return Location::NoLocation();
2622 }
2623 
GetReturnLocation(DataType::Type type) const2624 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2625   switch (type) {
2626     case DataType::Type::kReference:
2627     case DataType::Type::kBool:
2628     case DataType::Type::kUint8:
2629     case DataType::Type::kInt8:
2630     case DataType::Type::kUint16:
2631     case DataType::Type::kInt16:
2632     case DataType::Type::kUint32:
2633     case DataType::Type::kInt32: {
2634       return LocationFrom(r0);
2635     }
2636 
2637     case DataType::Type::kFloat32: {
2638       return LocationFrom(s0);
2639     }
2640 
2641     case DataType::Type::kUint64:
2642     case DataType::Type::kInt64: {
2643       return LocationFrom(r0, r1);
2644     }
2645 
2646     case DataType::Type::kFloat64: {
2647       return LocationFrom(s0, s1);
2648     }
2649 
2650     case DataType::Type::kVoid:
2651       return Location::NoLocation();
2652   }
2653 
2654   UNREACHABLE();
2655 }
2656 
GetMethodLocation() const2657 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2658   return LocationFrom(kMethodRegister);
2659 }
2660 
GetNextLocation(DataType::Type type)2661 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2662   DCHECK_NE(type, DataType::Type::kReference);
2663 
2664   // Native ABI uses the same registers as managed, except that the method register r0
2665   // is a normal argument.
2666   Location location = Location::NoLocation();
2667   if (DataType::Is64BitType(type)) {
2668     gpr_index_ = RoundUp(gpr_index_, 2u);
2669     stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2670     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2671       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2672                               kParameterCoreRegistersVIXL[gpr_index_]);
2673       gpr_index_ += 2u;
2674     }
2675   } else {
2676     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2677       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2678       ++gpr_index_;
2679     }
2680   }
2681   if (location.IsInvalid()) {
2682     if (DataType::Is64BitType(type)) {
2683       location = Location::DoubleStackSlot(stack_offset_);
2684       stack_offset_ += 2 * kFramePointerSize;
2685     } else {
2686       location = Location::StackSlot(stack_offset_);
2687       stack_offset_ += kFramePointerSize;
2688     }
2689 
2690     if (for_register_allocation_) {
2691       location = Location::Any();
2692     }
2693   }
2694   return location;
2695 }
2696 
GetReturnLocation(DataType::Type type) const2697 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2698     const {
2699   // We perform conversion to the managed ABI return register after the call if needed.
2700   InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2701   return dex_calling_convention.GetReturnLocation(type);
2702 }
2703 
GetMethodLocation() const2704 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2705   // Pass the method in the hidden argument R4.
2706   return Location::RegisterLocation(R4);
2707 }
2708 
Move32(Location destination,Location source)2709 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2710   if (source.Equals(destination)) {
2711     return;
2712   }
2713   if (destination.IsRegister()) {
2714     if (source.IsRegister()) {
2715       __ Mov(RegisterFrom(destination), RegisterFrom(source));
2716     } else if (source.IsFpuRegister()) {
2717       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2718     } else {
2719       GetAssembler()->LoadFromOffset(kLoadWord,
2720                                      RegisterFrom(destination),
2721                                      sp,
2722                                      source.GetStackIndex());
2723     }
2724   } else if (destination.IsFpuRegister()) {
2725     if (source.IsRegister()) {
2726       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2727     } else if (source.IsFpuRegister()) {
2728       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2729     } else {
2730       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2731     }
2732   } else {
2733     DCHECK(destination.IsStackSlot()) << destination;
2734     if (source.IsRegister()) {
2735       GetAssembler()->StoreToOffset(kStoreWord,
2736                                     RegisterFrom(source),
2737                                     sp,
2738                                     destination.GetStackIndex());
2739     } else if (source.IsFpuRegister()) {
2740       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2741     } else {
2742       DCHECK(source.IsStackSlot()) << source;
2743       UseScratchRegisterScope temps(GetVIXLAssembler());
2744       vixl32::Register temp = temps.Acquire();
2745       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2746       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2747     }
2748   }
2749 }
2750 
MoveConstant(Location location,int32_t value)2751 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2752   DCHECK(location.IsRegister());
2753   __ Mov(RegisterFrom(location), value);
2754 }
2755 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2756 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2757   // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2758   // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2759   HParallelMove move(GetGraph()->GetAllocator());
2760   move.AddMove(src, dst, dst_type, nullptr);
2761   GetMoveResolver()->EmitNativeCode(&move);
2762 }
2763 
AddLocationAsTemp(Location location,LocationSummary * locations)2764 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2765   if (location.IsRegister()) {
2766     locations->AddTemp(location);
2767   } else if (location.IsRegisterPair()) {
2768     locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2769     locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2770   } else {
2771     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2772   }
2773 }
2774 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,SlowPathCode * slow_path)2775 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2776                                          HInstruction* instruction,
2777                                          SlowPathCode* slow_path) {
2778   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2779 
2780   ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2781   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2782   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2783   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2784   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2785     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2786     // Ensure the pc position is recorded immediately after the `blx` instruction.
2787     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2788     ExactAssemblyScope aas(GetVIXLAssembler(),
2789                            vixl32::k16BitT32InstructionSizeInBytes,
2790                            CodeBufferCheckScope::kExactSize);
2791     __ blx(lr);
2792     if (EntrypointRequiresStackMap(entrypoint)) {
2793       RecordPcInfo(instruction, slow_path);
2794     }
2795   } else {
2796     // Ensure the pc position is recorded immediately after the `bl` instruction.
2797     ExactAssemblyScope aas(GetVIXLAssembler(),
2798                            vixl32::k32BitT32InstructionSizeInBytes,
2799                            CodeBufferCheckScope::kExactSize);
2800     EmitEntrypointThunkCall(entrypoint_offset);
2801     if (EntrypointRequiresStackMap(entrypoint)) {
2802       RecordPcInfo(instruction, slow_path);
2803     }
2804   }
2805 }
2806 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2807 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2808                                                                HInstruction* instruction,
2809                                                                SlowPathCode* slow_path) {
2810   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2811   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2812   __ Blx(lr);
2813 }
2814 
HandleGoto(HInstruction * got,HBasicBlock * successor)2815 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2816   if (successor->IsExitBlock()) {
2817     DCHECK(got->GetPrevious()->AlwaysThrows());
2818     return;  // no code needed
2819   }
2820 
2821   HBasicBlock* block = got->GetBlock();
2822   HInstruction* previous = got->GetPrevious();
2823   HLoopInformation* info = block->GetLoopInformation();
2824 
2825   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2826     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2827     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2828     return;
2829   }
2830   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2831     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2832     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2833   }
2834   if (!codegen_->GoesToNextBlock(block, successor)) {
2835     __ B(codegen_->GetLabelOf(successor));
2836   }
2837 }
2838 
VisitGoto(HGoto * got)2839 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2840   got->SetLocations(nullptr);
2841 }
2842 
VisitGoto(HGoto * got)2843 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2844   HandleGoto(got, got->GetSuccessor());
2845 }
2846 
VisitTryBoundary(HTryBoundary * try_boundary)2847 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2848   try_boundary->SetLocations(nullptr);
2849 }
2850 
VisitTryBoundary(HTryBoundary * try_boundary)2851 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2852   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2853   if (!successor->IsExitBlock()) {
2854     HandleGoto(try_boundary, successor);
2855   }
2856 }
2857 
VisitExit(HExit * exit)2858 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2859   exit->SetLocations(nullptr);
2860 }
2861 
VisitExit(HExit * exit)2862 void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
2863 
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2864 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2865                                                                    vixl32::Label* true_target,
2866                                                                    vixl32::Label* false_target,
2867                                                                    bool is_far_target) {
2868   if (true_target == false_target) {
2869     DCHECK(true_target != nullptr);
2870     __ B(true_target);
2871     return;
2872   }
2873 
2874   vixl32::Label* non_fallthrough_target;
2875   bool invert;
2876   bool emit_both_branches;
2877 
2878   if (true_target == nullptr) {
2879     // The true target is fallthrough.
2880     DCHECK(false_target != nullptr);
2881     non_fallthrough_target = false_target;
2882     invert = true;
2883     emit_both_branches = false;
2884   } else {
2885     non_fallthrough_target = true_target;
2886     invert = false;
2887     // Either the false target is fallthrough, or there is no fallthrough
2888     // and both branches must be emitted.
2889     emit_both_branches = (false_target != nullptr);
2890   }
2891 
2892   const auto cond = GenerateTest(condition, invert, codegen_);
2893 
2894   __ B(cond.first, non_fallthrough_target, is_far_target);
2895 
2896   if (emit_both_branches) {
2897     // No target falls through, we need to branch.
2898     __ B(false_target);
2899   }
2900 }
2901 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2902 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2903                                                             size_t condition_input_index,
2904                                                             vixl32::Label* true_target,
2905                                                             vixl32::Label* false_target,
2906                                                             bool far_target) {
2907   HInstruction* cond = instruction->InputAt(condition_input_index);
2908 
2909   if (true_target == nullptr && false_target == nullptr) {
2910     // Nothing to do. The code always falls through.
2911     return;
2912   } else if (cond->IsIntConstant()) {
2913     // Constant condition, statically compared against "true" (integer value 1).
2914     if (cond->AsIntConstant()->IsTrue()) {
2915       if (true_target != nullptr) {
2916         __ B(true_target);
2917       }
2918     } else {
2919       DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2920       if (false_target != nullptr) {
2921         __ B(false_target);
2922       }
2923     }
2924     return;
2925   }
2926 
2927   // The following code generates these patterns:
2928   //  (1) true_target == nullptr && false_target != nullptr
2929   //        - opposite condition true => branch to false_target
2930   //  (2) true_target != nullptr && false_target == nullptr
2931   //        - condition true => branch to true_target
2932   //  (3) true_target != nullptr && false_target != nullptr
2933   //        - condition true => branch to true_target
2934   //        - branch to false_target
2935   if (IsBooleanValueOrMaterializedCondition(cond)) {
2936     // Condition has been materialized, compare the output to 0.
2937     if (kIsDebugBuild) {
2938       Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2939       DCHECK(cond_val.IsRegister());
2940     }
2941     if (true_target == nullptr) {
2942       __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2943                                 false_target,
2944                                 far_target);
2945     } else {
2946       __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2947                                    true_target,
2948                                    far_target);
2949     }
2950   } else {
2951     // Condition has not been materialized. Use its inputs as the comparison and
2952     // its condition as the branch condition.
2953     HCondition* condition = cond->AsCondition();
2954 
2955     // If this is a long or FP comparison that has been folded into
2956     // the HCondition, generate the comparison directly.
2957     DataType::Type type = condition->InputAt(0)->GetType();
2958     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2959       GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2960       return;
2961     }
2962 
2963     vixl32::Label* non_fallthrough_target;
2964     vixl32::Condition arm_cond = vixl32::Condition::None();
2965     const vixl32::Register left = InputRegisterAt(cond, 0);
2966     const Operand right = InputOperandAt(cond, 1);
2967 
2968     if (true_target == nullptr) {
2969       arm_cond = ARMCondition(condition->GetOppositeCondition());
2970       non_fallthrough_target = false_target;
2971     } else {
2972       arm_cond = ARMCondition(condition->GetCondition());
2973       non_fallthrough_target = true_target;
2974     }
2975 
2976     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2977       if (arm_cond.Is(eq)) {
2978         __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2979       } else {
2980         DCHECK(arm_cond.Is(ne));
2981         __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2982       }
2983     } else {
2984       __ Cmp(left, right);
2985       __ B(arm_cond, non_fallthrough_target, far_target);
2986     }
2987   }
2988 
2989   // If neither branch falls through (case 3), the conditional branch to `true_target`
2990   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2991   if (true_target != nullptr && false_target != nullptr) {
2992     __ B(false_target);
2993   }
2994 }
2995 
VisitIf(HIf * if_instr)2996 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
2997   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2998   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2999     locations->SetInAt(0, Location::RequiresRegister());
3000     if (GetGraph()->IsCompilingBaseline() &&
3001         codegen_->GetCompilerOptions().ProfileBranches() &&
3002         !Runtime::Current()->IsAotCompiler()) {
3003       locations->AddTemp(Location::RequiresRegister());
3004     }
3005   }
3006 }
3007 
VisitIf(HIf * if_instr)3008 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
3009   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3010   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3011   vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
3012       nullptr : codegen_->GetLabelOf(true_successor);
3013   vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
3014       nullptr : codegen_->GetLabelOf(false_successor);
3015   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3016     if (GetGraph()->IsCompilingBaseline() &&
3017         codegen_->GetCompilerOptions().ProfileBranches() &&
3018         !Runtime::Current()->IsAotCompiler()) {
3019       DCHECK(if_instr->InputAt(0)->IsCondition());
3020       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3021       DCHECK(info != nullptr);
3022       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3023       // Currently, not all If branches are profiled.
3024       if (cache != nullptr) {
3025         uint32_t address =
3026             reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
3027         static_assert(
3028             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3029             "Unexpected offsets for BranchCache");
3030         vixl32::Label done;
3031         UseScratchRegisterScope temps(GetVIXLAssembler());
3032         vixl32::Register temp = temps.Acquire();
3033         vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
3034         vixl32::Register condition = InputRegisterAt(if_instr, 0);
3035         __ Mov(temp, address);
3036         __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3037         __ Adds(counter, counter, 1);
3038         __ Uxth(counter, counter);
3039         __ CompareAndBranchIfZero(counter, &done);
3040         __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3041         __ Bind(&done);
3042       }
3043     }
3044   }
3045   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3046 }
3047 
VisitDeoptimize(HDeoptimize * deoptimize)3048 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3049   LocationSummary* locations = new (GetGraph()->GetAllocator())
3050       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3051   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3052   RegisterSet caller_saves = RegisterSet::Empty();
3053   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
3054   locations->SetCustomSlowPathCallerSaves(caller_saves);
3055   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3056     locations->SetInAt(0, Location::RequiresRegister());
3057   }
3058 }
3059 
VisitDeoptimize(HDeoptimize * deoptimize)3060 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3061   SlowPathCodeARMVIXL* slow_path =
3062       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
3063   GenerateTestAndBranch(deoptimize,
3064                         /* condition_input_index= */ 0,
3065                         slow_path->GetEntryLabel(),
3066                         /* false_target= */ nullptr);
3067 }
3068 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3069 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3070   LocationSummary* locations = new (GetGraph()->GetAllocator())
3071       LocationSummary(flag, LocationSummary::kNoCall);
3072   locations->SetOut(Location::RequiresRegister());
3073 }
3074 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3075 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3076   GetAssembler()->LoadFromOffset(kLoadWord,
3077                                  OutputRegister(flag),
3078                                  sp,
3079                                  codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3080 }
3081 
VisitSelect(HSelect * select)3082 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3083   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3084   const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3085 
3086   if (is_floating_point) {
3087     locations->SetInAt(0, Location::RequiresFpuRegister());
3088     locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3089   } else {
3090     locations->SetInAt(0, Location::RequiresRegister());
3091     locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3092   }
3093 
3094   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3095     locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3096     // The code generator handles overlap with the values, but not with the condition.
3097     locations->SetOut(Location::SameAsFirstInput());
3098   } else if (is_floating_point) {
3099     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3100   } else {
3101     if (!locations->InAt(1).IsConstant()) {
3102       locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3103     }
3104 
3105     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3106   }
3107 }
3108 
VisitSelect(HSelect * select)3109 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3110   HInstruction* const condition = select->GetCondition();
3111   const LocationSummary* const locations = select->GetLocations();
3112   const DataType::Type type = select->GetType();
3113   const Location first = locations->InAt(0);
3114   const Location out = locations->Out();
3115   const Location second = locations->InAt(1);
3116 
3117   // In the unlucky case the output of this instruction overlaps
3118   // with an input of an "emitted-at-use-site" condition, and
3119   // the output of this instruction is not one of its inputs, we'll
3120   // need to fallback to branches instead of conditional ARM instructions.
3121   bool output_overlaps_with_condition_inputs =
3122       !IsBooleanValueOrMaterializedCondition(condition) &&
3123       !out.Equals(first) &&
3124       !out.Equals(second) &&
3125       (condition->GetLocations()->InAt(0).Equals(out) ||
3126        condition->GetLocations()->InAt(1).Equals(out));
3127   DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3128   Location src;
3129 
3130   if (condition->IsIntConstant()) {
3131     if (condition->AsIntConstant()->IsFalse()) {
3132       src = first;
3133     } else {
3134       src = second;
3135     }
3136 
3137     codegen_->MoveLocation(out, src, type);
3138     return;
3139   }
3140 
3141   if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3142     bool invert = false;
3143 
3144     if (out.Equals(second)) {
3145       src = first;
3146       invert = true;
3147     } else if (out.Equals(first)) {
3148       src = second;
3149     } else if (second.IsConstant()) {
3150       DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3151       src = second;
3152     } else if (first.IsConstant()) {
3153       DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3154       src = first;
3155       invert = true;
3156     } else {
3157       src = second;
3158     }
3159 
3160     if (CanGenerateConditionalMove(out, src)) {
3161       if (!out.Equals(first) && !out.Equals(second)) {
3162         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3163       }
3164 
3165       std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3166 
3167       if (IsBooleanValueOrMaterializedCondition(condition)) {
3168         __ Cmp(InputRegisterAt(select, 2), 0);
3169         cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3170       } else {
3171         cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3172       }
3173 
3174       const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3175       // We use the scope because of the IT block that follows.
3176       ExactAssemblyScope guard(GetVIXLAssembler(),
3177                                instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3178                                CodeBufferCheckScope::kExactSize);
3179 
3180       if (out.IsRegister()) {
3181         __ it(cond.first);
3182         __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3183       } else {
3184         DCHECK(out.IsRegisterPair());
3185 
3186         Operand operand_high(0);
3187         Operand operand_low(0);
3188 
3189         if (src.IsConstant()) {
3190           const int64_t value = Int64ConstantFrom(src);
3191 
3192           operand_high = High32Bits(value);
3193           operand_low = Low32Bits(value);
3194         } else {
3195           DCHECK(src.IsRegisterPair());
3196           operand_high = HighRegisterFrom(src);
3197           operand_low = LowRegisterFrom(src);
3198         }
3199 
3200         __ it(cond.first);
3201         __ mov(cond.first, LowRegisterFrom(out), operand_low);
3202         __ it(cond.first);
3203         __ mov(cond.first, HighRegisterFrom(out), operand_high);
3204       }
3205 
3206       return;
3207     }
3208   }
3209 
3210   vixl32::Label* false_target = nullptr;
3211   vixl32::Label* true_target = nullptr;
3212   vixl32::Label select_end;
3213   vixl32::Label other_case;
3214   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3215 
3216   if (out.Equals(second)) {
3217     true_target = target;
3218     src = first;
3219   } else {
3220     false_target = target;
3221     src = second;
3222 
3223     if (!out.Equals(first)) {
3224       if (output_overlaps_with_condition_inputs) {
3225         false_target = &other_case;
3226       } else {
3227         codegen_->MoveLocation(out, first, type);
3228       }
3229     }
3230   }
3231 
3232   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3233   codegen_->MoveLocation(out, src, type);
3234   if (output_overlaps_with_condition_inputs) {
3235     __ B(target);
3236     __ Bind(&other_case);
3237     codegen_->MoveLocation(out, first, type);
3238   }
3239 
3240   if (select_end.IsReferenced()) {
3241     __ Bind(&select_end);
3242   }
3243 }
3244 
VisitNop(HNop * nop)3245 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3246   new (GetGraph()->GetAllocator()) LocationSummary(nop);
3247 }
3248 
VisitNop(HNop *)3249 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3250   // The environment recording already happened in CodeGenerator::Compile.
3251 }
3252 
IncreaseFrame(size_t adjustment)3253 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3254   __ Claim(adjustment);
3255   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3256 }
3257 
DecreaseFrame(size_t adjustment)3258 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3259   __ Drop(adjustment);
3260   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3261 }
3262 
GenerateNop()3263 void CodeGeneratorARMVIXL::GenerateNop() {
3264   __ Nop();
3265 }
3266 
3267 // `temp` is an extra temporary register that is used for some conditions;
3268 // callers may not specify it, in which case the method will use a scratch
3269 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3270 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3271                                                      vixl32::Register out,
3272                                                      vixl32::Register in,
3273                                                      vixl32::Register temp) {
3274   switch (condition) {
3275     case kCondEQ:
3276     // x <= 0 iff x == 0 when the comparison is unsigned.
3277     case kCondBE:
3278       if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3279         temp = out;
3280       }
3281 
3282       // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3283       // different as well.
3284       if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3285         // temp = - in; only 0 sets the carry flag.
3286         __ Rsbs(temp, in, 0);
3287 
3288         if (out.Is(in)) {
3289           std::swap(in, temp);
3290         }
3291 
3292         // out = - in + in + carry = carry
3293         __ Adc(out, temp, in);
3294       } else {
3295         // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3296         __ Clz(out, in);
3297         // Any number less than 32 logically shifted right by 5 bits results in 0;
3298         // the same operation on 32 yields 1.
3299         __ Lsr(out, out, 5);
3300       }
3301 
3302       break;
3303     case kCondNE:
3304     // x > 0 iff x != 0 when the comparison is unsigned.
3305     case kCondA: {
3306       UseScratchRegisterScope temps(GetVIXLAssembler());
3307 
3308       if (out.Is(in)) {
3309         if (!temp.IsValid() || in.Is(temp)) {
3310           temp = temps.Acquire();
3311         }
3312       } else if (!temp.IsValid() || !temp.IsLow()) {
3313         temp = out;
3314       }
3315 
3316       // temp = in - 1; only 0 does not set the carry flag.
3317       __ Subs(temp, in, 1);
3318       // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3319       __ Sbc(out, in, temp);
3320       break;
3321     }
3322     case kCondGE:
3323       __ Mvn(out, in);
3324       in = out;
3325       FALLTHROUGH_INTENDED;
3326     case kCondLT:
3327       // We only care about the sign bit.
3328       __ Lsr(out, in, 31);
3329       break;
3330     case kCondAE:
3331       // Trivially true.
3332       __ Mov(out, 1);
3333       break;
3334     case kCondB:
3335       // Trivially false.
3336       __ Mov(out, 0);
3337       break;
3338     default:
3339       LOG(FATAL) << "Unexpected condition " << condition;
3340       UNREACHABLE();
3341   }
3342 }
3343 
HandleCondition(HCondition * cond)3344 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3345   LocationSummary* locations =
3346       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3347   const DataType::Type type = cond->InputAt(0)->GetType();
3348   if (DataType::IsFloatingPointType(type)) {
3349     locations->SetInAt(0, Location::RequiresFpuRegister());
3350     locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3351   } else {
3352     locations->SetInAt(0, Location::RequiresRegister());
3353     locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3354   }
3355   if (!cond->IsEmittedAtUseSite()) {
3356     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3357   }
3358 }
3359 
HandleCondition(HCondition * cond)3360 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3361   if (cond->IsEmittedAtUseSite()) {
3362     return;
3363   }
3364 
3365   const DataType::Type type = cond->GetLeft()->GetType();
3366 
3367   if (DataType::IsFloatingPointType(type)) {
3368     GenerateConditionGeneric(cond, codegen_);
3369     return;
3370   }
3371 
3372   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3373 
3374   const IfCondition condition = cond->GetCondition();
3375 
3376   // A condition with only one boolean input, or two boolean inputs without being equality or
3377   // inequality results from transformations done by the instruction simplifier, and is handled
3378   // as a regular condition with integral inputs.
3379   if (type == DataType::Type::kBool &&
3380       cond->GetRight()->GetType() == DataType::Type::kBool &&
3381       (condition == kCondEQ || condition == kCondNE)) {
3382     vixl32::Register left = InputRegisterAt(cond, 0);
3383     const vixl32::Register out = OutputRegister(cond);
3384     const Location right_loc = cond->GetLocations()->InAt(1);
3385 
3386     // The constant case is handled by the instruction simplifier.
3387     DCHECK(!right_loc.IsConstant());
3388 
3389     vixl32::Register right = RegisterFrom(right_loc);
3390 
3391     // Avoid 32-bit instructions if possible.
3392     if (out.Is(right)) {
3393       std::swap(left, right);
3394     }
3395 
3396     __ Eor(out, left, right);
3397 
3398     if (condition == kCondEQ) {
3399       __ Eor(out, out, 1);
3400     }
3401 
3402     return;
3403   }
3404 
3405   GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3406 }
3407 
VisitEqual(HEqual * comp)3408 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3409   HandleCondition(comp);
3410 }
3411 
VisitEqual(HEqual * comp)3412 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3413   HandleCondition(comp);
3414 }
3415 
VisitNotEqual(HNotEqual * comp)3416 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3417   HandleCondition(comp);
3418 }
3419 
VisitNotEqual(HNotEqual * comp)3420 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3421   HandleCondition(comp);
3422 }
3423 
VisitLessThan(HLessThan * comp)3424 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3425   HandleCondition(comp);
3426 }
3427 
VisitLessThan(HLessThan * comp)3428 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3429   HandleCondition(comp);
3430 }
3431 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3432 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3433   HandleCondition(comp);
3434 }
3435 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3436 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3437   HandleCondition(comp);
3438 }
3439 
VisitGreaterThan(HGreaterThan * comp)3440 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3441   HandleCondition(comp);
3442 }
3443 
VisitGreaterThan(HGreaterThan * comp)3444 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3445   HandleCondition(comp);
3446 }
3447 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3448 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3449   HandleCondition(comp);
3450 }
3451 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3452 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3453   HandleCondition(comp);
3454 }
3455 
VisitBelow(HBelow * comp)3456 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3457   HandleCondition(comp);
3458 }
3459 
VisitBelow(HBelow * comp)3460 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3461   HandleCondition(comp);
3462 }
3463 
VisitBelowOrEqual(HBelowOrEqual * comp)3464 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3465   HandleCondition(comp);
3466 }
3467 
VisitBelowOrEqual(HBelowOrEqual * comp)3468 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3469   HandleCondition(comp);
3470 }
3471 
VisitAbove(HAbove * comp)3472 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3473   HandleCondition(comp);
3474 }
3475 
VisitAbove(HAbove * comp)3476 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3477   HandleCondition(comp);
3478 }
3479 
VisitAboveOrEqual(HAboveOrEqual * comp)3480 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3481   HandleCondition(comp);
3482 }
3483 
VisitAboveOrEqual(HAboveOrEqual * comp)3484 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3485   HandleCondition(comp);
3486 }
3487 
VisitIntConstant(HIntConstant * constant)3488 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3489   LocationSummary* locations =
3490       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3491   locations->SetOut(Location::ConstantLocation(constant));
3492 }
3493 
VisitIntConstant(HIntConstant * constant)3494 void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
3495   // Will be generated at use site.
3496 }
3497 
VisitNullConstant(HNullConstant * constant)3498 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3499   LocationSummary* locations =
3500       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3501   locations->SetOut(Location::ConstantLocation(constant));
3502 }
3503 
VisitNullConstant(HNullConstant * constant)3504 void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
3505   // Will be generated at use site.
3506 }
3507 
VisitLongConstant(HLongConstant * constant)3508 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3509   LocationSummary* locations =
3510       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3511   locations->SetOut(Location::ConstantLocation(constant));
3512 }
3513 
VisitLongConstant(HLongConstant * constant)3514 void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
3515   // Will be generated at use site.
3516 }
3517 
VisitFloatConstant(HFloatConstant * constant)3518 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3519   LocationSummary* locations =
3520       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3521   locations->SetOut(Location::ConstantLocation(constant));
3522 }
3523 
VisitFloatConstant(HFloatConstant * constant)3524 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3525     [[maybe_unused]] HFloatConstant* constant) {
3526   // Will be generated at use site.
3527 }
3528 
VisitDoubleConstant(HDoubleConstant * constant)3529 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3530   LocationSummary* locations =
3531       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3532   locations->SetOut(Location::ConstantLocation(constant));
3533 }
3534 
VisitDoubleConstant(HDoubleConstant * constant)3535 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3536     [[maybe_unused]] HDoubleConstant* constant) {
3537   // Will be generated at use site.
3538 }
3539 
VisitConstructorFence(HConstructorFence * constructor_fence)3540 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3541   constructor_fence->SetLocations(nullptr);
3542 }
3543 
VisitConstructorFence(HConstructorFence * constructor_fence)3544 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3545     [[maybe_unused]] HConstructorFence* constructor_fence) {
3546   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3547 }
3548 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3549 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3550   memory_barrier->SetLocations(nullptr);
3551 }
3552 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3553 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3554   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3555 }
3556 
VisitReturnVoid(HReturnVoid * ret)3557 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3558   ret->SetLocations(nullptr);
3559 }
3560 
VisitReturnVoid(HReturnVoid * ret)3561 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
3562   codegen_->GenerateFrameExit();
3563 }
3564 
VisitReturn(HReturn * ret)3565 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3566   LocationSummary* locations =
3567       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3568   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3569 }
3570 
VisitReturn(HReturn * ret)3571 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3572   if (GetGraph()->IsCompilingOsr()) {
3573     // To simplify callers of an OSR method, we put the return value in both
3574     // floating point and core registers.
3575     switch (ret->InputAt(0)->GetType()) {
3576       case DataType::Type::kFloat32:
3577         __ Vmov(r0, s0);
3578         break;
3579       case DataType::Type::kFloat64:
3580         __ Vmov(r0, r1, d0);
3581         break;
3582       default:
3583         break;
3584     }
3585   }
3586   codegen_->GenerateFrameExit();
3587 }
3588 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3589 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3590   // The trampoline uses the same calling convention as dex calling conventions,
3591   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3592   // the method_idx.
3593   HandleInvoke(invoke);
3594 }
3595 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3596 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3597   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3598   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3599 }
3600 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3601 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3602   // Explicit clinit checks triggered by static invokes must have been pruned by
3603   // art::PrepareForRegisterAllocation.
3604   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3605 
3606   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3607   if (intrinsic.TryDispatch(invoke)) {
3608     return;
3609   }
3610 
3611   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3612     CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3613         /*for_register_allocation=*/ true);
3614     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3615   } else {
3616     HandleInvoke(invoke);
3617   }
3618 }
3619 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3620 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3621   if (invoke->GetLocations()->Intrinsified()) {
3622     IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3623     intrinsic.Dispatch(invoke);
3624     return true;
3625   }
3626   return false;
3627 }
3628 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3629 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3630   // Explicit clinit checks triggered by static invokes must have been pruned by
3631   // art::PrepareForRegisterAllocation.
3632   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3633 
3634   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3635     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3636     return;
3637   }
3638 
3639   LocationSummary* locations = invoke->GetLocations();
3640   codegen_->GenerateStaticOrDirectCall(
3641       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3642 
3643   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3644 }
3645 
HandleInvoke(HInvoke * invoke)3646 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3647   InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3648   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3649 }
3650 
VisitInvokeVirtual(HInvokeVirtual * invoke)3651 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3652   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3653   if (intrinsic.TryDispatch(invoke)) {
3654     return;
3655   }
3656 
3657   HandleInvoke(invoke);
3658 }
3659 
VisitInvokeVirtual(HInvokeVirtual * invoke)3660 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3661   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3662     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3663     return;
3664   }
3665 
3666   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3667   DCHECK(!codegen_->IsLeafMethod());
3668 
3669   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3670 }
3671 
VisitInvokeInterface(HInvokeInterface * invoke)3672 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3673   HandleInvoke(invoke);
3674   // Add the hidden argument.
3675   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3676     // We cannot request r12 as it's blocked by the register allocator.
3677     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3678   }
3679 }
3680 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3681 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3682                                                          vixl32::Register klass) {
3683   DCHECK_EQ(r0.GetCode(), klass.GetCode());
3684   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3685     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3686     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3687         info, GetCompilerOptions(), instruction->AsInvoke());
3688     if (cache != nullptr) {
3689       uint32_t address = reinterpret_cast32<uint32_t>(cache);
3690       vixl32::Label done;
3691       UseScratchRegisterScope temps(GetVIXLAssembler());
3692       temps.Exclude(ip);
3693       __ Mov(r4, address);
3694       __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3695       // Fast path for a monomorphic cache.
3696       __ Cmp(klass, ip);
3697       __ B(eq, &done, /* is_far_target= */ false);
3698       InvokeRuntime(kQuickUpdateInlineCache, instruction);
3699       __ Bind(&done);
3700     } else {
3701       // This is unexpected, but we don't guarantee stable compilation across
3702       // JIT runs so just warn about it.
3703       ScopedObjectAccess soa(Thread::Current());
3704       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3705     }
3706   }
3707 }
3708 
VisitInvokeInterface(HInvokeInterface * invoke)3709 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3710   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3711   LocationSummary* locations = invoke->GetLocations();
3712   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3713   Location receiver = locations->InAt(0);
3714   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3715 
3716   DCHECK(!receiver.IsStackSlot());
3717 
3718   // Ensure the pc position is recorded immediately after the `ldr` instruction.
3719   {
3720     ExactAssemblyScope aas(GetVIXLAssembler(),
3721                            vixl32::kMaxInstructionSizeInBytes,
3722                            CodeBufferCheckScope::kMaximumSize);
3723     // /* HeapReference<Class> */ temp = receiver->klass_
3724     __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3725     codegen_->MaybeRecordImplicitNullCheck(invoke);
3726   }
3727   // Instead of simply (possibly) unpoisoning `temp` here, we should
3728   // emit a read barrier for the previous class reference load.
3729   // However this is not required in practice, as this is an
3730   // intermediate/temporary reference and because the current
3731   // concurrent copying collector keeps the from-space memory
3732   // intact/accessible until the end of the marking phase (the
3733   // concurrent copying collector may not in the future).
3734   GetAssembler()->MaybeUnpoisonHeapReference(temp);
3735 
3736   // If we're compiling baseline, update the inline cache.
3737   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3738 
3739   GetAssembler()->LoadFromOffset(kLoadWord,
3740                                  temp,
3741                                  temp,
3742                                  mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3743 
3744   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3745       invoke->GetImtIndex(), kArmPointerSize));
3746   // temp = temp->GetImtEntryAt(method_offset);
3747   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3748   uint32_t entry_point =
3749       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3750   // LR = temp->GetEntryPoint();
3751   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3752 
3753   {
3754     // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3755     // instruction from clobbering it as they might use r12 as a scratch register.
3756     Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3757     // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3758     // so it checks if the application is using them (by passing them to the macro assembler
3759     // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3760     // what is available, and is the opposite of the standard usage: Instead of requesting a
3761     // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3762     // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3763     // (to materialize the constant), since the destination register becomes available for such use
3764     // internally for the duration of the macro instruction.
3765     UseScratchRegisterScope temps(GetVIXLAssembler());
3766     temps.Exclude(RegisterFrom(hidden_reg));
3767     if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3768       Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3769       if (current_method.IsStackSlot()) {
3770         GetAssembler()->LoadFromOffset(
3771             kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3772       } else {
3773         __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3774       }
3775     } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3776       // We pass the method from the IMT in case of a conflict. This will ensure
3777       // we go into the runtime to resolve the actual method.
3778       CHECK_NE(temp.GetCode(), lr.GetCode());
3779       __ Mov(RegisterFrom(hidden_reg), temp);
3780     } else {
3781       codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3782     }
3783   }
3784   {
3785     // Ensure the pc position is recorded immediately after the `blx` instruction.
3786     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3787     ExactAssemblyScope aas(GetVIXLAssembler(),
3788                            vixl32::k16BitT32InstructionSizeInBytes,
3789                            CodeBufferCheckScope::kExactSize);
3790     // LR();
3791     __ blx(lr);
3792     codegen_->RecordPcInfo(invoke);
3793     DCHECK(!codegen_->IsLeafMethod());
3794   }
3795 
3796   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3797 }
3798 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3799 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3800   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3801   if (intrinsic.TryDispatch(invoke)) {
3802     return;
3803   }
3804   HandleInvoke(invoke);
3805 }
3806 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3807 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3808   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3809     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3810     return;
3811   }
3812   codegen_->GenerateInvokePolymorphicCall(invoke);
3813   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3814 }
3815 
VisitInvokeCustom(HInvokeCustom * invoke)3816 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3817   HandleInvoke(invoke);
3818 }
3819 
VisitInvokeCustom(HInvokeCustom * invoke)3820 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3821   codegen_->GenerateInvokeCustomCall(invoke);
3822   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3823 }
3824 
VisitNeg(HNeg * neg)3825 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3826   LocationSummary* locations =
3827       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3828   switch (neg->GetResultType()) {
3829     case DataType::Type::kInt32: {
3830       locations->SetInAt(0, Location::RequiresRegister());
3831       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3832       break;
3833     }
3834     case DataType::Type::kInt64: {
3835       locations->SetInAt(0, Location::RequiresRegister());
3836       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3837       break;
3838     }
3839 
3840     case DataType::Type::kFloat32:
3841     case DataType::Type::kFloat64:
3842       locations->SetInAt(0, Location::RequiresFpuRegister());
3843       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3844       break;
3845 
3846     default:
3847       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3848   }
3849 }
3850 
VisitNeg(HNeg * neg)3851 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3852   LocationSummary* locations = neg->GetLocations();
3853   Location out = locations->Out();
3854   Location in = locations->InAt(0);
3855   switch (neg->GetResultType()) {
3856     case DataType::Type::kInt32:
3857       __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3858       break;
3859 
3860     case DataType::Type::kInt64:
3861       // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3862       __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3863       // We cannot emit an RSC (Reverse Subtract with Carry)
3864       // instruction here, as it does not exist in the Thumb-2
3865       // instruction set.  We use the following approach
3866       // using SBC and SUB instead.
3867       //
3868       // out.hi = -C
3869       __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3870       // out.hi = out.hi - in.hi
3871       __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3872       break;
3873 
3874     case DataType::Type::kFloat32:
3875     case DataType::Type::kFloat64:
3876       __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3877       break;
3878 
3879     default:
3880       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3881   }
3882 }
3883 
VisitTypeConversion(HTypeConversion * conversion)3884 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3885   DataType::Type result_type = conversion->GetResultType();
3886   DataType::Type input_type = conversion->GetInputType();
3887   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3888       << input_type << " -> " << result_type;
3889 
3890   // The float-to-long, double-to-long and long-to-float type conversions
3891   // rely on a call to the runtime.
3892   LocationSummary::CallKind call_kind =
3893       (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3894         && result_type == DataType::Type::kInt64)
3895        || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3896       ? LocationSummary::kCallOnMainOnly
3897       : LocationSummary::kNoCall;
3898   LocationSummary* locations =
3899       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3900 
3901   switch (result_type) {
3902     case DataType::Type::kUint8:
3903     case DataType::Type::kInt8:
3904     case DataType::Type::kUint16:
3905     case DataType::Type::kInt16:
3906       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3907       locations->SetInAt(0, Location::RequiresRegister());
3908       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3909       break;
3910 
3911     case DataType::Type::kInt32:
3912       switch (input_type) {
3913         case DataType::Type::kInt64:
3914           locations->SetInAt(0, Location::Any());
3915           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3916           break;
3917 
3918         case DataType::Type::kFloat32:
3919           locations->SetInAt(0, Location::RequiresFpuRegister());
3920           locations->SetOut(Location::RequiresRegister());
3921           locations->AddTemp(Location::RequiresFpuRegister());
3922           break;
3923 
3924         case DataType::Type::kFloat64:
3925           locations->SetInAt(0, Location::RequiresFpuRegister());
3926           locations->SetOut(Location::RequiresRegister());
3927           locations->AddTemp(Location::RequiresFpuRegister());
3928           break;
3929 
3930         default:
3931           LOG(FATAL) << "Unexpected type conversion from " << input_type
3932                      << " to " << result_type;
3933       }
3934       break;
3935 
3936     case DataType::Type::kInt64:
3937       switch (input_type) {
3938         case DataType::Type::kBool:
3939         case DataType::Type::kUint8:
3940         case DataType::Type::kInt8:
3941         case DataType::Type::kUint16:
3942         case DataType::Type::kInt16:
3943         case DataType::Type::kInt32:
3944           locations->SetInAt(0, Location::RequiresRegister());
3945           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3946           break;
3947 
3948         case DataType::Type::kFloat32: {
3949           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3950           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3951           locations->SetOut(LocationFrom(r0, r1));
3952           break;
3953         }
3954 
3955         case DataType::Type::kFloat64: {
3956           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3957           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3958                                              calling_convention.GetFpuRegisterAt(1)));
3959           locations->SetOut(LocationFrom(r0, r1));
3960           break;
3961         }
3962 
3963         default:
3964           LOG(FATAL) << "Unexpected type conversion from " << input_type
3965                      << " to " << result_type;
3966       }
3967       break;
3968 
3969     case DataType::Type::kFloat32:
3970       switch (input_type) {
3971         case DataType::Type::kBool:
3972         case DataType::Type::kUint8:
3973         case DataType::Type::kInt8:
3974         case DataType::Type::kUint16:
3975         case DataType::Type::kInt16:
3976         case DataType::Type::kInt32:
3977           locations->SetInAt(0, Location::RequiresRegister());
3978           locations->SetOut(Location::RequiresFpuRegister());
3979           break;
3980 
3981         case DataType::Type::kInt64: {
3982           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3983           locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3984                                              calling_convention.GetRegisterAt(1)));
3985           locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3986           break;
3987         }
3988 
3989         case DataType::Type::kFloat64:
3990           locations->SetInAt(0, Location::RequiresFpuRegister());
3991           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3992           break;
3993 
3994         default:
3995           LOG(FATAL) << "Unexpected type conversion from " << input_type
3996                      << " to " << result_type;
3997       }
3998       break;
3999 
4000     case DataType::Type::kFloat64:
4001       switch (input_type) {
4002         case DataType::Type::kBool:
4003         case DataType::Type::kUint8:
4004         case DataType::Type::kInt8:
4005         case DataType::Type::kUint16:
4006         case DataType::Type::kInt16:
4007         case DataType::Type::kInt32:
4008           locations->SetInAt(0, Location::RequiresRegister());
4009           locations->SetOut(Location::RequiresFpuRegister());
4010           break;
4011 
4012         case DataType::Type::kInt64:
4013           locations->SetInAt(0, Location::RequiresRegister());
4014           locations->SetOut(Location::RequiresFpuRegister());
4015           locations->AddTemp(Location::RequiresFpuRegister());
4016           locations->AddTemp(Location::RequiresFpuRegister());
4017           break;
4018 
4019         case DataType::Type::kFloat32:
4020           locations->SetInAt(0, Location::RequiresFpuRegister());
4021           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4022           break;
4023 
4024         default:
4025           LOG(FATAL) << "Unexpected type conversion from " << input_type
4026                      << " to " << result_type;
4027       }
4028       break;
4029 
4030     default:
4031       LOG(FATAL) << "Unexpected type conversion from " << input_type
4032                  << " to " << result_type;
4033   }
4034 }
4035 
VisitTypeConversion(HTypeConversion * conversion)4036 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
4037   LocationSummary* locations = conversion->GetLocations();
4038   Location out = locations->Out();
4039   Location in = locations->InAt(0);
4040   DataType::Type result_type = conversion->GetResultType();
4041   DataType::Type input_type = conversion->GetInputType();
4042   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
4043       << input_type << " -> " << result_type;
4044   switch (result_type) {
4045     case DataType::Type::kUint8:
4046       switch (input_type) {
4047         case DataType::Type::kInt8:
4048         case DataType::Type::kUint16:
4049         case DataType::Type::kInt16:
4050         case DataType::Type::kInt32:
4051           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4052           break;
4053         case DataType::Type::kInt64:
4054           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4055           break;
4056 
4057         default:
4058           LOG(FATAL) << "Unexpected type conversion from " << input_type
4059                      << " to " << result_type;
4060       }
4061       break;
4062 
4063     case DataType::Type::kInt8:
4064       switch (input_type) {
4065         case DataType::Type::kUint8:
4066         case DataType::Type::kUint16:
4067         case DataType::Type::kInt16:
4068         case DataType::Type::kInt32:
4069           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4070           break;
4071         case DataType::Type::kInt64:
4072           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4073           break;
4074 
4075         default:
4076           LOG(FATAL) << "Unexpected type conversion from " << input_type
4077                      << " to " << result_type;
4078       }
4079       break;
4080 
4081     case DataType::Type::kUint16:
4082       switch (input_type) {
4083         case DataType::Type::kInt8:
4084         case DataType::Type::kInt16:
4085         case DataType::Type::kInt32:
4086           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4087           break;
4088         case DataType::Type::kInt64:
4089           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4090           break;
4091 
4092         default:
4093           LOG(FATAL) << "Unexpected type conversion from " << input_type
4094                      << " to " << result_type;
4095       }
4096       break;
4097 
4098     case DataType::Type::kInt16:
4099       switch (input_type) {
4100         case DataType::Type::kUint16:
4101         case DataType::Type::kInt32:
4102           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4103           break;
4104         case DataType::Type::kInt64:
4105           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4106           break;
4107 
4108         default:
4109           LOG(FATAL) << "Unexpected type conversion from " << input_type
4110                      << " to " << result_type;
4111       }
4112       break;
4113 
4114     case DataType::Type::kInt32:
4115       switch (input_type) {
4116         case DataType::Type::kInt64:
4117           DCHECK(out.IsRegister());
4118           if (in.IsRegisterPair()) {
4119             __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4120           } else if (in.IsDoubleStackSlot()) {
4121             GetAssembler()->LoadFromOffset(kLoadWord,
4122                                            OutputRegister(conversion),
4123                                            sp,
4124                                            in.GetStackIndex());
4125           } else {
4126             DCHECK(in.IsConstant());
4127             DCHECK(in.GetConstant()->IsLongConstant());
4128             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4129             __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4130           }
4131           break;
4132 
4133         case DataType::Type::kFloat32: {
4134           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4135           __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4136           __ Vmov(OutputRegister(conversion), temp);
4137           break;
4138         }
4139 
4140         case DataType::Type::kFloat64: {
4141           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4142           __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4143           __ Vmov(OutputRegister(conversion), temp_s);
4144           break;
4145         }
4146 
4147         default:
4148           LOG(FATAL) << "Unexpected type conversion from " << input_type
4149                      << " to " << result_type;
4150       }
4151       break;
4152 
4153     case DataType::Type::kInt64:
4154       switch (input_type) {
4155         case DataType::Type::kBool:
4156         case DataType::Type::kUint8:
4157         case DataType::Type::kInt8:
4158         case DataType::Type::kUint16:
4159         case DataType::Type::kInt16:
4160         case DataType::Type::kInt32:
4161           DCHECK(out.IsRegisterPair());
4162           DCHECK(in.IsRegister());
4163           __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4164           // Sign extension.
4165           __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4166           break;
4167 
4168         case DataType::Type::kFloat32:
4169           codegen_->InvokeRuntime(kQuickF2l, conversion);
4170           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4171           break;
4172 
4173         case DataType::Type::kFloat64:
4174           codegen_->InvokeRuntime(kQuickD2l, conversion);
4175           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4176           break;
4177 
4178         default:
4179           LOG(FATAL) << "Unexpected type conversion from " << input_type
4180                      << " to " << result_type;
4181       }
4182       break;
4183 
4184     case DataType::Type::kFloat32:
4185       switch (input_type) {
4186         case DataType::Type::kBool:
4187         case DataType::Type::kUint8:
4188         case DataType::Type::kInt8:
4189         case DataType::Type::kUint16:
4190         case DataType::Type::kInt16:
4191         case DataType::Type::kInt32:
4192           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4193           __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4194           break;
4195 
4196         case DataType::Type::kInt64:
4197           codegen_->InvokeRuntime(kQuickL2f, conversion);
4198           CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4199           break;
4200 
4201         case DataType::Type::kFloat64:
4202           __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4203           break;
4204 
4205         default:
4206           LOG(FATAL) << "Unexpected type conversion from " << input_type
4207                      << " to " << result_type;
4208       }
4209       break;
4210 
4211     case DataType::Type::kFloat64:
4212       switch (input_type) {
4213         case DataType::Type::kBool:
4214         case DataType::Type::kUint8:
4215         case DataType::Type::kInt8:
4216         case DataType::Type::kUint16:
4217         case DataType::Type::kInt16:
4218         case DataType::Type::kInt32:
4219           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4220           __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4221           break;
4222 
4223         case DataType::Type::kInt64: {
4224           vixl32::Register low = LowRegisterFrom(in);
4225           vixl32::Register high = HighRegisterFrom(in);
4226           vixl32::SRegister out_s = LowSRegisterFrom(out);
4227           vixl32::DRegister out_d = DRegisterFrom(out);
4228           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4229           vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4230           vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4231 
4232           // temp_d = int-to-double(high)
4233           __ Vmov(temp_s, high);
4234           __ Vcvt(F64, S32, temp_d, temp_s);
4235           // constant_d = k2Pow32EncodingForDouble
4236           __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4237           // out_d = unsigned-to-double(low)
4238           __ Vmov(out_s, low);
4239           __ Vcvt(F64, U32, out_d, out_s);
4240           // out_d += temp_d * constant_d
4241           __ Vmla(F64, out_d, temp_d, constant_d);
4242           break;
4243         }
4244 
4245         case DataType::Type::kFloat32:
4246           __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4247           break;
4248 
4249         default:
4250           LOG(FATAL) << "Unexpected type conversion from " << input_type
4251                      << " to " << result_type;
4252       }
4253       break;
4254 
4255     default:
4256       LOG(FATAL) << "Unexpected type conversion from " << input_type
4257                  << " to " << result_type;
4258   }
4259 }
4260 
VisitAdd(HAdd * add)4261 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4262   LocationSummary* locations =
4263       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4264   switch (add->GetResultType()) {
4265     case DataType::Type::kInt32: {
4266       locations->SetInAt(0, Location::RequiresRegister());
4267       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4268       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4269       break;
4270     }
4271 
4272     case DataType::Type::kInt64: {
4273       locations->SetInAt(0, Location::RequiresRegister());
4274       locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4275       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4276       break;
4277     }
4278 
4279     case DataType::Type::kFloat32:
4280     case DataType::Type::kFloat64: {
4281       locations->SetInAt(0, Location::RequiresFpuRegister());
4282       locations->SetInAt(1, Location::RequiresFpuRegister());
4283       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4284       break;
4285     }
4286 
4287     default:
4288       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4289   }
4290 }
4291 
VisitAdd(HAdd * add)4292 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4293   LocationSummary* locations = add->GetLocations();
4294   Location out = locations->Out();
4295   Location first = locations->InAt(0);
4296   Location second = locations->InAt(1);
4297 
4298   switch (add->GetResultType()) {
4299     case DataType::Type::kInt32: {
4300       __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4301       }
4302       break;
4303 
4304     case DataType::Type::kInt64: {
4305       if (second.IsConstant()) {
4306         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4307         GenerateAddLongConst(out, first, value);
4308       } else {
4309         DCHECK(second.IsRegisterPair());
4310         __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4311         __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4312       }
4313       break;
4314     }
4315 
4316     case DataType::Type::kFloat32:
4317     case DataType::Type::kFloat64:
4318       __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4319       break;
4320 
4321     default:
4322       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4323   }
4324 }
4325 
VisitSub(HSub * sub)4326 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4327   LocationSummary* locations =
4328       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4329   switch (sub->GetResultType()) {
4330     case DataType::Type::kInt32: {
4331       locations->SetInAt(0, Location::RequiresRegister());
4332       locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4333       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4334       break;
4335     }
4336 
4337     case DataType::Type::kInt64: {
4338       locations->SetInAt(0, Location::RequiresRegister());
4339       locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4340       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4341       break;
4342     }
4343     case DataType::Type::kFloat32:
4344     case DataType::Type::kFloat64: {
4345       locations->SetInAt(0, Location::RequiresFpuRegister());
4346       locations->SetInAt(1, Location::RequiresFpuRegister());
4347       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4348       break;
4349     }
4350     default:
4351       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4352   }
4353 }
4354 
VisitSub(HSub * sub)4355 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4356   LocationSummary* locations = sub->GetLocations();
4357   Location out = locations->Out();
4358   Location first = locations->InAt(0);
4359   Location second = locations->InAt(1);
4360   switch (sub->GetResultType()) {
4361     case DataType::Type::kInt32: {
4362       __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4363       break;
4364     }
4365 
4366     case DataType::Type::kInt64: {
4367       if (second.IsConstant()) {
4368         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4369         GenerateAddLongConst(out, first, -value);
4370       } else {
4371         DCHECK(second.IsRegisterPair());
4372         __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4373         __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4374       }
4375       break;
4376     }
4377 
4378     case DataType::Type::kFloat32:
4379     case DataType::Type::kFloat64:
4380       __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4381       break;
4382 
4383     default:
4384       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4385   }
4386 }
4387 
VisitMul(HMul * mul)4388 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4389   LocationSummary* locations =
4390       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4391   switch (mul->GetResultType()) {
4392     case DataType::Type::kInt32:
4393     case DataType::Type::kInt64:  {
4394       locations->SetInAt(0, Location::RequiresRegister());
4395       locations->SetInAt(1, Location::RequiresRegister());
4396       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4397       break;
4398     }
4399 
4400     case DataType::Type::kFloat32:
4401     case DataType::Type::kFloat64: {
4402       locations->SetInAt(0, Location::RequiresFpuRegister());
4403       locations->SetInAt(1, Location::RequiresFpuRegister());
4404       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4405       break;
4406     }
4407 
4408     default:
4409       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4410   }
4411 }
4412 
VisitMul(HMul * mul)4413 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4414   LocationSummary* locations = mul->GetLocations();
4415   Location out = locations->Out();
4416   Location first = locations->InAt(0);
4417   Location second = locations->InAt(1);
4418   switch (mul->GetResultType()) {
4419     case DataType::Type::kInt32: {
4420       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4421       break;
4422     }
4423     case DataType::Type::kInt64: {
4424       vixl32::Register out_hi = HighRegisterFrom(out);
4425       vixl32::Register out_lo = LowRegisterFrom(out);
4426       vixl32::Register in1_hi = HighRegisterFrom(first);
4427       vixl32::Register in1_lo = LowRegisterFrom(first);
4428       vixl32::Register in2_hi = HighRegisterFrom(second);
4429       vixl32::Register in2_lo = LowRegisterFrom(second);
4430 
4431       // Extra checks to protect caused by the existence of R1_R2.
4432       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4433       // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4434       DCHECK(!out_hi.Is(in1_lo));
4435       DCHECK(!out_hi.Is(in2_lo));
4436 
4437       // input: in1 - 64 bits, in2 - 64 bits
4438       // output: out
4439       // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4440       // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4441       // parts: out.lo = (in1.lo * in2.lo)[31:0]
4442 
4443       UseScratchRegisterScope temps(GetVIXLAssembler());
4444       vixl32::Register temp = temps.Acquire();
4445       // temp <- in1.lo * in2.hi
4446       __ Mul(temp, in1_lo, in2_hi);
4447       // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4448       __ Mla(out_hi, in1_hi, in2_lo, temp);
4449       // out.lo <- (in1.lo * in2.lo)[31:0];
4450       __ Umull(out_lo, temp, in1_lo, in2_lo);
4451       // out.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4452       __ Add(out_hi, out_hi, temp);
4453       break;
4454     }
4455 
4456     case DataType::Type::kFloat32:
4457     case DataType::Type::kFloat64:
4458       __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4459       break;
4460 
4461     default:
4462       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4463   }
4464 }
4465 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4466 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4467   DCHECK(instruction->IsDiv() || instruction->IsRem());
4468   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4469 
4470   Location second = instruction->GetLocations()->InAt(1);
4471   DCHECK(second.IsConstant());
4472 
4473   vixl32::Register out = OutputRegister(instruction);
4474   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4475   int32_t imm = Int32ConstantFrom(second);
4476   DCHECK(imm == 1 || imm == -1);
4477 
4478   if (instruction->IsRem()) {
4479     __ Mov(out, 0);
4480   } else {
4481     if (imm == 1) {
4482       __ Mov(out, dividend);
4483     } else {
4484       __ Rsb(out, dividend, 0);
4485     }
4486   }
4487 }
4488 
DivRemByPowerOfTwo(HBinaryOperation * instruction)4489 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4490   DCHECK(instruction->IsDiv() || instruction->IsRem());
4491   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4492 
4493   LocationSummary* locations = instruction->GetLocations();
4494   Location second = locations->InAt(1);
4495   DCHECK(second.IsConstant());
4496 
4497   vixl32::Register out = OutputRegister(instruction);
4498   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4499   int32_t imm = Int32ConstantFrom(second);
4500   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4501   int ctz_imm = CTZ(abs_imm);
4502 
4503   auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4504     __ Asr(out, in, ctz_imm);
4505     if (imm < 0) {
4506       __ Rsb(out, out, 0);
4507     }
4508   };
4509 
4510   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4511     // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4512     // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4513     //   imm == 2
4514     //     HDiv
4515     //      add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4516     //      asr out, out(0x80000001), #1 => out = 0xc0000000
4517     //      This is the same as 'asr out, dividend(0x80000000), #1'
4518     //
4519     //   imm > 2
4520     //     HDiv
4521     //      asr out, dividend(0x80000000), #31 => out = -1
4522     //      add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4523     //          where the number of the rightmost 1s is ctz_imm.
4524     //      asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4525     //          leftmost 1s is ctz_imm + 1.
4526     //      This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4527     //
4528     //   imm == INT32_MIN
4529     //     HDiv
4530     //      asr out, dividend(0x80000000), #31 => out = -1
4531     //      add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4532     //      asr out, out(0xc0000000), #31 => out = -1
4533     //      rsb out, out(-1), #0 => out = 1
4534     //      This is the same as
4535     //        asr out, dividend(0x80000000), #31
4536     //        rsb out, out, #0
4537     //
4538     //
4539     //   INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4540     //   0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4541     //   produce zero.
4542     if (instruction->IsDiv()) {
4543       generate_div_code(out, dividend);
4544     } else {
4545       if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4546         __ And(out, dividend, abs_imm - 1);
4547       } else {
4548         __ Ubfx(out, dividend, 0, ctz_imm);
4549       }
4550       return;
4551     }
4552   } else {
4553     vixl32::Register add_right_input = dividend;
4554     if (ctz_imm > 1) {
4555       __ Asr(out, dividend, 31);
4556       add_right_input = out;
4557     }
4558     __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4559 
4560     if (instruction->IsDiv()) {
4561       generate_div_code(out, out);
4562     } else {
4563       __ Bfc(out, 0, ctz_imm);
4564       __ Sub(out, dividend, out);
4565     }
4566   }
4567 }
4568 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4569 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4570   DCHECK(instruction->IsDiv() || instruction->IsRem());
4571   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4572 
4573   LocationSummary* locations = instruction->GetLocations();
4574   Location second = locations->InAt(1);
4575   DCHECK(second.IsConstant());
4576 
4577   vixl32::Register out = OutputRegister(instruction);
4578   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4579   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4580   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4581   int32_t imm = Int32ConstantFrom(second);
4582 
4583   int64_t magic;
4584   int shift;
4585   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4586 
4587   auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4588                                                         vixl32::Register dividend,
4589                                                         vixl32::Register temp1,
4590                                                         vixl32::Register temp2) {
4591     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4592     __ Mov(temp1, static_cast<int32_t>(magic));
4593     if (magic > 0 && shift == 0) {
4594       __ Smull(temp2, out, dividend, temp1);
4595     } else {
4596       __ Smull(temp2, temp1, dividend, temp1);
4597       if (magic < 0) {
4598         // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4599         // than INT32_MAX. In such a case shift is never 0.
4600         // Proof:
4601         //   m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4602         //
4603         //   If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4604         //   = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4605         //   = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4606         //
4607         //   1 + (2^32 / d) is decreasing when d is increasing.
4608         //   The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4609         //   the minimum is 3, when d = 2^31 -1.
4610         //   So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4611         //   is never less than 0.
4612         __ Add(temp1, temp1, dividend);
4613       }
4614       DCHECK_NE(shift, 0);
4615       __ Lsr(out, temp1, shift);
4616     }
4617   };
4618 
4619   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4620     // No need to adjust the result for a non-negative dividend and a positive divisor.
4621     if (instruction->IsDiv()) {
4622       generate_unsigned_div_code(out, dividend, temp1, temp2);
4623     } else {
4624       generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4625       __ Mov(temp2, imm);
4626       __ Mls(out, temp1, temp2, dividend);
4627     }
4628   } else {
4629     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4630     __ Mov(temp1, static_cast<int32_t>(magic));
4631     __ Smull(temp2, temp1, dividend, temp1);
4632 
4633     if (imm > 0 && magic < 0) {
4634       __ Add(temp1, temp1, dividend);
4635     } else if (imm < 0 && magic > 0) {
4636       __ Sub(temp1, temp1, dividend);
4637     }
4638 
4639     if (shift != 0) {
4640       __ Asr(temp1, temp1, shift);
4641     }
4642 
4643     if (instruction->IsDiv()) {
4644       __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4645     } else {
4646       __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4647       // TODO: Strength reduction for mls.
4648       __ Mov(temp2, imm);
4649       __ Mls(out, temp1, temp2, dividend);
4650     }
4651   }
4652 }
4653 
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4654 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4655     HBinaryOperation* instruction) {
4656   DCHECK(instruction->IsDiv() || instruction->IsRem());
4657   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4658 
4659   Location second = instruction->GetLocations()->InAt(1);
4660   DCHECK(second.IsConstant());
4661 
4662   int32_t imm = Int32ConstantFrom(second);
4663   if (imm == 0) {
4664     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4665   } else if (imm == 1 || imm == -1) {
4666     DivRemOneOrMinusOne(instruction);
4667   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4668     DivRemByPowerOfTwo(instruction);
4669   } else {
4670     DCHECK(imm <= -2 || imm >= 2);
4671     GenerateDivRemWithAnyConstant(instruction);
4672   }
4673 }
4674 
VisitDiv(HDiv * div)4675 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4676   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4677   if (div->GetResultType() == DataType::Type::kInt64) {
4678     // pLdiv runtime call.
4679     call_kind = LocationSummary::kCallOnMainOnly;
4680   } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4681     // sdiv will be replaced by other instruction sequence.
4682   } else if (div->GetResultType() == DataType::Type::kInt32 &&
4683              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4684     // pIdivmod runtime call.
4685     call_kind = LocationSummary::kCallOnMainOnly;
4686   }
4687 
4688   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4689 
4690   switch (div->GetResultType()) {
4691     case DataType::Type::kInt32: {
4692       HInstruction* divisor = div->InputAt(1);
4693       if (divisor->IsConstant()) {
4694         locations->SetInAt(0, Location::RequiresRegister());
4695         locations->SetInAt(1, Location::ConstantLocation(divisor));
4696         int32_t value = Int32ConstantFrom(divisor);
4697         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4698         if (value == 1 || value == 0 || value == -1) {
4699           // No temp register required.
4700         } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4701                    value != 2 &&
4702                    value != -2 &&
4703                    !HasNonNegativeOrMinIntInputAt(div, 0)) {
4704           // The "out" register is used as a temporary, so it overlaps with the inputs.
4705           out_overlaps = Location::kOutputOverlap;
4706         } else {
4707           locations->AddRegisterTemps(2);
4708         }
4709         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4710       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4711         locations->SetInAt(0, Location::RequiresRegister());
4712         locations->SetInAt(1, Location::RequiresRegister());
4713         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4714       } else {
4715         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4716         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4717         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4718         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4719         //       we only need the former.
4720         locations->SetOut(LocationFrom(r0));
4721       }
4722       break;
4723     }
4724     case DataType::Type::kInt64: {
4725       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4726       locations->SetInAt(0, LocationFrom(
4727           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4728       locations->SetInAt(1, LocationFrom(
4729           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4730       locations->SetOut(LocationFrom(r0, r1));
4731       break;
4732     }
4733     case DataType::Type::kFloat32:
4734     case DataType::Type::kFloat64: {
4735       locations->SetInAt(0, Location::RequiresFpuRegister());
4736       locations->SetInAt(1, Location::RequiresFpuRegister());
4737       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4738       break;
4739     }
4740 
4741     default:
4742       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4743   }
4744 }
4745 
VisitDiv(HDiv * div)4746 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4747   Location lhs = div->GetLocations()->InAt(0);
4748   Location rhs = div->GetLocations()->InAt(1);
4749 
4750   switch (div->GetResultType()) {
4751     case DataType::Type::kInt32: {
4752       if (rhs.IsConstant()) {
4753         GenerateDivRemConstantIntegral(div);
4754       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4755         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4756       } else {
4757         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4758         DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4759         DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4760         DCHECK(r0.Is(OutputRegister(div)));
4761 
4762         codegen_->InvokeRuntime(kQuickIdivmod, div);
4763         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4764       }
4765       break;
4766     }
4767 
4768     case DataType::Type::kInt64: {
4769       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4770       DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4771       DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4772       DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4773       DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4774       DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4775       DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4776 
4777       codegen_->InvokeRuntime(kQuickLdiv, div);
4778       CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4779       break;
4780     }
4781 
4782     case DataType::Type::kFloat32:
4783     case DataType::Type::kFloat64:
4784       __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4785       break;
4786 
4787     default:
4788       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4789   }
4790 }
4791 
VisitRem(HRem * rem)4792 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4793   DataType::Type type = rem->GetResultType();
4794 
4795   // Most remainders are implemented in the runtime.
4796   LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4797   if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4798     // sdiv will be replaced by other instruction sequence.
4799     call_kind = LocationSummary::kNoCall;
4800   } else if ((rem->GetResultType() == DataType::Type::kInt32)
4801              && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4802     // Have hardware divide instruction for int, do it with three instructions.
4803     call_kind = LocationSummary::kNoCall;
4804   }
4805 
4806   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4807 
4808   switch (type) {
4809     case DataType::Type::kInt32: {
4810       HInstruction* divisor = rem->InputAt(1);
4811       if (divisor->IsConstant()) {
4812         locations->SetInAt(0, Location::RequiresRegister());
4813         locations->SetInAt(1, Location::ConstantLocation(divisor));
4814         int32_t value = Int32ConstantFrom(divisor);
4815         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4816         if (value == 1 || value == 0 || value == -1) {
4817           // No temp register required.
4818         } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4819           // The "out" register is used as a temporary, so it overlaps with the inputs.
4820           out_overlaps = Location::kOutputOverlap;
4821         } else {
4822           locations->AddRegisterTemps(2);
4823         }
4824         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4825       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4826         locations->SetInAt(0, Location::RequiresRegister());
4827         locations->SetInAt(1, Location::RequiresRegister());
4828         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4829         locations->AddTemp(Location::RequiresRegister());
4830       } else {
4831         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4832         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4833         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4834         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4835         //       we only need the latter.
4836         locations->SetOut(LocationFrom(r1));
4837       }
4838       break;
4839     }
4840     case DataType::Type::kInt64: {
4841       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4842       locations->SetInAt(0, LocationFrom(
4843           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4844       locations->SetInAt(1, LocationFrom(
4845           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4846       // The runtime helper puts the output in R2,R3.
4847       locations->SetOut(LocationFrom(r2, r3));
4848       break;
4849     }
4850     case DataType::Type::kFloat32: {
4851       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4852       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4853       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4854       locations->SetOut(LocationFrom(s0));
4855       break;
4856     }
4857 
4858     case DataType::Type::kFloat64: {
4859       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4860       locations->SetInAt(0, LocationFrom(
4861           calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4862       locations->SetInAt(1, LocationFrom(
4863           calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4864       locations->SetOut(LocationFrom(s0, s1));
4865       break;
4866     }
4867 
4868     default:
4869       LOG(FATAL) << "Unexpected rem type " << type;
4870   }
4871 }
4872 
VisitRem(HRem * rem)4873 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4874   LocationSummary* locations = rem->GetLocations();
4875   Location second = locations->InAt(1);
4876 
4877   DataType::Type type = rem->GetResultType();
4878   switch (type) {
4879     case DataType::Type::kInt32: {
4880         vixl32::Register reg1 = InputRegisterAt(rem, 0);
4881         vixl32::Register out_reg = OutputRegister(rem);
4882         if (second.IsConstant()) {
4883           GenerateDivRemConstantIntegral(rem);
4884         } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4885         vixl32::Register reg2 = RegisterFrom(second);
4886         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4887 
4888         // temp = reg1 / reg2  (integer division)
4889         // dest = reg1 - temp * reg2
4890         __ Sdiv(temp, reg1, reg2);
4891         __ Mls(out_reg, temp, reg2, reg1);
4892       } else {
4893         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4894         DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4895         DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4896         DCHECK(out_reg.Is(r1));
4897 
4898         codegen_->InvokeRuntime(kQuickIdivmod, rem);
4899         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4900       }
4901       break;
4902     }
4903 
4904     case DataType::Type::kInt64: {
4905       codegen_->InvokeRuntime(kQuickLmod, rem);
4906       CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4907       break;
4908     }
4909 
4910     case DataType::Type::kFloat32: {
4911       codegen_->InvokeRuntime(kQuickFmodf, rem);
4912       CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4913       break;
4914     }
4915 
4916     case DataType::Type::kFloat64: {
4917       codegen_->InvokeRuntime(kQuickFmod, rem);
4918       CheckEntrypointTypes<kQuickFmod, double, double, double>();
4919       break;
4920     }
4921 
4922     default:
4923       LOG(FATAL) << "Unexpected rem type " << type;
4924   }
4925 }
4926 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4927 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4928   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4929   switch (minmax->GetResultType()) {
4930     case DataType::Type::kInt32:
4931       locations->SetInAt(0, Location::RequiresRegister());
4932       locations->SetInAt(1, Location::RequiresRegister());
4933       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4934       break;
4935     case DataType::Type::kInt64:
4936       locations->SetInAt(0, Location::RequiresRegister());
4937       locations->SetInAt(1, Location::RequiresRegister());
4938       locations->SetOut(Location::SameAsFirstInput());
4939       break;
4940     case DataType::Type::kFloat32:
4941       locations->SetInAt(0, Location::RequiresFpuRegister());
4942       locations->SetInAt(1, Location::RequiresFpuRegister());
4943       locations->SetOut(Location::SameAsFirstInput());
4944       locations->AddTemp(Location::RequiresRegister());
4945       break;
4946     case DataType::Type::kFloat64:
4947       locations->SetInAt(0, Location::RequiresFpuRegister());
4948       locations->SetInAt(1, Location::RequiresFpuRegister());
4949       locations->SetOut(Location::SameAsFirstInput());
4950       break;
4951     default:
4952       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4953   }
4954 }
4955 
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4956 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4957   Location op1_loc = locations->InAt(0);
4958   Location op2_loc = locations->InAt(1);
4959   Location out_loc = locations->Out();
4960 
4961   vixl32::Register op1 = RegisterFrom(op1_loc);
4962   vixl32::Register op2 = RegisterFrom(op2_loc);
4963   vixl32::Register out = RegisterFrom(out_loc);
4964 
4965   __ Cmp(op1, op2);
4966 
4967   {
4968     ExactAssemblyScope aas(GetVIXLAssembler(),
4969                            3 * kMaxInstructionSizeInBytes,
4970                            CodeBufferCheckScope::kMaximumSize);
4971 
4972     __ ite(is_min ? lt : gt);
4973     __ mov(is_min ? lt : gt, out, op1);
4974     __ mov(is_min ? ge : le, out, op2);
4975   }
4976 }
4977 
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4978 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4979   Location op1_loc = locations->InAt(0);
4980   Location op2_loc = locations->InAt(1);
4981   Location out_loc = locations->Out();
4982 
4983   // Optimization: don't generate any code if inputs are the same.
4984   if (op1_loc.Equals(op2_loc)) {
4985     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
4986     return;
4987   }
4988 
4989   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
4990   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
4991   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
4992   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
4993   vixl32::Register out_lo = LowRegisterFrom(out_loc);
4994   vixl32::Register out_hi = HighRegisterFrom(out_loc);
4995   UseScratchRegisterScope temps(GetVIXLAssembler());
4996   const vixl32::Register temp = temps.Acquire();
4997 
4998   DCHECK(op1_lo.Is(out_lo));
4999   DCHECK(op1_hi.Is(out_hi));
5000 
5001   // Compare op1 >= op2, or op1 < op2.
5002   __ Cmp(out_lo, op2_lo);
5003   __ Sbcs(temp, out_hi, op2_hi);
5004 
5005   // Now GE/LT condition code is correct for the long comparison.
5006   {
5007     vixl32::ConditionType cond = is_min ? ge : lt;
5008     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5009                                 3 * kMaxInstructionSizeInBytes,
5010                                 CodeBufferCheckScope::kMaximumSize);
5011     __ itt(cond);
5012     __ mov(cond, out_lo, op2_lo);
5013     __ mov(cond, out_hi, op2_hi);
5014   }
5015 }
5016 
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)5017 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
5018   LocationSummary* locations = minmax->GetLocations();
5019   Location op1_loc = locations->InAt(0);
5020   Location op2_loc = locations->InAt(1);
5021   Location out_loc = locations->Out();
5022 
5023   // Optimization: don't generate any code if inputs are the same.
5024   if (op1_loc.Equals(op2_loc)) {
5025     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
5026     return;
5027   }
5028 
5029   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
5030   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
5031   vixl32::SRegister out = SRegisterFrom(out_loc);
5032 
5033   UseScratchRegisterScope temps(GetVIXLAssembler());
5034   const vixl32::Register temp1 = temps.Acquire();
5035   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
5036   vixl32::Label nan, done;
5037   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5038 
5039   DCHECK(op1.Is(out));
5040 
5041   __ Vcmp(op1, op2);
5042   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5043   __ B(vs, &nan, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5044 
5045   // op1 <> op2
5046   vixl32::ConditionType cond = is_min ? gt : lt;
5047   {
5048     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5049                                 2 * kMaxInstructionSizeInBytes,
5050                                 CodeBufferCheckScope::kMaximumSize);
5051     __ it(cond);
5052     __ vmov(cond, F32, out, op2);
5053   }
5054   // for <>(not equal), we've done min/max calculation.
5055   __ B(ne, final_label, /* is_far_target= */ false);
5056 
5057   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
5058   __ Vmov(temp1, op1);
5059   __ Vmov(temp2, op2);
5060   if (is_min) {
5061     __ Orr(temp1, temp1, temp2);
5062   } else {
5063     __ And(temp1, temp1, temp2);
5064   }
5065   __ Vmov(out, temp1);
5066   __ B(final_label);
5067 
5068   // handle NaN input.
5069   __ Bind(&nan);
5070   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
5071   __ Vmov(out, temp1);
5072 
5073   if (done.IsReferenced()) {
5074     __ Bind(&done);
5075   }
5076 }
5077 
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5078 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5079   LocationSummary* locations = minmax->GetLocations();
5080   Location op1_loc = locations->InAt(0);
5081   Location op2_loc = locations->InAt(1);
5082   Location out_loc = locations->Out();
5083 
5084   // Optimization: don't generate any code if inputs are the same.
5085   if (op1_loc.Equals(op2_loc)) {
5086     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
5087     return;
5088   }
5089 
5090   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5091   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5092   vixl32::DRegister out = DRegisterFrom(out_loc);
5093   vixl32::Label handle_nan_eq, done;
5094   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5095 
5096   DCHECK(op1.Is(out));
5097 
5098   __ Vcmp(op1, op2);
5099   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5100   __ B(vs, &handle_nan_eq, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5101 
5102   // op1 <> op2
5103   vixl32::ConditionType cond = is_min ? gt : lt;
5104   {
5105     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5106                                 2 * kMaxInstructionSizeInBytes,
5107                                 CodeBufferCheckScope::kMaximumSize);
5108     __ it(cond);
5109     __ vmov(cond, F64, out, op2);
5110   }
5111   // for <>(not equal), we've done min/max calculation.
5112   __ B(ne, final_label, /* is_far_target= */ false);
5113 
5114   // handle op1 == op2, max(+0.0,-0.0).
5115   if (!is_min) {
5116     __ Vand(F64, out, op1, op2);
5117     __ B(final_label);
5118   }
5119 
5120   // handle op1 == op2, min(+0.0,-0.0), NaN input.
5121   __ Bind(&handle_nan_eq);
5122   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
5123 
5124   if (done.IsReferenced()) {
5125     __ Bind(&done);
5126   }
5127 }
5128 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5129 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5130   DataType::Type type = minmax->GetResultType();
5131   switch (type) {
5132     case DataType::Type::kInt32:
5133       GenerateMinMaxInt(minmax->GetLocations(), is_min);
5134       break;
5135     case DataType::Type::kInt64:
5136       GenerateMinMaxLong(minmax->GetLocations(), is_min);
5137       break;
5138     case DataType::Type::kFloat32:
5139       GenerateMinMaxFloat(minmax, is_min);
5140       break;
5141     case DataType::Type::kFloat64:
5142       GenerateMinMaxDouble(minmax, is_min);
5143       break;
5144     default:
5145       LOG(FATAL) << "Unexpected type for HMinMax " << type;
5146   }
5147 }
5148 
VisitMin(HMin * min)5149 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5150   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5151 }
5152 
VisitMin(HMin * min)5153 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5154   GenerateMinMax(min, /*is_min*/ true);
5155 }
5156 
VisitMax(HMax * max)5157 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5158   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5159 }
5160 
VisitMax(HMax * max)5161 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5162   GenerateMinMax(max, /*is_min*/ false);
5163 }
5164 
VisitAbs(HAbs * abs)5165 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5166   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5167   switch (abs->GetResultType()) {
5168     case DataType::Type::kInt32:
5169     case DataType::Type::kInt64:
5170       locations->SetInAt(0, Location::RequiresRegister());
5171       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5172       locations->AddTemp(Location::RequiresRegister());
5173       break;
5174     case DataType::Type::kFloat32:
5175     case DataType::Type::kFloat64:
5176       locations->SetInAt(0, Location::RequiresFpuRegister());
5177       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5178       break;
5179     default:
5180       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5181   }
5182 }
5183 
VisitAbs(HAbs * abs)5184 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5185   LocationSummary* locations = abs->GetLocations();
5186   switch (abs->GetResultType()) {
5187     case DataType::Type::kInt32: {
5188       vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5189       vixl32::Register out_reg = RegisterFrom(locations->Out());
5190       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5191       __ Asr(mask, in_reg, 31);
5192       __ Add(out_reg, in_reg, mask);
5193       __ Eor(out_reg, out_reg, mask);
5194       break;
5195     }
5196     case DataType::Type::kInt64: {
5197       Location in = locations->InAt(0);
5198       vixl32::Register in_reg_lo = LowRegisterFrom(in);
5199       vixl32::Register in_reg_hi = HighRegisterFrom(in);
5200       Location output = locations->Out();
5201       vixl32::Register out_reg_lo = LowRegisterFrom(output);
5202       vixl32::Register out_reg_hi = HighRegisterFrom(output);
5203       DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5204       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5205       __ Asr(mask, in_reg_hi, 31);
5206       __ Adds(out_reg_lo, in_reg_lo, mask);
5207       __ Adc(out_reg_hi, in_reg_hi, mask);
5208       __ Eor(out_reg_lo, out_reg_lo, mask);
5209       __ Eor(out_reg_hi, out_reg_hi, mask);
5210       break;
5211     }
5212     case DataType::Type::kFloat32:
5213     case DataType::Type::kFloat64:
5214       __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5215       break;
5216     default:
5217       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5218   }
5219 }
5220 
VisitDivZeroCheck(HDivZeroCheck * instruction)5221 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5222   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5223   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5224 }
5225 
VisitDivZeroCheck(HDivZeroCheck * instruction)5226 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5227   DivZeroCheckSlowPathARMVIXL* slow_path =
5228       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5229   codegen_->AddSlowPath(slow_path);
5230 
5231   LocationSummary* locations = instruction->GetLocations();
5232   Location value = locations->InAt(0);
5233 
5234   switch (instruction->GetType()) {
5235     case DataType::Type::kBool:
5236     case DataType::Type::kUint8:
5237     case DataType::Type::kInt8:
5238     case DataType::Type::kUint16:
5239     case DataType::Type::kInt16:
5240     case DataType::Type::kInt32: {
5241       if (value.IsRegister()) {
5242         __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5243       } else {
5244         DCHECK(value.IsConstant()) << value;
5245         if (Int32ConstantFrom(value) == 0) {
5246           __ B(slow_path->GetEntryLabel());
5247         }
5248       }
5249       break;
5250     }
5251     case DataType::Type::kInt64: {
5252       if (value.IsRegisterPair()) {
5253         UseScratchRegisterScope temps(GetVIXLAssembler());
5254         vixl32::Register temp = temps.Acquire();
5255         __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5256         __ B(eq, slow_path->GetEntryLabel());
5257       } else {
5258         DCHECK(value.IsConstant()) << value;
5259         if (Int64ConstantFrom(value) == 0) {
5260           __ B(slow_path->GetEntryLabel());
5261         }
5262       }
5263       break;
5264     }
5265     default:
5266       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5267   }
5268 }
5269 
HandleIntegerRotate(HBinaryOperation * rotate)5270 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HBinaryOperation* rotate) {
5271   LocationSummary* locations = rotate->GetLocations();
5272   vixl32::Register in = InputRegisterAt(rotate, 0);
5273   Location rhs = locations->InAt(1);
5274   vixl32::Register out = OutputRegister(rotate);
5275 
5276   if (rhs.IsConstant()) {
5277     // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5278     // so map all rotations to a +ve. equivalent in that range.
5279     // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5280     uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
5281     if (rotate->IsRol()) {
5282       rot = -rot;
5283     }
5284     rot &= 0x1f;
5285 
5286     if (rot) {
5287       // Rotate, mapping left rotations to right equivalents if necessary.
5288       // (e.g. left by 2 bits == right by 30.)
5289       __ Ror(out, in, rot);
5290     } else if (!out.Is(in)) {
5291       __ Mov(out, in);
5292     }
5293   } else {
5294     if (rotate->IsRol()) {
5295       UseScratchRegisterScope temps(GetVIXLAssembler());
5296 
5297       vixl32::Register negated = temps.Acquire();
5298       __ Rsb(negated, RegisterFrom(rhs), 0);
5299       __ Ror(out, in, negated);
5300     } else {
5301       DCHECK(rotate->IsRor());
5302       __ Ror(out, in, RegisterFrom(rhs));
5303     }
5304   }
5305 }
5306 
5307 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5308 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5309 // a larger rotation) or flipping direction (thus treating larger right/left
5310 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HBinaryOperation * rotate)5311 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HBinaryOperation* rotate) {
5312   LocationSummary* locations = rotate->GetLocations();
5313   vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5314   vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5315   Location rhs = locations->InAt(1);
5316   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5317   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5318 
5319   if (rhs.IsConstant()) {
5320     uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5321 
5322     if (rotate->IsRol()) {
5323       rot = -rot;
5324     }
5325 
5326     // Map all rotations to +ve. equivalents on the interval [0,63].
5327     rot &= kMaxLongShiftDistance;
5328     // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5329     // logic below to a simple pair of binary orr.
5330     // (e.g. 34 bits == in_reg swap + 2 bits right.)
5331     if (rot >= kArmBitsPerWord) {
5332       rot -= kArmBitsPerWord;
5333       std::swap(in_reg_hi, in_reg_lo);
5334     }
5335     // Rotate, or mov to out for zero or word size rotations.
5336     if (rot != 0u) {
5337       __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5338       __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5339       __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5340       __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5341     } else {
5342       __ Mov(out_reg_lo, in_reg_lo);
5343       __ Mov(out_reg_hi, in_reg_hi);
5344     }
5345   } else {
5346     vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5347     vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5348     vixl32::Label end;
5349     vixl32::Label shift_by_32_plus_shift_right;
5350     vixl32::Label* final_label = codegen_->GetFinalLabel(rotate, &end);
5351 
5352     // Negate rhs, taken from VisitNeg
5353     if (rotate->IsRol()) {
5354       Location negated = locations->GetTemp(2);
5355       Location in = rhs;
5356 
5357       __ Rsb(RegisterFrom(negated), RegisterFrom(in), 0);
5358 
5359       rhs = negated;
5360     }
5361 
5362     __ And(shift_right, RegisterFrom(rhs), 0x1F);
5363     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5364     __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5365     __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5366 
5367     // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5368     // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5369     __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5370     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5371     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5372     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5373     __ Lsr(shift_left, in_reg_hi, shift_right);
5374     __ Add(out_reg_lo, out_reg_lo, shift_left);
5375     __ B(final_label);
5376 
5377     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
5378     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5379     // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5380     __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5381     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5382     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5383     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5384     __ Lsl(shift_right, in_reg_hi, shift_left);
5385     __ Add(out_reg_lo, out_reg_lo, shift_right);
5386 
5387     if (end.IsReferenced()) {
5388       __ Bind(&end);
5389     }
5390   }
5391 }
5392 
HandleRotate(HBinaryOperation * rotate)5393 void LocationsBuilderARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5394   LocationSummary* locations =
5395       new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5396   HInstruction* shift = rotate->InputAt(1);
5397   switch (rotate->GetResultType()) {
5398     case DataType::Type::kInt32: {
5399       locations->SetInAt(0, Location::RequiresRegister());
5400       locations->SetInAt(1, Location::RegisterOrConstant(shift));
5401       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5402       break;
5403     }
5404     case DataType::Type::kInt64: {
5405       locations->SetInAt(0, Location::RequiresRegister());
5406       if (shift->IsConstant()) {
5407         locations->SetInAt(1, Location::ConstantLocation(shift));
5408       } else {
5409         locations->SetInAt(1, Location::RequiresRegister());
5410 
5411         if (rotate->IsRor()) {
5412           locations->AddRegisterTemps(2);
5413         } else {
5414           DCHECK(rotate->IsRol());
5415           locations->AddRegisterTemps(3);
5416         }
5417       }
5418       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5419       break;
5420     }
5421     default:
5422       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5423   }
5424 }
5425 
VisitRol(HRol * rol)5426 void LocationsBuilderARMVIXL::VisitRol(HRol* rol) {
5427   HandleRotate(rol);
5428 }
5429 
VisitRor(HRor * ror)5430 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5431   HandleRotate(ror);
5432 }
5433 
HandleRotate(HBinaryOperation * rotate)5434 void InstructionCodeGeneratorARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5435   DataType::Type type = rotate->GetResultType();
5436   switch (type) {
5437     case DataType::Type::kInt32: {
5438       HandleIntegerRotate(rotate);
5439       break;
5440     }
5441     case DataType::Type::kInt64: {
5442       HandleLongRotate(rotate);
5443       break;
5444     }
5445     default:
5446       LOG(FATAL) << "Unexpected operation type " << type;
5447       UNREACHABLE();
5448   }
5449 }
5450 
VisitRol(HRol * rol)5451 void InstructionCodeGeneratorARMVIXL::VisitRol(HRol* rol) {
5452   HandleRotate(rol);
5453 }
5454 
VisitRor(HRor * ror)5455 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5456   HandleRotate(ror);
5457 }
5458 
HandleShift(HBinaryOperation * op)5459 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5460   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5461 
5462   LocationSummary* locations =
5463       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5464 
5465   HInstruction* shift = op->InputAt(1);
5466   switch (op->GetResultType()) {
5467     case DataType::Type::kInt32: {
5468       locations->SetInAt(0, Location::RequiresRegister());
5469       if (shift->IsConstant()) {
5470         locations->SetInAt(1, Location::ConstantLocation(shift));
5471         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5472       } else {
5473         locations->SetInAt(1, Location::RequiresRegister());
5474         // Make the output overlap, as it will be used to hold the masked
5475         // second input.
5476         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5477       }
5478       break;
5479     }
5480     case DataType::Type::kInt64: {
5481       locations->SetInAt(0, Location::RequiresRegister());
5482       if (shift->IsConstant()) {
5483         locations->SetInAt(1, Location::ConstantLocation(shift));
5484         // For simplicity, use kOutputOverlap even though we only require that low registers
5485         // don't clash with high registers which the register allocator currently guarantees.
5486         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5487       } else {
5488         locations->SetInAt(1, Location::RequiresRegister());
5489         locations->AddTemp(Location::RequiresRegister());
5490         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5491       }
5492       break;
5493     }
5494     default:
5495       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5496   }
5497 }
5498 
HandleShift(HBinaryOperation * op)5499 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5500   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5501 
5502   LocationSummary* locations = op->GetLocations();
5503   Location out = locations->Out();
5504   Location first = locations->InAt(0);
5505   Location second = locations->InAt(1);
5506 
5507   DataType::Type type = op->GetResultType();
5508   switch (type) {
5509     case DataType::Type::kInt32: {
5510       vixl32::Register out_reg = OutputRegister(op);
5511       vixl32::Register first_reg = InputRegisterAt(op, 0);
5512       if (second.IsRegister()) {
5513         vixl32::Register second_reg = RegisterFrom(second);
5514         // ARM doesn't mask the shift count so we need to do it ourselves.
5515         __ And(out_reg, second_reg, kMaxIntShiftDistance);
5516         if (op->IsShl()) {
5517           __ Lsl(out_reg, first_reg, out_reg);
5518         } else if (op->IsShr()) {
5519           __ Asr(out_reg, first_reg, out_reg);
5520         } else {
5521           __ Lsr(out_reg, first_reg, out_reg);
5522         }
5523       } else {
5524         int32_t cst = Int32ConstantFrom(second);
5525         uint32_t shift_value = cst & kMaxIntShiftDistance;
5526         if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
5527           __ Mov(out_reg, first_reg);
5528         } else if (op->IsShl()) {
5529           __ Lsl(out_reg, first_reg, shift_value);
5530         } else if (op->IsShr()) {
5531           __ Asr(out_reg, first_reg, shift_value);
5532         } else {
5533           __ Lsr(out_reg, first_reg, shift_value);
5534         }
5535       }
5536       break;
5537     }
5538     case DataType::Type::kInt64: {
5539       vixl32::Register o_h = HighRegisterFrom(out);
5540       vixl32::Register o_l = LowRegisterFrom(out);
5541 
5542       vixl32::Register high = HighRegisterFrom(first);
5543       vixl32::Register low = LowRegisterFrom(first);
5544 
5545       if (second.IsRegister()) {
5546         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5547 
5548         vixl32::Register second_reg = RegisterFrom(second);
5549 
5550         if (op->IsShl()) {
5551           __ And(o_l, second_reg, kMaxLongShiftDistance);
5552           // Shift the high part
5553           __ Lsl(o_h, high, o_l);
5554           // Shift the low part and `or` what overflew on the high part
5555           __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5556           __ Lsr(temp, low, temp);
5557           __ Orr(o_h, o_h, temp);
5558           // If the shift is > 32 bits, override the high part
5559           __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5560           {
5561             ExactAssemblyScope guard(GetVIXLAssembler(),
5562                                      2 * vixl32::kMaxInstructionSizeInBytes,
5563                                      CodeBufferCheckScope::kMaximumSize);
5564             __ it(pl);
5565             __ lsl(pl, o_h, low, temp);
5566           }
5567           // Shift the low part
5568           __ Lsl(o_l, low, o_l);
5569         } else if (op->IsShr()) {
5570           __ And(o_h, second_reg, kMaxLongShiftDistance);
5571           // Shift the low part
5572           __ Lsr(o_l, low, o_h);
5573           // Shift the high part and `or` what underflew on the low part
5574           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5575           __ Lsl(temp, high, temp);
5576           __ Orr(o_l, o_l, temp);
5577           // If the shift is > 32 bits, override the low part
5578           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5579           {
5580             ExactAssemblyScope guard(GetVIXLAssembler(),
5581                                      2 * vixl32::kMaxInstructionSizeInBytes,
5582                                      CodeBufferCheckScope::kMaximumSize);
5583             __ it(pl);
5584             __ asr(pl, o_l, high, temp);
5585           }
5586           // Shift the high part
5587           __ Asr(o_h, high, o_h);
5588         } else {
5589           __ And(o_h, second_reg, kMaxLongShiftDistance);
5590           // same as Shr except we use `Lsr`s and not `Asr`s
5591           __ Lsr(o_l, low, o_h);
5592           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5593           __ Lsl(temp, high, temp);
5594           __ Orr(o_l, o_l, temp);
5595           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5596           {
5597             ExactAssemblyScope guard(GetVIXLAssembler(),
5598                                      2 * vixl32::kMaxInstructionSizeInBytes,
5599                                      CodeBufferCheckScope::kMaximumSize);
5600           __ it(pl);
5601           __ lsr(pl, o_l, high, temp);
5602           }
5603           __ Lsr(o_h, high, o_h);
5604         }
5605       } else {
5606         // Register allocator doesn't create partial overlap.
5607         DCHECK(!o_l.Is(high));
5608         DCHECK(!o_h.Is(low));
5609         int32_t cst = Int32ConstantFrom(second);
5610         uint32_t shift_value = cst & kMaxLongShiftDistance;
5611         if (shift_value > 32) {
5612           if (op->IsShl()) {
5613             __ Lsl(o_h, low, shift_value - 32);
5614             __ Mov(o_l, 0);
5615           } else if (op->IsShr()) {
5616             __ Asr(o_l, high, shift_value - 32);
5617             __ Asr(o_h, high, 31);
5618           } else {
5619             __ Lsr(o_l, high, shift_value - 32);
5620             __ Mov(o_h, 0);
5621           }
5622         } else if (shift_value == 32) {
5623           if (op->IsShl()) {
5624             __ Mov(o_h, low);
5625             __ Mov(o_l, 0);
5626           } else if (op->IsShr()) {
5627             __ Mov(o_l, high);
5628             __ Asr(o_h, high, 31);
5629           } else {
5630             __ Mov(o_l, high);
5631             __ Mov(o_h, 0);
5632           }
5633         } else if (shift_value == 1) {
5634           if (op->IsShl()) {
5635             __ Lsls(o_l, low, 1);
5636             __ Adc(o_h, high, high);
5637           } else if (op->IsShr()) {
5638             __ Asrs(o_h, high, 1);
5639             __ Rrx(o_l, low);
5640           } else {
5641             __ Lsrs(o_h, high, 1);
5642             __ Rrx(o_l, low);
5643           }
5644         } else if (shift_value == 0) {
5645           __ Mov(o_l, low);
5646           __ Mov(o_h, high);
5647         } else {
5648           DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5649           if (op->IsShl()) {
5650             __ Lsl(o_h, high, shift_value);
5651             __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5652             __ Lsl(o_l, low, shift_value);
5653           } else if (op->IsShr()) {
5654             __ Lsr(o_l, low, shift_value);
5655             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5656             __ Asr(o_h, high, shift_value);
5657           } else {
5658             __ Lsr(o_l, low, shift_value);
5659             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5660             __ Lsr(o_h, high, shift_value);
5661           }
5662         }
5663       }
5664       break;
5665     }
5666     default:
5667       LOG(FATAL) << "Unexpected operation type " << type;
5668       UNREACHABLE();
5669   }
5670 }
5671 
VisitShl(HShl * shl)5672 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5673   HandleShift(shl);
5674 }
5675 
VisitShl(HShl * shl)5676 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5677   HandleShift(shl);
5678 }
5679 
VisitShr(HShr * shr)5680 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5681   HandleShift(shr);
5682 }
5683 
VisitShr(HShr * shr)5684 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5685   HandleShift(shr);
5686 }
5687 
VisitUShr(HUShr * ushr)5688 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5689   HandleShift(ushr);
5690 }
5691 
VisitUShr(HUShr * ushr)5692 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5693   HandleShift(ushr);
5694 }
5695 
VisitNewInstance(HNewInstance * instruction)5696 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5697   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5698       instruction, LocationSummary::kCallOnMainOnly);
5699   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5700   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5701   locations->SetOut(LocationFrom(r0));
5702 }
5703 
VisitNewInstance(HNewInstance * instruction)5704 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5705   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction);
5706   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5707   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5708 }
5709 
VisitNewArray(HNewArray * instruction)5710 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5711   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5712       instruction, LocationSummary::kCallOnMainOnly);
5713   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5714   locations->SetOut(LocationFrom(r0));
5715   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5716   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5717 }
5718 
VisitNewArray(HNewArray * instruction)5719 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5720   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5721   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5722   codegen_->InvokeRuntime(entrypoint, instruction);
5723   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5724   DCHECK(!codegen_->IsLeafMethod());
5725   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5726 }
5727 
VisitParameterValue(HParameterValue * instruction)5728 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5729   LocationSummary* locations =
5730       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5731   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5732   if (location.IsStackSlot()) {
5733     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5734   } else if (location.IsDoubleStackSlot()) {
5735     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5736   }
5737   locations->SetOut(location);
5738 }
5739 
VisitParameterValue(HParameterValue * instruction)5740 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5741     [[maybe_unused]] HParameterValue* instruction) {
5742   // Nothing to do, the parameter is already at its location.
5743 }
5744 
VisitCurrentMethod(HCurrentMethod * instruction)5745 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5746   LocationSummary* locations =
5747       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5748   locations->SetOut(LocationFrom(kMethodRegister));
5749 }
5750 
VisitCurrentMethod(HCurrentMethod * instruction)5751 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5752     [[maybe_unused]] HCurrentMethod* instruction) {
5753   // Nothing to do, the method is already at its location.
5754 }
5755 
VisitNot(HNot * not_)5756 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5757   LocationSummary* locations =
5758       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5759   locations->SetInAt(0, Location::RequiresRegister());
5760   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5761 }
5762 
VisitNot(HNot * not_)5763 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5764   LocationSummary* locations = not_->GetLocations();
5765   Location out = locations->Out();
5766   Location in = locations->InAt(0);
5767   switch (not_->GetResultType()) {
5768     case DataType::Type::kInt32:
5769       __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5770       break;
5771 
5772     case DataType::Type::kInt64:
5773       __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5774       __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5775       break;
5776 
5777     default:
5778       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5779   }
5780 }
5781 
VisitBooleanNot(HBooleanNot * bool_not)5782 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5783   LocationSummary* locations =
5784       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5785   locations->SetInAt(0, Location::RequiresRegister());
5786   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5787 }
5788 
VisitBooleanNot(HBooleanNot * bool_not)5789 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5790   __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5791 }
5792 
VisitCompare(HCompare * compare)5793 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5794   LocationSummary* locations =
5795       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5796   switch (compare->GetComparisonType()) {
5797     case DataType::Type::kBool:
5798     case DataType::Type::kUint8:
5799     case DataType::Type::kInt8:
5800     case DataType::Type::kUint16:
5801     case DataType::Type::kInt16:
5802     case DataType::Type::kInt32:
5803     case DataType::Type::kUint32:
5804     case DataType::Type::kInt64:
5805     case DataType::Type::kUint64: {
5806       locations->SetInAt(0, Location::RequiresRegister());
5807       locations->SetInAt(1, Location::RequiresRegister());
5808       // Output overlaps because it is written before doing the low comparison.
5809       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5810       break;
5811     }
5812     case DataType::Type::kFloat32:
5813     case DataType::Type::kFloat64: {
5814       locations->SetInAt(0, Location::RequiresFpuRegister());
5815       locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5816       locations->SetOut(Location::RequiresRegister());
5817       break;
5818     }
5819     default:
5820       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5821   }
5822 }
5823 
VisitCompare(HCompare * compare)5824 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5825   LocationSummary* locations = compare->GetLocations();
5826   vixl32::Register out = OutputRegister(compare);
5827   Location left = locations->InAt(0);
5828   Location right = locations->InAt(1);
5829 
5830   vixl32::Label less, greater, done;
5831   vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5832   DataType::Type type = compare->GetComparisonType();
5833   vixl32::Condition less_cond = vixl32::ConditionType::lt;
5834   vixl32::Condition greater_cond = vixl32::ConditionType::gt;
5835   switch (type) {
5836     case DataType::Type::kUint32:
5837       less_cond = vixl32::ConditionType::lo;
5838       // greater_cond - is not needed below
5839       FALLTHROUGH_INTENDED;
5840     case DataType::Type::kBool:
5841     case DataType::Type::kUint8:
5842     case DataType::Type::kInt8:
5843     case DataType::Type::kUint16:
5844     case DataType::Type::kInt16:
5845     case DataType::Type::kInt32: {
5846       // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5847       __ Mov(out, 0);
5848       __ Cmp(RegisterFrom(left), RegisterFrom(right));
5849       break;
5850     }
5851     case DataType::Type::kUint64:
5852       less_cond = vixl32::ConditionType::lo;
5853       greater_cond = vixl32::ConditionType::hi;
5854       FALLTHROUGH_INTENDED;
5855     case DataType::Type::kInt64: {
5856       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));  // High part compare.
5857       __ B(less_cond, &less, /* is_far_target= */ false);
5858       __ B(greater_cond, &greater, /* is_far_target= */ false);
5859       // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5860       __ Mov(out, 0);
5861       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));  // Unsigned compare.
5862       less_cond = vixl32::ConditionType::lo;
5863       // greater_cond - is not needed below
5864       break;
5865     }
5866     case DataType::Type::kFloat32:
5867     case DataType::Type::kFloat64: {
5868       __ Mov(out, 0);
5869       GenerateVcmp(compare, codegen_);
5870       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5871       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5872       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5873       break;
5874     }
5875     default:
5876       LOG(FATAL) << "Unexpected compare type " << type;
5877       UNREACHABLE();
5878   }
5879 
5880   __ B(eq, final_label, /* is_far_target= */ false);
5881   __ B(less_cond, &less, /* is_far_target= */ false);
5882 
5883   __ Bind(&greater);
5884   __ Mov(out, 1);
5885   __ B(final_label);
5886 
5887   __ Bind(&less);
5888   __ Mov(out, -1);
5889 
5890   if (done.IsReferenced()) {
5891     __ Bind(&done);
5892   }
5893 }
5894 
VisitPhi(HPhi * instruction)5895 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5896   LocationSummary* locations =
5897       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5898   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5899     locations->SetInAt(i, Location::Any());
5900   }
5901   locations->SetOut(Location::Any());
5902 }
5903 
VisitPhi(HPhi * instruction)5904 void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
5905   LOG(FATAL) << "Unreachable";
5906 }
5907 
GenerateMemoryBarrier(MemBarrierKind kind)5908 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5909   // TODO (ported from quick): revisit ARM barrier kinds.
5910   DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
5911   switch (kind) {
5912     case MemBarrierKind::kAnyStore:
5913     case MemBarrierKind::kLoadAny:
5914     case MemBarrierKind::kAnyAny: {
5915       flavor = DmbOptions::ISH;
5916       break;
5917     }
5918     case MemBarrierKind::kStoreStore: {
5919       flavor = DmbOptions::ISHST;
5920       break;
5921     }
5922     default:
5923       LOG(FATAL) << "Unexpected memory barrier " << kind;
5924   }
5925   __ Dmb(flavor);
5926 }
5927 
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5928 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5929                                                              uint32_t offset,
5930                                                              vixl32::Register out_lo,
5931                                                              vixl32::Register out_hi) {
5932   UseScratchRegisterScope temps(GetVIXLAssembler());
5933   if (offset != 0) {
5934     vixl32::Register temp = temps.Acquire();
5935     __ Add(temp, addr, offset);
5936     addr = temp;
5937   }
5938   __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5939 }
5940 
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5941 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5942                                                               uint32_t offset,
5943                                                               vixl32::Register value_lo,
5944                                                               vixl32::Register value_hi,
5945                                                               vixl32::Register temp1,
5946                                                               vixl32::Register temp2,
5947                                                               HInstruction* instruction) {
5948   UseScratchRegisterScope temps(GetVIXLAssembler());
5949   vixl32::Label fail;
5950   if (offset != 0) {
5951     vixl32::Register temp = temps.Acquire();
5952     __ Add(temp, addr, offset);
5953     addr = temp;
5954   }
5955   __ Bind(&fail);
5956   {
5957     // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5958     ExactAssemblyScope aas(GetVIXLAssembler(),
5959                            vixl32::kMaxInstructionSizeInBytes,
5960                            CodeBufferCheckScope::kMaximumSize);
5961     // We need a load followed by store. (The address used in a STREX instruction must
5962     // be the same as the address in the most recently executed LDREX instruction.)
5963     __ ldrexd(temp1, temp2, MemOperand(addr));
5964     codegen_->MaybeRecordImplicitNullCheck(instruction);
5965   }
5966   __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5967   __ CompareAndBranchIfNonZero(temp1, &fail);
5968 }
5969 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5970 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5971                                              const FieldInfo& field_info,
5972                                              WriteBarrierKind write_barrier_kind) {
5973   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5974 
5975   LocationSummary* locations =
5976       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5977   locations->SetInAt(0, Location::RequiresRegister());
5978 
5979   DataType::Type field_type = field_info.GetFieldType();
5980   if (DataType::IsFloatingPointType(field_type)) {
5981     locations->SetInAt(1, Location::RequiresFpuRegister());
5982   } else {
5983     locations->SetInAt(1, Location::RequiresRegister());
5984   }
5985 
5986   bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5987   bool generate_volatile = field_info.IsVolatile()
5988       && is_wide
5989       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5990   bool needs_write_barrier =
5991       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5992   bool check_gc_card =
5993       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5994 
5995   // Temporary registers for the write barrier.
5996   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5997   if (needs_write_barrier || check_gc_card) {
5998     locations->AddRegisterTemps(2);
5999   } else if (generate_volatile) {
6000     // ARM encoding have some additional constraints for ldrexd/strexd:
6001     // - registers need to be consecutive
6002     // - the first register should be even but not R14.
6003     // We don't test for ARM yet, and the assertion makes sure that we
6004     // revisit this if we ever enable ARM encoding.
6005     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6006     locations->AddRegisterTemps(2);
6007     if (field_type == DataType::Type::kFloat64) {
6008       // For doubles we need two more registers to copy the value.
6009       locations->AddTemp(LocationFrom(r2));
6010       locations->AddTemp(LocationFrom(r3));
6011     }
6012   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6013     locations->AddTemp(Location::RequiresRegister());
6014   }
6015 }
6016 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6017 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
6018                                                      const FieldInfo& field_info,
6019                                                      bool value_can_be_null,
6020                                                      WriteBarrierKind write_barrier_kind) {
6021   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6022 
6023   LocationSummary* locations = instruction->GetLocations();
6024   vixl32::Register base = InputRegisterAt(instruction, 0);
6025   Location value = locations->InAt(1);
6026 
6027   bool is_volatile = field_info.IsVolatile();
6028   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6029   DataType::Type field_type = field_info.GetFieldType();
6030   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6031   bool needs_write_barrier =
6032       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6033 
6034   if (is_volatile) {
6035     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6036   }
6037 
6038   switch (field_type) {
6039     case DataType::Type::kBool:
6040     case DataType::Type::kUint8:
6041     case DataType::Type::kInt8:
6042     case DataType::Type::kUint16:
6043     case DataType::Type::kInt16:
6044     case DataType::Type::kInt32: {
6045       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6046       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6047       StoreOperandType operand_type = GetStoreOperandType(field_type);
6048       GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
6049       codegen_->MaybeRecordImplicitNullCheck(instruction);
6050       break;
6051     }
6052 
6053     case DataType::Type::kReference: {
6054       vixl32::Register value_reg = RegisterFrom(value);
6055       if (kPoisonHeapReferences) {
6056         DCHECK_EQ(field_type, DataType::Type::kReference);
6057         value_reg = RegisterFrom(locations->GetTemp(0));
6058         __ Mov(value_reg, RegisterFrom(value));
6059         GetAssembler()->PoisonHeapReference(value_reg);
6060       }
6061       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6062       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6063       GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
6064       codegen_->MaybeRecordImplicitNullCheck(instruction);
6065       break;
6066     }
6067 
6068     case DataType::Type::kInt64: {
6069       if (is_volatile && !atomic_ldrd_strd) {
6070         GenerateWideAtomicStore(base,
6071                                 offset,
6072                                 LowRegisterFrom(value),
6073                                 HighRegisterFrom(value),
6074                                 RegisterFrom(locations->GetTemp(0)),
6075                                 RegisterFrom(locations->GetTemp(1)),
6076                                 instruction);
6077       } else {
6078         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6079         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6080         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
6081         codegen_->MaybeRecordImplicitNullCheck(instruction);
6082       }
6083       break;
6084     }
6085 
6086     case DataType::Type::kFloat32: {
6087       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6088       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6089       GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
6090       codegen_->MaybeRecordImplicitNullCheck(instruction);
6091       break;
6092     }
6093 
6094     case DataType::Type::kFloat64: {
6095       vixl32::DRegister value_reg = DRegisterFrom(value);
6096       if (is_volatile && !atomic_ldrd_strd) {
6097         vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
6098         vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
6099 
6100         __ Vmov(value_reg_lo, value_reg_hi, value_reg);
6101 
6102         GenerateWideAtomicStore(base,
6103                                 offset,
6104                                 value_reg_lo,
6105                                 value_reg_hi,
6106                                 RegisterFrom(locations->GetTemp(2)),
6107                                 RegisterFrom(locations->GetTemp(3)),
6108                                 instruction);
6109       } else {
6110         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6111         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6112         GetAssembler()->StoreDToOffset(value_reg, base, offset);
6113         codegen_->MaybeRecordImplicitNullCheck(instruction);
6114       }
6115       break;
6116     }
6117 
6118     case DataType::Type::kUint32:
6119     case DataType::Type::kUint64:
6120     case DataType::Type::kVoid:
6121       LOG(FATAL) << "Unreachable type " << field_type;
6122       UNREACHABLE();
6123   }
6124 
6125   if (needs_write_barrier) {
6126     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6127     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6128     codegen_->MaybeMarkGCCard(
6129         temp,
6130         card,
6131         base,
6132         RegisterFrom(value),
6133         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6134   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6135     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6136     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6137     codegen_->CheckGCCardIsValid(temp, card, base);
6138   }
6139 
6140   if (is_volatile) {
6141     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6142   }
6143 }
6144 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6145 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6146                                              const FieldInfo& field_info) {
6147   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6148 
6149   bool object_field_get_with_read_barrier =
6150       (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6151   LocationSummary* locations =
6152       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6153                                                        object_field_get_with_read_barrier
6154                                                            ? LocationSummary::kCallOnSlowPath
6155                                                            : LocationSummary::kNoCall);
6156   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6157     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6158   }
6159   // Input for object receiver.
6160   locations->SetInAt(0, Location::RequiresRegister());
6161 
6162   bool volatile_for_double = field_info.IsVolatile()
6163       && (field_info.GetFieldType() == DataType::Type::kFloat64)
6164       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6165   // The output overlaps in case of volatile long: we don't want the code generated by
6166   // `GenerateWideAtomicLoad()` to overwrite the object's location.  Likewise, in the case
6167   // of an object field get with non-Baker read barriers enabled, we do not want the load
6168   // to overwrite the object's location, as we need it to emit the read barrier.
6169   // Baker read barrier implementation with introspection does not have this restriction.
6170   bool overlap =
6171       (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6172       (object_field_get_with_read_barrier && !kUseBakerReadBarrier);
6173 
6174   if (DataType::IsFloatingPointType(instruction->GetType())) {
6175     locations->SetOut(Location::RequiresFpuRegister());
6176   } else {
6177     locations->SetOut(Location::RequiresRegister(),
6178                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6179   }
6180   if (volatile_for_double) {
6181     // ARM encoding have some additional constraints for ldrexd/strexd:
6182     // - registers need to be consecutive
6183     // - the first register should be even but not R14.
6184     // We don't test for ARM yet, and the assertion makes sure that we
6185     // revisit this if we ever enable ARM encoding.
6186     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6187     locations->AddRegisterTemps(2);
6188   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6189     // We need a temporary register for the read barrier load in
6190     // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6191     // only if the offset is too big.
6192     if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6193       locations->AddTemp(Location::RequiresRegister());
6194     }
6195   }
6196 }
6197 
ArithmeticZeroOrFpuRegister(HInstruction * input)6198 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6199   DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6200   if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6201       (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6202     return Location::ConstantLocation(input);
6203   } else {
6204     return Location::RequiresFpuRegister();
6205   }
6206 }
6207 
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6208 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6209                                                                  Opcode opcode) {
6210   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6211   if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6212     return Location::ConstantLocation(constant);
6213   }
6214   return Location::RequiresRegister();
6215 }
6216 
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6217 static bool CanEncode32BitConstantAsImmediate(
6218     CodeGeneratorARMVIXL* codegen,
6219     uint32_t value,
6220     Opcode opcode,
6221     vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6222   ArmVIXLAssembler* assembler = codegen->GetAssembler();
6223   if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6224     return true;
6225   }
6226   Opcode neg_opcode = kNoOperand;
6227   uint32_t neg_value = 0;
6228   switch (opcode) {
6229     case AND: neg_opcode = BIC; neg_value = ~value; break;
6230     case ORR: neg_opcode = ORN; neg_value = ~value; break;
6231     case ADD: neg_opcode = SUB; neg_value = -value; break;
6232     case ADC: neg_opcode = SBC; neg_value = ~value; break;
6233     case SUB: neg_opcode = ADD; neg_value = -value; break;
6234     case SBC: neg_opcode = ADC; neg_value = ~value; break;
6235     case MOV: neg_opcode = MVN; neg_value = ~value; break;
6236     default:
6237       return false;
6238   }
6239 
6240   if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6241     return true;
6242   }
6243 
6244   return opcode == AND && IsPowerOfTwo(value + 1);
6245 }
6246 
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6247 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6248   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6249   if (DataType::Is64BitType(input_cst->GetType())) {
6250     Opcode high_opcode = opcode;
6251     vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6252     switch (opcode) {
6253       case SUB:
6254         // Flip the operation to an ADD.
6255         value = -value;
6256         opcode = ADD;
6257         FALLTHROUGH_INTENDED;
6258       case ADD:
6259         if (Low32Bits(value) == 0u) {
6260           return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6261         }
6262         high_opcode = ADC;
6263         low_flags_update = vixl32::FlagsUpdate::SetFlags;
6264         break;
6265       default:
6266         break;
6267     }
6268     return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6269            CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6270   } else {
6271     return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6272   }
6273 }
6274 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6275 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6276                                                      const FieldInfo& field_info) {
6277   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6278 
6279   LocationSummary* locations = instruction->GetLocations();
6280   uint32_t receiver_input = 0;
6281   vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6282   Location out = locations->Out();
6283   bool is_volatile = field_info.IsVolatile();
6284   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6285   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6286   DataType::Type load_type = instruction->GetType();
6287   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6288 
6289   switch (load_type) {
6290     case DataType::Type::kBool:
6291     case DataType::Type::kUint8:
6292     case DataType::Type::kInt8:
6293     case DataType::Type::kUint16:
6294     case DataType::Type::kInt16:
6295     case DataType::Type::kInt32: {
6296       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6297       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6298       LoadOperandType operand_type = GetLoadOperandType(load_type);
6299       GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6300       codegen_->MaybeRecordImplicitNullCheck(instruction);
6301       break;
6302     }
6303 
6304     case DataType::Type::kReference: {
6305       // /* HeapReference<Object> */ out = *(base + offset)
6306       if (codegen_->EmitBakerReadBarrier()) {
6307         Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6308         // Note that a potential implicit null check is handled in this
6309         // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6310         codegen_->GenerateFieldLoadWithBakerReadBarrier(
6311             instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6312         if (is_volatile) {
6313           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6314         }
6315       } else {
6316         {
6317           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6318           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6319           GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6320           codegen_->MaybeRecordImplicitNullCheck(instruction);
6321         }
6322         if (is_volatile) {
6323           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6324         }
6325         // If read barriers are enabled, emit read barriers other than
6326         // Baker's using a slow path (and also unpoison the loaded
6327         // reference, if heap poisoning is enabled).
6328         codegen_->MaybeGenerateReadBarrierSlow(
6329             instruction, out, out, locations->InAt(receiver_input), offset);
6330       }
6331       break;
6332     }
6333 
6334     case DataType::Type::kInt64: {
6335       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6336       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6337       if (is_volatile && !atomic_ldrd_strd) {
6338         GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6339       } else {
6340         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6341       }
6342       codegen_->MaybeRecordImplicitNullCheck(instruction);
6343       break;
6344     }
6345 
6346     case DataType::Type::kFloat32: {
6347       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6348       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6349       GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6350       codegen_->MaybeRecordImplicitNullCheck(instruction);
6351       break;
6352     }
6353 
6354     case DataType::Type::kFloat64: {
6355       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6356       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6357       vixl32::DRegister out_dreg = DRegisterFrom(out);
6358       if (is_volatile && !atomic_ldrd_strd) {
6359         vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6360         vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6361         GenerateWideAtomicLoad(base, offset, lo, hi);
6362         codegen_->MaybeRecordImplicitNullCheck(instruction);
6363         __ Vmov(out_dreg, lo, hi);
6364       } else {
6365         GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6366         codegen_->MaybeRecordImplicitNullCheck(instruction);
6367       }
6368       break;
6369     }
6370 
6371     case DataType::Type::kUint32:
6372     case DataType::Type::kUint64:
6373     case DataType::Type::kVoid:
6374       LOG(FATAL) << "Unreachable type " << load_type;
6375       UNREACHABLE();
6376   }
6377 
6378   if (is_volatile) {
6379     if (load_type == DataType::Type::kReference) {
6380       // Memory barriers, in the case of references, are also handled
6381       // in the previous switch statement.
6382     } else {
6383       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6384     }
6385   }
6386 }
6387 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6388 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6389   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6390 }
6391 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6392 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6393   HandleFieldSet(instruction,
6394                  instruction->GetFieldInfo(),
6395                  instruction->GetValueCanBeNull(),
6396                  instruction->GetWriteBarrierKind());
6397 }
6398 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6399 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6400   HandleFieldGet(instruction, instruction->GetFieldInfo());
6401 }
6402 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6403 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6404   HandleFieldGet(instruction, instruction->GetFieldInfo());
6405 }
6406 
VisitStaticFieldGet(HStaticFieldGet * instruction)6407 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6408   HandleFieldGet(instruction, instruction->GetFieldInfo());
6409 }
6410 
VisitStaticFieldGet(HStaticFieldGet * instruction)6411 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6412   HandleFieldGet(instruction, instruction->GetFieldInfo());
6413 }
6414 
VisitStaticFieldSet(HStaticFieldSet * instruction)6415 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6416   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6417 }
6418 
VisitStaticFieldSet(HStaticFieldSet * instruction)6419 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6420   HandleFieldSet(instruction,
6421                  instruction->GetFieldInfo(),
6422                  instruction->GetValueCanBeNull(),
6423                  instruction->GetWriteBarrierKind());
6424 }
6425 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6426 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6427   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6428 }
6429 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6430 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6431   __ Mov(r0, instruction->GetFormat()->GetValue());
6432   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction);
6433 }
6434 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6435 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6436     HUnresolvedInstanceFieldGet* instruction) {
6437   FieldAccessCallingConventionARMVIXL calling_convention;
6438   codegen_->CreateUnresolvedFieldLocationSummary(
6439       instruction, instruction->GetFieldType(), calling_convention);
6440 }
6441 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6442 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6443     HUnresolvedInstanceFieldGet* instruction) {
6444   FieldAccessCallingConventionARMVIXL calling_convention;
6445   codegen_->GenerateUnresolvedFieldAccess(instruction,
6446                                           instruction->GetFieldType(),
6447                                           instruction->GetFieldIndex(),
6448                                           calling_convention);
6449 }
6450 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6451 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6452     HUnresolvedInstanceFieldSet* instruction) {
6453   FieldAccessCallingConventionARMVIXL calling_convention;
6454   codegen_->CreateUnresolvedFieldLocationSummary(
6455       instruction, instruction->GetFieldType(), calling_convention);
6456 }
6457 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6458 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6459     HUnresolvedInstanceFieldSet* instruction) {
6460   FieldAccessCallingConventionARMVIXL calling_convention;
6461   codegen_->GenerateUnresolvedFieldAccess(instruction,
6462                                           instruction->GetFieldType(),
6463                                           instruction->GetFieldIndex(),
6464                                           calling_convention);
6465 }
6466 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6467 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6468     HUnresolvedStaticFieldGet* instruction) {
6469   FieldAccessCallingConventionARMVIXL calling_convention;
6470   codegen_->CreateUnresolvedFieldLocationSummary(
6471       instruction, instruction->GetFieldType(), calling_convention);
6472 }
6473 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6474 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6475     HUnresolvedStaticFieldGet* instruction) {
6476   FieldAccessCallingConventionARMVIXL calling_convention;
6477   codegen_->GenerateUnresolvedFieldAccess(instruction,
6478                                           instruction->GetFieldType(),
6479                                           instruction->GetFieldIndex(),
6480                                           calling_convention);
6481 }
6482 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6483 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6484     HUnresolvedStaticFieldSet* instruction) {
6485   FieldAccessCallingConventionARMVIXL calling_convention;
6486   codegen_->CreateUnresolvedFieldLocationSummary(
6487       instruction, instruction->GetFieldType(), calling_convention);
6488 }
6489 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6490 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6491     HUnresolvedStaticFieldSet* instruction) {
6492   FieldAccessCallingConventionARMVIXL calling_convention;
6493   codegen_->GenerateUnresolvedFieldAccess(instruction,
6494                                           instruction->GetFieldType(),
6495                                           instruction->GetFieldIndex(),
6496                                           calling_convention);
6497 }
6498 
VisitNullCheck(HNullCheck * instruction)6499 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6500   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6501   locations->SetInAt(0, Location::RequiresRegister());
6502 }
6503 
GenerateImplicitNullCheck(HNullCheck * instruction)6504 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6505   if (CanMoveNullCheckToUser(instruction)) {
6506     return;
6507   }
6508 
6509   UseScratchRegisterScope temps(GetVIXLAssembler());
6510   // Ensure the pc position is recorded immediately after the `ldr` instruction.
6511   ExactAssemblyScope aas(GetVIXLAssembler(),
6512                          vixl32::kMaxInstructionSizeInBytes,
6513                          CodeBufferCheckScope::kMaximumSize);
6514   __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6515   RecordPcInfo(instruction);
6516 }
6517 
GenerateExplicitNullCheck(HNullCheck * instruction)6518 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6519   NullCheckSlowPathARMVIXL* slow_path =
6520       new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6521   AddSlowPath(slow_path);
6522   __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6523 }
6524 
VisitNullCheck(HNullCheck * instruction)6525 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6526   codegen_->GenerateNullCheck(instruction);
6527 }
6528 
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6529 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6530                                                     Location out_loc,
6531                                                     vixl32::Register base,
6532                                                     vixl32::Register reg_index,
6533                                                     vixl32::Condition cond) {
6534   uint32_t shift_count = DataType::SizeShift(type);
6535   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6536 
6537   switch (type) {
6538     case DataType::Type::kBool:
6539     case DataType::Type::kUint8:
6540       __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6541       break;
6542     case DataType::Type::kInt8:
6543       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6544       break;
6545     case DataType::Type::kUint16:
6546       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6547       break;
6548     case DataType::Type::kInt16:
6549       __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6550       break;
6551     case DataType::Type::kReference:
6552     case DataType::Type::kInt32:
6553       __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6554       break;
6555     // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6556     case DataType::Type::kInt64:
6557     case DataType::Type::kFloat32:
6558     case DataType::Type::kFloat64:
6559     default:
6560       LOG(FATAL) << "Unreachable type " << type;
6561       UNREACHABLE();
6562   }
6563 }
6564 
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6565 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6566                                                    Location loc,
6567                                                    vixl32::Register base,
6568                                                    vixl32::Register reg_index,
6569                                                    vixl32::Condition cond) {
6570   uint32_t shift_count = DataType::SizeShift(type);
6571   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6572 
6573   switch (type) {
6574     case DataType::Type::kBool:
6575     case DataType::Type::kUint8:
6576     case DataType::Type::kInt8:
6577       __ Strb(cond, RegisterFrom(loc), mem_address);
6578       break;
6579     case DataType::Type::kUint16:
6580     case DataType::Type::kInt16:
6581       __ Strh(cond, RegisterFrom(loc), mem_address);
6582       break;
6583     case DataType::Type::kReference:
6584     case DataType::Type::kInt32:
6585       __ Str(cond, RegisterFrom(loc), mem_address);
6586       break;
6587     // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6588     case DataType::Type::kInt64:
6589     case DataType::Type::kFloat32:
6590     case DataType::Type::kFloat64:
6591     default:
6592       LOG(FATAL) << "Unreachable type " << type;
6593       UNREACHABLE();
6594   }
6595 }
6596 
VisitArrayGet(HArrayGet * instruction)6597 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6598   bool object_array_get_with_read_barrier =
6599       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6600   LocationSummary* locations =
6601       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6602                                                        object_array_get_with_read_barrier
6603                                                            ? LocationSummary::kCallOnSlowPath
6604                                                            : LocationSummary::kNoCall);
6605   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6606     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6607   }
6608   locations->SetInAt(0, Location::RequiresRegister());
6609   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6610   if (DataType::IsFloatingPointType(instruction->GetType())) {
6611     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6612   } else {
6613     // The output overlaps for an object array get for non-Baker read barriers: we do not want
6614     // the load to overwrite the object's location, as we need it to emit the read barrier.
6615     // Baker read barrier implementation with introspection does not have this restriction.
6616     bool overlap = object_array_get_with_read_barrier && !kUseBakerReadBarrier;
6617     locations->SetOut(Location::RequiresRegister(),
6618                       overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6619   }
6620   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6621     if (instruction->GetIndex()->IsConstant()) {
6622       // Array loads with constant index are treated as field loads.
6623       // We need a temporary register for the read barrier load in
6624       // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6625       // only if the offset is too big.
6626       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6627       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6628       offset += index << DataType::SizeShift(DataType::Type::kReference);
6629       if (offset >= kReferenceLoadMinFarOffset) {
6630         locations->AddTemp(Location::RequiresRegister());
6631       }
6632     } else {
6633       // We need a non-scratch temporary for the array data pointer in
6634       // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6635       locations->AddTemp(Location::RequiresRegister());
6636     }
6637   } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6638     // Also need a temporary for String compression feature.
6639     locations->AddTemp(Location::RequiresRegister());
6640   }
6641 }
6642 
VisitArrayGet(HArrayGet * instruction)6643 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6644   LocationSummary* locations = instruction->GetLocations();
6645   Location obj_loc = locations->InAt(0);
6646   vixl32::Register obj = InputRegisterAt(instruction, 0);
6647   Location index = locations->InAt(1);
6648   Location out_loc = locations->Out();
6649   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6650   DataType::Type type = instruction->GetType();
6651   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6652                                         instruction->IsStringCharAt();
6653   HInstruction* array_instr = instruction->GetArray();
6654   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6655 
6656   switch (type) {
6657     case DataType::Type::kBool:
6658     case DataType::Type::kUint8:
6659     case DataType::Type::kInt8:
6660     case DataType::Type::kUint16:
6661     case DataType::Type::kInt16:
6662     case DataType::Type::kInt32: {
6663       vixl32::Register length;
6664       if (maybe_compressed_char_at) {
6665         length = RegisterFrom(locations->GetTemp(0));
6666         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6667         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6668         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6669         GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6670         codegen_->MaybeRecordImplicitNullCheck(instruction);
6671       }
6672       if (index.IsConstant()) {
6673         int32_t const_index = Int32ConstantFrom(index);
6674         if (maybe_compressed_char_at) {
6675           vixl32::Label uncompressed_load, done;
6676           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6677           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6678           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6679                         "Expecting 0=compressed, 1=uncompressed");
6680           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6681           GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6682                                          RegisterFrom(out_loc),
6683                                          obj,
6684                                          data_offset + const_index);
6685           __ B(final_label);
6686           __ Bind(&uncompressed_load);
6687           GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6688                                          RegisterFrom(out_loc),
6689                                          obj,
6690                                          data_offset + (const_index << 1));
6691           if (done.IsReferenced()) {
6692             __ Bind(&done);
6693           }
6694         } else {
6695           uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6696 
6697           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6698           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6699           LoadOperandType load_type = GetLoadOperandType(type);
6700           GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6701           codegen_->MaybeRecordImplicitNullCheck(instruction);
6702         }
6703       } else {
6704         UseScratchRegisterScope temps(GetVIXLAssembler());
6705         vixl32::Register temp = temps.Acquire();
6706 
6707         if (has_intermediate_address) {
6708           // We do not need to compute the intermediate address from the array: the
6709           // input instruction has done it already. See the comment in
6710           // `TryExtractArrayAccessAddress()`.
6711           if (kIsDebugBuild) {
6712             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6713             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6714           }
6715           temp = obj;
6716         } else {
6717           __ Add(temp, obj, data_offset);
6718         }
6719         if (maybe_compressed_char_at) {
6720           vixl32::Label uncompressed_load, done;
6721           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6722           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6723           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6724                         "Expecting 0=compressed, 1=uncompressed");
6725           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6726           __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6727           __ B(final_label);
6728           __ Bind(&uncompressed_load);
6729           __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6730           if (done.IsReferenced()) {
6731             __ Bind(&done);
6732           }
6733         } else {
6734           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6735           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6736           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6737           codegen_->MaybeRecordImplicitNullCheck(instruction);
6738         }
6739       }
6740       break;
6741     }
6742 
6743     case DataType::Type::kReference: {
6744       // The read barrier instrumentation of object ArrayGet
6745       // instructions does not support the HIntermediateAddress
6746       // instruction.
6747       DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
6748 
6749       static_assert(
6750           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6751           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6752       // /* HeapReference<Object> */ out =
6753       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6754       if (codegen_->EmitBakerReadBarrier()) {
6755         // Note that a potential implicit null check is handled in this
6756         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6757         DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6758         if (index.IsConstant()) {
6759           // Array load with a constant index can be treated as a field load.
6760           Location maybe_temp =
6761               (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6762           data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6763           codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6764                                                           out_loc,
6765                                                           obj,
6766                                                           data_offset,
6767                                                           maybe_temp,
6768                                                           /* needs_null_check= */ false);
6769         } else {
6770           Location temp = locations->GetTemp(0);
6771           codegen_->GenerateArrayLoadWithBakerReadBarrier(
6772               out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6773         }
6774       } else {
6775         vixl32::Register out = OutputRegister(instruction);
6776         if (index.IsConstant()) {
6777           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6778           {
6779             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6780             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6781             GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6782             codegen_->MaybeRecordImplicitNullCheck(instruction);
6783           }
6784           // If read barriers are enabled, emit read barriers other than
6785           // Baker's using a slow path (and also unpoison the loaded
6786           // reference, if heap poisoning is enabled).
6787           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6788         } else {
6789           UseScratchRegisterScope temps(GetVIXLAssembler());
6790           vixl32::Register temp = temps.Acquire();
6791 
6792           if (has_intermediate_address) {
6793             // We do not need to compute the intermediate address from the array: the
6794             // input instruction has done it already. See the comment in
6795             // `TryExtractArrayAccessAddress()`.
6796             if (kIsDebugBuild) {
6797               HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6798               DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6799             }
6800             temp = obj;
6801           } else {
6802             __ Add(temp, obj, data_offset);
6803           }
6804           {
6805             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6806             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6807             codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6808             temps.Close();
6809             codegen_->MaybeRecordImplicitNullCheck(instruction);
6810           }
6811           // If read barriers are enabled, emit read barriers other than
6812           // Baker's using a slow path (and also unpoison the loaded
6813           // reference, if heap poisoning is enabled).
6814           codegen_->MaybeGenerateReadBarrierSlow(
6815               instruction, out_loc, out_loc, obj_loc, data_offset, index);
6816         }
6817       }
6818       break;
6819     }
6820 
6821     case DataType::Type::kInt64: {
6822       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6823       // As two macro instructions can be emitted the max size is doubled.
6824       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6825       if (index.IsConstant()) {
6826         size_t offset =
6827             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6828         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6829       } else {
6830         UseScratchRegisterScope temps(GetVIXLAssembler());
6831         vixl32::Register temp = temps.Acquire();
6832         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6833         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6834       }
6835       codegen_->MaybeRecordImplicitNullCheck(instruction);
6836       break;
6837     }
6838 
6839     case DataType::Type::kFloat32: {
6840       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6841       // As two macro instructions can be emitted the max size is doubled.
6842       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6843       vixl32::SRegister out = SRegisterFrom(out_loc);
6844       if (index.IsConstant()) {
6845         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6846         GetAssembler()->LoadSFromOffset(out, obj, offset);
6847       } else {
6848         UseScratchRegisterScope temps(GetVIXLAssembler());
6849         vixl32::Register temp = temps.Acquire();
6850         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6851         GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6852       }
6853       codegen_->MaybeRecordImplicitNullCheck(instruction);
6854       break;
6855     }
6856 
6857     case DataType::Type::kFloat64: {
6858       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6859       // As two macro instructions can be emitted the max size is doubled.
6860       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6861       if (index.IsConstant()) {
6862         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6863         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6864       } else {
6865         UseScratchRegisterScope temps(GetVIXLAssembler());
6866         vixl32::Register temp = temps.Acquire();
6867         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6868         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6869       }
6870       codegen_->MaybeRecordImplicitNullCheck(instruction);
6871       break;
6872     }
6873 
6874     case DataType::Type::kUint32:
6875     case DataType::Type::kUint64:
6876     case DataType::Type::kVoid:
6877       LOG(FATAL) << "Unreachable type " << type;
6878       UNREACHABLE();
6879   }
6880 }
6881 
VisitArraySet(HArraySet * instruction)6882 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6883   DataType::Type value_type = instruction->GetComponentType();
6884 
6885   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6886   bool needs_write_barrier =
6887       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6888   bool check_gc_card =
6889       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6890 
6891   bool needs_type_check = instruction->NeedsTypeCheck();
6892 
6893   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6894       instruction,
6895       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6896 
6897   locations->SetInAt(0, Location::RequiresRegister());
6898   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6899   if (DataType::IsFloatingPointType(value_type)) {
6900     locations->SetInAt(2, Location::RequiresFpuRegister());
6901   } else {
6902     locations->SetInAt(2, Location::RequiresRegister());
6903   }
6904   if (needs_write_barrier || check_gc_card || instruction->NeedsTypeCheck()) {
6905     // Temporary registers for type checking, write barrier, checking the dirty bit, or register
6906     // poisoning.
6907     locations->AddRegisterTemps(2);
6908   } else if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
6909     locations->AddTemp(Location::RequiresRegister());
6910   }
6911 }
6912 
VisitArraySet(HArraySet * instruction)6913 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6914   LocationSummary* locations = instruction->GetLocations();
6915   vixl32::Register array = InputRegisterAt(instruction, 0);
6916   Location index = locations->InAt(1);
6917   DataType::Type value_type = instruction->GetComponentType();
6918   bool needs_type_check = instruction->NeedsTypeCheck();
6919   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6920   bool needs_write_barrier =
6921       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6922   uint32_t data_offset =
6923       mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6924   Location value_loc = locations->InAt(2);
6925   HInstruction* array_instr = instruction->GetArray();
6926   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6927 
6928   switch (value_type) {
6929     case DataType::Type::kBool:
6930     case DataType::Type::kUint8:
6931     case DataType::Type::kInt8:
6932     case DataType::Type::kUint16:
6933     case DataType::Type::kInt16:
6934     case DataType::Type::kInt32: {
6935       if (index.IsConstant()) {
6936         int32_t const_index = Int32ConstantFrom(index);
6937         uint32_t full_offset =
6938             data_offset + (const_index << DataType::SizeShift(value_type));
6939         StoreOperandType store_type = GetStoreOperandType(value_type);
6940         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6941         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6942         GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6943         codegen_->MaybeRecordImplicitNullCheck(instruction);
6944       } else {
6945         UseScratchRegisterScope temps(GetVIXLAssembler());
6946         vixl32::Register temp = temps.Acquire();
6947 
6948         if (has_intermediate_address) {
6949           // We do not need to compute the intermediate address from the array: the
6950           // input instruction has done it already. See the comment in
6951           // `TryExtractArrayAccessAddress()`.
6952           if (kIsDebugBuild) {
6953             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6954             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6955           }
6956           temp = array;
6957         } else {
6958           __ Add(temp, array, data_offset);
6959         }
6960         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6961         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6962         codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6963         codegen_->MaybeRecordImplicitNullCheck(instruction);
6964       }
6965       break;
6966     }
6967 
6968     case DataType::Type::kReference: {
6969       vixl32::Register value = RegisterFrom(value_loc);
6970       // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6971       // See the comment in instruction_simplifier_shared.cc.
6972       DCHECK(!has_intermediate_address);
6973 
6974       if (instruction->InputAt(2)->IsNullConstant()) {
6975         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6976         // As two macro instructions can be emitted the max size is doubled.
6977         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6978         // Just setting null.
6979         if (index.IsConstant()) {
6980           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6981           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6982         } else {
6983           DCHECK(index.IsRegister()) << index;
6984           UseScratchRegisterScope temps(GetVIXLAssembler());
6985           vixl32::Register temp = temps.Acquire();
6986           __ Add(temp, array, data_offset);
6987           codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6988         }
6989         codegen_->MaybeRecordImplicitNullCheck(instruction);
6990         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6991           // We need to set a write barrier here even though we are writing null, since this write
6992           // barrier is being relied on.
6993           DCHECK(needs_write_barrier);
6994           vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
6995           vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
6996           codegen_->MarkGCCard(temp1, temp2, array);
6997         }
6998         DCHECK(!needs_type_check);
6999         break;
7000       }
7001 
7002       const bool can_value_be_null = instruction->GetValueCanBeNull();
7003       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
7004       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
7005       // value is null for the type check).
7006       const bool skip_marking_gc_card =
7007           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
7008       vixl32::Label do_store;
7009       vixl32::Label skip_writing_card;
7010       if (can_value_be_null) {
7011         if (skip_marking_gc_card) {
7012           __ CompareAndBranchIfZero(value, &skip_writing_card, /* is_far_target= */ false);
7013         } else {
7014           __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
7015         }
7016       }
7017 
7018       SlowPathCodeARMVIXL* slow_path = nullptr;
7019       if (needs_type_check) {
7020         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
7021         codegen_->AddSlowPath(slow_path);
7022 
7023         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7024         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7025         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7026 
7027         // Note that when read barriers are enabled, the type checks
7028         // are performed without read barriers.  This is fine, even in
7029         // the case where a class object is in the from-space after
7030         // the flip, as a comparison involving such a type would not
7031         // produce a false positive; it may of course produce a false
7032         // negative, in which case we would take the ArraySet slow
7033         // path.
7034 
7035         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7036         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7037 
7038         {
7039           // Ensure we record the pc position immediately after the `ldr` instruction.
7040           ExactAssemblyScope aas(GetVIXLAssembler(),
7041                                  vixl32::kMaxInstructionSizeInBytes,
7042                                  CodeBufferCheckScope::kMaximumSize);
7043           // /* HeapReference<Class> */ temp1 = array->klass_
7044           __ ldr(temp1, MemOperand(array, class_offset));
7045           codegen_->MaybeRecordImplicitNullCheck(instruction);
7046         }
7047         GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7048 
7049         // /* HeapReference<Class> */ temp1 = temp1->component_type_
7050         GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
7051         // /* HeapReference<Class> */ temp2 = value->klass_
7052         GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
7053         // If heap poisoning is enabled, no need to unpoison `temp1`
7054         // nor `temp2`, as we are comparing two poisoned references.
7055         __ Cmp(temp1, temp2);
7056 
7057         if (instruction->StaticTypeOfArrayIsObjectArray()) {
7058           vixl32::Label do_put;
7059           __ B(eq, &do_put, /* is_far_target= */ false);
7060           // If heap poisoning is enabled, the `temp1` reference has
7061           // not been unpoisoned yet; unpoison it now.
7062           GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7063 
7064           // /* HeapReference<Class> */ temp1 = temp1->super_class_
7065           GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
7066           // If heap poisoning is enabled, no need to unpoison
7067           // `temp1`, as we are comparing against null below.
7068           __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
7069           __ Bind(&do_put);
7070         } else {
7071           __ B(ne, slow_path->GetEntryLabel());
7072         }
7073       }
7074 
7075       if (can_value_be_null && !skip_marking_gc_card) {
7076         DCHECK(do_store.IsReferenced());
7077         __ Bind(&do_store);
7078       }
7079 
7080       if (needs_write_barrier) {
7081         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7082         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7083         codegen_->MarkGCCard(temp1, temp2, array);
7084       } else if (codegen_->ShouldCheckGCCard(
7085                      value_type, instruction->GetValue(), write_barrier_kind)) {
7086         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7087         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7088         codegen_->CheckGCCardIsValid(temp1, temp2, array);
7089       }
7090 
7091       if (skip_marking_gc_card) {
7092         // Note that we don't check that the GC card is valid as it can be correctly clean.
7093         DCHECK(skip_writing_card.IsReferenced());
7094         __ Bind(&skip_writing_card);
7095       }
7096 
7097       vixl32::Register source = value;
7098       if (kPoisonHeapReferences) {
7099         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7100         DCHECK_EQ(value_type, DataType::Type::kReference);
7101         __ Mov(temp1, value);
7102         GetAssembler()->PoisonHeapReference(temp1);
7103         source = temp1;
7104       }
7105 
7106       {
7107         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7108         // As two macro instructions can be emitted the max size is doubled.
7109         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7110         if (index.IsConstant()) {
7111           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7112           GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7113         } else {
7114           DCHECK(index.IsRegister()) << index;
7115 
7116           UseScratchRegisterScope temps(GetVIXLAssembler());
7117           vixl32::Register temp = temps.Acquire();
7118           __ Add(temp, array, data_offset);
7119           codegen_->StoreToShiftedRegOffset(value_type,
7120                                             LocationFrom(source),
7121                                             temp,
7122                                             RegisterFrom(index));
7123         }
7124 
7125         if (can_value_be_null || !needs_type_check) {
7126           codegen_->MaybeRecordImplicitNullCheck(instruction);
7127         }
7128       }
7129 
7130       if (slow_path != nullptr) {
7131         __ Bind(slow_path->GetExitLabel());
7132       }
7133 
7134       break;
7135     }
7136 
7137     case DataType::Type::kInt64: {
7138       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7139       // As two macro instructions can be emitted the max size is doubled.
7140       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7141       Location value = locations->InAt(2);
7142       if (index.IsConstant()) {
7143         size_t offset =
7144             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7145         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7146       } else {
7147         UseScratchRegisterScope temps(GetVIXLAssembler());
7148         vixl32::Register temp = temps.Acquire();
7149         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7150         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7151       }
7152       codegen_->MaybeRecordImplicitNullCheck(instruction);
7153       break;
7154     }
7155 
7156     case DataType::Type::kFloat32: {
7157       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7158       // As two macro instructions can be emitted the max size is doubled.
7159       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7160       Location value = locations->InAt(2);
7161       DCHECK(value.IsFpuRegister());
7162       if (index.IsConstant()) {
7163         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7164         GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7165       } else {
7166         UseScratchRegisterScope temps(GetVIXLAssembler());
7167         vixl32::Register temp = temps.Acquire();
7168         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7169         GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7170       }
7171       codegen_->MaybeRecordImplicitNullCheck(instruction);
7172       break;
7173     }
7174 
7175     case DataType::Type::kFloat64: {
7176       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7177       // As two macro instructions can be emitted the max size is doubled.
7178       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7179       Location value = locations->InAt(2);
7180       DCHECK(value.IsFpuRegisterPair());
7181       if (index.IsConstant()) {
7182         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7183         GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7184       } else {
7185         UseScratchRegisterScope temps(GetVIXLAssembler());
7186         vixl32::Register temp = temps.Acquire();
7187         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7188         GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7189       }
7190       codegen_->MaybeRecordImplicitNullCheck(instruction);
7191       break;
7192     }
7193 
7194     case DataType::Type::kUint32:
7195     case DataType::Type::kUint64:
7196     case DataType::Type::kVoid:
7197       LOG(FATAL) << "Unreachable type " << value_type;
7198       UNREACHABLE();
7199   }
7200 }
7201 
VisitArrayLength(HArrayLength * instruction)7202 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7203   LocationSummary* locations =
7204       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7205   locations->SetInAt(0, Location::RequiresRegister());
7206   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7207 }
7208 
VisitArrayLength(HArrayLength * instruction)7209 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7210   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7211   vixl32::Register obj = InputRegisterAt(instruction, 0);
7212   vixl32::Register out = OutputRegister(instruction);
7213   {
7214     ExactAssemblyScope aas(GetVIXLAssembler(),
7215                            vixl32::kMaxInstructionSizeInBytes,
7216                            CodeBufferCheckScope::kMaximumSize);
7217     __ ldr(out, MemOperand(obj, offset));
7218     codegen_->MaybeRecordImplicitNullCheck(instruction);
7219   }
7220   // Mask out compression flag from String's array length.
7221   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7222     __ Lsr(out, out, 1u);
7223   }
7224 }
7225 
VisitIntermediateAddress(HIntermediateAddress * instruction)7226 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7227   LocationSummary* locations =
7228       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7229 
7230   locations->SetInAt(0, Location::RequiresRegister());
7231   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7232   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7233 }
7234 
VisitIntermediateAddress(HIntermediateAddress * instruction)7235 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7236   vixl32::Register out = OutputRegister(instruction);
7237   vixl32::Register first = InputRegisterAt(instruction, 0);
7238   Location second = instruction->GetLocations()->InAt(1);
7239 
7240   if (second.IsRegister()) {
7241     __ Add(out, first, RegisterFrom(second));
7242   } else {
7243     __ Add(out, first, Int32ConstantFrom(second));
7244   }
7245 }
7246 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7247 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7248     HIntermediateAddressIndex* instruction) {
7249   LOG(FATAL) << "Unreachable " << instruction->GetId();
7250 }
7251 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7252 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7253     HIntermediateAddressIndex* instruction) {
7254   LOG(FATAL) << "Unreachable " << instruction->GetId();
7255 }
7256 
VisitBoundsCheck(HBoundsCheck * instruction)7257 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7258   RegisterSet caller_saves = RegisterSet::Empty();
7259   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7260   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7261   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7262   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7263 
7264   HInstruction* index = instruction->InputAt(0);
7265   HInstruction* length = instruction->InputAt(1);
7266   // If both index and length are constants we can statically check the bounds. But if at least one
7267   // of them is not encodable ArmEncodableConstantOrRegister will create
7268   // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7269   // locations.
7270   bool both_const = index->IsConstant() && length->IsConstant();
7271   locations->SetInAt(0, both_const
7272       ? Location::ConstantLocation(index)
7273       : ArmEncodableConstantOrRegister(index, CMP));
7274   locations->SetInAt(1, both_const
7275       ? Location::ConstantLocation(length)
7276       : ArmEncodableConstantOrRegister(length, CMP));
7277 }
7278 
VisitBoundsCheck(HBoundsCheck * instruction)7279 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7280   LocationSummary* locations = instruction->GetLocations();
7281   Location index_loc = locations->InAt(0);
7282   Location length_loc = locations->InAt(1);
7283 
7284   if (length_loc.IsConstant()) {
7285     int32_t length = Int32ConstantFrom(length_loc);
7286     if (index_loc.IsConstant()) {
7287       // BCE will remove the bounds check if we are guaranteed to pass.
7288       int32_t index = Int32ConstantFrom(index_loc);
7289       if (index < 0 || index >= length) {
7290         SlowPathCodeARMVIXL* slow_path =
7291             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7292         codegen_->AddSlowPath(slow_path);
7293         __ B(slow_path->GetEntryLabel());
7294       } else {
7295         // Some optimization after BCE may have generated this, and we should not
7296         // generate a bounds check if it is a valid range.
7297       }
7298       return;
7299     }
7300 
7301     SlowPathCodeARMVIXL* slow_path =
7302         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7303     __ Cmp(RegisterFrom(index_loc), length);
7304     codegen_->AddSlowPath(slow_path);
7305     __ B(hs, slow_path->GetEntryLabel());
7306   } else {
7307     SlowPathCodeARMVIXL* slow_path =
7308         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7309     __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7310     codegen_->AddSlowPath(slow_path);
7311     __ B(ls, slow_path->GetEntryLabel());
7312   }
7313 }
7314 
MaybeMarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7315 void CodeGeneratorARMVIXL::MaybeMarkGCCard(vixl32::Register temp,
7316                                            vixl32::Register card,
7317                                            vixl32::Register object,
7318                                            vixl32::Register value,
7319                                            bool emit_null_check) {
7320   vixl32::Label is_null;
7321   if (emit_null_check) {
7322     __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7323   }
7324   MarkGCCard(temp, card, object);
7325   if (emit_null_check) {
7326     __ Bind(&is_null);
7327   }
7328 }
7329 
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7330 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7331                                       vixl32::Register card,
7332                                       vixl32::Register object) {
7333   // Load the address of the card table into `card`.
7334   GetAssembler()->LoadFromOffset(
7335       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7336   // Calculate the offset (in the card table) of the card corresponding to `object`.
7337   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7338   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7339   // `object`'s card.
7340   //
7341   // Register `card` contains the address of the card table. Note that the card
7342   // table's base is biased during its creation so that it always starts at an
7343   // address whose least-significant byte is equal to `kCardDirty` (see
7344   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7345   // below writes the `kCardDirty` (byte) value into the `object`'s card
7346   // (located at `card + object >> kCardShift`).
7347   //
7348   // This dual use of the value in register `card` (1. to calculate the location
7349   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7350   // (no need to explicitly load `kCardDirty` as an immediate value).
7351   __ Strb(card, MemOperand(card, temp));
7352 }
7353 
CheckGCCardIsValid(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7354 void CodeGeneratorARMVIXL::CheckGCCardIsValid(vixl32::Register temp,
7355                                               vixl32::Register card,
7356                                               vixl32::Register object) {
7357   vixl32::Label done;
7358   // Load the address of the card table into `card`.
7359   GetAssembler()->LoadFromOffset(
7360       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7361   // Calculate the offset (in the card table) of the card corresponding to `object`.
7362   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7363   // assert (!clean || !self->is_gc_marking)
7364   __ Ldrb(temp, MemOperand(card, temp));
7365   static_assert(gc::accounting::CardTable::kCardClean == 0);
7366   __ CompareAndBranchIfNonZero(temp, &done, /*is_far_target=*/false);
7367   __ CompareAndBranchIfZero(mr, &done, /*is_far_target=*/false);
7368   __ Bkpt(0);
7369   __ Bind(&done);
7370 }
7371 
VisitParallelMove(HParallelMove * instruction)7372 void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
7373   LOG(FATAL) << "Unreachable";
7374 }
7375 
VisitParallelMove(HParallelMove * instruction)7376 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7377   if (instruction->GetNext()->IsSuspendCheck() &&
7378       instruction->GetBlock()->GetLoopInformation() != nullptr) {
7379     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7380     // The back edge will generate the suspend check.
7381     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7382   }
7383 
7384   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7385 }
7386 
VisitSuspendCheck(HSuspendCheck * instruction)7387 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7388   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7389       instruction, LocationSummary::kCallOnSlowPath);
7390   locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7391 }
7392 
VisitSuspendCheck(HSuspendCheck * instruction)7393 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7394   HBasicBlock* block = instruction->GetBlock();
7395   if (block->GetLoopInformation() != nullptr) {
7396     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7397     // The back edge will generate the suspend check.
7398     return;
7399   }
7400   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7401     // The goto will generate the suspend check.
7402     return;
7403   }
7404   GenerateSuspendCheck(instruction, nullptr);
7405   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7406 }
7407 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7408 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7409                                                            HBasicBlock* successor) {
7410   SuspendCheckSlowPathARMVIXL* slow_path =
7411       down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7412   if (slow_path == nullptr) {
7413     slow_path =
7414         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7415     instruction->SetSlowPath(slow_path);
7416     codegen_->AddSlowPath(slow_path);
7417     if (successor != nullptr) {
7418       DCHECK(successor->IsLoopHeader());
7419     }
7420   } else {
7421     DCHECK_EQ(slow_path->GetSuccessor(), successor);
7422   }
7423 
7424   UseScratchRegisterScope temps(GetVIXLAssembler());
7425   vixl32::Register temp = temps.Acquire();
7426   GetAssembler()->LoadFromOffset(
7427       kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7428   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7429   if (successor == nullptr) {
7430     __ B(ne, slow_path->GetEntryLabel());
7431     __ Bind(slow_path->GetReturnLabel());
7432   } else {
7433     __ B(eq, codegen_->GetLabelOf(successor));
7434     __ B(slow_path->GetEntryLabel());
7435   }
7436 }
7437 
GetAssembler() const7438 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7439   return codegen_->GetAssembler();
7440 }
7441 
EmitMove(size_t index)7442 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7443   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7444   MoveOperands* move = moves_[index];
7445   Location source = move->GetSource();
7446   Location destination = move->GetDestination();
7447 
7448   if (source.IsRegister()) {
7449     if (destination.IsRegister()) {
7450       __ Mov(RegisterFrom(destination), RegisterFrom(source));
7451     } else if (destination.IsFpuRegister()) {
7452       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7453     } else {
7454       DCHECK(destination.IsStackSlot());
7455       GetAssembler()->StoreToOffset(kStoreWord,
7456                                     RegisterFrom(source),
7457                                     sp,
7458                                     destination.GetStackIndex());
7459     }
7460   } else if (source.IsStackSlot()) {
7461     if (destination.IsRegister()) {
7462       GetAssembler()->LoadFromOffset(kLoadWord,
7463                                      RegisterFrom(destination),
7464                                      sp,
7465                                      source.GetStackIndex());
7466     } else if (destination.IsFpuRegister()) {
7467       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7468     } else {
7469       DCHECK(destination.IsStackSlot());
7470       vixl32::Register temp = temps.Acquire();
7471       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7472       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7473     }
7474   } else if (source.IsFpuRegister()) {
7475     if (destination.IsRegister()) {
7476       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7477     } else if (destination.IsFpuRegister()) {
7478       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7479     } else {
7480       DCHECK(destination.IsStackSlot());
7481       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7482     }
7483   } else if (source.IsDoubleStackSlot()) {
7484     if (destination.IsDoubleStackSlot()) {
7485       vixl32::DRegister temp = temps.AcquireD();
7486       GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7487       GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7488     } else if (destination.IsRegisterPair()) {
7489       DCHECK(ExpectedPairLayout(destination));
7490       GetAssembler()->LoadFromOffset(
7491           kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7492     } else {
7493       DCHECK(destination.IsFpuRegisterPair()) << destination;
7494       GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7495     }
7496   } else if (source.IsRegisterPair()) {
7497     if (destination.IsRegisterPair()) {
7498       __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7499       __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7500     } else if (destination.IsFpuRegisterPair()) {
7501       __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7502     } else {
7503       DCHECK(destination.IsDoubleStackSlot()) << destination;
7504       DCHECK(ExpectedPairLayout(source));
7505       GetAssembler()->StoreToOffset(kStoreWordPair,
7506                                     LowRegisterFrom(source),
7507                                     sp,
7508                                     destination.GetStackIndex());
7509     }
7510   } else if (source.IsFpuRegisterPair()) {
7511     if (destination.IsRegisterPair()) {
7512       __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7513     } else if (destination.IsFpuRegisterPair()) {
7514       __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7515     } else {
7516       DCHECK(destination.IsDoubleStackSlot()) << destination;
7517       GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7518     }
7519   } else {
7520     DCHECK(source.IsConstant()) << source;
7521     HConstant* constant = source.GetConstant();
7522     if (constant->IsIntConstant() || constant->IsNullConstant()) {
7523       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7524       if (destination.IsRegister()) {
7525         __ Mov(RegisterFrom(destination), value);
7526       } else {
7527         DCHECK(destination.IsStackSlot());
7528         vixl32::Register temp = temps.Acquire();
7529         __ Mov(temp, value);
7530         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7531       }
7532     } else if (constant->IsLongConstant()) {
7533       int64_t value = Int64ConstantFrom(source);
7534       if (destination.IsRegisterPair()) {
7535         __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7536         __ Mov(HighRegisterFrom(destination), High32Bits(value));
7537       } else {
7538         DCHECK(destination.IsDoubleStackSlot()) << destination;
7539         vixl32::Register temp = temps.Acquire();
7540         __ Mov(temp, Low32Bits(value));
7541         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7542         __ Mov(temp, High32Bits(value));
7543         GetAssembler()->StoreToOffset(kStoreWord,
7544                                       temp,
7545                                       sp,
7546                                       destination.GetHighStackIndex(kArmWordSize));
7547       }
7548     } else if (constant->IsDoubleConstant()) {
7549       double value = constant->AsDoubleConstant()->GetValue();
7550       if (destination.IsFpuRegisterPair()) {
7551         __ Vmov(DRegisterFrom(destination), value);
7552       } else {
7553         DCHECK(destination.IsDoubleStackSlot()) << destination;
7554         uint64_t int_value = bit_cast<uint64_t, double>(value);
7555         vixl32::Register temp = temps.Acquire();
7556         __ Mov(temp, Low32Bits(int_value));
7557         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7558         __ Mov(temp, High32Bits(int_value));
7559         GetAssembler()->StoreToOffset(kStoreWord,
7560                                       temp,
7561                                       sp,
7562                                       destination.GetHighStackIndex(kArmWordSize));
7563       }
7564     } else {
7565       DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7566       float value = constant->AsFloatConstant()->GetValue();
7567       if (destination.IsFpuRegister()) {
7568         __ Vmov(SRegisterFrom(destination), value);
7569       } else {
7570         DCHECK(destination.IsStackSlot());
7571         vixl32::Register temp = temps.Acquire();
7572         __ Mov(temp, bit_cast<int32_t, float>(value));
7573         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7574       }
7575     }
7576   }
7577 }
7578 
Exchange(vixl32::Register reg,int mem)7579 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7580   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7581   vixl32::Register temp = temps.Acquire();
7582   __ Mov(temp, reg);
7583   GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7584   GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7585 }
7586 
Exchange(int mem1,int mem2)7587 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7588   // TODO(VIXL32): Double check the performance of this implementation.
7589   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7590   vixl32::Register temp1 = temps.Acquire();
7591   ScratchRegisterScope ensure_scratch(
7592       this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7593   vixl32::Register temp2(ensure_scratch.GetRegister());
7594 
7595   int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7596   GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7597   GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7598   GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7599   GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7600 }
7601 
EmitSwap(size_t index)7602 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7603   MoveOperands* move = moves_[index];
7604   Location source = move->GetSource();
7605   Location destination = move->GetDestination();
7606   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7607 
7608   if (source.IsRegister() && destination.IsRegister()) {
7609     vixl32::Register temp = temps.Acquire();
7610     DCHECK(!RegisterFrom(source).Is(temp));
7611     DCHECK(!RegisterFrom(destination).Is(temp));
7612     __ Mov(temp, RegisterFrom(destination));
7613     __ Mov(RegisterFrom(destination), RegisterFrom(source));
7614     __ Mov(RegisterFrom(source), temp);
7615   } else if (source.IsRegister() && destination.IsStackSlot()) {
7616     Exchange(RegisterFrom(source), destination.GetStackIndex());
7617   } else if (source.IsStackSlot() && destination.IsRegister()) {
7618     Exchange(RegisterFrom(destination), source.GetStackIndex());
7619   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7620     Exchange(source.GetStackIndex(), destination.GetStackIndex());
7621   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7622     vixl32::Register temp = temps.Acquire();
7623     __ Vmov(temp, SRegisterFrom(source));
7624     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7625     __ Vmov(SRegisterFrom(destination), temp);
7626   } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7627     vixl32::DRegister temp = temps.AcquireD();
7628     __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7629     __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7630     __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7631     __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7632   } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7633     vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7634     int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7635     DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7636     vixl32::DRegister temp = temps.AcquireD();
7637     __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7638     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7639     GetAssembler()->StoreDToOffset(temp, sp, mem);
7640   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7641     vixl32::DRegister first = DRegisterFrom(source);
7642     vixl32::DRegister second = DRegisterFrom(destination);
7643     vixl32::DRegister temp = temps.AcquireD();
7644     __ Vmov(temp, first);
7645     __ Vmov(first, second);
7646     __ Vmov(second, temp);
7647   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7648     vixl32::DRegister reg = source.IsFpuRegisterPair()
7649         ? DRegisterFrom(source)
7650         : DRegisterFrom(destination);
7651     int mem = source.IsFpuRegisterPair()
7652         ? destination.GetStackIndex()
7653         : source.GetStackIndex();
7654     vixl32::DRegister temp = temps.AcquireD();
7655     __ Vmov(temp, reg);
7656     GetAssembler()->LoadDFromOffset(reg, sp, mem);
7657     GetAssembler()->StoreDToOffset(temp, sp, mem);
7658   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7659     vixl32::SRegister reg = source.IsFpuRegister()
7660         ? SRegisterFrom(source)
7661         : SRegisterFrom(destination);
7662     int mem = source.IsFpuRegister()
7663         ? destination.GetStackIndex()
7664         : source.GetStackIndex();
7665     vixl32::Register temp = temps.Acquire();
7666     __ Vmov(temp, reg);
7667     GetAssembler()->LoadSFromOffset(reg, sp, mem);
7668     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7669   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7670     vixl32::DRegister temp1 = temps.AcquireD();
7671     vixl32::DRegister temp2 = temps.AcquireD();
7672     __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7673     __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7674     __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7675     __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7676   } else {
7677     LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7678   }
7679 }
7680 
SpillScratch(int reg)7681 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7682   __ Push(vixl32::Register(reg));
7683 }
7684 
RestoreScratch(int reg)7685 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7686   __ Pop(vixl32::Register(reg));
7687 }
7688 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7689 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7690     HLoadClass::LoadKind desired_class_load_kind) {
7691   switch (desired_class_load_kind) {
7692     case HLoadClass::LoadKind::kInvalid:
7693       LOG(FATAL) << "UNREACHABLE";
7694       UNREACHABLE();
7695     case HLoadClass::LoadKind::kReferrersClass:
7696       break;
7697     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7698     case HLoadClass::LoadKind::kBootImageRelRo:
7699     case HLoadClass::LoadKind::kAppImageRelRo:
7700     case HLoadClass::LoadKind::kBssEntry:
7701     case HLoadClass::LoadKind::kBssEntryPublic:
7702     case HLoadClass::LoadKind::kBssEntryPackage:
7703       DCHECK(!GetCompilerOptions().IsJitCompiler());
7704       break;
7705     case HLoadClass::LoadKind::kJitBootImageAddress:
7706     case HLoadClass::LoadKind::kJitTableAddress:
7707       DCHECK(GetCompilerOptions().IsJitCompiler());
7708       break;
7709     case HLoadClass::LoadKind::kRuntimeCall:
7710       break;
7711   }
7712   return desired_class_load_kind;
7713 }
7714 
VisitLoadClass(HLoadClass * cls)7715 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7716   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7717   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7718     InvokeRuntimeCallingConventionARMVIXL calling_convention;
7719     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7720         cls,
7721         LocationFrom(calling_convention.GetRegisterAt(0)),
7722         LocationFrom(r0));
7723     DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7724     return;
7725   }
7726   DCHECK_EQ(cls->NeedsAccessCheck(),
7727             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7728                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7729 
7730   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7731   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7732       ? LocationSummary::kCallOnSlowPath
7733       : LocationSummary::kNoCall;
7734   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7735   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7736     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7737   }
7738 
7739   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7740     locations->SetInAt(0, Location::RequiresRegister());
7741   }
7742   locations->SetOut(Location::RequiresRegister());
7743   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
7744       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7745       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
7746     if (codegen_->EmitNonBakerReadBarrier()) {
7747       // For non-Baker read barrier we have a temp-clobbering call.
7748     } else {
7749       // Rely on the type resolution or initialization and marking to save everything we need.
7750       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7751     }
7752   }
7753 }
7754 
7755 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7756 // move.
VisitLoadClass(HLoadClass * cls)7757 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7758   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7759   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7760     codegen_->GenerateLoadClassRuntimeCall(cls);
7761     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7762     return;
7763   }
7764   DCHECK_EQ(cls->NeedsAccessCheck(),
7765             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7766                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7767 
7768   LocationSummary* locations = cls->GetLocations();
7769   Location out_loc = locations->Out();
7770   vixl32::Register out = OutputRegister(cls);
7771 
7772   const ReadBarrierOption read_barrier_option =
7773       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7774   bool generate_null_check = false;
7775   switch (load_kind) {
7776     case HLoadClass::LoadKind::kReferrersClass: {
7777       DCHECK(!cls->CanCallRuntime());
7778       DCHECK(!cls->MustGenerateClinitCheck());
7779       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7780       vixl32::Register current_method = InputRegisterAt(cls, 0);
7781       codegen_->GenerateGcRootFieldLoad(cls,
7782                                         out_loc,
7783                                         current_method,
7784                                         ArtMethod::DeclaringClassOffset().Int32Value(),
7785                                         read_barrier_option);
7786       break;
7787     }
7788     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7789       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7790              codegen_->GetCompilerOptions().IsBootImageExtension());
7791       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7792       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7793           codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7794       codegen_->EmitMovwMovtPlaceholder(labels, out);
7795       break;
7796     }
7797     case HLoadClass::LoadKind::kBootImageRelRo: {
7798       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7799       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7800       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7801       break;
7802     }
7803     case HLoadClass::LoadKind::kAppImageRelRo: {
7804       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7805       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7806       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7807           codegen_->NewAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7808       codegen_->EmitMovwMovtPlaceholder(labels, out);
7809       __ Ldr(out, MemOperand(out, /*offset=*/ 0));
7810       break;
7811     }
7812     case HLoadClass::LoadKind::kBssEntry:
7813     case HLoadClass::LoadKind::kBssEntryPublic:
7814     case HLoadClass::LoadKind::kBssEntryPackage: {
7815       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7816       codegen_->EmitMovwMovtPlaceholder(labels, out);
7817       // All aligned loads are implicitly atomic consume operations on ARM.
7818       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7819       generate_null_check = true;
7820       break;
7821     }
7822     case HLoadClass::LoadKind::kJitBootImageAddress: {
7823       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7824       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7825       DCHECK_NE(address, 0u);
7826       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7827       break;
7828     }
7829     case HLoadClass::LoadKind::kJitTableAddress: {
7830       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7831                                                        cls->GetTypeIndex(),
7832                                                        cls->GetClass()));
7833       // /* GcRoot<mirror::Class> */ out = *out
7834       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7835       break;
7836     }
7837     case HLoadClass::LoadKind::kRuntimeCall:
7838     case HLoadClass::LoadKind::kInvalid:
7839       LOG(FATAL) << "UNREACHABLE";
7840       UNREACHABLE();
7841   }
7842 
7843   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7844     DCHECK(cls->CanCallRuntime());
7845     LoadClassSlowPathARMVIXL* slow_path =
7846         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7847     codegen_->AddSlowPath(slow_path);
7848     if (generate_null_check) {
7849       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7850     }
7851     if (cls->MustGenerateClinitCheck()) {
7852       GenerateClassInitializationCheck(slow_path, out);
7853     } else {
7854       __ Bind(slow_path->GetExitLabel());
7855     }
7856     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7857   }
7858 }
7859 
VisitLoadMethodHandle(HLoadMethodHandle * load)7860 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7861   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7862   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7863   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7864 }
7865 
VisitLoadMethodHandle(HLoadMethodHandle * load)7866 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7867   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7868 }
7869 
VisitLoadMethodType(HLoadMethodType * load)7870 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7871   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7872   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7873   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7874 }
7875 
VisitLoadMethodType(HLoadMethodType * load)7876 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7877   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7878 }
7879 
VisitClinitCheck(HClinitCheck * check)7880 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7881   LocationSummary* locations =
7882       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7883   locations->SetInAt(0, Location::RequiresRegister());
7884   if (check->HasUses()) {
7885     locations->SetOut(Location::SameAsFirstInput());
7886   }
7887   // Rely on the type initialization to save everything we need.
7888   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7889 }
7890 
VisitClinitCheck(HClinitCheck * check)7891 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7892   // We assume the class is not null.
7893   LoadClassSlowPathARMVIXL* slow_path =
7894       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7895   codegen_->AddSlowPath(slow_path);
7896   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7897 }
7898 
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7899 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7900     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7901   UseScratchRegisterScope temps(GetVIXLAssembler());
7902   vixl32::Register temp = temps.Acquire();
7903   __ Ldrb(temp, MemOperand(class_reg, kClassStatusByteOffset));
7904   __ Cmp(temp, kShiftedVisiblyInitializedValue);
7905   __ B(lo, slow_path->GetEntryLabel());
7906   __ Bind(slow_path->GetExitLabel());
7907 }
7908 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7909 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7910     HTypeCheckInstruction* check,
7911     vixl32::Register temp,
7912     vixl32::FlagsUpdate flags_update) {
7913   uint32_t path_to_root = check->GetBitstringPathToRoot();
7914   uint32_t mask = check->GetBitstringMask();
7915   DCHECK(IsPowerOfTwo(mask + 1));
7916   size_t mask_bits = WhichPowerOf2(mask + 1);
7917 
7918   // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7919   // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7920   if (mask_bits == 16u) {
7921     // Load only the bitstring part of the status word.
7922     __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7923     // Check if the bitstring bits are equal to `path_to_root`.
7924     if (flags_update == SetFlags) {
7925       __ Cmp(temp, path_to_root);
7926     } else {
7927       __ Sub(temp, temp, path_to_root);
7928     }
7929   } else {
7930     // /* uint32_t */ temp = temp->status_
7931     __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7932     if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7933       // Compare the bitstring bits using SUB.
7934       __ Sub(temp, temp, path_to_root);
7935       // Shift out bits that do not contribute to the comparison.
7936       __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7937     } else if (IsUint<16>(path_to_root)) {
7938       if (temp.IsLow()) {
7939         // Note: Optimized for size but contains one more dependent instruction than necessary.
7940         //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7941         //       macro assembler would use the high reg IP for the constant by default.
7942         // Compare the bitstring bits using SUB.
7943         __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
7944         __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
7945         // Shift out bits that do not contribute to the comparison.
7946         __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7947       } else {
7948         // Extract the bitstring bits.
7949         __ Ubfx(temp, temp, 0, mask_bits);
7950         // Check if the bitstring bits are equal to `path_to_root`.
7951         if (flags_update == SetFlags) {
7952           __ Cmp(temp, path_to_root);
7953         } else {
7954           __ Sub(temp, temp, path_to_root);
7955         }
7956       }
7957     } else {
7958       // Shift out bits that do not contribute to the comparison.
7959       __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7960       // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7961       if (flags_update == SetFlags) {
7962         __ Cmp(temp, path_to_root << (32u - mask_bits));
7963       } else {
7964         __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7965       }
7966     }
7967   }
7968 }
7969 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7970 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7971     HLoadString::LoadKind desired_string_load_kind) {
7972   switch (desired_string_load_kind) {
7973     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7974     case HLoadString::LoadKind::kBootImageRelRo:
7975     case HLoadString::LoadKind::kBssEntry:
7976       DCHECK(!GetCompilerOptions().IsJitCompiler());
7977       break;
7978     case HLoadString::LoadKind::kJitBootImageAddress:
7979     case HLoadString::LoadKind::kJitTableAddress:
7980       DCHECK(GetCompilerOptions().IsJitCompiler());
7981       break;
7982     case HLoadString::LoadKind::kRuntimeCall:
7983       break;
7984   }
7985   return desired_string_load_kind;
7986 }
7987 
VisitLoadString(HLoadString * load)7988 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7989   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7990   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7991   HLoadString::LoadKind load_kind = load->GetLoadKind();
7992   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7993     locations->SetOut(LocationFrom(r0));
7994   } else {
7995     locations->SetOut(Location::RequiresRegister());
7996     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7997       if (codegen_->EmitNonBakerReadBarrier()) {
7998         // For non-Baker read barrier we have a temp-clobbering call.
7999       } else {
8000         // Rely on the pResolveString and marking to save everything we need, including temps.
8001         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
8002       }
8003     }
8004   }
8005 }
8006 
8007 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
8008 // move.
VisitLoadString(HLoadString * load)8009 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
8010   LocationSummary* locations = load->GetLocations();
8011   Location out_loc = locations->Out();
8012   vixl32::Register out = OutputRegister(load);
8013   HLoadString::LoadKind load_kind = load->GetLoadKind();
8014 
8015   switch (load_kind) {
8016     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
8017       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
8018              codegen_->GetCompilerOptions().IsBootImageExtension());
8019       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8020           codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
8021       codegen_->EmitMovwMovtPlaceholder(labels, out);
8022       return;
8023     }
8024     case HLoadString::LoadKind::kBootImageRelRo: {
8025       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
8026       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
8027       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
8028       return;
8029     }
8030     case HLoadString::LoadKind::kBssEntry: {
8031       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8032           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
8033       codegen_->EmitMovwMovtPlaceholder(labels, out);
8034       // All aligned loads are implicitly atomic consume operations on ARM.
8035       codegen_->GenerateGcRootFieldLoad(
8036           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8037       LoadStringSlowPathARMVIXL* slow_path =
8038           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
8039       codegen_->AddSlowPath(slow_path);
8040       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
8041       __ Bind(slow_path->GetExitLabel());
8042       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
8043       return;
8044     }
8045     case HLoadString::LoadKind::kJitBootImageAddress: {
8046       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
8047       DCHECK_NE(address, 0u);
8048       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
8049       return;
8050     }
8051     case HLoadString::LoadKind::kJitTableAddress: {
8052       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
8053                                                         load->GetStringIndex(),
8054                                                         load->GetString()));
8055       // /* GcRoot<mirror::String> */ out = *out
8056       codegen_->GenerateGcRootFieldLoad(
8057           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8058       return;
8059     }
8060     default:
8061       break;
8062   }
8063 
8064   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
8065   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8066   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
8067   codegen_->InvokeRuntime(kQuickResolveString, load);
8068   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
8069   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
8070 }
8071 
GetExceptionTlsOffset()8072 static int32_t GetExceptionTlsOffset() {
8073   return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
8074 }
8075 
VisitLoadException(HLoadException * load)8076 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
8077   LocationSummary* locations =
8078       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
8079   locations->SetOut(Location::RequiresRegister());
8080 }
8081 
VisitLoadException(HLoadException * load)8082 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
8083   vixl32::Register out = OutputRegister(load);
8084   GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
8085 }
8086 
8087 
VisitClearException(HClearException * clear)8088 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
8089   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
8090 }
8091 
VisitClearException(HClearException * clear)8092 void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
8093   UseScratchRegisterScope temps(GetVIXLAssembler());
8094   vixl32::Register temp = temps.Acquire();
8095   __ Mov(temp, 0);
8096   GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
8097 }
8098 
VisitThrow(HThrow * instruction)8099 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
8100   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8101       instruction, LocationSummary::kCallOnMainOnly);
8102   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8103   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8104 }
8105 
VisitThrow(HThrow * instruction)8106 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
8107   codegen_->InvokeRuntime(kQuickDeliverException, instruction);
8108   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
8109 }
8110 
8111 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8112 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8113   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8114     return 1;
8115   }
8116   if (emit_read_barrier &&
8117        (kUseBakerReadBarrier ||
8118           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
8119           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
8120           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
8121     return 1;
8122   }
8123   return 0;
8124 }
8125 
8126 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
8127 // interface pointer, one for loading the current interface.
8128 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8129 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8130   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8131     return 3;
8132   }
8133   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
8134 }
8135 
VisitInstanceOf(HInstanceOf * instruction)8136 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8137   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
8138   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8139   bool baker_read_barrier_slow_path = false;
8140   switch (type_check_kind) {
8141     case TypeCheckKind::kExactCheck:
8142     case TypeCheckKind::kAbstractClassCheck:
8143     case TypeCheckKind::kClassHierarchyCheck:
8144     case TypeCheckKind::kArrayObjectCheck:
8145     case TypeCheckKind::kInterfaceCheck: {
8146       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
8147       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
8148       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
8149                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
8150       break;
8151     }
8152     case TypeCheckKind::kArrayCheck:
8153     case TypeCheckKind::kUnresolvedCheck:
8154       call_kind = LocationSummary::kCallOnSlowPath;
8155       break;
8156     case TypeCheckKind::kBitstringCheck:
8157       break;
8158   }
8159 
8160   LocationSummary* locations =
8161       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8162   if (baker_read_barrier_slow_path) {
8163     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
8164   }
8165   locations->SetInAt(0, Location::RequiresRegister());
8166   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8167     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8168     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8169     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8170   } else {
8171     locations->SetInAt(1, Location::RequiresRegister());
8172   }
8173   // The "out" register is used as a temporary, so it overlaps with the inputs.
8174   // Note that TypeCheckSlowPathARM uses this register too.
8175   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8176   locations->AddRegisterTemps(
8177       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
8178 }
8179 
VisitInstanceOf(HInstanceOf * instruction)8180 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8181   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8182   LocationSummary* locations = instruction->GetLocations();
8183   Location obj_loc = locations->InAt(0);
8184   vixl32::Register obj = InputRegisterAt(instruction, 0);
8185   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8186       ? vixl32::Register()
8187       : InputRegisterAt(instruction, 1);
8188   Location out_loc = locations->Out();
8189   vixl32::Register out = OutputRegister(instruction);
8190   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
8191   DCHECK_LE(num_temps, 1u);
8192   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8193   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8194   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8195   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8196   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8197   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8198   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8199   const uint32_t object_array_data_offset =
8200       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8201   vixl32::Label done;
8202   vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8203   SlowPathCodeARMVIXL* slow_path = nullptr;
8204 
8205   // Return 0 if `obj` is null.
8206   // avoid null check if we know obj is not null.
8207   if (instruction->MustDoNullCheck()) {
8208     DCHECK(!out.Is(obj));
8209     __ Mov(out, 0);
8210     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8211   }
8212 
8213   switch (type_check_kind) {
8214     case TypeCheckKind::kExactCheck: {
8215       ReadBarrierOption read_barrier_option =
8216           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8217       // /* HeapReference<Class> */ out = obj->klass_
8218       GenerateReferenceLoadTwoRegisters(instruction,
8219                                         out_loc,
8220                                         obj_loc,
8221                                         class_offset,
8222                                         maybe_temp_loc,
8223                                         read_barrier_option);
8224       // Classes must be equal for the instanceof to succeed.
8225       __ Cmp(out, cls);
8226       // We speculatively set the result to false without changing the condition
8227       // flags, which allows us to avoid some branching later.
8228       __ Mov(LeaveFlags, out, 0);
8229 
8230       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8231       // we check that the output is in a low register, so that a 16-bit MOV
8232       // encoding can be used.
8233       if (out.IsLow()) {
8234         // We use the scope because of the IT block that follows.
8235         ExactAssemblyScope guard(GetVIXLAssembler(),
8236                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8237                                  CodeBufferCheckScope::kExactSize);
8238 
8239         __ it(eq);
8240         __ mov(eq, out, 1);
8241       } else {
8242         __ B(ne, final_label, /* is_far_target= */ false);
8243         __ Mov(out, 1);
8244       }
8245 
8246       break;
8247     }
8248 
8249     case TypeCheckKind::kAbstractClassCheck: {
8250       ReadBarrierOption read_barrier_option =
8251           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8252       // /* HeapReference<Class> */ out = obj->klass_
8253       GenerateReferenceLoadTwoRegisters(instruction,
8254                                         out_loc,
8255                                         obj_loc,
8256                                         class_offset,
8257                                         maybe_temp_loc,
8258                                         read_barrier_option);
8259       // If the class is abstract, we eagerly fetch the super class of the
8260       // object to avoid doing a comparison we know will fail.
8261       vixl32::Label loop;
8262       __ Bind(&loop);
8263       // /* HeapReference<Class> */ out = out->super_class_
8264       GenerateReferenceLoadOneRegister(instruction,
8265                                        out_loc,
8266                                        super_offset,
8267                                        maybe_temp_loc,
8268                                        read_barrier_option);
8269       // If `out` is null, we use it for the result, and jump to the final label.
8270       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8271       __ Cmp(out, cls);
8272       __ B(ne, &loop, /* is_far_target= */ false);
8273       __ Mov(out, 1);
8274       break;
8275     }
8276 
8277     case TypeCheckKind::kClassHierarchyCheck: {
8278       ReadBarrierOption read_barrier_option =
8279           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8280       // /* HeapReference<Class> */ out = obj->klass_
8281       GenerateReferenceLoadTwoRegisters(instruction,
8282                                         out_loc,
8283                                         obj_loc,
8284                                         class_offset,
8285                                         maybe_temp_loc,
8286                                         read_barrier_option);
8287       // Walk over the class hierarchy to find a match.
8288       vixl32::Label loop, success;
8289       __ Bind(&loop);
8290       __ Cmp(out, cls);
8291       __ B(eq, &success, /* is_far_target= */ false);
8292       // /* HeapReference<Class> */ out = out->super_class_
8293       GenerateReferenceLoadOneRegister(instruction,
8294                                        out_loc,
8295                                        super_offset,
8296                                        maybe_temp_loc,
8297                                        read_barrier_option);
8298       // This is essentially a null check, but it sets the condition flags to the
8299       // proper value for the code that follows the loop, i.e. not `eq`.
8300       __ Cmp(out, 1);
8301       __ B(hs, &loop, /* is_far_target= */ false);
8302 
8303       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8304       // we check that the output is in a low register, so that a 16-bit MOV
8305       // encoding can be used.
8306       if (out.IsLow()) {
8307         // If `out` is null, we use it for the result, and the condition flags
8308         // have already been set to `ne`, so the IT block that comes afterwards
8309         // (and which handles the successful case) turns into a NOP (instead of
8310         // overwriting `out`).
8311         __ Bind(&success);
8312 
8313         // We use the scope because of the IT block that follows.
8314         ExactAssemblyScope guard(GetVIXLAssembler(),
8315                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8316                                  CodeBufferCheckScope::kExactSize);
8317 
8318         // There is only one branch to the `success` label (which is bound to this
8319         // IT block), and it has the same condition, `eq`, so in that case the MOV
8320         // is executed.
8321         __ it(eq);
8322         __ mov(eq, out, 1);
8323       } else {
8324         // If `out` is null, we use it for the result, and jump to the final label.
8325         __ B(final_label);
8326         __ Bind(&success);
8327         __ Mov(out, 1);
8328       }
8329 
8330       break;
8331     }
8332 
8333     case TypeCheckKind::kArrayObjectCheck: {
8334       ReadBarrierOption read_barrier_option =
8335           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8336       // /* HeapReference<Class> */ out = obj->klass_
8337       GenerateReferenceLoadTwoRegisters(instruction,
8338                                         out_loc,
8339                                         obj_loc,
8340                                         class_offset,
8341                                         maybe_temp_loc,
8342                                         read_barrier_option);
8343       // Do an exact check.
8344       vixl32::Label exact_check;
8345       __ Cmp(out, cls);
8346       __ B(eq, &exact_check, /* is_far_target= */ false);
8347       // Otherwise, we need to check that the object's class is a non-primitive array.
8348       // /* HeapReference<Class> */ out = out->component_type_
8349       GenerateReferenceLoadOneRegister(instruction,
8350                                        out_loc,
8351                                        component_offset,
8352                                        maybe_temp_loc,
8353                                        read_barrier_option);
8354       // If `out` is null, we use it for the result, and jump to the final label.
8355       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8356       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8357       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8358       __ Cmp(out, 0);
8359       // We speculatively set the result to false without changing the condition
8360       // flags, which allows us to avoid some branching later.
8361       __ Mov(LeaveFlags, out, 0);
8362 
8363       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8364       // we check that the output is in a low register, so that a 16-bit MOV
8365       // encoding can be used.
8366       if (out.IsLow()) {
8367         __ Bind(&exact_check);
8368 
8369         // We use the scope because of the IT block that follows.
8370         ExactAssemblyScope guard(GetVIXLAssembler(),
8371                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8372                                  CodeBufferCheckScope::kExactSize);
8373 
8374         __ it(eq);
8375         __ mov(eq, out, 1);
8376       } else {
8377         __ B(ne, final_label, /* is_far_target= */ false);
8378         __ Bind(&exact_check);
8379         __ Mov(out, 1);
8380       }
8381 
8382       break;
8383     }
8384 
8385     case TypeCheckKind::kArrayCheck: {
8386       // No read barrier since the slow path will retry upon failure.
8387       // /* HeapReference<Class> */ out = obj->klass_
8388       GenerateReferenceLoadTwoRegisters(instruction,
8389                                         out_loc,
8390                                         obj_loc,
8391                                         class_offset,
8392                                         maybe_temp_loc,
8393                                         kWithoutReadBarrier);
8394       __ Cmp(out, cls);
8395       DCHECK(locations->OnlyCallsOnSlowPath());
8396       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8397           instruction, /* is_fatal= */ false);
8398       codegen_->AddSlowPath(slow_path);
8399       __ B(ne, slow_path->GetEntryLabel());
8400       __ Mov(out, 1);
8401       break;
8402     }
8403 
8404     case TypeCheckKind::kInterfaceCheck: {
8405       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8406         DCHECK(locations->OnlyCallsOnSlowPath());
8407         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8408             instruction, /* is_fatal= */ false);
8409         codegen_->AddSlowPath(slow_path);
8410         if (codegen_->EmitNonBakerReadBarrier()) {
8411           __ B(slow_path->GetEntryLabel());
8412           break;
8413         }
8414         // For Baker read barrier, take the slow path while marking.
8415         __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
8416       }
8417 
8418       // Fast-path without read barriers.
8419       UseScratchRegisterScope temps(GetVIXLAssembler());
8420       vixl32::Register temp = RegisterFrom(maybe_temp_loc);
8421       vixl32::Register temp2 = temps.Acquire();
8422       // /* HeapReference<Class> */ temp = obj->klass_
8423       __ Ldr(temp, MemOperand(obj, class_offset));
8424       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8425       // /* HeapReference<Class> */ temp = temp->iftable_
8426       __ Ldr(temp, MemOperand(temp, iftable_offset));
8427       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8428       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8429       __ Ldr(out, MemOperand(temp, array_length_offset));
8430       // Loop through the `IfTable` and check if any class matches.
8431       vixl32::Label loop;
8432       __ Bind(&loop);
8433       // If taken, the result in `out` is already 0 (false).
8434       __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false);
8435       __ Ldr(temp2, MemOperand(temp, object_array_data_offset));
8436       GetAssembler()->MaybeUnpoisonHeapReference(temp2);
8437       // Go to next interface.
8438       __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize));
8439       __ Sub(out, out, 2);
8440       // Compare the classes and continue the loop if they do not match.
8441       __ Cmp(cls, temp2);
8442       __ B(ne, &loop);
8443       __ Mov(out, 1);
8444       break;
8445     }
8446 
8447     case TypeCheckKind::kUnresolvedCheck: {
8448       // Note that we indeed only call on slow path, but we always go
8449       // into the slow path for the unresolved check case.
8450       //
8451       // We cannot directly call the InstanceofNonTrivial runtime
8452       // entry point without resorting to a type checking slow path
8453       // here (i.e. by calling InvokeRuntime directly), as it would
8454       // require to assign fixed registers for the inputs of this
8455       // HInstanceOf instruction (following the runtime calling
8456       // convention), which might be cluttered by the potential first
8457       // read barrier emission at the beginning of this method.
8458       //
8459       // TODO: Introduce a new runtime entry point taking the object
8460       // to test (instead of its class) as argument, and let it deal
8461       // with the read barrier issues. This will let us refactor this
8462       // case of the `switch` code as it was previously (with a direct
8463       // call to the runtime not using a type checking slow path).
8464       // This should also be beneficial for the other cases above.
8465       DCHECK(locations->OnlyCallsOnSlowPath());
8466       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8467           instruction, /* is_fatal= */ false);
8468       codegen_->AddSlowPath(slow_path);
8469       __ B(slow_path->GetEntryLabel());
8470       break;
8471     }
8472 
8473     case TypeCheckKind::kBitstringCheck: {
8474       // /* HeapReference<Class> */ temp = obj->klass_
8475       GenerateReferenceLoadTwoRegisters(instruction,
8476                                         out_loc,
8477                                         obj_loc,
8478                                         class_offset,
8479                                         maybe_temp_loc,
8480                                         kWithoutReadBarrier);
8481 
8482       GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8483       // If `out` is a low reg and we would have another low reg temp, we could
8484       // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8485       //
8486       // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8487       // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8488       // would be the same and we would have fewer direct data dependencies.
8489       codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
8490       break;
8491     }
8492   }
8493 
8494   if (done.IsReferenced()) {
8495     __ Bind(&done);
8496   }
8497 
8498   if (slow_path != nullptr) {
8499     __ Bind(slow_path->GetExitLabel());
8500   }
8501 }
8502 
VisitCheckCast(HCheckCast * instruction)8503 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8504   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8505   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8506   LocationSummary* locations =
8507       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8508   locations->SetInAt(0, Location::RequiresRegister());
8509   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8510     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8511     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8512     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8513   } else {
8514     locations->SetInAt(1, Location::RequiresRegister());
8515   }
8516   locations->AddRegisterTemps(
8517       NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8518 }
8519 
VisitCheckCast(HCheckCast * instruction)8520 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8521   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8522   LocationSummary* locations = instruction->GetLocations();
8523   Location obj_loc = locations->InAt(0);
8524   vixl32::Register obj = InputRegisterAt(instruction, 0);
8525   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8526       ? vixl32::Register()
8527       : InputRegisterAt(instruction, 1);
8528   Location temp_loc = locations->GetTemp(0);
8529   vixl32::Register temp = RegisterFrom(temp_loc);
8530   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8531   DCHECK_LE(num_temps, 3u);
8532   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8533   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8534   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8535   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8536   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8537   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8538   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8539   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8540   const uint32_t object_array_data_offset =
8541       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8542 
8543   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8544   SlowPathCodeARMVIXL* type_check_slow_path =
8545       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8546           instruction, is_type_check_slow_path_fatal);
8547   codegen_->AddSlowPath(type_check_slow_path);
8548 
8549   vixl32::Label done;
8550   vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8551   // Avoid null check if we know obj is not null.
8552   if (instruction->MustDoNullCheck()) {
8553     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8554   }
8555 
8556   switch (type_check_kind) {
8557     case TypeCheckKind::kExactCheck:
8558     case TypeCheckKind::kArrayCheck: {
8559       // /* HeapReference<Class> */ temp = obj->klass_
8560       GenerateReferenceLoadTwoRegisters(instruction,
8561                                         temp_loc,
8562                                         obj_loc,
8563                                         class_offset,
8564                                         maybe_temp2_loc,
8565                                         kWithoutReadBarrier);
8566 
8567       __ Cmp(temp, cls);
8568       // Jump to slow path for throwing the exception or doing a
8569       // more involved array check.
8570       __ B(ne, type_check_slow_path->GetEntryLabel());
8571       break;
8572     }
8573 
8574     case TypeCheckKind::kAbstractClassCheck: {
8575       // /* HeapReference<Class> */ temp = obj->klass_
8576       GenerateReferenceLoadTwoRegisters(instruction,
8577                                         temp_loc,
8578                                         obj_loc,
8579                                         class_offset,
8580                                         maybe_temp2_loc,
8581                                         kWithoutReadBarrier);
8582 
8583       // If the class is abstract, we eagerly fetch the super class of the
8584       // object to avoid doing a comparison we know will fail.
8585       vixl32::Label loop;
8586       __ Bind(&loop);
8587       // /* HeapReference<Class> */ temp = temp->super_class_
8588       GenerateReferenceLoadOneRegister(instruction,
8589                                        temp_loc,
8590                                        super_offset,
8591                                        maybe_temp2_loc,
8592                                        kWithoutReadBarrier);
8593 
8594       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8595       // exception.
8596       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8597 
8598       // Otherwise, compare the classes.
8599       __ Cmp(temp, cls);
8600       __ B(ne, &loop, /* is_far_target= */ false);
8601       break;
8602     }
8603 
8604     case TypeCheckKind::kClassHierarchyCheck: {
8605       // /* HeapReference<Class> */ temp = obj->klass_
8606       GenerateReferenceLoadTwoRegisters(instruction,
8607                                         temp_loc,
8608                                         obj_loc,
8609                                         class_offset,
8610                                         maybe_temp2_loc,
8611                                         kWithoutReadBarrier);
8612 
8613       // Walk over the class hierarchy to find a match.
8614       vixl32::Label loop;
8615       __ Bind(&loop);
8616       __ Cmp(temp, cls);
8617       __ B(eq, final_label, /* is_far_target= */ false);
8618 
8619       // /* HeapReference<Class> */ temp = temp->super_class_
8620       GenerateReferenceLoadOneRegister(instruction,
8621                                        temp_loc,
8622                                        super_offset,
8623                                        maybe_temp2_loc,
8624                                        kWithoutReadBarrier);
8625 
8626       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8627       // exception.
8628       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8629       // Otherwise, jump to the beginning of the loop.
8630       __ B(&loop);
8631       break;
8632     }
8633 
8634     case TypeCheckKind::kArrayObjectCheck:  {
8635       // /* HeapReference<Class> */ temp = obj->klass_
8636       GenerateReferenceLoadTwoRegisters(instruction,
8637                                         temp_loc,
8638                                         obj_loc,
8639                                         class_offset,
8640                                         maybe_temp2_loc,
8641                                         kWithoutReadBarrier);
8642 
8643       // Do an exact check.
8644       __ Cmp(temp, cls);
8645       __ B(eq, final_label, /* is_far_target= */ false);
8646 
8647       // Otherwise, we need to check that the object's class is a non-primitive array.
8648       // /* HeapReference<Class> */ temp = temp->component_type_
8649       GenerateReferenceLoadOneRegister(instruction,
8650                                        temp_loc,
8651                                        component_offset,
8652                                        maybe_temp2_loc,
8653                                        kWithoutReadBarrier);
8654       // If the component type is null, jump to the slow path to throw the exception.
8655       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8656       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8657       // to further check that this component type is not a primitive type.
8658       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8659       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8660       __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8661       break;
8662     }
8663 
8664     case TypeCheckKind::kUnresolvedCheck:
8665       // We always go into the type check slow path for the unresolved check case.
8666       // We cannot directly call the CheckCast runtime entry point
8667       // without resorting to a type checking slow path here (i.e. by
8668       // calling InvokeRuntime directly), as it would require to
8669       // assign fixed registers for the inputs of this HInstanceOf
8670       // instruction (following the runtime calling convention), which
8671       // might be cluttered by the potential first read barrier
8672       // emission at the beginning of this method.
8673 
8674       __ B(type_check_slow_path->GetEntryLabel());
8675       break;
8676 
8677     case TypeCheckKind::kInterfaceCheck: {
8678       // Avoid read barriers to improve performance of the fast path. We can not get false
8679       // positives by doing this.
8680       // /* HeapReference<Class> */ temp = obj->klass_
8681       GenerateReferenceLoadTwoRegisters(instruction,
8682                                         temp_loc,
8683                                         obj_loc,
8684                                         class_offset,
8685                                         maybe_temp2_loc,
8686                                         kWithoutReadBarrier);
8687 
8688       // /* HeapReference<Class> */ temp = temp->iftable_
8689       GenerateReferenceLoadOneRegister(instruction,
8690                                        temp_loc,
8691                                        iftable_offset,
8692                                        maybe_temp2_loc,
8693                                        kWithoutReadBarrier);
8694       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8695       __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8696       // Loop through the iftable and check if any class matches.
8697       vixl32::Label start_loop;
8698       __ Bind(&start_loop);
8699       __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8700                                 type_check_slow_path->GetEntryLabel());
8701       __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8702       GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8703       // Go to next interface.
8704       __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8705       __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8706       // Compare the classes and continue the loop if they do not match.
8707       __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8708       __ B(ne, &start_loop, /* is_far_target= */ false);
8709       break;
8710     }
8711 
8712     case TypeCheckKind::kBitstringCheck: {
8713       // /* HeapReference<Class> */ temp = obj->klass_
8714       GenerateReferenceLoadTwoRegisters(instruction,
8715                                         temp_loc,
8716                                         obj_loc,
8717                                         class_offset,
8718                                         maybe_temp2_loc,
8719                                         kWithoutReadBarrier);
8720 
8721       GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8722       __ B(ne, type_check_slow_path->GetEntryLabel());
8723       break;
8724     }
8725   }
8726   if (done.IsReferenced()) {
8727     __ Bind(&done);
8728   }
8729 
8730   __ Bind(type_check_slow_path->GetExitLabel());
8731 }
8732 
VisitMonitorOperation(HMonitorOperation * instruction)8733 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8734   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8735       instruction, LocationSummary::kCallOnMainOnly);
8736   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8737   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8738 }
8739 
VisitMonitorOperation(HMonitorOperation * instruction)8740 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8741   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8742                           instruction);
8743   if (instruction->IsEnter()) {
8744     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8745   } else {
8746     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8747   }
8748   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8749 }
8750 
VisitAnd(HAnd * instruction)8751 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8752   HandleBitwiseOperation(instruction, AND);
8753 }
8754 
VisitOr(HOr * instruction)8755 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8756   HandleBitwiseOperation(instruction, ORR);
8757 }
8758 
VisitXor(HXor * instruction)8759 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8760   HandleBitwiseOperation(instruction, EOR);
8761 }
8762 
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8763 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8764   LocationSummary* locations =
8765       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8766   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8767          || instruction->GetResultType() == DataType::Type::kInt64);
8768   // Note: GVN reorders commutative operations to have the constant on the right hand side.
8769   locations->SetInAt(0, Location::RequiresRegister());
8770   locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8771   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8772 }
8773 
VisitAnd(HAnd * instruction)8774 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8775   HandleBitwiseOperation(instruction);
8776 }
8777 
VisitOr(HOr * instruction)8778 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8779   HandleBitwiseOperation(instruction);
8780 }
8781 
VisitXor(HXor * instruction)8782 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8783   HandleBitwiseOperation(instruction);
8784 }
8785 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8786 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8787   LocationSummary* locations =
8788       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8789   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8790          || instruction->GetResultType() == DataType::Type::kInt64);
8791 
8792   locations->SetInAt(0, Location::RequiresRegister());
8793   locations->SetInAt(1, Location::RequiresRegister());
8794   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8795 }
8796 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8797 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8798   LocationSummary* locations = instruction->GetLocations();
8799   Location first = locations->InAt(0);
8800   Location second = locations->InAt(1);
8801   Location out = locations->Out();
8802 
8803   if (instruction->GetResultType() == DataType::Type::kInt32) {
8804     vixl32::Register first_reg = RegisterFrom(first);
8805     vixl32::Register second_reg = RegisterFrom(second);
8806     vixl32::Register out_reg = RegisterFrom(out);
8807 
8808     switch (instruction->GetOpKind()) {
8809       case HInstruction::kAnd:
8810         __ Bic(out_reg, first_reg, second_reg);
8811         break;
8812       case HInstruction::kOr:
8813         __ Orn(out_reg, first_reg, second_reg);
8814         break;
8815       // There is no EON on arm.
8816       case HInstruction::kXor:
8817       default:
8818         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8819         UNREACHABLE();
8820     }
8821     return;
8822 
8823   } else {
8824     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8825     vixl32::Register first_low = LowRegisterFrom(first);
8826     vixl32::Register first_high = HighRegisterFrom(first);
8827     vixl32::Register second_low = LowRegisterFrom(second);
8828     vixl32::Register second_high = HighRegisterFrom(second);
8829     vixl32::Register out_low = LowRegisterFrom(out);
8830     vixl32::Register out_high = HighRegisterFrom(out);
8831 
8832     switch (instruction->GetOpKind()) {
8833       case HInstruction::kAnd:
8834         __ Bic(out_low, first_low, second_low);
8835         __ Bic(out_high, first_high, second_high);
8836         break;
8837       case HInstruction::kOr:
8838         __ Orn(out_low, first_low, second_low);
8839         __ Orn(out_high, first_high, second_high);
8840         break;
8841       // There is no EON on arm.
8842       case HInstruction::kXor:
8843       default:
8844         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8845         UNREACHABLE();
8846     }
8847   }
8848 }
8849 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8850 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8851     HDataProcWithShifterOp* instruction) {
8852   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8853          instruction->GetType() == DataType::Type::kInt64);
8854   LocationSummary* locations =
8855       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8856   const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8857                        HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8858 
8859   locations->SetInAt(0, Location::RequiresRegister());
8860   locations->SetInAt(1, Location::RequiresRegister());
8861   locations->SetOut(Location::RequiresRegister(),
8862                     overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8863 }
8864 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8865 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8866     HDataProcWithShifterOp* instruction) {
8867   const LocationSummary* const locations = instruction->GetLocations();
8868   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8869   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8870 
8871   if (instruction->GetType() == DataType::Type::kInt32) {
8872     const vixl32::Register first = InputRegisterAt(instruction, 0);
8873     const vixl32::Register output = OutputRegister(instruction);
8874     const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8875         ? LowRegisterFrom(locations->InAt(1))
8876         : InputRegisterAt(instruction, 1);
8877 
8878     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8879       DCHECK_EQ(kind, HInstruction::kAdd);
8880 
8881       switch (op_kind) {
8882         case HDataProcWithShifterOp::kUXTB:
8883           __ Uxtab(output, first, second);
8884           break;
8885         case HDataProcWithShifterOp::kUXTH:
8886           __ Uxtah(output, first, second);
8887           break;
8888         case HDataProcWithShifterOp::kSXTB:
8889           __ Sxtab(output, first, second);
8890           break;
8891         case HDataProcWithShifterOp::kSXTH:
8892           __ Sxtah(output, first, second);
8893           break;
8894         default:
8895           LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8896           UNREACHABLE();
8897       }
8898     } else {
8899       GenerateDataProcInstruction(kind,
8900                                   output,
8901                                   first,
8902                                   Operand(second,
8903                                           ShiftFromOpKind(op_kind),
8904                                           instruction->GetShiftAmount()),
8905                                   codegen_);
8906     }
8907   } else {
8908     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8909 
8910     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8911       const vixl32::Register second = InputRegisterAt(instruction, 1);
8912 
8913       DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8914       GenerateDataProc(kind,
8915                        locations->Out(),
8916                        locations->InAt(0),
8917                        second,
8918                        Operand(second, ShiftType::ASR, 31),
8919                        codegen_);
8920     } else {
8921       GenerateLongDataProc(instruction, codegen_);
8922     }
8923   }
8924 }
8925 
8926 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8927 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8928                                                        vixl32::Register first,
8929                                                        uint32_t value) {
8930   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8931   if (value == 0xffffffffu) {
8932     if (!out.Is(first)) {
8933       __ Mov(out, first);
8934     }
8935     return;
8936   }
8937   if (value == 0u) {
8938     __ Mov(out, 0);
8939     return;
8940   }
8941   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8942     __ And(out, first, value);
8943   } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8944     __ Bic(out, first, ~value);
8945   } else {
8946     DCHECK(IsPowerOfTwo(value + 1));
8947     __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8948   }
8949 }
8950 
8951 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8952 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8953                                                        vixl32::Register first,
8954                                                        uint32_t value) {
8955   // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8956   if (value == 0u) {
8957     if (!out.Is(first)) {
8958       __ Mov(out, first);
8959     }
8960     return;
8961   }
8962   if (value == 0xffffffffu) {
8963     __ Mvn(out, 0);
8964     return;
8965   }
8966   if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8967     __ Orr(out, first, value);
8968   } else {
8969     DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8970     __ Orn(out, first, ~value);
8971   }
8972 }
8973 
8974 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8975 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8976                                                        vixl32::Register first,
8977                                                        uint32_t value) {
8978   // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8979   if (value == 0u) {
8980     if (!out.Is(first)) {
8981       __ Mov(out, first);
8982     }
8983     return;
8984   }
8985   __ Eor(out, first, value);
8986 }
8987 
GenerateAddLongConst(Location out,Location first,uint64_t value)8988 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8989                                                            Location first,
8990                                                            uint64_t value) {
8991   vixl32::Register out_low = LowRegisterFrom(out);
8992   vixl32::Register out_high = HighRegisterFrom(out);
8993   vixl32::Register first_low = LowRegisterFrom(first);
8994   vixl32::Register first_high = HighRegisterFrom(first);
8995   uint32_t value_low = Low32Bits(value);
8996   uint32_t value_high = High32Bits(value);
8997   if (value_low == 0u) {
8998     if (!out_low.Is(first_low)) {
8999       __ Mov(out_low, first_low);
9000     }
9001     __ Add(out_high, first_high, value_high);
9002     return;
9003   }
9004   __ Adds(out_low, first_low, value_low);
9005   if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
9006     __ Adc(out_high, first_high, value_high);
9007   } else {
9008     DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
9009     __ Sbc(out_high, first_high, ~value_high);
9010   }
9011 }
9012 
HandleBitwiseOperation(HBinaryOperation * instruction)9013 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
9014   LocationSummary* locations = instruction->GetLocations();
9015   Location first = locations->InAt(0);
9016   Location second = locations->InAt(1);
9017   Location out = locations->Out();
9018 
9019   if (second.IsConstant()) {
9020     uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
9021     uint32_t value_low = Low32Bits(value);
9022     if (instruction->GetResultType() == DataType::Type::kInt32) {
9023       vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9024       vixl32::Register out_reg = OutputRegister(instruction);
9025       if (instruction->IsAnd()) {
9026         GenerateAndConst(out_reg, first_reg, value_low);
9027       } else if (instruction->IsOr()) {
9028         GenerateOrrConst(out_reg, first_reg, value_low);
9029       } else {
9030         DCHECK(instruction->IsXor());
9031         GenerateEorConst(out_reg, first_reg, value_low);
9032       }
9033     } else {
9034       DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9035       uint32_t value_high = High32Bits(value);
9036       vixl32::Register first_low = LowRegisterFrom(first);
9037       vixl32::Register first_high = HighRegisterFrom(first);
9038       vixl32::Register out_low = LowRegisterFrom(out);
9039       vixl32::Register out_high = HighRegisterFrom(out);
9040       if (instruction->IsAnd()) {
9041         GenerateAndConst(out_low, first_low, value_low);
9042         GenerateAndConst(out_high, first_high, value_high);
9043       } else if (instruction->IsOr()) {
9044         GenerateOrrConst(out_low, first_low, value_low);
9045         GenerateOrrConst(out_high, first_high, value_high);
9046       } else {
9047         DCHECK(instruction->IsXor());
9048         GenerateEorConst(out_low, first_low, value_low);
9049         GenerateEorConst(out_high, first_high, value_high);
9050       }
9051     }
9052     return;
9053   }
9054 
9055   if (instruction->GetResultType() == DataType::Type::kInt32) {
9056     vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9057     vixl32::Register second_reg = InputRegisterAt(instruction, 1);
9058     vixl32::Register out_reg = OutputRegister(instruction);
9059     if (instruction->IsAnd()) {
9060       __ And(out_reg, first_reg, second_reg);
9061     } else if (instruction->IsOr()) {
9062       __ Orr(out_reg, first_reg, second_reg);
9063     } else {
9064       DCHECK(instruction->IsXor());
9065       __ Eor(out_reg, first_reg, second_reg);
9066     }
9067   } else {
9068     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9069     vixl32::Register first_low = LowRegisterFrom(first);
9070     vixl32::Register first_high = HighRegisterFrom(first);
9071     vixl32::Register second_low = LowRegisterFrom(second);
9072     vixl32::Register second_high = HighRegisterFrom(second);
9073     vixl32::Register out_low = LowRegisterFrom(out);
9074     vixl32::Register out_high = HighRegisterFrom(out);
9075     if (instruction->IsAnd()) {
9076       __ And(out_low, first_low, second_low);
9077       __ And(out_high, first_high, second_high);
9078     } else if (instruction->IsOr()) {
9079       __ Orr(out_low, first_low, second_low);
9080       __ Orr(out_high, first_high, second_high);
9081     } else {
9082       DCHECK(instruction->IsXor());
9083       __ Eor(out_low, first_low, second_low);
9084       __ Eor(out_high, first_high, second_high);
9085     }
9086   }
9087 }
9088 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9089 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
9090     HInstruction* instruction,
9091     Location out,
9092     uint32_t offset,
9093     Location maybe_temp,
9094     ReadBarrierOption read_barrier_option) {
9095   vixl32::Register out_reg = RegisterFrom(out);
9096   if (read_barrier_option == kWithReadBarrier) {
9097     DCHECK(codegen_->EmitReadBarrier());
9098     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9099     if (kUseBakerReadBarrier) {
9100       // Load with fast path based Baker's read barrier.
9101       // /* HeapReference<Object> */ out = *(out + offset)
9102       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9103           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
9104     } else {
9105       // Load with slow path based read barrier.
9106       // Save the value of `out` into `maybe_temp` before overwriting it
9107       // in the following move operation, as we will need it for the
9108       // read barrier below.
9109       __ Mov(RegisterFrom(maybe_temp), out_reg);
9110       // /* HeapReference<Object> */ out = *(out + offset)
9111       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9112       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
9113     }
9114   } else {
9115     // Plain load with no read barrier.
9116     // /* HeapReference<Object> */ out = *(out + offset)
9117     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9118     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9119   }
9120 }
9121 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9122 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
9123     HInstruction* instruction,
9124     Location out,
9125     Location obj,
9126     uint32_t offset,
9127     Location maybe_temp,
9128     ReadBarrierOption read_barrier_option) {
9129   vixl32::Register out_reg = RegisterFrom(out);
9130   vixl32::Register obj_reg = RegisterFrom(obj);
9131   if (read_barrier_option == kWithReadBarrier) {
9132     DCHECK(codegen_->EmitReadBarrier());
9133     if (kUseBakerReadBarrier) {
9134       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9135       // Load with fast path based Baker's read barrier.
9136       // /* HeapReference<Object> */ out = *(obj + offset)
9137       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9138           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
9139     } else {
9140       // Load with slow path based read barrier.
9141       // /* HeapReference<Object> */ out = *(obj + offset)
9142       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9143       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
9144     }
9145   } else {
9146     // Plain load with no read barrier.
9147     // /* HeapReference<Object> */ out = *(obj + offset)
9148     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9149     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9150   }
9151 }
9152 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)9153 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
9154     HInstruction* instruction,
9155     Location root,
9156     vixl32::Register obj,
9157     uint32_t offset,
9158     ReadBarrierOption read_barrier_option) {
9159   vixl32::Register root_reg = RegisterFrom(root);
9160   if (read_barrier_option == kWithReadBarrier) {
9161     DCHECK(EmitReadBarrier());
9162     if (kUseBakerReadBarrier) {
9163       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
9164       // Baker's read barrier are used.
9165 
9166       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
9167       // the Marking Register) to decide whether we need to enter
9168       // the slow path to mark the GC root.
9169       //
9170       // We use shared thunks for the slow path; shared within the method
9171       // for JIT, across methods for AOT. That thunk checks the reference
9172       // and jumps to the entrypoint if needed.
9173       //
9174       //     lr = &return_address;
9175       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
9176       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9177       //       goto gc_root_thunk<root_reg>(lr)
9178       //     }
9179       //   return_address:
9180 
9181       UseScratchRegisterScope temps(GetVIXLAssembler());
9182       temps.Exclude(ip);
9183       bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
9184       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
9185 
9186       size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
9187       size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
9188       size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9189                           narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9190       ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9191       vixl32::Label return_address;
9192       EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9193       __ cmp(mr, Operand(0));
9194       // Currently the offset is always within range. If that changes,
9195       // we shall have to split the load the same way as for fields.
9196       DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9197       ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9198       __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9199       EmitBakerReadBarrierBne(custom_data);
9200       __ bind(&return_address);
9201       DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9202                 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9203                        : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9204     } else {
9205       // GC root loaded through a slow path for read barriers other
9206       // than Baker's.
9207       // /* GcRoot<mirror::Object>* */ root = obj + offset
9208       __ Add(root_reg, obj, offset);
9209       // /* mirror::Object* */ root = root->Read()
9210       GenerateReadBarrierForRootSlow(instruction, root, root);
9211     }
9212   } else {
9213     // Plain GC root load with no read barrier.
9214     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9215     GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9216     // Note that GC roots are not affected by heap poisoning, thus we
9217     // do not have to unpoison `root_reg` here.
9218   }
9219   MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9220 }
9221 
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9222 void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
9223     vixl::aarch32::Register marked_old_value,
9224     vixl::aarch32::Register old_value) {
9225   DCHECK(EmitBakerReadBarrier());
9226 
9227   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9228   // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9229   // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9230   bool narrow_mov = marked_old_value.IsLow();
9231   uint32_t custom_data = narrow_mov
9232       ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9233       : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9234 
9235   size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9236   size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9237   size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9238                       narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9239   ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9240   vixl32::Label return_address;
9241   EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9242   __ cmp(mr, Operand(0));
9243   ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9244   __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9245   EmitBakerReadBarrierBne(custom_data);
9246   __ bind(&return_address);
9247   DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9248             narrow_mov
9249                 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9250                 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9251 }
9252 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9253 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9254                                                                  Location ref,
9255                                                                  vixl32::Register obj,
9256                                                                  const vixl32::MemOperand& src,
9257                                                                  bool needs_null_check) {
9258   DCHECK(EmitBakerReadBarrier());
9259 
9260   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9261   // Marking Register) to decide whether we need to enter the slow
9262   // path to mark the reference. Then, in the slow path, check the
9263   // gray bit in the lock word of the reference's holder (`obj`) to
9264   // decide whether to mark `ref` or not.
9265   //
9266   // We use shared thunks for the slow path; shared within the method
9267   // for JIT, across methods for AOT. That thunk checks the holder
9268   // and jumps to the entrypoint if needed. If the holder is not gray,
9269   // it creates a fake dependency and returns to the LDR instruction.
9270   //
9271   //     lr = &gray_return_address;
9272   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9273   //       goto field_thunk<holder_reg, base_reg>(lr)
9274   //     }
9275   //   not_gray_return_address:
9276   //     // Original reference load. If the offset is too large to fit
9277   //     // into LDR, we use an adjusted base register here.
9278   //     HeapReference<mirror::Object> reference = *(obj+offset);
9279   //   gray_return_address:
9280 
9281   DCHECK(src.GetAddrMode() == vixl32::Offset);
9282   DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9283   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9284   bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9285 
9286   UseScratchRegisterScope temps(GetVIXLAssembler());
9287   temps.Exclude(ip);
9288   uint32_t custom_data =
9289       EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9290 
9291   {
9292     size_t narrow_instructions =
9293         /* CMP */ (mr.IsLow() ? 1u : 0u) +
9294         /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9295     size_t wide_instructions =
9296         /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9297     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9298                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9299     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9300     vixl32::Label return_address;
9301     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9302     __ cmp(mr, Operand(0));
9303     EmitBakerReadBarrierBne(custom_data);
9304     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9305     __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9306     if (needs_null_check) {
9307       MaybeRecordImplicitNullCheck(instruction);
9308     }
9309     // Note: We need a specific width for the unpoisoning NEG.
9310     if (kPoisonHeapReferences) {
9311       if (narrow) {
9312         // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9313         __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9314       } else {
9315         __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9316       }
9317     }
9318     __ bind(&return_address);
9319     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9320               narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9321                      : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9322   }
9323   MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9324 }
9325 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9326 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9327                                                                  Location ref,
9328                                                                  vixl32::Register obj,
9329                                                                  uint32_t offset,
9330                                                                  Location maybe_temp,
9331                                                                  bool needs_null_check) {
9332   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9333   vixl32::Register base = obj;
9334   if (offset >= kReferenceLoadMinFarOffset) {
9335     base = RegisterFrom(maybe_temp);
9336     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9337     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9338     offset &= (kReferenceLoadMinFarOffset - 1u);
9339   }
9340   GenerateFieldLoadWithBakerReadBarrier(
9341       instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9342 }
9343 
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9344 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9345                                                                  vixl32::Register obj,
9346                                                                  uint32_t data_offset,
9347                                                                  Location index,
9348                                                                  Location temp,
9349                                                                  bool needs_null_check) {
9350   DCHECK(EmitBakerReadBarrier());
9351 
9352   static_assert(
9353       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9354       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9355   ScaleFactor scale_factor = TIMES_4;
9356 
9357   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9358   // Marking Register) to decide whether we need to enter the slow
9359   // path to mark the reference. Then, in the slow path, check the
9360   // gray bit in the lock word of the reference's holder (`obj`) to
9361   // decide whether to mark `ref` or not.
9362   //
9363   // We use shared thunks for the slow path; shared within the method
9364   // for JIT, across methods for AOT. That thunk checks the holder
9365   // and jumps to the entrypoint if needed. If the holder is not gray,
9366   // it creates a fake dependency and returns to the LDR instruction.
9367   //
9368   //     lr = &gray_return_address;
9369   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9370   //       goto array_thunk<base_reg>(lr)
9371   //     }
9372   //   not_gray_return_address:
9373   //     // Original reference load. If the offset is too large to fit
9374   //     // into LDR, we use an adjusted base register here.
9375   //     HeapReference<mirror::Object> reference = data[index];
9376   //   gray_return_address:
9377 
9378   DCHECK(index.IsValid());
9379   vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9380   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9381   vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32);  // Raw pointer.
9382 
9383   UseScratchRegisterScope temps(GetVIXLAssembler());
9384   temps.Exclude(ip);
9385   uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9386 
9387   __ Add(data_reg, obj, Operand(data_offset));
9388   {
9389     size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9390     size_t wide_instructions =
9391         /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9392     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9393                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9394     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9395     vixl32::Label return_address;
9396     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9397     __ cmp(mr, Operand(0));
9398     EmitBakerReadBarrierBne(custom_data);
9399     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9400     __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9401     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
9402     // Note: We need a Wide NEG for the unpoisoning.
9403     if (kPoisonHeapReferences) {
9404       __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9405     }
9406     __ bind(&return_address);
9407     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9408               BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9409   }
9410   MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9411 }
9412 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9413 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9414   // The following condition is a compile-time one, so it does not have a run-time cost.
9415   if (kIsDebugBuild && EmitBakerReadBarrier()) {
9416     // The following condition is a run-time one; it is executed after the
9417     // previous compile-time test, to avoid penalizing non-debug builds.
9418     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9419       UseScratchRegisterScope temps(GetVIXLAssembler());
9420       vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9421       GetAssembler()->GenerateMarkingRegisterCheck(temp,
9422                                                    kMarkingRegisterCheckBreakCodeBaseCode + code);
9423     }
9424   }
9425 }
9426 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9427 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9428                                                                   Location out,
9429                                                                   Location ref,
9430                                                                   Location obj,
9431                                                                   uint32_t offset,
9432                                                                   Location index) {
9433   SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9434       ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9435   AddSlowPath(slow_path);
9436   return slow_path;
9437 }
9438 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9439 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9440                                                    Location out,
9441                                                    Location ref,
9442                                                    Location obj,
9443                                                    uint32_t offset,
9444                                                    Location index) {
9445   DCHECK(EmitReadBarrier());
9446 
9447   // Insert a slow path based read barrier *after* the reference load.
9448   //
9449   // If heap poisoning is enabled, the unpoisoning of the loaded
9450   // reference will be carried out by the runtime within the slow
9451   // path.
9452   //
9453   // Note that `ref` currently does not get unpoisoned (when heap
9454   // poisoning is enabled), which is alright as the `ref` argument is
9455   // not used by the artReadBarrierSlow entry point.
9456   //
9457   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9458   SlowPathCodeARMVIXL* slow_path =
9459       AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9460 
9461   __ B(slow_path->GetEntryLabel());
9462   __ Bind(slow_path->GetExitLabel());
9463 }
9464 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9465 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9466                                                         Location out,
9467                                                         Location ref,
9468                                                         Location obj,
9469                                                         uint32_t offset,
9470                                                         Location index) {
9471   if (EmitReadBarrier()) {
9472     // Baker's read barriers shall be handled by the fast path
9473     // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9474     DCHECK(!kUseBakerReadBarrier);
9475     // If heap poisoning is enabled, unpoisoning will be taken care of
9476     // by the runtime within the slow path.
9477     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9478   } else if (kPoisonHeapReferences) {
9479     GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9480   }
9481 }
9482 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9483 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9484                                                           Location out,
9485                                                           Location root) {
9486   DCHECK(EmitReadBarrier());
9487 
9488   // Insert a slow path based read barrier *after* the GC root load.
9489   //
9490   // Note that GC roots are not affected by heap poisoning, so we do
9491   // not need to do anything special for this here.
9492   SlowPathCodeARMVIXL* slow_path =
9493       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9494   AddSlowPath(slow_path);
9495 
9496   __ B(slow_path->GetEntryLabel());
9497   __ Bind(slow_path->GetExitLabel());
9498 }
9499 
9500 // Check if the desired_dispatch_info is supported. If it is, return it,
9501 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9502 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9503     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9504     ArtMethod* method) {
9505   if (method->IsIntrinsic() &&
9506       desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9507     // As a work-around for soft-float native ABI interfering with type checks, we are
9508     // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9509     // when a float or double argument is passed in core registers but we cannot do that
9510     // for actual intrinsic implementations that expect them in FP registers. Therefore
9511     // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9512     // properly intrinsified, the dispatch type does not matter anyway.
9513     ScopedObjectAccess soa(Thread::Current());
9514     uint32_t shorty_len;
9515     const char* shorty = method->GetShorty(&shorty_len);
9516     for (uint32_t i = 1; i != shorty_len; ++i) {
9517       if (shorty[i] == 'D' || shorty[i] == 'F') {
9518         HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9519         dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9520         return dispatch_info;
9521       }
9522     }
9523   }
9524   return desired_dispatch_info;
9525 }
9526 
9527 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9528 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9529   switch (load_kind) {
9530     case MethodLoadKind::kBootImageLinkTimePcRelative: {
9531       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9532       PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9533       vixl32::Register temp_reg = RegisterFrom(temp);
9534       EmitMovwMovtPlaceholder(labels, temp_reg);
9535       break;
9536     }
9537     case MethodLoadKind::kBootImageRelRo: {
9538       uint32_t boot_image_offset = GetBootImageOffset(invoke);
9539       LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9540       break;
9541     }
9542     case MethodLoadKind::kAppImageRelRo: {
9543       DCHECK(GetCompilerOptions().IsAppImage());
9544       PcRelativePatchInfo* labels = NewAppImageMethodPatch(invoke->GetResolvedMethodReference());
9545       vixl32::Register temp_reg = RegisterFrom(temp);
9546       EmitMovwMovtPlaceholder(labels, temp_reg);
9547       __ Ldr(temp_reg, MemOperand(temp_reg, /*offset=*/ 0));
9548       break;
9549     }
9550     case MethodLoadKind::kBssEntry: {
9551       PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9552       vixl32::Register temp_reg = RegisterFrom(temp);
9553       EmitMovwMovtPlaceholder(labels, temp_reg);
9554       // All aligned loads are implicitly atomic consume operations on ARM.
9555       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9556       break;
9557     }
9558     case MethodLoadKind::kJitDirectAddress: {
9559       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9560       break;
9561     }
9562     case MethodLoadKind::kRuntimeCall: {
9563       // Test situation, don't do anything.
9564       break;
9565     }
9566     default: {
9567       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9568       UNREACHABLE();
9569     }
9570   }
9571 }
9572 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9573 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9574     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9575   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
9576   switch (invoke->GetMethodLoadKind()) {
9577     case MethodLoadKind::kStringInit: {
9578       uint32_t offset =
9579           GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9580       // temp = thread->string_init_entrypoint
9581       GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9582       break;
9583     }
9584     case MethodLoadKind::kRecursive: {
9585       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9586       break;
9587     }
9588     case MethodLoadKind::kRuntimeCall: {
9589       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9590       return;  // No code pointer retrieval; the runtime performs the call directly.
9591     }
9592     case MethodLoadKind::kBootImageLinkTimePcRelative:
9593       // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9594       // pointer for kCallCriticalNative because it would not save us an instruction from
9595       // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9596       // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9597       FALLTHROUGH_INTENDED;
9598     default: {
9599       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9600       break;
9601     }
9602   }
9603 
9604   auto call_code_pointer_member = [&](MemberOffset offset) {
9605     // LR = callee_method->member;
9606     GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9607     {
9608       // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9609       // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9610       ExactAssemblyScope aas(GetVIXLAssembler(),
9611                              vixl32::k16BitT32InstructionSizeInBytes,
9612                              CodeBufferCheckScope::kExactSize);
9613       // LR()
9614       __ blx(lr);
9615       RecordPcInfo(invoke, slow_path);
9616     }
9617   };
9618   switch (invoke->GetCodePtrLocation()) {
9619     case CodePtrLocation::kCallSelf:
9620       {
9621         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9622         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9623         ExactAssemblyScope aas(GetVIXLAssembler(),
9624                                vixl32::k32BitT32InstructionSizeInBytes,
9625                                CodeBufferCheckScope::kMaximumSize);
9626         __ bl(GetFrameEntryLabel());
9627         RecordPcInfo(invoke, slow_path);
9628       }
9629       break;
9630     case CodePtrLocation::kCallCriticalNative: {
9631       size_t out_frame_size =
9632           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9633                                     kAapcsStackAlignment,
9634                                     GetCriticalNativeDirectCallFrameSize>(invoke);
9635       call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9636       // Move the result when needed due to native and managed ABI mismatch.
9637       switch (invoke->GetType()) {
9638         case DataType::Type::kFloat32:
9639           __ Vmov(s0, r0);
9640           break;
9641         case DataType::Type::kFloat64:
9642           __ Vmov(d0, r0, r1);
9643           break;
9644         case DataType::Type::kBool:
9645         case DataType::Type::kInt8:
9646         case DataType::Type::kUint16:
9647         case DataType::Type::kInt16:
9648         case DataType::Type::kInt32:
9649         case DataType::Type::kInt64:
9650         case DataType::Type::kVoid:
9651           break;
9652         default:
9653           DCHECK(false) << invoke->GetType();
9654           break;
9655       }
9656       if (out_frame_size != 0u) {
9657         DecreaseFrame(out_frame_size);
9658       }
9659       break;
9660     }
9661     case CodePtrLocation::kCallArtMethod:
9662       call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9663       break;
9664   }
9665 
9666   DCHECK(!IsLeafMethod());
9667 }
9668 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9669 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9670     HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9671   vixl32::Register temp = RegisterFrom(temp_location);
9672   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9673       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9674 
9675   // Use the calling convention instead of the location of the receiver, as
9676   // intrinsics may have put the receiver in a different register. In the intrinsics
9677   // slow path, the arguments have been moved to the right place, so here we are
9678   // guaranteed that the receiver is the first register of the calling convention.
9679   InvokeDexCallingConventionARMVIXL calling_convention;
9680   vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9681   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9682   {
9683     // Make sure the pc is recorded immediately after the `ldr` instruction.
9684     ExactAssemblyScope aas(GetVIXLAssembler(),
9685                            vixl32::kMaxInstructionSizeInBytes,
9686                            CodeBufferCheckScope::kMaximumSize);
9687     // /* HeapReference<Class> */ temp = receiver->klass_
9688     __ ldr(temp, MemOperand(receiver, class_offset));
9689     MaybeRecordImplicitNullCheck(invoke);
9690   }
9691   // Instead of simply (possibly) unpoisoning `temp` here, we should
9692   // emit a read barrier for the previous class reference load.
9693   // However this is not required in practice, as this is an
9694   // intermediate/temporary reference and because the current
9695   // concurrent copying collector keeps the from-space memory
9696   // intact/accessible until the end of the marking phase (the
9697   // concurrent copying collector may not in the future).
9698   GetAssembler()->MaybeUnpoisonHeapReference(temp);
9699 
9700   // If we're compiling baseline, update the inline cache.
9701   MaybeGenerateInlineCacheCheck(invoke, temp);
9702 
9703   // temp = temp->GetMethodAt(method_offset);
9704   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9705       kArmPointerSize).Int32Value();
9706   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9707   // LR = temp->GetEntryPoint();
9708   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9709   {
9710     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9711     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9712     ExactAssemblyScope aas(GetVIXLAssembler(),
9713                            vixl32::k16BitT32InstructionSizeInBytes,
9714                            CodeBufferCheckScope::kExactSize);
9715     // LR();
9716     __ blx(lr);
9717     RecordPcInfo(invoke, slow_path);
9718   }
9719 }
9720 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9721 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9722     uint32_t intrinsic_data) {
9723   return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9724 }
9725 
NewBootImageRelRoPatch(uint32_t boot_image_offset)9726 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9727     uint32_t boot_image_offset) {
9728   return NewPcRelativePatch(/* dex_file= */ nullptr,
9729                             boot_image_offset,
9730                             &boot_image_other_patches_);
9731 }
9732 
NewBootImageMethodPatch(MethodReference target_method)9733 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9734     MethodReference target_method) {
9735   return NewPcRelativePatch(
9736       target_method.dex_file, target_method.index, &boot_image_method_patches_);
9737 }
9738 
NewAppImageMethodPatch(MethodReference target_method)9739 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageMethodPatch(
9740     MethodReference target_method) {
9741   return NewPcRelativePatch(
9742       target_method.dex_file, target_method.index, &app_image_method_patches_);
9743 }
9744 
NewMethodBssEntryPatch(MethodReference target_method)9745 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9746     MethodReference target_method) {
9747   return NewPcRelativePatch(
9748       target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9749 }
9750 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9751 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9752     const DexFile& dex_file, dex::TypeIndex type_index) {
9753   return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9754 }
9755 
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9756 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageTypePatch(
9757     const DexFile& dex_file, dex::TypeIndex type_index) {
9758   return NewPcRelativePatch(&dex_file, type_index.index_, &app_image_type_patches_);
9759 }
9760 
NewTypeBssEntryPatch(HLoadClass * load_class)9761 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9762     HLoadClass* load_class) {
9763   const DexFile& dex_file = load_class->GetDexFile();
9764   dex::TypeIndex type_index = load_class->GetTypeIndex();
9765   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9766   switch (load_class->GetLoadKind()) {
9767     case HLoadClass::LoadKind::kBssEntry:
9768       patches = &type_bss_entry_patches_;
9769       break;
9770     case HLoadClass::LoadKind::kBssEntryPublic:
9771       patches = &public_type_bss_entry_patches_;
9772       break;
9773     case HLoadClass::LoadKind::kBssEntryPackage:
9774       patches = &package_type_bss_entry_patches_;
9775       break;
9776     default:
9777       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9778       UNREACHABLE();
9779   }
9780   return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9781 }
9782 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9783 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9784     const DexFile& dex_file, dex::StringIndex string_index) {
9785   return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9786 }
9787 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9788 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9789     const DexFile& dex_file, dex::StringIndex string_index) {
9790   return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9791 }
9792 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9793 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9794     const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9795   patches->emplace_back(dex_file, offset_or_index);
9796   return &patches->back();
9797 }
9798 
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9799 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9800   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9801   DCHECK(!GetCompilerOptions().IsJitCompiler());
9802   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9803   vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9804   __ bind(bl_label);
9805   vixl32::Label placeholder_label;
9806   __ bl(&placeholder_label);  // Placeholder, patched at link-time.
9807   __ bind(&placeholder_label);
9808 }
9809 
EmitBakerReadBarrierBne(uint32_t custom_data)9810 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9811   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9812   if (GetCompilerOptions().IsJitCompiler()) {
9813     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9814     vixl::aarch32::Label* slow_path_entry = &it->second.label;
9815     __ b(ne, EncodingSize(Wide), slow_path_entry);
9816   } else {
9817     baker_read_barrier_patches_.emplace_back(custom_data);
9818     vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9819     __ bind(patch_label);
9820     vixl32::Label placeholder_label;
9821     __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
9822     __ bind(&placeholder_label);
9823   }
9824 }
9825 
DeduplicateBootImageAddressLiteral(uint32_t address)9826 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9827   return DeduplicateUint32Literal(address, &uint32_literals_);
9828 }
9829 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9830 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9831     const DexFile& dex_file,
9832     dex::StringIndex string_index,
9833     Handle<mirror::String> handle) {
9834   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9835   return jit_string_patches_.GetOrCreate(
9836       StringReference(&dex_file, string_index),
9837       [this]() {
9838         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9839       });
9840 }
9841 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9842 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9843                                                       dex::TypeIndex type_index,
9844                                                       Handle<mirror::Class> handle) {
9845   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9846   return jit_class_patches_.GetOrCreate(
9847       TypeReference(&dex_file, type_index),
9848       [this]() {
9849         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9850       });
9851 }
9852 
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9853 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9854                                                    uint32_t boot_image_offset) {
9855   CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9856   EmitMovwMovtPlaceholder(labels, reg);
9857   __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9858 }
9859 
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9860 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9861                                                 uint32_t boot_image_reference) {
9862   if (GetCompilerOptions().IsBootImage()) {
9863     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9864         NewBootImageIntrinsicPatch(boot_image_reference);
9865     EmitMovwMovtPlaceholder(labels, reg);
9866   } else if (GetCompilerOptions().GetCompilePic()) {
9867     LoadBootImageRelRoEntry(reg, boot_image_reference);
9868   } else {
9869     DCHECK(GetCompilerOptions().IsJitCompiler());
9870     gc::Heap* heap = Runtime::Current()->GetHeap();
9871     DCHECK(!heap->GetBootImageSpaces().empty());
9872     uintptr_t address =
9873         reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9874     __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9875   }
9876 }
9877 
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9878 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9879                                                          TypeReference target_type) {
9880   // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9881   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9882   PcRelativePatchInfo* labels =
9883       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9884   EmitMovwMovtPlaceholder(labels, reg);
9885 }
9886 
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9887 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9888   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9889   if (GetCompilerOptions().IsBootImage()) {
9890     MethodReference target_method = invoke->GetResolvedMethodReference();
9891     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9892     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9893   } else {
9894     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9895     LoadBootImageAddress(reg, boot_image_offset);
9896   }
9897 }
9898 
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9899 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9900                                                      ClassRoot class_root) {
9901   if (GetCompilerOptions().IsBootImage()) {
9902     ScopedObjectAccess soa(Thread::Current());
9903     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9904     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9905     LoadTypeForBootImageIntrinsic(reg, target_type);
9906   } else {
9907     uint32_t boot_image_offset = GetBootImageOffset(class_root);
9908     LoadBootImageAddress(reg, boot_image_offset);
9909   }
9910 }
9911 
9912 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9913 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9914     const ArenaDeque<PcRelativePatchInfo>& infos,
9915     ArenaVector<linker::LinkerPatch>* linker_patches) {
9916   for (const PcRelativePatchInfo& info : infos) {
9917     const DexFile* dex_file = info.target_dex_file;
9918     size_t offset_or_index = info.offset_or_index;
9919     DCHECK(info.add_pc_label.IsBound());
9920     uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9921     // Add MOVW patch.
9922     DCHECK(info.movw_label.IsBound());
9923     uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9924     linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9925     // Add MOVT patch.
9926     DCHECK(info.movt_label.IsBound());
9927     uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9928     linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9929   }
9930 }
9931 
9932 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9933 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9934                                      const DexFile* target_dex_file,
9935                                      uint32_t pc_insn_offset,
9936                                      uint32_t boot_image_offset) {
9937   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
9938   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9939 }
9940 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9941 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9942   DCHECK(linker_patches->empty());
9943   size_t size =
9944       /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9945       /* MOVW+MOVT for each entry */ 2u * app_image_method_patches_.size() +
9946       /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9947       /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9948       /* MOVW+MOVT for each entry */ 2u * app_image_type_patches_.size() +
9949       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9950       /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9951       /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9952       /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9953       /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9954       /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9955       call_entrypoint_patches_.size() +
9956       baker_read_barrier_patches_.size();
9957   linker_patches->reserve(size);
9958   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9959     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9960         boot_image_method_patches_, linker_patches);
9961     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9962         boot_image_type_patches_, linker_patches);
9963     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9964         boot_image_string_patches_, linker_patches);
9965   } else {
9966     DCHECK(boot_image_method_patches_.empty());
9967     DCHECK(boot_image_type_patches_.empty());
9968     DCHECK(boot_image_string_patches_.empty());
9969   }
9970   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
9971   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
9972   if (GetCompilerOptions().IsBootImage()) {
9973     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9974         boot_image_other_patches_, linker_patches);
9975   } else {
9976     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
9977         boot_image_other_patches_, linker_patches);
9978     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
9979         app_image_method_patches_, linker_patches);
9980     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
9981         app_image_type_patches_, linker_patches);
9982   }
9983   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9984       method_bss_entry_patches_, linker_patches);
9985   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9986       type_bss_entry_patches_, linker_patches);
9987   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
9988       public_type_bss_entry_patches_, linker_patches);
9989   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
9990       package_type_bss_entry_patches_, linker_patches);
9991   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9992       string_bss_entry_patches_, linker_patches);
9993   for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9994     DCHECK(info.target_dex_file == nullptr);
9995     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9996         info.label.GetLocation(), info.offset_or_index));
9997   }
9998   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9999     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
10000         info.label.GetLocation(), info.custom_data));
10001   }
10002   DCHECK_EQ(size, linker_patches->size());
10003 }
10004 
NeedsThunkCode(const linker::LinkerPatch & patch) const10005 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
10006   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
10007          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
10008          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
10009 }
10010 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)10011 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
10012                                          /*out*/ ArenaVector<uint8_t>* code,
10013                                          /*out*/ std::string* debug_name) {
10014   arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
10015   switch (patch.GetType()) {
10016     case linker::LinkerPatch::Type::kCallRelative: {
10017       // The thunk just uses the entry point in the ArtMethod. This works even for calls
10018       // to the generic JNI and interpreter trampolines.
10019       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
10020       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
10021       assembler.GetVIXLAssembler()->Bkpt(0);
10022       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10023         *debug_name = "MethodCallThunk";
10024       }
10025       break;
10026     }
10027     case linker::LinkerPatch::Type::kCallEntrypoint: {
10028       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
10029       assembler.GetVIXLAssembler()->Bkpt(0);
10030       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10031         *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
10032       }
10033       break;
10034     }
10035     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
10036       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
10037       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
10038       break;
10039     }
10040     default:
10041       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
10042       UNREACHABLE();
10043   }
10044 
10045   // Ensure we emit the literal pool if any.
10046   assembler.FinalizeCode();
10047   code->resize(assembler.CodeSize());
10048   MemoryRegion code_region(code->data(), code->size());
10049   assembler.CopyInstructions(code_region);
10050 }
10051 
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)10052 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
10053     uint32_t value,
10054     Uint32ToLiteralMap* map) {
10055   return map->GetOrCreate(
10056       value,
10057       [this, value]() {
10058         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
10059       });
10060 }
10061 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10062 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10063   LocationSummary* locations =
10064       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
10065   locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
10066                      Location::RequiresRegister());
10067   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
10068   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
10069   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
10070 }
10071 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10072 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10073   vixl32::Register res = OutputRegister(instr);
10074   vixl32::Register accumulator =
10075       InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
10076   vixl32::Register mul_left =
10077       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
10078   vixl32::Register mul_right =
10079       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
10080 
10081   if (instr->GetOpKind() == HInstruction::kAdd) {
10082     __ Mla(res, mul_left, mul_right, accumulator);
10083   } else {
10084     __ Mls(res, mul_left, mul_right, accumulator);
10085   }
10086 }
10087 
VisitBoundType(HBoundType * instruction)10088 void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10089   // Nothing to do, this should be removed during prepare for register allocator.
10090   LOG(FATAL) << "Unreachable";
10091 }
10092 
VisitBoundType(HBoundType * instruction)10093 void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10094   // Nothing to do, this should be removed during prepare for register allocator.
10095   LOG(FATAL) << "Unreachable";
10096 }
10097 
10098 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)10099 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10100   LocationSummary* locations =
10101       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
10102   locations->SetInAt(0, Location::RequiresRegister());
10103   if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
10104       codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10105     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
10106     if (switch_instr->GetStartValue() != 0) {
10107       locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
10108     }
10109   }
10110 }
10111 
10112 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)10113 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10114   int32_t lower_bound = switch_instr->GetStartValue();
10115   uint32_t num_entries = switch_instr->GetNumEntries();
10116   LocationSummary* locations = switch_instr->GetLocations();
10117   vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
10118   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
10119 
10120   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
10121       !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10122     // Create a series of compare/jumps.
10123     UseScratchRegisterScope temps(GetVIXLAssembler());
10124     vixl32::Register temp_reg = temps.Acquire();
10125     // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
10126     // the immediate, because IP is used as the destination register. For the other
10127     // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
10128     // and they can be encoded in the instruction without making use of IP register.
10129     __ Adds(temp_reg, value_reg, -lower_bound);
10130 
10131     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
10132     // Jump to successors[0] if value == lower_bound.
10133     __ B(eq, codegen_->GetLabelOf(successors[0]));
10134     int32_t last_index = 0;
10135     for (; num_entries - last_index > 2; last_index += 2) {
10136       __ Adds(temp_reg, temp_reg, -2);
10137       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
10138       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
10139       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
10140       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
10141     }
10142     if (num_entries - last_index == 2) {
10143       // The last missing case_value.
10144       __ Cmp(temp_reg, 1);
10145       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
10146     }
10147 
10148     // And the default for any other value.
10149     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
10150       __ B(codegen_->GetLabelOf(default_block));
10151     }
10152   } else {
10153     // Create a table lookup.
10154     vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
10155 
10156     JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
10157 
10158     // Remove the bias.
10159     vixl32::Register key_reg;
10160     if (lower_bound != 0) {
10161       key_reg = RegisterFrom(locations->GetTemp(1));
10162       __ Sub(key_reg, value_reg, lower_bound);
10163     } else {
10164       key_reg = value_reg;
10165     }
10166 
10167     // Check whether the value is in the table, jump to default block if not.
10168     __ Cmp(key_reg, num_entries - 1);
10169     __ B(hi, codegen_->GetLabelOf(default_block));
10170 
10171     UseScratchRegisterScope temps(GetVIXLAssembler());
10172     vixl32::Register jump_offset = temps.Acquire();
10173 
10174     // Load jump offset from the table.
10175     {
10176       const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
10177       ExactAssemblyScope aas(GetVIXLAssembler(),
10178                              (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
10179                              CodeBufferCheckScope::kMaximumSize);
10180       __ adr(table_base, jump_table->GetTableStartLabel());
10181       __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
10182 
10183       // Jump to target block by branching to table_base(pc related) + offset.
10184       vixl32::Register target_address = table_base;
10185       __ add(target_address, table_base, jump_offset);
10186       __ bx(target_address);
10187 
10188       jump_table->EmitTable(codegen_);
10189     }
10190   }
10191 }
10192 
10193 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)10194 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
10195   if (!trg.IsValid()) {
10196     DCHECK_EQ(type, DataType::Type::kVoid);
10197     return;
10198   }
10199 
10200   DCHECK_NE(type, DataType::Type::kVoid);
10201 
10202   Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
10203   if (return_loc.Equals(trg)) {
10204     return;
10205   }
10206 
10207   // Let the parallel move resolver take care of all of this.
10208   HParallelMove parallel_move(GetGraph()->GetAllocator());
10209   parallel_move.AddMove(return_loc, trg, type, nullptr);
10210   GetMoveResolver()->EmitNativeCode(&parallel_move);
10211 }
10212 
VisitClassTableGet(HClassTableGet * instruction)10213 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10214   LocationSummary* locations =
10215       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
10216   locations->SetInAt(0, Location::RequiresRegister());
10217   locations->SetOut(Location::RequiresRegister());
10218 }
10219 
VisitClassTableGet(HClassTableGet * instruction)10220 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10221   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10222     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10223         instruction->GetIndex(), kArmPointerSize).SizeValue();
10224     GetAssembler()->LoadFromOffset(kLoadWord,
10225                                    OutputRegister(instruction),
10226                                    InputRegisterAt(instruction, 0),
10227                                    method_offset);
10228   } else {
10229     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10230         instruction->GetIndex(), kArmPointerSize));
10231     GetAssembler()->LoadFromOffset(kLoadWord,
10232                                    OutputRegister(instruction),
10233                                    InputRegisterAt(instruction, 0),
10234                                    mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10235     GetAssembler()->LoadFromOffset(kLoadWord,
10236                                    OutputRegister(instruction),
10237                                    OutputRegister(instruction),
10238                                    method_offset);
10239   }
10240 }
10241 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10242 static void PatchJitRootUse(uint8_t* code,
10243                             const uint8_t* roots_data,
10244                             VIXLUInt32Literal* literal,
10245                             uint64_t index_in_table) {
10246   DCHECK(literal->IsBound());
10247   uint32_t literal_offset = literal->GetLocation();
10248   uintptr_t address =
10249       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10250   uint8_t* data = code + literal_offset;
10251   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10252 }
10253 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10254 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10255   for (const auto& entry : jit_string_patches_) {
10256     const StringReference& string_reference = entry.first;
10257     VIXLUInt32Literal* table_entry_literal = entry.second;
10258     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10259     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10260   }
10261   for (const auto& entry : jit_class_patches_) {
10262     const TypeReference& type_reference = entry.first;
10263     VIXLUInt32Literal* table_entry_literal = entry.second;
10264     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10265     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10266   }
10267 }
10268 
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10269 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10270     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10271     vixl32::Register out) {
10272   ExactAssemblyScope aas(GetVIXLAssembler(),
10273                          3 * vixl32::kMaxInstructionSizeInBytes,
10274                          CodeBufferCheckScope::kMaximumSize);
10275   // TODO(VIXL): Think about using mov instead of movw.
10276   __ bind(&labels->movw_label);
10277   __ movw(out, /* operand= */ 0u);
10278   __ bind(&labels->movt_label);
10279   __ movt(out, /* operand= */ 0u);
10280   __ bind(&labels->add_pc_label);
10281   __ add(out, out, pc);
10282 }
10283 
10284 #undef __
10285 #undef QUICK_ENTRY_POINT
10286 #undef TODO_VIXL32
10287 
10288 #define __ assembler.GetVIXLAssembler()->
10289 
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10290 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10291                                      vixl32::Register base_reg,
10292                                      vixl32::MemOperand& lock_word,
10293                                      vixl32::Label* slow_path,
10294                                      int32_t raw_ldr_offset,
10295                                      vixl32::Label* throw_npe = nullptr) {
10296   // Load the lock word containing the rb_state.
10297   __ Ldr(ip, lock_word);
10298   // Given the numeric representation, it's enough to check the low bit of the rb_state.
10299   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10300   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10301   __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10302   __ B(ne, slow_path, /* is_far_target= */ false);
10303   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10304   if (throw_npe != nullptr) {
10305     __ Bind(throw_npe);
10306   }
10307   __ Add(lr, lr, raw_ldr_offset);
10308   // Introduce a dependency on the lock_word including rb_state,
10309   // to prevent load-load reordering, and without using
10310   // a memory barrier (which would be more expensive).
10311   __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10312   __ Bx(lr);          // And return back to the function.
10313   // Note: The fake dependency is unnecessary for the slow path.
10314 }
10315 
10316 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10317 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10318   // The register where the read barrier introspection entrypoint is loaded
10319   // is the marking register. We clobber it here and the entrypoint restores it to 1.
10320   vixl32::Register entrypoint = mr;
10321   // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10322   DCHECK_EQ(ip.GetCode(), 12u);
10323   const int32_t entry_point_offset =
10324       Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10325   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10326   return entrypoint;
10327 }
10328 
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10329 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10330                                                         uint32_t encoded_data,
10331                                                         /*out*/ std::string* debug_name) {
10332   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10333   switch (kind) {
10334     case BakerReadBarrierKind::kField: {
10335       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10336       CheckValidReg(base_reg.GetCode());
10337       vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10338       CheckValidReg(holder_reg.GetCode());
10339       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10340       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10341       temps.Exclude(ip);
10342       // In the case of a field load, if `base_reg` differs from
10343       // `holder_reg`, the offset was too large and we must have emitted (during the construction
10344       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10345       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10346       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10347       // not necessarily do that check before going to the thunk.
10348       vixl32::Label throw_npe_label;
10349       vixl32::Label* throw_npe = nullptr;
10350       if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10351         throw_npe = &throw_npe_label;
10352         __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10353       }
10354       // Check if the holder is gray and, if not, add fake dependency to the base register
10355       // and return to the LDR instruction to load the reference. Otherwise, use introspection
10356       // to load the reference and call the entrypoint that performs further checks on the
10357       // reference and marks it if needed.
10358       vixl32::Label slow_path;
10359       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10360       const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10361           ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10362           : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10363       EmitGrayCheckAndFastPath(
10364           assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10365       __ Bind(&slow_path);
10366       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10367                                  raw_ldr_offset;
10368       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10369       if (width == BakerReadBarrierWidth::kWide) {
10370         MemOperand ldr_half_address(lr, ldr_offset + 2);
10371         __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
10372         __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
10373         __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
10374       } else {
10375         MemOperand ldr_address(lr, ldr_offset);
10376         __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
10377         __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
10378                ep_reg,                        // for narrow LDR.
10379                Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10380         __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
10381         __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
10382       }
10383       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10384       __ Bx(ep_reg);                          // Jump to the entrypoint.
10385       break;
10386     }
10387     case BakerReadBarrierKind::kArray: {
10388       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10389       CheckValidReg(base_reg.GetCode());
10390       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10391                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10392       DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10393       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10394       temps.Exclude(ip);
10395       vixl32::Label slow_path;
10396       int32_t data_offset =
10397           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10398       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10399       DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10400       const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10401       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10402       __ Bind(&slow_path);
10403       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10404                                  raw_ldr_offset;
10405       MemOperand ldr_address(lr, ldr_offset + 2);
10406       __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
10407                                               // i.e. Rm+32 because the scale in imm2 is 2.
10408       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10409       __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
10410                                               // a switch case target based on the index register.
10411       __ Mov(ip, base_reg);                   // Move the base register to ip0.
10412       __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
10413       break;
10414     }
10415     case BakerReadBarrierKind::kGcRoot:
10416     case BakerReadBarrierKind::kIntrinsicCas: {
10417       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10418       // and it does not have a forwarding address), call the correct introspection entrypoint;
10419       // otherwise return the reference (or the extracted forwarding address).
10420       // There is no gray bit check for GC roots.
10421       vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10422       CheckValidReg(root_reg.GetCode());
10423       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10424                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10425       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10426       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10427       temps.Exclude(ip);
10428       vixl32::Label return_label, not_marked, forwarding_address;
10429       __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10430       MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10431       __ Ldr(ip, lock_word);
10432       __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10433       __ B(eq, &not_marked);
10434       __ Bind(&return_label);
10435       __ Bx(lr);
10436       __ Bind(&not_marked);
10437       static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10438                     "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10439                     " the highest bits and the 'forwarding address' state to have all bits set");
10440       __ Cmp(ip, Operand(0xc0000000));
10441       __ B(hs, &forwarding_address);
10442       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10443       // Adjust the art_quick_read_barrier_mark_introspection address
10444       // in kBakerCcEntrypointRegister to one of
10445       //     art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10446       if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10447         DCHECK(width == BakerReadBarrierWidth::kWide);
10448         DCHECK(!root_reg.IsLow());
10449       }
10450       int32_t entrypoint_offset =
10451           (kind == BakerReadBarrierKind::kGcRoot)
10452               ? (width == BakerReadBarrierWidth::kWide)
10453                   ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10454                   : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10455               : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10456       __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10457       __ Mov(ip, root_reg);
10458       __ Bx(ep_reg);
10459       __ Bind(&forwarding_address);
10460       __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10461       __ Bx(lr);
10462       break;
10463     }
10464     default:
10465       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10466       UNREACHABLE();
10467   }
10468 
10469   // For JIT, the slow path is considered part of the compiled method,
10470   // so JIT should pass null as `debug_name`.
10471   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10472   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10473     std::ostringstream oss;
10474     oss << "BakerReadBarrierThunk";
10475     switch (kind) {
10476       case BakerReadBarrierKind::kField:
10477         oss << "Field";
10478         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10479           oss << "Wide";
10480         }
10481         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10482             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10483         break;
10484       case BakerReadBarrierKind::kArray:
10485         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10486         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10487                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10488         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10489         break;
10490       case BakerReadBarrierKind::kGcRoot:
10491         oss << "GcRoot";
10492         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10493           oss << "Wide";
10494         }
10495         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10496         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10497                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10498         break;
10499       case BakerReadBarrierKind::kIntrinsicCas:
10500         oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10501         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10502                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10503         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10504         break;
10505     }
10506     *debug_name = oss.str();
10507   }
10508 }
10509 
10510 #undef __
10511 
10512 }  // namespace arm
10513 }  // namespace art
10514