1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm_vixl.h"
18
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "class_root-inl.h"
26 #include "class_table.h"
27 #include "code_generator_utils.h"
28 #include "common_arm.h"
29 #include "entrypoints/quick/quick_entrypoints.h"
30 #include "gc/accounting/card_table.h"
31 #include "gc/space/image_space.h"
32 #include "heap_poisoning.h"
33 #include "interpreter/mterp/nterp.h"
34 #include "intrinsics.h"
35 #include "intrinsics_arm_vixl.h"
36 #include "intrinsics_utils.h"
37 #include "linker/linker_patch.h"
38 #include "mirror/array-inl.h"
39 #include "mirror/class-inl.h"
40 #include "mirror/var_handle.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "utils/arm/assembler_arm_vixl.h"
44 #include "utils/arm/managed_register_arm.h"
45 #include "utils/assembler.h"
46 #include "utils/stack_checks.h"
47
48 namespace art HIDDEN {
49 namespace arm {
50
51 namespace vixl32 = vixl::aarch32;
52 using namespace vixl32; // NOLINT(build/namespaces)
53
54 using helpers::DRegisterFrom;
55 using helpers::HighRegisterFrom;
56 using helpers::InputDRegisterAt;
57 using helpers::InputOperandAt;
58 using helpers::InputRegister;
59 using helpers::InputRegisterAt;
60 using helpers::InputSRegisterAt;
61 using helpers::InputVRegister;
62 using helpers::InputVRegisterAt;
63 using helpers::Int32ConstantFrom;
64 using helpers::Int64ConstantFrom;
65 using helpers::LocationFrom;
66 using helpers::LowRegisterFrom;
67 using helpers::LowSRegisterFrom;
68 using helpers::OperandFrom;
69 using helpers::OutputRegister;
70 using helpers::OutputSRegister;
71 using helpers::OutputVRegister;
72 using helpers::RegisterFrom;
73 using helpers::SRegisterFrom;
74 using helpers::Uint64ConstantFrom;
75
76 using vixl::EmissionCheckScope;
77 using vixl::ExactAssemblyScope;
78 using vixl::CodeBufferCheckScope;
79
80 using RegisterList = vixl32::RegisterList;
81
ExpectedPairLayout(Location location)82 static bool ExpectedPairLayout(Location location) {
83 // We expected this for both core and fpu register pairs.
84 return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
85 }
86 // Use a local definition to prevent copying mistakes.
87 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
88 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
89 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
90
91 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
92 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
93 // For the Baker read barrier implementation using link-time generated thunks we need to split
94 // the offset explicitly.
95 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
96
97 // Using a base helps identify when we hit Marking Register check breakpoints.
98 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
99
100 #ifdef __
101 #error "ARM Codegen VIXL macro-assembler macro already defined."
102 #endif
103
104 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
105 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
106 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
107
108 // Marker that code is yet to be, and must, be implemented.
109 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
110
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)111 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
112 return rt.IsLow() && rn.IsLow() && offset < 32u;
113 }
114
115 class EmitAdrCode {
116 public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)117 EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
118 : assembler_(assembler), rd_(rd), label_(label) {
119 DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
120 adr_location_ = assembler->GetCursorOffset();
121 assembler->adr(EncodingSize(Wide), rd, label);
122 }
123
~EmitAdrCode()124 ~EmitAdrCode() {
125 DCHECK(label_->IsBound());
126 // The ADR emitted by the assembler does not set the Thumb mode bit we need.
127 // TODO: Maybe extend VIXL to allow ADR for return address?
128 uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
129 // Expecting ADR encoding T3 with `(offset & 1) == 0`.
130 DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
131 DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
132 DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
133 DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
134 // Add the Thumb mode bit.
135 raw_adr[2] |= 0x01u;
136 }
137
138 private:
139 ArmVIXLMacroAssembler* const assembler_;
140 vixl32::Register rd_;
141 vixl32::Label* const label_;
142 int32_t adr_location_;
143 };
144
OneRegInReferenceOutSaveEverythingCallerSaves()145 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
146 InvokeRuntimeCallingConventionARMVIXL calling_convention;
147 RegisterSet caller_saves = RegisterSet::Empty();
148 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
149 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
150 // that the the kPrimNot result register is the same as the first argument register.
151 return caller_saves;
152 }
153
154 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
155 // for each live D registers they treat two corresponding S registers as live ones.
156 //
157 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
158 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
159 // S registers corner cases) and save/restore this new list treating them as D registers.
160 // - decreasing code size
161 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
162 // restored and then used in regular non SlowPath code as D register.
163 //
164 // For the following example (v means the S register is live):
165 // D names: | D0 | D1 | D2 | D4 | ...
166 // S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
167 // Live? | | v | v | v | v | v | v | | ...
168 //
169 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
170 // as D registers.
171 //
172 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
173 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)174 static size_t SaveContiguousSRegisterList(size_t first,
175 size_t last,
176 CodeGenerator* codegen,
177 size_t stack_offset) {
178 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
179 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
180 DCHECK_LE(first, last);
181 if ((first == last) && (first == 0)) {
182 __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
183 return stack_offset + kSRegSizeInBytes;
184 }
185 if (first % 2 == 1) {
186 __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
187 stack_offset += kSRegSizeInBytes;
188 }
189
190 bool save_last = false;
191 if (last % 2 == 0) {
192 save_last = true;
193 --last;
194 }
195
196 if (first < last) {
197 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
198 DCHECK_EQ((last - first + 1) % 2, 0u);
199 size_t number_of_d_regs = (last - first + 1) / 2;
200
201 if (number_of_d_regs == 1) {
202 __ Vstr(d_reg, MemOperand(sp, stack_offset));
203 } else if (number_of_d_regs > 1) {
204 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
205 vixl32::Register base = sp;
206 if (stack_offset != 0) {
207 base = temps.Acquire();
208 __ Add(base, sp, Operand::From(stack_offset));
209 }
210 __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
211 }
212 stack_offset += number_of_d_regs * kDRegSizeInBytes;
213 }
214
215 if (save_last) {
216 __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
217 stack_offset += kSRegSizeInBytes;
218 }
219
220 return stack_offset;
221 }
222
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)223 static size_t RestoreContiguousSRegisterList(size_t first,
224 size_t last,
225 CodeGenerator* codegen,
226 size_t stack_offset) {
227 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
228 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
229 DCHECK_LE(first, last);
230 if ((first == last) && (first == 0)) {
231 __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
232 return stack_offset + kSRegSizeInBytes;
233 }
234 if (first % 2 == 1) {
235 __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
236 stack_offset += kSRegSizeInBytes;
237 }
238
239 bool restore_last = false;
240 if (last % 2 == 0) {
241 restore_last = true;
242 --last;
243 }
244
245 if (first < last) {
246 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
247 DCHECK_EQ((last - first + 1) % 2, 0u);
248 size_t number_of_d_regs = (last - first + 1) / 2;
249 if (number_of_d_regs == 1) {
250 __ Vldr(d_reg, MemOperand(sp, stack_offset));
251 } else if (number_of_d_regs > 1) {
252 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
253 vixl32::Register base = sp;
254 if (stack_offset != 0) {
255 base = temps.Acquire();
256 __ Add(base, sp, Operand::From(stack_offset));
257 }
258 __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
259 }
260 stack_offset += number_of_d_regs * kDRegSizeInBytes;
261 }
262
263 if (restore_last) {
264 __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
265 stack_offset += kSRegSizeInBytes;
266 }
267
268 return stack_offset;
269 }
270
GetLoadOperandType(DataType::Type type)271 static LoadOperandType GetLoadOperandType(DataType::Type type) {
272 switch (type) {
273 case DataType::Type::kReference:
274 return kLoadWord;
275 case DataType::Type::kBool:
276 case DataType::Type::kUint8:
277 return kLoadUnsignedByte;
278 case DataType::Type::kInt8:
279 return kLoadSignedByte;
280 case DataType::Type::kUint16:
281 return kLoadUnsignedHalfword;
282 case DataType::Type::kInt16:
283 return kLoadSignedHalfword;
284 case DataType::Type::kInt32:
285 return kLoadWord;
286 case DataType::Type::kInt64:
287 return kLoadWordPair;
288 case DataType::Type::kFloat32:
289 return kLoadSWord;
290 case DataType::Type::kFloat64:
291 return kLoadDWord;
292 default:
293 LOG(FATAL) << "Unreachable type " << type;
294 UNREACHABLE();
295 }
296 }
297
GetStoreOperandType(DataType::Type type)298 static StoreOperandType GetStoreOperandType(DataType::Type type) {
299 switch (type) {
300 case DataType::Type::kReference:
301 return kStoreWord;
302 case DataType::Type::kBool:
303 case DataType::Type::kUint8:
304 case DataType::Type::kInt8:
305 return kStoreByte;
306 case DataType::Type::kUint16:
307 case DataType::Type::kInt16:
308 return kStoreHalfword;
309 case DataType::Type::kInt32:
310 return kStoreWord;
311 case DataType::Type::kInt64:
312 return kStoreWordPair;
313 case DataType::Type::kFloat32:
314 return kStoreSWord;
315 case DataType::Type::kFloat64:
316 return kStoreDWord;
317 default:
318 LOG(FATAL) << "Unreachable type " << type;
319 UNREACHABLE();
320 }
321 }
322
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)323 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
324 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
325 size_t orig_offset = stack_offset;
326
327 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
328 for (uint32_t i : LowToHighBits(core_spills)) {
329 // If the register holds an object, update the stack mask.
330 if (locations->RegisterContainsObject(i)) {
331 locations->SetStackBit(stack_offset / kVRegSize);
332 }
333 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
334 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
335 saved_core_stack_offsets_[i] = stack_offset;
336 stack_offset += kArmWordSize;
337 }
338
339 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
340 arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
341
342 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
343 orig_offset = stack_offset;
344 for (uint32_t i : LowToHighBits(fp_spills)) {
345 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
346 saved_fpu_stack_offsets_[i] = stack_offset;
347 stack_offset += kArmWordSize;
348 }
349
350 stack_offset = orig_offset;
351 while (fp_spills != 0u) {
352 uint32_t begin = CTZ(fp_spills);
353 uint32_t tmp = fp_spills + (1u << begin);
354 fp_spills &= tmp; // Clear the contiguous range of 1s.
355 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
356 stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
357 }
358 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
359 }
360
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)361 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
362 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
363 size_t orig_offset = stack_offset;
364
365 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
366 for (uint32_t i : LowToHighBits(core_spills)) {
367 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
368 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
369 stack_offset += kArmWordSize;
370 }
371
372 // TODO(VIXL): Check the coherency of stack_offset after this with a test.
373 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
374 arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
375
376 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
377 while (fp_spills != 0u) {
378 uint32_t begin = CTZ(fp_spills);
379 uint32_t tmp = fp_spills + (1u << begin);
380 fp_spills &= tmp; // Clear the contiguous range of 1s.
381 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
382 stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
383 }
384 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
385 }
386
387 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
388 public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)389 explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
390
EmitNativeCode(CodeGenerator * codegen)391 void EmitNativeCode(CodeGenerator* codegen) override {
392 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
393 __ Bind(GetEntryLabel());
394 if (instruction_->CanThrowIntoCatchBlock()) {
395 // Live registers will be restored in the catch block if caught.
396 SaveLiveRegisters(codegen, instruction_->GetLocations());
397 }
398 arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
399 instruction_,
400 instruction_->GetDexPc(),
401 this);
402 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
403 }
404
IsFatal() const405 bool IsFatal() const override { return true; }
406
GetDescription() const407 const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
408
409 private:
410 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
411 };
412
413 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
414 public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)415 explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
416 : SlowPathCodeARMVIXL(instruction) {}
417
EmitNativeCode(CodeGenerator * codegen)418 void EmitNativeCode(CodeGenerator* codegen) override {
419 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
420 __ Bind(GetEntryLabel());
421 arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
422 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
423 }
424
IsFatal() const425 bool IsFatal() const override { return true; }
426
GetDescription() const427 const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
428
429 private:
430 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
431 };
432
433 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
434 public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)435 SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
436 : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
437
EmitNativeCode(CodeGenerator * codegen)438 void EmitNativeCode(CodeGenerator* codegen) override {
439 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
440 __ Bind(GetEntryLabel());
441 arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
442 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
443 if (successor_ == nullptr) {
444 __ B(GetReturnLabel());
445 } else {
446 __ B(arm_codegen->GetLabelOf(successor_));
447 }
448 }
449
GetReturnLabel()450 vixl32::Label* GetReturnLabel() {
451 DCHECK(successor_ == nullptr);
452 return &return_label_;
453 }
454
GetSuccessor() const455 HBasicBlock* GetSuccessor() const {
456 return successor_;
457 }
458
GetDescription() const459 const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
460
461 private:
462 // If not null, the block to branch to after the suspend check.
463 HBasicBlock* const successor_;
464
465 // If `successor_` is null, the label to branch to after the suspend check.
466 vixl32::Label return_label_;
467
468 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
469 };
470
471 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
472 public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)473 explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
474 : SlowPathCodeARMVIXL(instruction) {}
475
EmitNativeCode(CodeGenerator * codegen)476 void EmitNativeCode(CodeGenerator* codegen) override {
477 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
478 LocationSummary* locations = instruction_->GetLocations();
479
480 __ Bind(GetEntryLabel());
481 if (instruction_->CanThrowIntoCatchBlock()) {
482 // Live registers will be restored in the catch block if caught.
483 SaveLiveRegisters(codegen, instruction_->GetLocations());
484 }
485 // We're moving two locations to locations that could overlap, so we need a parallel
486 // move resolver.
487 InvokeRuntimeCallingConventionARMVIXL calling_convention;
488 codegen->EmitParallelMoves(
489 locations->InAt(0),
490 LocationFrom(calling_convention.GetRegisterAt(0)),
491 DataType::Type::kInt32,
492 locations->InAt(1),
493 LocationFrom(calling_convention.GetRegisterAt(1)),
494 DataType::Type::kInt32);
495 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
496 ? kQuickThrowStringBounds
497 : kQuickThrowArrayBounds;
498 arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
499 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
500 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
501 }
502
IsFatal() const503 bool IsFatal() const override { return true; }
504
GetDescription() const505 const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
506
507 private:
508 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
509 };
510
511 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
512 public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)513 LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
514 : SlowPathCodeARMVIXL(at), cls_(cls) {
515 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
516 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
517 }
518
EmitNativeCode(CodeGenerator * codegen)519 void EmitNativeCode(CodeGenerator* codegen) override {
520 LocationSummary* locations = instruction_->GetLocations();
521 Location out = locations->Out();
522 const uint32_t dex_pc = instruction_->GetDexPc();
523 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
524 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
525
526 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
527 __ Bind(GetEntryLabel());
528 SaveLiveRegisters(codegen, locations);
529
530 InvokeRuntimeCallingConventionARMVIXL calling_convention;
531 if (must_resolve_type) {
532 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
533 arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
534 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
535 &cls_->GetDexFile()));
536 dex::TypeIndex type_index = cls_->GetTypeIndex();
537 __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
538 if (cls_->NeedsAccessCheck()) {
539 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
540 arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
541 } else {
542 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
543 arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
544 }
545 // If we also must_do_clinit, the resolved type is now in the correct register.
546 } else {
547 DCHECK(must_do_clinit);
548 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
549 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
550 }
551 if (must_do_clinit) {
552 arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
553 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
554 }
555
556 // Move the class to the desired location.
557 if (out.IsValid()) {
558 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
559 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
560 }
561 RestoreLiveRegisters(codegen, locations);
562 __ B(GetExitLabel());
563 }
564
GetDescription() const565 const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
566
567 private:
568 // The class this slow path will load.
569 HLoadClass* const cls_;
570
571 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
572 };
573
574 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
575 public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)576 explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
577 : SlowPathCodeARMVIXL(instruction) {}
578
EmitNativeCode(CodeGenerator * codegen)579 void EmitNativeCode(CodeGenerator* codegen) override {
580 DCHECK(instruction_->IsLoadString());
581 DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
582 LocationSummary* locations = instruction_->GetLocations();
583 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
584 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
585
586 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
587 __ Bind(GetEntryLabel());
588 SaveLiveRegisters(codegen, locations);
589
590 InvokeRuntimeCallingConventionARMVIXL calling_convention;
591 __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
592 arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
593 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
594
595 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
596 RestoreLiveRegisters(codegen, locations);
597
598 __ B(GetExitLabel());
599 }
600
GetDescription() const601 const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
602
603 private:
604 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
605 };
606
607 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
608 public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)609 TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
610 : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
611
EmitNativeCode(CodeGenerator * codegen)612 void EmitNativeCode(CodeGenerator* codegen) override {
613 LocationSummary* locations = instruction_->GetLocations();
614 DCHECK(instruction_->IsCheckCast()
615 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
616
617 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
618 __ Bind(GetEntryLabel());
619
620 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
621 SaveLiveRegisters(codegen, locations);
622 }
623
624 // We're moving two locations to locations that could overlap, so we need a parallel
625 // move resolver.
626 InvokeRuntimeCallingConventionARMVIXL calling_convention;
627
628 codegen->EmitParallelMoves(locations->InAt(0),
629 LocationFrom(calling_convention.GetRegisterAt(0)),
630 DataType::Type::kReference,
631 locations->InAt(1),
632 LocationFrom(calling_convention.GetRegisterAt(1)),
633 DataType::Type::kReference);
634 if (instruction_->IsInstanceOf()) {
635 arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
636 instruction_,
637 instruction_->GetDexPc(),
638 this);
639 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
640 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
641 } else {
642 DCHECK(instruction_->IsCheckCast());
643 arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
644 instruction_,
645 instruction_->GetDexPc(),
646 this);
647 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
648 }
649
650 if (!is_fatal_) {
651 RestoreLiveRegisters(codegen, locations);
652 __ B(GetExitLabel());
653 }
654 }
655
GetDescription() const656 const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
657
IsFatal() const658 bool IsFatal() const override { return is_fatal_; }
659
660 private:
661 const bool is_fatal_;
662
663 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
664 };
665
666 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
667 public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)668 explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
669 : SlowPathCodeARMVIXL(instruction) {}
670
EmitNativeCode(CodeGenerator * codegen)671 void EmitNativeCode(CodeGenerator* codegen) override {
672 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
673 __ Bind(GetEntryLabel());
674 LocationSummary* locations = instruction_->GetLocations();
675 SaveLiveRegisters(codegen, locations);
676 InvokeRuntimeCallingConventionARMVIXL calling_convention;
677 __ Mov(calling_convention.GetRegisterAt(0),
678 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
679
680 arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
681 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
682 }
683
GetDescription() const684 const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
685
686 private:
687 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
688 };
689
690 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
691 public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)692 explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
693
EmitNativeCode(CodeGenerator * codegen)694 void EmitNativeCode(CodeGenerator* codegen) override {
695 LocationSummary* locations = instruction_->GetLocations();
696 __ Bind(GetEntryLabel());
697 SaveLiveRegisters(codegen, locations);
698
699 InvokeRuntimeCallingConventionARMVIXL calling_convention;
700 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
701 parallel_move.AddMove(
702 locations->InAt(0),
703 LocationFrom(calling_convention.GetRegisterAt(0)),
704 DataType::Type::kReference,
705 nullptr);
706 parallel_move.AddMove(
707 locations->InAt(1),
708 LocationFrom(calling_convention.GetRegisterAt(1)),
709 DataType::Type::kInt32,
710 nullptr);
711 parallel_move.AddMove(
712 locations->InAt(2),
713 LocationFrom(calling_convention.GetRegisterAt(2)),
714 DataType::Type::kReference,
715 nullptr);
716 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
717
718 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
719 arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
720 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
721 RestoreLiveRegisters(codegen, locations);
722 __ B(GetExitLabel());
723 }
724
GetDescription() const725 const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
726
727 private:
728 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
729 };
730
731 // Slow path generating a read barrier for a heap reference.
732 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
733 public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)734 ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
735 Location out,
736 Location ref,
737 Location obj,
738 uint32_t offset,
739 Location index)
740 : SlowPathCodeARMVIXL(instruction),
741 out_(out),
742 ref_(ref),
743 obj_(obj),
744 offset_(offset),
745 index_(index) {
746 DCHECK(gUseReadBarrier);
747 // If `obj` is equal to `out` or `ref`, it means the initial object
748 // has been overwritten by (or after) the heap object reference load
749 // to be instrumented, e.g.:
750 //
751 // __ LoadFromOffset(kLoadWord, out, out, offset);
752 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
753 //
754 // In that case, we have lost the information about the original
755 // object, and the emitted read barrier cannot work properly.
756 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
757 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
758 }
759
EmitNativeCode(CodeGenerator * codegen)760 void EmitNativeCode(CodeGenerator* codegen) override {
761 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
762 LocationSummary* locations = instruction_->GetLocations();
763 vixl32::Register reg_out = RegisterFrom(out_);
764 DCHECK(locations->CanCall());
765 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
766 DCHECK(instruction_->IsInstanceFieldGet() ||
767 instruction_->IsPredicatedInstanceFieldGet() ||
768 instruction_->IsStaticFieldGet() ||
769 instruction_->IsArrayGet() ||
770 instruction_->IsInstanceOf() ||
771 instruction_->IsCheckCast() ||
772 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
773 << "Unexpected instruction in read barrier for heap reference slow path: "
774 << instruction_->DebugName();
775 // The read barrier instrumentation of object ArrayGet
776 // instructions does not support the HIntermediateAddress
777 // instruction.
778 DCHECK(!(instruction_->IsArrayGet() &&
779 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
780
781 __ Bind(GetEntryLabel());
782 SaveLiveRegisters(codegen, locations);
783
784 // We may have to change the index's value, but as `index_` is a
785 // constant member (like other "inputs" of this slow path),
786 // introduce a copy of it, `index`.
787 Location index = index_;
788 if (index_.IsValid()) {
789 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
790 if (instruction_->IsArrayGet()) {
791 // Compute the actual memory offset and store it in `index`.
792 vixl32::Register index_reg = RegisterFrom(index_);
793 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
794 if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
795 // We are about to change the value of `index_reg` (see the
796 // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
797 // art::arm::ArmVIXLMacroAssembler::Add below), but it has
798 // not been saved by the previous call to
799 // art::SlowPathCode::SaveLiveRegisters, as it is a
800 // callee-save register --
801 // art::SlowPathCode::SaveLiveRegisters does not consider
802 // callee-save registers, as it has been designed with the
803 // assumption that callee-save registers are supposed to be
804 // handled by the called function. So, as a callee-save
805 // register, `index_reg` _would_ eventually be saved onto
806 // the stack, but it would be too late: we would have
807 // changed its value earlier. Therefore, we manually save
808 // it here into another freely available register,
809 // `free_reg`, chosen of course among the caller-save
810 // registers (as a callee-save `free_reg` register would
811 // exhibit the same problem).
812 //
813 // Note we could have requested a temporary register from
814 // the register allocator instead; but we prefer not to, as
815 // this is a slow path, and we know we can find a
816 // caller-save register that is available.
817 vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
818 __ Mov(free_reg, index_reg);
819 index_reg = free_reg;
820 index = LocationFrom(index_reg);
821 } else {
822 // The initial register stored in `index_` has already been
823 // saved in the call to art::SlowPathCode::SaveLiveRegisters
824 // (as it is not a callee-save register), so we can freely
825 // use it.
826 }
827 // Shifting the index value contained in `index_reg` by the scale
828 // factor (2) cannot overflow in practice, as the runtime is
829 // unable to allocate object arrays with a size larger than
830 // 2^26 - 1 (that is, 2^28 - 4 bytes).
831 __ Lsl(index_reg, index_reg, TIMES_4);
832 static_assert(
833 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
834 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
835 __ Add(index_reg, index_reg, offset_);
836 } else {
837 // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
838 // (as in the case of ArrayGet), as it is actually an offset to an object field within an
839 // object.
840 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
841 DCHECK(instruction_->GetLocations()->Intrinsified());
842 HInvoke* invoke = instruction_->AsInvoke();
843 DCHECK(IsUnsafeGetObject(invoke) || IsVarHandleGet(invoke) || IsVarHandleCASFamily(invoke))
844 << invoke->GetIntrinsic();
845 DCHECK_EQ(offset_, 0U);
846 // Though UnsafeGet's offset location is a register pair, we only pass the low
847 // part (high part is irrelevant for 32-bit addresses) to the slow path.
848 // For VarHandle intrinsics, the index is always just a register.
849 DCHECK(index_.IsRegister());
850 index = index_;
851 }
852 }
853
854 // We're moving two or three locations to locations that could
855 // overlap, so we need a parallel move resolver.
856 InvokeRuntimeCallingConventionARMVIXL calling_convention;
857 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
858 parallel_move.AddMove(ref_,
859 LocationFrom(calling_convention.GetRegisterAt(0)),
860 DataType::Type::kReference,
861 nullptr);
862 parallel_move.AddMove(obj_,
863 LocationFrom(calling_convention.GetRegisterAt(1)),
864 DataType::Type::kReference,
865 nullptr);
866 if (index.IsValid()) {
867 parallel_move.AddMove(index,
868 LocationFrom(calling_convention.GetRegisterAt(2)),
869 DataType::Type::kInt32,
870 nullptr);
871 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
872 } else {
873 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
874 __ Mov(calling_convention.GetRegisterAt(2), offset_);
875 }
876 arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
877 CheckEntrypointTypes<
878 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
879 arm_codegen->Move32(out_, LocationFrom(r0));
880
881 RestoreLiveRegisters(codegen, locations);
882 __ B(GetExitLabel());
883 }
884
GetDescription() const885 const char* GetDescription() const override {
886 return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
887 }
888
889 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)890 vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
891 uint32_t ref = RegisterFrom(ref_).GetCode();
892 uint32_t obj = RegisterFrom(obj_).GetCode();
893 for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
894 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
895 return vixl32::Register(i);
896 }
897 }
898 // We shall never fail to find a free caller-save register, as
899 // there are more than two core caller-save registers on ARM
900 // (meaning it is possible to find one which is different from
901 // `ref` and `obj`).
902 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
903 LOG(FATAL) << "Could not find a free caller-save register";
904 UNREACHABLE();
905 }
906
907 const Location out_;
908 const Location ref_;
909 const Location obj_;
910 const uint32_t offset_;
911 // An additional location containing an index to an array.
912 // Only used for HArrayGet and the UnsafeGetObject &
913 // UnsafeGetObjectVolatile intrinsics.
914 const Location index_;
915
916 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
917 };
918
919 // Slow path generating a read barrier for a GC root.
920 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
921 public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)922 ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
923 : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
924 DCHECK(gUseReadBarrier);
925 }
926
EmitNativeCode(CodeGenerator * codegen)927 void EmitNativeCode(CodeGenerator* codegen) override {
928 LocationSummary* locations = instruction_->GetLocations();
929 vixl32::Register reg_out = RegisterFrom(out_);
930 DCHECK(locations->CanCall());
931 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
932 DCHECK(instruction_->IsLoadClass() ||
933 instruction_->IsLoadString() ||
934 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
935 << "Unexpected instruction in read barrier for GC root slow path: "
936 << instruction_->DebugName();
937
938 __ Bind(GetEntryLabel());
939 SaveLiveRegisters(codegen, locations);
940
941 InvokeRuntimeCallingConventionARMVIXL calling_convention;
942 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
943 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
944 arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
945 instruction_,
946 instruction_->GetDexPc(),
947 this);
948 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
949 arm_codegen->Move32(out_, LocationFrom(r0));
950
951 RestoreLiveRegisters(codegen, locations);
952 __ B(GetExitLabel());
953 }
954
GetDescription() const955 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
956
957 private:
958 const Location out_;
959 const Location root_;
960
961 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
962 };
963
964 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
965 public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)966 explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
967 : SlowPathCodeARMVIXL(instruction) {}
968
EmitNativeCode(CodeGenerator * codegen)969 void EmitNativeCode(CodeGenerator* codegen) override {
970 LocationSummary* locations = instruction_->GetLocations();
971 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
972 QuickEntrypointEnum entry_point =
973 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
974 __ Bind(GetEntryLabel());
975 SaveLiveRegisters(codegen, locations);
976 if (instruction_->IsMethodExitHook()) {
977 // Load frame size to pass to the exit hooks
978 __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
979 }
980 arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
981 RestoreLiveRegisters(codegen, locations);
982 __ B(GetExitLabel());
983 }
984
GetDescription() const985 const char* GetDescription() const override {
986 return "MethodEntryExitHooksSlowPath";
987 }
988
989 private:
990 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
991 };
992
993 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
994 public:
CompileOptimizedSlowPathARMVIXL()995 CompileOptimizedSlowPathARMVIXL() : SlowPathCodeARMVIXL(/* instruction= */ nullptr) {}
996
EmitNativeCode(CodeGenerator * codegen)997 void EmitNativeCode(CodeGenerator* codegen) override {
998 uint32_t entry_point_offset =
999 GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
1000 __ Bind(GetEntryLabel());
1001 __ Ldr(lr, MemOperand(tr, entry_point_offset));
1002 // Note: we don't record the call here (and therefore don't generate a stack
1003 // map), as the entrypoint should never be suspended.
1004 __ Blx(lr);
1005 __ B(GetExitLabel());
1006 }
1007
GetDescription() const1008 const char* GetDescription() const override {
1009 return "CompileOptimizedSlowPath";
1010 }
1011
1012 private:
1013 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
1014 };
1015
ARMCondition(IfCondition cond)1016 inline vixl32::Condition ARMCondition(IfCondition cond) {
1017 switch (cond) {
1018 case kCondEQ: return eq;
1019 case kCondNE: return ne;
1020 case kCondLT: return lt;
1021 case kCondLE: return le;
1022 case kCondGT: return gt;
1023 case kCondGE: return ge;
1024 case kCondB: return lo;
1025 case kCondBE: return ls;
1026 case kCondA: return hi;
1027 case kCondAE: return hs;
1028 }
1029 LOG(FATAL) << "Unreachable";
1030 UNREACHABLE();
1031 }
1032
1033 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1034 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1035 switch (cond) {
1036 case kCondEQ: return eq;
1037 case kCondNE: return ne;
1038 // Signed to unsigned.
1039 case kCondLT: return lo;
1040 case kCondLE: return ls;
1041 case kCondGT: return hi;
1042 case kCondGE: return hs;
1043 // Unsigned remain unchanged.
1044 case kCondB: return lo;
1045 case kCondBE: return ls;
1046 case kCondA: return hi;
1047 case kCondAE: return hs;
1048 }
1049 LOG(FATAL) << "Unreachable";
1050 UNREACHABLE();
1051 }
1052
ARMFPCondition(IfCondition cond,bool gt_bias)1053 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1054 // The ARM condition codes can express all the necessary branches, see the
1055 // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1056 // There is no dex instruction or HIR that would need the missing conditions
1057 // "equal or unordered" or "not equal".
1058 switch (cond) {
1059 case kCondEQ: return eq;
1060 case kCondNE: return ne /* unordered */;
1061 case kCondLT: return gt_bias ? cc : lt /* unordered */;
1062 case kCondLE: return gt_bias ? ls : le /* unordered */;
1063 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1064 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1065 default:
1066 LOG(FATAL) << "UNREACHABLE";
1067 UNREACHABLE();
1068 }
1069 }
1070
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1071 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1072 switch (op_kind) {
1073 case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1074 case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1075 case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1076 default:
1077 LOG(FATAL) << "Unexpected op kind " << op_kind;
1078 UNREACHABLE();
1079 }
1080 }
1081
DumpCoreRegister(std::ostream & stream,int reg) const1082 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1083 stream << vixl32::Register(reg);
1084 }
1085
DumpFloatingPointRegister(std::ostream & stream,int reg) const1086 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1087 stream << vixl32::SRegister(reg);
1088 }
1089
GetInstructionSetFeatures() const1090 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1091 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1092 }
1093
ComputeSRegisterListMask(const SRegisterList & regs)1094 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1095 uint32_t mask = 0;
1096 for (uint32_t i = regs.GetFirstSRegister().GetCode();
1097 i <= regs.GetLastSRegister().GetCode();
1098 ++i) {
1099 mask |= (1 << i);
1100 }
1101 return mask;
1102 }
1103
1104 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1105 size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1106 uint32_t reg_id ATTRIBUTE_UNUSED) {
1107 TODO_VIXL32(FATAL);
1108 UNREACHABLE();
1109 }
1110
1111 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1112 size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1113 uint32_t reg_id ATTRIBUTE_UNUSED) {
1114 TODO_VIXL32(FATAL);
1115 UNREACHABLE();
1116 }
1117
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1118 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1119 uint32_t reg_id ATTRIBUTE_UNUSED) {
1120 TODO_VIXL32(FATAL);
1121 UNREACHABLE();
1122 }
1123
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1124 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1125 uint32_t reg_id ATTRIBUTE_UNUSED) {
1126 TODO_VIXL32(FATAL);
1127 UNREACHABLE();
1128 }
1129
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1130 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1131 vixl32::Register out,
1132 vixl32::Register first,
1133 const Operand& second,
1134 CodeGeneratorARMVIXL* codegen) {
1135 if (second.IsImmediate() && second.GetImmediate() == 0) {
1136 const Operand in = kind == HInstruction::kAnd
1137 ? Operand(0)
1138 : Operand(first);
1139
1140 __ Mov(out, in);
1141 } else {
1142 switch (kind) {
1143 case HInstruction::kAdd:
1144 __ Add(out, first, second);
1145 break;
1146 case HInstruction::kAnd:
1147 __ And(out, first, second);
1148 break;
1149 case HInstruction::kOr:
1150 __ Orr(out, first, second);
1151 break;
1152 case HInstruction::kSub:
1153 __ Sub(out, first, second);
1154 break;
1155 case HInstruction::kXor:
1156 __ Eor(out, first, second);
1157 break;
1158 default:
1159 LOG(FATAL) << "Unexpected instruction kind: " << kind;
1160 UNREACHABLE();
1161 }
1162 }
1163 }
1164
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1165 static void GenerateDataProc(HInstruction::InstructionKind kind,
1166 const Location& out,
1167 const Location& first,
1168 const Operand& second_lo,
1169 const Operand& second_hi,
1170 CodeGeneratorARMVIXL* codegen) {
1171 const vixl32::Register first_hi = HighRegisterFrom(first);
1172 const vixl32::Register first_lo = LowRegisterFrom(first);
1173 const vixl32::Register out_hi = HighRegisterFrom(out);
1174 const vixl32::Register out_lo = LowRegisterFrom(out);
1175
1176 if (kind == HInstruction::kAdd) {
1177 __ Adds(out_lo, first_lo, second_lo);
1178 __ Adc(out_hi, first_hi, second_hi);
1179 } else if (kind == HInstruction::kSub) {
1180 __ Subs(out_lo, first_lo, second_lo);
1181 __ Sbc(out_hi, first_hi, second_hi);
1182 } else {
1183 GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1184 GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1185 }
1186 }
1187
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1188 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1189 return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1190 }
1191
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1192 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1193 CodeGeneratorARMVIXL* codegen) {
1194 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1195 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1196
1197 const LocationSummary* const locations = instruction->GetLocations();
1198 const uint32_t shift_value = instruction->GetShiftAmount();
1199 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1200 const Location first = locations->InAt(0);
1201 const Location second = locations->InAt(1);
1202 const Location out = locations->Out();
1203 const vixl32::Register first_hi = HighRegisterFrom(first);
1204 const vixl32::Register first_lo = LowRegisterFrom(first);
1205 const vixl32::Register out_hi = HighRegisterFrom(out);
1206 const vixl32::Register out_lo = LowRegisterFrom(out);
1207 const vixl32::Register second_hi = HighRegisterFrom(second);
1208 const vixl32::Register second_lo = LowRegisterFrom(second);
1209 const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1210
1211 if (shift_value >= 32) {
1212 if (shift == ShiftType::LSL) {
1213 GenerateDataProcInstruction(kind,
1214 out_hi,
1215 first_hi,
1216 Operand(second_lo, ShiftType::LSL, shift_value - 32),
1217 codegen);
1218 GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1219 } else if (shift == ShiftType::ASR) {
1220 GenerateDataProc(kind,
1221 out,
1222 first,
1223 GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1224 Operand(second_hi, ShiftType::ASR, 31),
1225 codegen);
1226 } else {
1227 DCHECK_EQ(shift, ShiftType::LSR);
1228 GenerateDataProc(kind,
1229 out,
1230 first,
1231 GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1232 0,
1233 codegen);
1234 }
1235 } else {
1236 DCHECK_GT(shift_value, 1U);
1237 DCHECK_LT(shift_value, 32U);
1238
1239 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1240
1241 if (shift == ShiftType::LSL) {
1242 // We are not doing this for HInstruction::kAdd because the output will require
1243 // Location::kOutputOverlap; not applicable to other cases.
1244 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1245 GenerateDataProcInstruction(kind,
1246 out_hi,
1247 first_hi,
1248 Operand(second_hi, ShiftType::LSL, shift_value),
1249 codegen);
1250 GenerateDataProcInstruction(kind,
1251 out_hi,
1252 out_hi,
1253 Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1254 codegen);
1255 GenerateDataProcInstruction(kind,
1256 out_lo,
1257 first_lo,
1258 Operand(second_lo, ShiftType::LSL, shift_value),
1259 codegen);
1260 } else {
1261 const vixl32::Register temp = temps.Acquire();
1262
1263 __ Lsl(temp, second_hi, shift_value);
1264 __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1265 GenerateDataProc(kind,
1266 out,
1267 first,
1268 Operand(second_lo, ShiftType::LSL, shift_value),
1269 temp,
1270 codegen);
1271 }
1272 } else {
1273 DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1274
1275 // We are not doing this for HInstruction::kAdd because the output will require
1276 // Location::kOutputOverlap; not applicable to other cases.
1277 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1278 GenerateDataProcInstruction(kind,
1279 out_lo,
1280 first_lo,
1281 Operand(second_lo, ShiftType::LSR, shift_value),
1282 codegen);
1283 GenerateDataProcInstruction(kind,
1284 out_lo,
1285 out_lo,
1286 Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1287 codegen);
1288 GenerateDataProcInstruction(kind,
1289 out_hi,
1290 first_hi,
1291 Operand(second_hi, shift, shift_value),
1292 codegen);
1293 } else {
1294 const vixl32::Register temp = temps.Acquire();
1295
1296 __ Lsr(temp, second_lo, shift_value);
1297 __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1298 GenerateDataProc(kind,
1299 out,
1300 first,
1301 temp,
1302 Operand(second_hi, shift, shift_value),
1303 codegen);
1304 }
1305 }
1306 }
1307 }
1308
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1309 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1310 const Location rhs_loc = instruction->GetLocations()->InAt(1);
1311 if (rhs_loc.IsConstant()) {
1312 // 0.0 is the only immediate that can be encoded directly in
1313 // a VCMP instruction.
1314 //
1315 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1316 // specify that in a floating-point comparison, positive zero
1317 // and negative zero are considered equal, so we can use the
1318 // literal 0.0 for both cases here.
1319 //
1320 // Note however that some methods (Float.equal, Float.compare,
1321 // Float.compareTo, Double.equal, Double.compare,
1322 // Double.compareTo, Math.max, Math.min, StrictMath.max,
1323 // StrictMath.min) consider 0.0 to be (strictly) greater than
1324 // -0.0. So if we ever translate calls to these methods into a
1325 // HCompare instruction, we must handle the -0.0 case with
1326 // care here.
1327 DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1328
1329 const DataType::Type type = instruction->InputAt(0)->GetType();
1330
1331 if (type == DataType::Type::kFloat32) {
1332 __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1333 } else {
1334 DCHECK_EQ(type, DataType::Type::kFloat64);
1335 __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1336 }
1337 } else {
1338 __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1339 }
1340 }
1341
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1342 static int64_t AdjustConstantForCondition(int64_t value,
1343 IfCondition* condition,
1344 IfCondition* opposite) {
1345 if (value == 1) {
1346 if (*condition == kCondB) {
1347 value = 0;
1348 *condition = kCondEQ;
1349 *opposite = kCondNE;
1350 } else if (*condition == kCondAE) {
1351 value = 0;
1352 *condition = kCondNE;
1353 *opposite = kCondEQ;
1354 }
1355 } else if (value == -1) {
1356 if (*condition == kCondGT) {
1357 value = 0;
1358 *condition = kCondGE;
1359 *opposite = kCondLT;
1360 } else if (*condition == kCondLE) {
1361 value = 0;
1362 *condition = kCondLT;
1363 *opposite = kCondGE;
1364 }
1365 }
1366
1367 return value;
1368 }
1369
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1370 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1371 HCondition* condition,
1372 bool invert,
1373 CodeGeneratorARMVIXL* codegen) {
1374 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1375
1376 const LocationSummary* const locations = condition->GetLocations();
1377 IfCondition cond = condition->GetCondition();
1378 IfCondition opposite = condition->GetOppositeCondition();
1379
1380 if (invert) {
1381 std::swap(cond, opposite);
1382 }
1383
1384 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1385 const Location left = locations->InAt(0);
1386 const Location right = locations->InAt(1);
1387
1388 DCHECK(right.IsConstant());
1389
1390 const vixl32::Register left_high = HighRegisterFrom(left);
1391 const vixl32::Register left_low = LowRegisterFrom(left);
1392 int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1393 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1394
1395 // Comparisons against 0 are common enough to deserve special attention.
1396 if (value == 0) {
1397 switch (cond) {
1398 case kCondNE:
1399 // x > 0 iff x != 0 when the comparison is unsigned.
1400 case kCondA:
1401 ret = std::make_pair(ne, eq);
1402 FALLTHROUGH_INTENDED;
1403 case kCondEQ:
1404 // x <= 0 iff x == 0 when the comparison is unsigned.
1405 case kCondBE:
1406 __ Orrs(temps.Acquire(), left_low, left_high);
1407 return ret;
1408 case kCondLT:
1409 case kCondGE:
1410 __ Cmp(left_high, 0);
1411 return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1412 // Trivially true or false.
1413 case kCondB:
1414 ret = std::make_pair(ne, eq);
1415 FALLTHROUGH_INTENDED;
1416 case kCondAE:
1417 __ Cmp(left_low, left_low);
1418 return ret;
1419 default:
1420 break;
1421 }
1422 }
1423
1424 switch (cond) {
1425 case kCondEQ:
1426 case kCondNE:
1427 case kCondB:
1428 case kCondBE:
1429 case kCondA:
1430 case kCondAE: {
1431 const uint32_t value_low = Low32Bits(value);
1432 Operand operand_low(value_low);
1433
1434 __ Cmp(left_high, High32Bits(value));
1435
1436 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1437 // we must ensure that the operands corresponding to the least significant
1438 // halves of the inputs fit into a 16-bit CMP encoding.
1439 if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1440 operand_low = Operand(temps.Acquire());
1441 __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1442 }
1443
1444 // We use the scope because of the IT block that follows.
1445 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1446 2 * vixl32::k16BitT32InstructionSizeInBytes,
1447 CodeBufferCheckScope::kExactSize);
1448
1449 __ it(eq);
1450 __ cmp(eq, left_low, operand_low);
1451 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1452 break;
1453 }
1454 case kCondLE:
1455 case kCondGT:
1456 // Trivially true or false.
1457 if (value == std::numeric_limits<int64_t>::max()) {
1458 __ Cmp(left_low, left_low);
1459 ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1460 break;
1461 }
1462
1463 if (cond == kCondLE) {
1464 DCHECK_EQ(opposite, kCondGT);
1465 cond = kCondLT;
1466 opposite = kCondGE;
1467 } else {
1468 DCHECK_EQ(cond, kCondGT);
1469 DCHECK_EQ(opposite, kCondLE);
1470 cond = kCondGE;
1471 opposite = kCondLT;
1472 }
1473
1474 value++;
1475 FALLTHROUGH_INTENDED;
1476 case kCondGE:
1477 case kCondLT: {
1478 __ Cmp(left_low, Low32Bits(value));
1479 __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1480 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1481 break;
1482 }
1483 default:
1484 LOG(FATAL) << "Unreachable";
1485 UNREACHABLE();
1486 }
1487
1488 return ret;
1489 }
1490
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1491 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1492 HCondition* condition,
1493 bool invert,
1494 CodeGeneratorARMVIXL* codegen) {
1495 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1496
1497 const LocationSummary* const locations = condition->GetLocations();
1498 IfCondition cond = condition->GetCondition();
1499 IfCondition opposite = condition->GetOppositeCondition();
1500
1501 if (invert) {
1502 std::swap(cond, opposite);
1503 }
1504
1505 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1506 Location left = locations->InAt(0);
1507 Location right = locations->InAt(1);
1508
1509 DCHECK(right.IsRegisterPair());
1510
1511 switch (cond) {
1512 case kCondEQ:
1513 case kCondNE:
1514 case kCondB:
1515 case kCondBE:
1516 case kCondA:
1517 case kCondAE: {
1518 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1519
1520 // We use the scope because of the IT block that follows.
1521 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1522 2 * vixl32::k16BitT32InstructionSizeInBytes,
1523 CodeBufferCheckScope::kExactSize);
1524
1525 __ it(eq);
1526 __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1527 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1528 break;
1529 }
1530 case kCondLE:
1531 case kCondGT:
1532 if (cond == kCondLE) {
1533 DCHECK_EQ(opposite, kCondGT);
1534 cond = kCondGE;
1535 opposite = kCondLT;
1536 } else {
1537 DCHECK_EQ(cond, kCondGT);
1538 DCHECK_EQ(opposite, kCondLE);
1539 cond = kCondLT;
1540 opposite = kCondGE;
1541 }
1542
1543 std::swap(left, right);
1544 FALLTHROUGH_INTENDED;
1545 case kCondGE:
1546 case kCondLT: {
1547 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1548
1549 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1550 __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1551 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1552 break;
1553 }
1554 default:
1555 LOG(FATAL) << "Unreachable";
1556 UNREACHABLE();
1557 }
1558
1559 return ret;
1560 }
1561
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1562 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1563 bool invert,
1564 CodeGeneratorARMVIXL* codegen) {
1565 const DataType::Type type = condition->GetLeft()->GetType();
1566 IfCondition cond = condition->GetCondition();
1567 IfCondition opposite = condition->GetOppositeCondition();
1568 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1569
1570 if (invert) {
1571 std::swap(cond, opposite);
1572 }
1573
1574 if (type == DataType::Type::kInt64) {
1575 ret = condition->GetLocations()->InAt(1).IsConstant()
1576 ? GenerateLongTestConstant(condition, invert, codegen)
1577 : GenerateLongTest(condition, invert, codegen);
1578 } else if (DataType::IsFloatingPointType(type)) {
1579 GenerateVcmp(condition, codegen);
1580 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1581 ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1582 ARMFPCondition(opposite, condition->IsGtBias()));
1583 } else {
1584 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1585 __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1586 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1587 }
1588
1589 return ret;
1590 }
1591
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1592 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1593 const vixl32::Register out = OutputRegister(cond);
1594 const auto condition = GenerateTest(cond, false, codegen);
1595
1596 __ Mov(LeaveFlags, out, 0);
1597
1598 if (out.IsLow()) {
1599 // We use the scope because of the IT block that follows.
1600 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1601 2 * vixl32::k16BitT32InstructionSizeInBytes,
1602 CodeBufferCheckScope::kExactSize);
1603
1604 __ it(condition.first);
1605 __ mov(condition.first, out, 1);
1606 } else {
1607 vixl32::Label done_label;
1608 vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1609
1610 __ B(condition.second, final_label, /* is_far_target= */ false);
1611 __ Mov(out, 1);
1612
1613 if (done_label.IsReferenced()) {
1614 __ Bind(&done_label);
1615 }
1616 }
1617 }
1618
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1619 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1620 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1621
1622 const LocationSummary* const locations = cond->GetLocations();
1623 IfCondition condition = cond->GetCondition();
1624 const vixl32::Register out = OutputRegister(cond);
1625 const Location left = locations->InAt(0);
1626 const Location right = locations->InAt(1);
1627 vixl32::Register left_high = HighRegisterFrom(left);
1628 vixl32::Register left_low = LowRegisterFrom(left);
1629 vixl32::Register temp;
1630 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1631
1632 if (right.IsConstant()) {
1633 IfCondition opposite = cond->GetOppositeCondition();
1634 const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1635 &condition,
1636 &opposite);
1637 Operand right_high = High32Bits(value);
1638 Operand right_low = Low32Bits(value);
1639
1640 // The output uses Location::kNoOutputOverlap.
1641 if (out.Is(left_high)) {
1642 std::swap(left_low, left_high);
1643 std::swap(right_low, right_high);
1644 }
1645
1646 __ Sub(out, left_low, right_low);
1647 temp = temps.Acquire();
1648 __ Sub(temp, left_high, right_high);
1649 } else {
1650 DCHECK(right.IsRegisterPair());
1651 temp = temps.Acquire();
1652 __ Sub(temp, left_high, HighRegisterFrom(right));
1653 __ Sub(out, left_low, LowRegisterFrom(right));
1654 }
1655
1656 // Need to check after calling AdjustConstantForCondition().
1657 DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1658
1659 if (condition == kCondNE && out.IsLow()) {
1660 __ Orrs(out, out, temp);
1661
1662 // We use the scope because of the IT block that follows.
1663 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1664 2 * vixl32::k16BitT32InstructionSizeInBytes,
1665 CodeBufferCheckScope::kExactSize);
1666
1667 __ it(ne);
1668 __ mov(ne, out, 1);
1669 } else {
1670 __ Orr(out, out, temp);
1671 codegen->GenerateConditionWithZero(condition, out, out, temp);
1672 }
1673 }
1674
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1675 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1676 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1677
1678 const LocationSummary* const locations = cond->GetLocations();
1679 IfCondition condition = cond->GetCondition();
1680 const vixl32::Register out = OutputRegister(cond);
1681 const Location left = locations->InAt(0);
1682 const Location right = locations->InAt(1);
1683
1684 if (right.IsConstant()) {
1685 IfCondition opposite = cond->GetOppositeCondition();
1686
1687 // Comparisons against 0 are common enough to deserve special attention.
1688 if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1689 switch (condition) {
1690 case kCondNE:
1691 case kCondA:
1692 if (out.IsLow()) {
1693 // We only care if both input registers are 0 or not.
1694 __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1695
1696 // We use the scope because of the IT block that follows.
1697 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1698 2 * vixl32::k16BitT32InstructionSizeInBytes,
1699 CodeBufferCheckScope::kExactSize);
1700
1701 __ it(ne);
1702 __ mov(ne, out, 1);
1703 return;
1704 }
1705
1706 FALLTHROUGH_INTENDED;
1707 case kCondEQ:
1708 case kCondBE:
1709 // We only care if both input registers are 0 or not.
1710 __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1711 codegen->GenerateConditionWithZero(condition, out, out);
1712 return;
1713 case kCondLT:
1714 case kCondGE:
1715 // We only care about the sign bit.
1716 FALLTHROUGH_INTENDED;
1717 case kCondAE:
1718 case kCondB:
1719 codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1720 return;
1721 case kCondLE:
1722 case kCondGT:
1723 default:
1724 break;
1725 }
1726 }
1727 }
1728
1729 // If `out` is a low register, then the GenerateConditionGeneric()
1730 // function generates a shorter code sequence that is still branchless.
1731 if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1732 GenerateEqualLong(cond, codegen);
1733 return;
1734 }
1735
1736 GenerateConditionGeneric(cond, codegen);
1737 }
1738
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1739 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1740 CodeGeneratorARMVIXL* codegen) {
1741 const DataType::Type type = cond->GetLeft()->GetType();
1742
1743 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1744
1745 if (type == DataType::Type::kInt64) {
1746 GenerateConditionLong(cond, codegen);
1747 return;
1748 }
1749
1750 IfCondition condition = cond->GetCondition();
1751 vixl32::Register in = InputRegisterAt(cond, 0);
1752 const vixl32::Register out = OutputRegister(cond);
1753 const Location right = cond->GetLocations()->InAt(1);
1754 int64_t value;
1755
1756 if (right.IsConstant()) {
1757 IfCondition opposite = cond->GetOppositeCondition();
1758
1759 value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1760
1761 // Comparisons against 0 are common enough to deserve special attention.
1762 if (value == 0) {
1763 switch (condition) {
1764 case kCondNE:
1765 case kCondA:
1766 if (out.IsLow() && out.Is(in)) {
1767 __ Cmp(out, 0);
1768
1769 // We use the scope because of the IT block that follows.
1770 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1771 2 * vixl32::k16BitT32InstructionSizeInBytes,
1772 CodeBufferCheckScope::kExactSize);
1773
1774 __ it(ne);
1775 __ mov(ne, out, 1);
1776 return;
1777 }
1778
1779 FALLTHROUGH_INTENDED;
1780 case kCondEQ:
1781 case kCondBE:
1782 case kCondLT:
1783 case kCondGE:
1784 case kCondAE:
1785 case kCondB:
1786 codegen->GenerateConditionWithZero(condition, out, in);
1787 return;
1788 case kCondLE:
1789 case kCondGT:
1790 default:
1791 break;
1792 }
1793 }
1794 }
1795
1796 if (condition == kCondEQ || condition == kCondNE) {
1797 Operand operand(0);
1798
1799 if (right.IsConstant()) {
1800 operand = Operand::From(value);
1801 } else if (out.Is(RegisterFrom(right))) {
1802 // Avoid 32-bit instructions if possible.
1803 operand = InputOperandAt(cond, 0);
1804 in = RegisterFrom(right);
1805 } else {
1806 operand = InputOperandAt(cond, 1);
1807 }
1808
1809 if (condition == kCondNE && out.IsLow()) {
1810 __ Subs(out, in, operand);
1811
1812 // We use the scope because of the IT block that follows.
1813 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1814 2 * vixl32::k16BitT32InstructionSizeInBytes,
1815 CodeBufferCheckScope::kExactSize);
1816
1817 __ it(ne);
1818 __ mov(ne, out, 1);
1819 } else {
1820 __ Sub(out, in, operand);
1821 codegen->GenerateConditionWithZero(condition, out, out);
1822 }
1823
1824 return;
1825 }
1826
1827 GenerateConditionGeneric(cond, codegen);
1828 }
1829
CanEncodeConstantAs8BitImmediate(HConstant * constant)1830 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1831 const DataType::Type type = constant->GetType();
1832 bool ret = false;
1833
1834 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1835
1836 if (type == DataType::Type::kInt64) {
1837 const uint64_t value = Uint64ConstantFrom(constant);
1838
1839 ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1840 } else {
1841 ret = IsUint<8>(Int32ConstantFrom(constant));
1842 }
1843
1844 return ret;
1845 }
1846
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1847 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1848 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1849
1850 if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1851 return Location::ConstantLocation(constant);
1852 }
1853
1854 return Location::RequiresRegister();
1855 }
1856
CanGenerateConditionalMove(const Location & out,const Location & src)1857 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1858 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1859 // we check that we are not dealing with floating-point output (there is no
1860 // 16-bit VMOV encoding).
1861 if (!out.IsRegister() && !out.IsRegisterPair()) {
1862 return false;
1863 }
1864
1865 // For constants, we also check that the output is in one or two low registers,
1866 // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1867 // MOV encoding can be used.
1868 if (src.IsConstant()) {
1869 if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1870 return false;
1871 }
1872
1873 if (out.IsRegister()) {
1874 if (!RegisterFrom(out).IsLow()) {
1875 return false;
1876 }
1877 } else {
1878 DCHECK(out.IsRegisterPair());
1879
1880 if (!HighRegisterFrom(out).IsLow()) {
1881 return false;
1882 }
1883 }
1884 }
1885
1886 return true;
1887 }
1888
1889 #undef __
1890
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1891 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1892 vixl32::Label* final_label) {
1893 DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1894 DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1895
1896 const HBasicBlock* const block = instruction->GetBlock();
1897 const HLoopInformation* const info = block->GetLoopInformation();
1898 HInstruction* const next = instruction->GetNext();
1899
1900 // Avoid a branch to a branch.
1901 if (next->IsGoto() && (info == nullptr ||
1902 !info->IsBackEdge(*block) ||
1903 !info->HasSuspendCheck())) {
1904 final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1905 }
1906
1907 return final_label;
1908 }
1909
1910 namespace detail {
1911 // Mark which intrinsics we don't have handcrafted code for.
1912 template <Intrinsics T>
1913 struct IsUnimplemented {
1914 bool is_unimplemented = false;
1915 };
1916
1917 #define TRUE_OVERRIDE(Name) \
1918 template <> \
1919 struct IsUnimplemented<Intrinsics::k##Name> { \
1920 bool is_unimplemented = true; \
1921 };
1922 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1923 #undef TRUE_OVERRIDE
1924
1925 #include "intrinsics_list.h"
1926 static constexpr bool kIsIntrinsicUnimplemented[] = {
1927 false, // kNone
1928 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1929 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1930 INTRINSICS_LIST(IS_UNIMPLEMENTED)
1931 #undef IS_UNIMPLEMENTED
1932 };
1933 #undef INTRINSICS_LIST
1934
1935 } // namespace detail
1936
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1937 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1938 const CompilerOptions& compiler_options,
1939 OptimizingCompilerStats* stats)
1940 : CodeGenerator(graph,
1941 kNumberOfCoreRegisters,
1942 kNumberOfSRegisters,
1943 kNumberOfRegisterPairs,
1944 kCoreCalleeSaves.GetList(),
1945 ComputeSRegisterListMask(kFpuCalleeSaves),
1946 compiler_options,
1947 stats,
1948 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1949 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1951 location_builder_(graph, this),
1952 instruction_visitor_(graph, this),
1953 move_resolver_(graph->GetAllocator(), this),
1954 assembler_(graph->GetAllocator()),
1955 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1956 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1957 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1958 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1959 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1960 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1961 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1962 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1963 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1964 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1965 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1966 uint32_literals_(std::less<uint32_t>(),
1967 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1968 jit_string_patches_(StringReferenceValueComparator(),
1969 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1970 jit_class_patches_(TypeReferenceValueComparator(),
1971 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1972 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1973 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1974 // Always save the LR register to mimic Quick.
1975 AddAllocatedRegister(Location::RegisterLocation(LR));
1976 // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1977 // S0-S31, which alias to D0-D15.
1978 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1979 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1980 }
1981
EmitTable(CodeGeneratorARMVIXL * codegen)1982 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1983 uint32_t num_entries = switch_instr_->GetNumEntries();
1984 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1985
1986 // We are about to use the assembler to place literals directly. Make sure we have enough
1987 // underlying code buffer and we have generated a jump table of the right size, using
1988 // codegen->GetVIXLAssembler()->GetBuffer().Align();
1989 ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1990 num_entries * sizeof(int32_t),
1991 CodeBufferCheckScope::kMaximumSize);
1992 // TODO(VIXL): Check that using lower case bind is fine here.
1993 codegen->GetVIXLAssembler()->bind(&table_start_);
1994 for (uint32_t i = 0; i < num_entries; i++) {
1995 codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1996 }
1997 }
1998
FixTable(CodeGeneratorARMVIXL * codegen)1999 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
2000 uint32_t num_entries = switch_instr_->GetNumEntries();
2001 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
2002
2003 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
2004 for (uint32_t i = 0; i < num_entries; i++) {
2005 vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
2006 DCHECK(target_label->IsBound());
2007 int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
2008 // When doing BX to address we need to have lower bit set to 1 in T32.
2009 if (codegen->GetVIXLAssembler()->IsUsingT32()) {
2010 jump_offset++;
2011 }
2012 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
2013 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
2014
2015 bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
2016 }
2017 }
2018
FixJumpTables()2019 void CodeGeneratorARMVIXL::FixJumpTables() {
2020 for (auto&& jump_table : jump_tables_) {
2021 jump_table->FixTable(this);
2022 }
2023 }
2024
2025 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
2026
Finalize(CodeAllocator * allocator)2027 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
2028 FixJumpTables();
2029
2030 // Emit JIT baker read barrier slow paths.
2031 DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2032 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2033 uint32_t encoded_data = entry.first;
2034 vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2035 __ Bind(slow_path_entry);
2036 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2037 }
2038
2039 GetAssembler()->FinalizeCode();
2040 CodeGenerator::Finalize(allocator);
2041
2042 // Verify Baker read barrier linker patches.
2043 if (kIsDebugBuild) {
2044 ArrayRef<const uint8_t> code = allocator->GetMemory();
2045 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2046 DCHECK(info.label.IsBound());
2047 uint32_t literal_offset = info.label.GetLocation();
2048 DCHECK_ALIGNED(literal_offset, 2u);
2049
2050 auto GetInsn16 = [&code](uint32_t offset) {
2051 DCHECK_ALIGNED(offset, 2u);
2052 return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2053 (static_cast<uint32_t>(code[offset + 1]) << 8);
2054 };
2055 auto GetInsn32 = [=](uint32_t offset) {
2056 return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2057 };
2058
2059 uint32_t encoded_data = info.custom_data;
2060 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2061 // Check that the next instruction matches the expected LDR.
2062 switch (kind) {
2063 case BakerReadBarrierKind::kField: {
2064 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2065 if (width == BakerReadBarrierWidth::kWide) {
2066 DCHECK_GE(code.size() - literal_offset, 8u);
2067 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2068 // LDR (immediate), encoding T3, with correct base_reg.
2069 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2070 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2071 CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2072 } else {
2073 DCHECK_GE(code.size() - literal_offset, 6u);
2074 uint32_t next_insn = GetInsn16(literal_offset + 4u);
2075 // LDR (immediate), encoding T1, with correct base_reg.
2076 CheckValidReg(next_insn & 0x7u); // Check destination register.
2077 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2078 CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2079 }
2080 break;
2081 }
2082 case BakerReadBarrierKind::kArray: {
2083 DCHECK_GE(code.size() - literal_offset, 8u);
2084 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2085 // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2086 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2087 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2088 CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2089 CheckValidReg(next_insn & 0xf); // Check index register
2090 break;
2091 }
2092 case BakerReadBarrierKind::kGcRoot: {
2093 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2094 if (width == BakerReadBarrierWidth::kWide) {
2095 DCHECK_GE(literal_offset, 4u);
2096 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2097 // LDR (immediate), encoding T3, with correct root_reg.
2098 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2099 CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2100 } else {
2101 DCHECK_GE(literal_offset, 2u);
2102 uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2103 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2104 // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2105 // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2106 if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2107 CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2108 }
2109 }
2110 break;
2111 }
2112 case BakerReadBarrierKind::kIntrinsicCas: {
2113 DCHECK_GE(literal_offset, 4u);
2114 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2115 // MOV (register), encoding T3, with correct root_reg.
2116 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2117 DCHECK_GE(root_reg, 8u); // Used only for high registers.
2118 CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2119 break;
2120 }
2121 default:
2122 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2123 UNREACHABLE();
2124 }
2125 }
2126 }
2127 }
2128
SetupBlockedRegisters() const2129 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2130 // Stack register, LR and PC are always reserved.
2131 blocked_core_registers_[SP] = true;
2132 blocked_core_registers_[LR] = true;
2133 blocked_core_registers_[PC] = true;
2134
2135 // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2136 // that would require some work in the assembler code as the right GC is
2137 // chosen at load-time and not compile time.
2138 if (kReserveMarkingRegister) {
2139 // Reserve marking register.
2140 blocked_core_registers_[MR] = true;
2141 }
2142
2143 // Reserve thread register.
2144 blocked_core_registers_[TR] = true;
2145
2146 // Reserve temp register.
2147 blocked_core_registers_[IP] = true;
2148
2149 if (GetGraph()->IsDebuggable()) {
2150 // Stubs do not save callee-save floating point registers. If the graph
2151 // is debuggable, we need to deal with these registers differently. For
2152 // now, just block them.
2153 for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2154 i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2155 ++i) {
2156 blocked_fpu_registers_[i] = true;
2157 }
2158 }
2159 }
2160
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2161 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2162 CodeGeneratorARMVIXL* codegen)
2163 : InstructionCodeGenerator(graph, codegen),
2164 assembler_(codegen->GetAssembler()),
2165 codegen_(codegen) {}
2166
ComputeSpillMask()2167 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2168 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2169 DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2170 << "At least the return address register must be saved";
2171 // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2172 DCHECK(GetVIXLAssembler()->IsUsingT32());
2173 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2174 // We use vpush and vpop for saving and restoring floating point registers, which take
2175 // a SRegister and the number of registers to save/restore after that SRegister. We
2176 // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2177 // but in the range.
2178 if (fpu_spill_mask_ != 0) {
2179 uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2180 uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2181 for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2182 fpu_spill_mask_ |= (1 << i);
2183 }
2184 }
2185 }
2186
VisitMethodExitHook(HMethodExitHook * method_hook)2187 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2188 LocationSummary* locations = new (GetGraph()->GetAllocator())
2189 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2190 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2191 }
2192
GenerateMethodEntryExitHook(HInstruction * instruction)2193 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2194 UseScratchRegisterScope temps(GetVIXLAssembler());
2195 vixl32::Register temp = temps.Acquire();
2196
2197 SlowPathCodeARMVIXL* slow_path =
2198 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2199 codegen_->AddSlowPath(slow_path);
2200
2201 if (instruction->IsMethodExitHook()) {
2202 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2203 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2204 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2205 // disabled in debuggable runtime. The other bit is used when this method itself requires a
2206 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2207 GetAssembler()->LoadFromOffset(kLoadWord,
2208 temp,
2209 sp,
2210 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2211 __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
2212 }
2213
2214 MemberOffset offset = instruction->IsMethodExitHook() ?
2215 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2216 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2217 uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2218 __ Mov(temp, address + offset.Int32Value());
2219 __ Ldrb(temp, MemOperand(temp, 0));
2220 __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
2221 __ Bind(slow_path->GetExitLabel());
2222 }
2223
VisitMethodExitHook(HMethodExitHook * instruction)2224 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2225 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2226 DCHECK(codegen_->RequiresCurrentMethod());
2227 GenerateMethodEntryExitHook(instruction);
2228 }
2229
VisitMethodEntryHook(HMethodEntryHook * method_hook)2230 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2231 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2232 }
2233
VisitMethodEntryHook(HMethodEntryHook * instruction)2234 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2235 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2236 DCHECK(codegen_->RequiresCurrentMethod());
2237 GenerateMethodEntryExitHook(instruction);
2238 }
2239
MaybeIncrementHotness(bool is_frame_entry)2240 void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
2241 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2242 UseScratchRegisterScope temps(GetVIXLAssembler());
2243 vixl32::Register temp = temps.Acquire();
2244 static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2245 if (!is_frame_entry) {
2246 __ Push(vixl32::Register(kMethodRegister));
2247 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2248 GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2249 }
2250 // Load with zero extend to clear the high bits for integer overflow check.
2251 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2252 vixl::aarch32::Label done;
2253 DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2254 __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2255 __ Add(temp, temp, -1);
2256 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2257 __ Bind(&done);
2258 if (!is_frame_entry) {
2259 __ Pop(vixl32::Register(kMethodRegister));
2260 GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2261 }
2262 }
2263
2264 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2265 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL();
2266 AddSlowPath(slow_path);
2267 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2268 DCHECK(info != nullptr);
2269 DCHECK(!HasEmptyFrame());
2270 uint32_t address = reinterpret_cast32<uint32_t>(info);
2271 UseScratchRegisterScope temps(GetVIXLAssembler());
2272 vixl32::Register tmp = temps.Acquire();
2273 __ Mov(lr, address);
2274 __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2275 __ Adds(tmp, tmp, -1);
2276 __ B(cc, slow_path->GetEntryLabel());
2277 __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2278 __ Bind(slow_path->GetExitLabel());
2279 }
2280 }
2281
GenerateFrameEntry()2282 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2283 bool skip_overflow_check =
2284 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2285 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2286
2287 // Check if we need to generate the clinit check. We will jump to the
2288 // resolution stub if the class is not initialized and the executing thread is
2289 // not the thread initializing it.
2290 // We do this before constructing the frame to get the correct stack trace if
2291 // an exception is thrown.
2292 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2293 UseScratchRegisterScope temps(GetVIXLAssembler());
2294 vixl32::Label resolution;
2295 vixl32::Label memory_barrier;
2296
2297 // Check if we're visibly initialized.
2298
2299 vixl32::Register temp1 = temps.Acquire();
2300 // Use r4 as other temporary register.
2301 DCHECK(!blocked_core_registers_[R4]);
2302 DCHECK(!kCoreCalleeSaves.Includes(r4));
2303 vixl32::Register temp2 = r4;
2304 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2305 DCHECK(!reg.Is(r4));
2306 }
2307
2308 // We don't emit a read barrier here to save on code size. We rely on the
2309 // resolution trampoline to do a suspend check before re-entering this code.
2310 __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2311 __ Ldrb(temp2, MemOperand(temp1, status_byte_offset));
2312 __ Cmp(temp2, shifted_visibly_initialized_value);
2313 __ B(cs, &frame_entry_label_);
2314
2315 // Check if we're initialized and jump to code that does a memory barrier if
2316 // so.
2317 __ Cmp(temp2, shifted_initialized_value);
2318 __ B(cs, &memory_barrier);
2319
2320 // Check if we're initializing and the thread initializing is the one
2321 // executing the code.
2322 __ Cmp(temp2, shifted_initializing_value);
2323 __ B(lo, &resolution);
2324
2325 __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2326 __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2327 __ Cmp(temp1, temp2);
2328 __ B(eq, &frame_entry_label_);
2329 __ Bind(&resolution);
2330
2331 // Jump to the resolution stub.
2332 ThreadOffset32 entrypoint_offset =
2333 GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2334 __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2335 __ Bx(temp1);
2336
2337 __ Bind(&memory_barrier);
2338 GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2339 }
2340
2341 __ Bind(&frame_entry_label_);
2342
2343 if (HasEmptyFrame()) {
2344 // Ensure that the CFI opcode list is not empty.
2345 GetAssembler()->cfi().Nop();
2346 MaybeIncrementHotness(/* is_frame_entry= */ true);
2347 return;
2348 }
2349
2350 if (!skip_overflow_check) {
2351 // Using r4 instead of IP saves 2 bytes.
2352 UseScratchRegisterScope temps(GetVIXLAssembler());
2353 vixl32::Register temp;
2354 // TODO: Remove this check when R4 is made a callee-save register
2355 // in ART compiled code (b/72801708). Currently we need to make
2356 // sure r4 is not blocked, e.g. in special purpose
2357 // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2358 // here.
2359 if (!blocked_core_registers_[R4]) {
2360 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2361 DCHECK(!reg.Is(r4));
2362 }
2363 DCHECK(!kCoreCalleeSaves.Includes(r4));
2364 temp = r4;
2365 } else {
2366 temp = temps.Acquire();
2367 }
2368 __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2369 // The load must immediately precede RecordPcInfo.
2370 ExactAssemblyScope aas(GetVIXLAssembler(),
2371 vixl32::kMaxInstructionSizeInBytes,
2372 CodeBufferCheckScope::kMaximumSize);
2373 __ ldr(temp, MemOperand(temp));
2374 RecordPcInfo(nullptr, 0);
2375 }
2376
2377 uint32_t frame_size = GetFrameSize();
2378 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2379 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2380 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2381 core_spills_offset <= 3u * kArmWordSize) {
2382 // Do a single PUSH for core registers including the method and up to two
2383 // filler registers. Then store the single FP spill if any.
2384 // (The worst case is when the method is not required and we actually
2385 // store 3 extra registers but they are stored in the same properly
2386 // aligned 16-byte chunk where we're already writing anyway.)
2387 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2388 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2389 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2390 __ Push(RegisterList(core_spill_mask_ | extra_regs));
2391 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2392 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2393 core_spills_offset,
2394 core_spill_mask_,
2395 kArmWordSize);
2396 if (fpu_spill_mask_ != 0u) {
2397 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2398 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2399 GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2400 GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2401 }
2402 } else {
2403 __ Push(RegisterList(core_spill_mask_));
2404 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2405 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2406 /*offset=*/ 0,
2407 core_spill_mask_,
2408 kArmWordSize);
2409 if (fpu_spill_mask_ != 0) {
2410 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2411
2412 // Check that list is contiguous.
2413 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2414
2415 __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2416 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2417 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2418 /*offset=*/ 0,
2419 fpu_spill_mask_,
2420 kArmWordSize);
2421 }
2422
2423 // Adjust SP and save the current method if we need it. Note that we do
2424 // not save the method in HCurrentMethod, as the instruction might have
2425 // been removed in the SSA graph.
2426 if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2427 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2428 __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2429 GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2430 } else {
2431 IncreaseFrame(fp_spills_offset);
2432 if (RequiresCurrentMethod()) {
2433 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2434 }
2435 }
2436 }
2437
2438 if (GetGraph()->HasShouldDeoptimizeFlag()) {
2439 UseScratchRegisterScope temps(GetVIXLAssembler());
2440 vixl32::Register temp = temps.Acquire();
2441 // Initialize should_deoptimize flag to 0.
2442 __ Mov(temp, 0);
2443 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2444 }
2445
2446 MaybeIncrementHotness(/* is_frame_entry= */ true);
2447 MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2448 }
2449
GenerateFrameExit()2450 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2451 if (HasEmptyFrame()) {
2452 __ Bx(lr);
2453 return;
2454 }
2455
2456 // Pop LR into PC to return.
2457 DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2458 uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2459
2460 uint32_t frame_size = GetFrameSize();
2461 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2462 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2463 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2464 // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2465 core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2466 // Load the FP spill if any and then do a single POP including the method
2467 // and up to two filler registers. If we have no FP spills, this also has
2468 // the advantage that we do not need to emit CFI directives.
2469 if (fpu_spill_mask_ != 0u) {
2470 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2471 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2472 GetAssembler()->cfi().RememberState();
2473 GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2474 GetAssembler()->cfi().Restore(DWARFReg(sreg));
2475 }
2476 // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2477 // never hold the return value.
2478 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2479 DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2480 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2481 __ Pop(RegisterList(pop_mask | extra_regs));
2482 if (fpu_spill_mask_ != 0u) {
2483 GetAssembler()->cfi().RestoreState();
2484 }
2485 } else {
2486 GetAssembler()->cfi().RememberState();
2487 DecreaseFrame(fp_spills_offset);
2488 if (fpu_spill_mask_ != 0) {
2489 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2490
2491 // Check that list is contiguous.
2492 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2493
2494 __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2495 GetAssembler()->cfi().AdjustCFAOffset(
2496 -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2497 GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2498 }
2499 __ Pop(RegisterList(pop_mask));
2500 GetAssembler()->cfi().RestoreState();
2501 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2502 }
2503 }
2504
Bind(HBasicBlock * block)2505 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2506 __ Bind(GetLabelOf(block));
2507 }
2508
GetNextLocation(DataType::Type type)2509 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2510 switch (type) {
2511 case DataType::Type::kReference:
2512 case DataType::Type::kBool:
2513 case DataType::Type::kUint8:
2514 case DataType::Type::kInt8:
2515 case DataType::Type::kUint16:
2516 case DataType::Type::kInt16:
2517 case DataType::Type::kInt32: {
2518 uint32_t index = gp_index_++;
2519 uint32_t stack_index = stack_index_++;
2520 if (index < calling_convention.GetNumberOfRegisters()) {
2521 return LocationFrom(calling_convention.GetRegisterAt(index));
2522 } else {
2523 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2524 }
2525 }
2526
2527 case DataType::Type::kInt64: {
2528 uint32_t index = gp_index_;
2529 uint32_t stack_index = stack_index_;
2530 gp_index_ += 2;
2531 stack_index_ += 2;
2532 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2533 if (calling_convention.GetRegisterAt(index).Is(r1)) {
2534 // Skip R1, and use R2_R3 instead.
2535 gp_index_++;
2536 index++;
2537 }
2538 }
2539 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2540 DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2541 calling_convention.GetRegisterAt(index + 1).GetCode());
2542
2543 return LocationFrom(calling_convention.GetRegisterAt(index),
2544 calling_convention.GetRegisterAt(index + 1));
2545 } else {
2546 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2547 }
2548 }
2549
2550 case DataType::Type::kFloat32: {
2551 uint32_t stack_index = stack_index_++;
2552 if (float_index_ % 2 == 0) {
2553 float_index_ = std::max(double_index_, float_index_);
2554 }
2555 if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2556 return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2557 } else {
2558 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2559 }
2560 }
2561
2562 case DataType::Type::kFloat64: {
2563 double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2564 uint32_t stack_index = stack_index_;
2565 stack_index_ += 2;
2566 if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2567 uint32_t index = double_index_;
2568 double_index_ += 2;
2569 Location result = LocationFrom(
2570 calling_convention.GetFpuRegisterAt(index),
2571 calling_convention.GetFpuRegisterAt(index + 1));
2572 DCHECK(ExpectedPairLayout(result));
2573 return result;
2574 } else {
2575 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2576 }
2577 }
2578
2579 case DataType::Type::kUint32:
2580 case DataType::Type::kUint64:
2581 case DataType::Type::kVoid:
2582 LOG(FATAL) << "Unexpected parameter type " << type;
2583 UNREACHABLE();
2584 }
2585 return Location::NoLocation();
2586 }
2587
GetReturnLocation(DataType::Type type) const2588 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2589 switch (type) {
2590 case DataType::Type::kReference:
2591 case DataType::Type::kBool:
2592 case DataType::Type::kUint8:
2593 case DataType::Type::kInt8:
2594 case DataType::Type::kUint16:
2595 case DataType::Type::kInt16:
2596 case DataType::Type::kUint32:
2597 case DataType::Type::kInt32: {
2598 return LocationFrom(r0);
2599 }
2600
2601 case DataType::Type::kFloat32: {
2602 return LocationFrom(s0);
2603 }
2604
2605 case DataType::Type::kUint64:
2606 case DataType::Type::kInt64: {
2607 return LocationFrom(r0, r1);
2608 }
2609
2610 case DataType::Type::kFloat64: {
2611 return LocationFrom(s0, s1);
2612 }
2613
2614 case DataType::Type::kVoid:
2615 return Location::NoLocation();
2616 }
2617
2618 UNREACHABLE();
2619 }
2620
GetMethodLocation() const2621 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2622 return LocationFrom(kMethodRegister);
2623 }
2624
GetNextLocation(DataType::Type type)2625 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2626 DCHECK_NE(type, DataType::Type::kReference);
2627
2628 // Native ABI uses the same registers as managed, except that the method register r0
2629 // is a normal argument.
2630 Location location = Location::NoLocation();
2631 if (DataType::Is64BitType(type)) {
2632 gpr_index_ = RoundUp(gpr_index_, 2u);
2633 stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2634 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2635 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2636 kParameterCoreRegistersVIXL[gpr_index_]);
2637 gpr_index_ += 2u;
2638 }
2639 } else {
2640 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2641 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2642 ++gpr_index_;
2643 }
2644 }
2645 if (location.IsInvalid()) {
2646 if (DataType::Is64BitType(type)) {
2647 location = Location::DoubleStackSlot(stack_offset_);
2648 stack_offset_ += 2 * kFramePointerSize;
2649 } else {
2650 location = Location::StackSlot(stack_offset_);
2651 stack_offset_ += kFramePointerSize;
2652 }
2653
2654 if (for_register_allocation_) {
2655 location = Location::Any();
2656 }
2657 }
2658 return location;
2659 }
2660
GetReturnLocation(DataType::Type type) const2661 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2662 const {
2663 // We perform conversion to the managed ABI return register after the call if needed.
2664 InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2665 return dex_calling_convention.GetReturnLocation(type);
2666 }
2667
GetMethodLocation() const2668 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2669 // Pass the method in the hidden argument R4.
2670 return Location::RegisterLocation(R4);
2671 }
2672
Move32(Location destination,Location source)2673 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2674 if (source.Equals(destination)) {
2675 return;
2676 }
2677 if (destination.IsRegister()) {
2678 if (source.IsRegister()) {
2679 __ Mov(RegisterFrom(destination), RegisterFrom(source));
2680 } else if (source.IsFpuRegister()) {
2681 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2682 } else {
2683 GetAssembler()->LoadFromOffset(kLoadWord,
2684 RegisterFrom(destination),
2685 sp,
2686 source.GetStackIndex());
2687 }
2688 } else if (destination.IsFpuRegister()) {
2689 if (source.IsRegister()) {
2690 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2691 } else if (source.IsFpuRegister()) {
2692 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2693 } else {
2694 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2695 }
2696 } else {
2697 DCHECK(destination.IsStackSlot()) << destination;
2698 if (source.IsRegister()) {
2699 GetAssembler()->StoreToOffset(kStoreWord,
2700 RegisterFrom(source),
2701 sp,
2702 destination.GetStackIndex());
2703 } else if (source.IsFpuRegister()) {
2704 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2705 } else {
2706 DCHECK(source.IsStackSlot()) << source;
2707 UseScratchRegisterScope temps(GetVIXLAssembler());
2708 vixl32::Register temp = temps.Acquire();
2709 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2710 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2711 }
2712 }
2713 }
2714
MoveConstant(Location location,int32_t value)2715 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2716 DCHECK(location.IsRegister());
2717 __ Mov(RegisterFrom(location), value);
2718 }
2719
MoveLocation(Location dst,Location src,DataType::Type dst_type)2720 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2721 // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2722 // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2723 HParallelMove move(GetGraph()->GetAllocator());
2724 move.AddMove(src, dst, dst_type, nullptr);
2725 GetMoveResolver()->EmitNativeCode(&move);
2726 }
2727
AddLocationAsTemp(Location location,LocationSummary * locations)2728 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2729 if (location.IsRegister()) {
2730 locations->AddTemp(location);
2731 } else if (location.IsRegisterPair()) {
2732 locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2733 locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2734 } else {
2735 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2736 }
2737 }
2738
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2739 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2740 HInstruction* instruction,
2741 uint32_t dex_pc,
2742 SlowPathCode* slow_path) {
2743 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2744
2745 ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2746 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2747 // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2748 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2749 if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2750 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2751 // Ensure the pc position is recorded immediately after the `blx` instruction.
2752 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2753 ExactAssemblyScope aas(GetVIXLAssembler(),
2754 vixl32::k16BitT32InstructionSizeInBytes,
2755 CodeBufferCheckScope::kExactSize);
2756 __ blx(lr);
2757 if (EntrypointRequiresStackMap(entrypoint)) {
2758 RecordPcInfo(instruction, dex_pc, slow_path);
2759 }
2760 } else {
2761 // Ensure the pc position is recorded immediately after the `bl` instruction.
2762 ExactAssemblyScope aas(GetVIXLAssembler(),
2763 vixl32::k32BitT32InstructionSizeInBytes,
2764 CodeBufferCheckScope::kExactSize);
2765 EmitEntrypointThunkCall(entrypoint_offset);
2766 if (EntrypointRequiresStackMap(entrypoint)) {
2767 RecordPcInfo(instruction, dex_pc, slow_path);
2768 }
2769 }
2770 }
2771
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2772 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2773 HInstruction* instruction,
2774 SlowPathCode* slow_path) {
2775 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2776 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2777 __ Blx(lr);
2778 }
2779
HandleGoto(HInstruction * got,HBasicBlock * successor)2780 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2781 if (successor->IsExitBlock()) {
2782 DCHECK(got->GetPrevious()->AlwaysThrows());
2783 return; // no code needed
2784 }
2785
2786 HBasicBlock* block = got->GetBlock();
2787 HInstruction* previous = got->GetPrevious();
2788 HLoopInformation* info = block->GetLoopInformation();
2789
2790 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2791 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
2792 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2793 return;
2794 }
2795 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2796 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2797 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2798 }
2799 if (!codegen_->GoesToNextBlock(block, successor)) {
2800 __ B(codegen_->GetLabelOf(successor));
2801 }
2802 }
2803
VisitGoto(HGoto * got)2804 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2805 got->SetLocations(nullptr);
2806 }
2807
VisitGoto(HGoto * got)2808 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2809 HandleGoto(got, got->GetSuccessor());
2810 }
2811
VisitTryBoundary(HTryBoundary * try_boundary)2812 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2813 try_boundary->SetLocations(nullptr);
2814 }
2815
VisitTryBoundary(HTryBoundary * try_boundary)2816 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2817 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2818 if (!successor->IsExitBlock()) {
2819 HandleGoto(try_boundary, successor);
2820 }
2821 }
2822
VisitExit(HExit * exit)2823 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2824 exit->SetLocations(nullptr);
2825 }
2826
VisitExit(HExit * exit ATTRIBUTE_UNUSED)2827 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
2828 }
2829
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2830 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2831 vixl32::Label* true_target,
2832 vixl32::Label* false_target,
2833 bool is_far_target) {
2834 if (true_target == false_target) {
2835 DCHECK(true_target != nullptr);
2836 __ B(true_target);
2837 return;
2838 }
2839
2840 vixl32::Label* non_fallthrough_target;
2841 bool invert;
2842 bool emit_both_branches;
2843
2844 if (true_target == nullptr) {
2845 // The true target is fallthrough.
2846 DCHECK(false_target != nullptr);
2847 non_fallthrough_target = false_target;
2848 invert = true;
2849 emit_both_branches = false;
2850 } else {
2851 non_fallthrough_target = true_target;
2852 invert = false;
2853 // Either the false target is fallthrough, or there is no fallthrough
2854 // and both branches must be emitted.
2855 emit_both_branches = (false_target != nullptr);
2856 }
2857
2858 const auto cond = GenerateTest(condition, invert, codegen_);
2859
2860 __ B(cond.first, non_fallthrough_target, is_far_target);
2861
2862 if (emit_both_branches) {
2863 // No target falls through, we need to branch.
2864 __ B(false_target);
2865 }
2866 }
2867
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2868 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2869 size_t condition_input_index,
2870 vixl32::Label* true_target,
2871 vixl32::Label* false_target,
2872 bool far_target) {
2873 HInstruction* cond = instruction->InputAt(condition_input_index);
2874
2875 if (true_target == nullptr && false_target == nullptr) {
2876 // Nothing to do. The code always falls through.
2877 return;
2878 } else if (cond->IsIntConstant()) {
2879 // Constant condition, statically compared against "true" (integer value 1).
2880 if (cond->AsIntConstant()->IsTrue()) {
2881 if (true_target != nullptr) {
2882 __ B(true_target);
2883 }
2884 } else {
2885 DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2886 if (false_target != nullptr) {
2887 __ B(false_target);
2888 }
2889 }
2890 return;
2891 }
2892
2893 // The following code generates these patterns:
2894 // (1) true_target == nullptr && false_target != nullptr
2895 // - opposite condition true => branch to false_target
2896 // (2) true_target != nullptr && false_target == nullptr
2897 // - condition true => branch to true_target
2898 // (3) true_target != nullptr && false_target != nullptr
2899 // - condition true => branch to true_target
2900 // - branch to false_target
2901 if (IsBooleanValueOrMaterializedCondition(cond)) {
2902 // Condition has been materialized, compare the output to 0.
2903 if (kIsDebugBuild) {
2904 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2905 DCHECK(cond_val.IsRegister());
2906 }
2907 if (true_target == nullptr) {
2908 __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2909 false_target,
2910 far_target);
2911 } else {
2912 __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2913 true_target,
2914 far_target);
2915 }
2916 } else {
2917 // Condition has not been materialized. Use its inputs as the comparison and
2918 // its condition as the branch condition.
2919 HCondition* condition = cond->AsCondition();
2920
2921 // If this is a long or FP comparison that has been folded into
2922 // the HCondition, generate the comparison directly.
2923 DataType::Type type = condition->InputAt(0)->GetType();
2924 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2925 GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2926 return;
2927 }
2928
2929 vixl32::Label* non_fallthrough_target;
2930 vixl32::Condition arm_cond = vixl32::Condition::None();
2931 const vixl32::Register left = InputRegisterAt(cond, 0);
2932 const Operand right = InputOperandAt(cond, 1);
2933
2934 if (true_target == nullptr) {
2935 arm_cond = ARMCondition(condition->GetOppositeCondition());
2936 non_fallthrough_target = false_target;
2937 } else {
2938 arm_cond = ARMCondition(condition->GetCondition());
2939 non_fallthrough_target = true_target;
2940 }
2941
2942 if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2943 if (arm_cond.Is(eq)) {
2944 __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2945 } else {
2946 DCHECK(arm_cond.Is(ne));
2947 __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2948 }
2949 } else {
2950 __ Cmp(left, right);
2951 __ B(arm_cond, non_fallthrough_target, far_target);
2952 }
2953 }
2954
2955 // If neither branch falls through (case 3), the conditional branch to `true_target`
2956 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2957 if (true_target != nullptr && false_target != nullptr) {
2958 __ B(false_target);
2959 }
2960 }
2961
VisitIf(HIf * if_instr)2962 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
2963 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2964 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2965 locations->SetInAt(0, Location::RequiresRegister());
2966 }
2967 }
2968
VisitIf(HIf * if_instr)2969 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
2970 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2971 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2972 vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2973 nullptr : codegen_->GetLabelOf(true_successor);
2974 vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2975 nullptr : codegen_->GetLabelOf(false_successor);
2976 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2977 }
2978
VisitDeoptimize(HDeoptimize * deoptimize)2979 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2980 LocationSummary* locations = new (GetGraph()->GetAllocator())
2981 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2982 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2983 RegisterSet caller_saves = RegisterSet::Empty();
2984 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
2985 locations->SetCustomSlowPathCallerSaves(caller_saves);
2986 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2987 locations->SetInAt(0, Location::RequiresRegister());
2988 }
2989 }
2990
VisitDeoptimize(HDeoptimize * deoptimize)2991 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2992 SlowPathCodeARMVIXL* slow_path =
2993 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
2994 GenerateTestAndBranch(deoptimize,
2995 /* condition_input_index= */ 0,
2996 slow_path->GetEntryLabel(),
2997 /* false_target= */ nullptr);
2998 }
2999
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3000 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3001 LocationSummary* locations = new (GetGraph()->GetAllocator())
3002 LocationSummary(flag, LocationSummary::kNoCall);
3003 locations->SetOut(Location::RequiresRegister());
3004 }
3005
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3006 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3007 GetAssembler()->LoadFromOffset(kLoadWord,
3008 OutputRegister(flag),
3009 sp,
3010 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3011 }
3012
VisitSelect(HSelect * select)3013 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3014 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3015 const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3016
3017 if (is_floating_point) {
3018 locations->SetInAt(0, Location::RequiresFpuRegister());
3019 locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3020 } else {
3021 locations->SetInAt(0, Location::RequiresRegister());
3022 locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3023 }
3024
3025 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3026 locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3027 // The code generator handles overlap with the values, but not with the condition.
3028 locations->SetOut(Location::SameAsFirstInput());
3029 } else if (is_floating_point) {
3030 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3031 } else {
3032 if (!locations->InAt(1).IsConstant()) {
3033 locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3034 }
3035
3036 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3037 }
3038 }
3039
VisitSelect(HSelect * select)3040 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3041 HInstruction* const condition = select->GetCondition();
3042 const LocationSummary* const locations = select->GetLocations();
3043 const DataType::Type type = select->GetType();
3044 const Location first = locations->InAt(0);
3045 const Location out = locations->Out();
3046 const Location second = locations->InAt(1);
3047
3048 // In the unlucky case the output of this instruction overlaps
3049 // with an input of an "emitted-at-use-site" condition, and
3050 // the output of this instruction is not one of its inputs, we'll
3051 // need to fallback to branches instead of conditional ARM instructions.
3052 bool output_overlaps_with_condition_inputs =
3053 !IsBooleanValueOrMaterializedCondition(condition) &&
3054 !out.Equals(first) &&
3055 !out.Equals(second) &&
3056 (condition->GetLocations()->InAt(0).Equals(out) ||
3057 condition->GetLocations()->InAt(1).Equals(out));
3058 DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3059 Location src;
3060
3061 if (condition->IsIntConstant()) {
3062 if (condition->AsIntConstant()->IsFalse()) {
3063 src = first;
3064 } else {
3065 src = second;
3066 }
3067
3068 codegen_->MoveLocation(out, src, type);
3069 return;
3070 }
3071
3072 if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3073 bool invert = false;
3074
3075 if (out.Equals(second)) {
3076 src = first;
3077 invert = true;
3078 } else if (out.Equals(first)) {
3079 src = second;
3080 } else if (second.IsConstant()) {
3081 DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3082 src = second;
3083 } else if (first.IsConstant()) {
3084 DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3085 src = first;
3086 invert = true;
3087 } else {
3088 src = second;
3089 }
3090
3091 if (CanGenerateConditionalMove(out, src)) {
3092 if (!out.Equals(first) && !out.Equals(second)) {
3093 codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3094 }
3095
3096 std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3097
3098 if (IsBooleanValueOrMaterializedCondition(condition)) {
3099 __ Cmp(InputRegisterAt(select, 2), 0);
3100 cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3101 } else {
3102 cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3103 }
3104
3105 const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3106 // We use the scope because of the IT block that follows.
3107 ExactAssemblyScope guard(GetVIXLAssembler(),
3108 instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3109 CodeBufferCheckScope::kExactSize);
3110
3111 if (out.IsRegister()) {
3112 __ it(cond.first);
3113 __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3114 } else {
3115 DCHECK(out.IsRegisterPair());
3116
3117 Operand operand_high(0);
3118 Operand operand_low(0);
3119
3120 if (src.IsConstant()) {
3121 const int64_t value = Int64ConstantFrom(src);
3122
3123 operand_high = High32Bits(value);
3124 operand_low = Low32Bits(value);
3125 } else {
3126 DCHECK(src.IsRegisterPair());
3127 operand_high = HighRegisterFrom(src);
3128 operand_low = LowRegisterFrom(src);
3129 }
3130
3131 __ it(cond.first);
3132 __ mov(cond.first, LowRegisterFrom(out), operand_low);
3133 __ it(cond.first);
3134 __ mov(cond.first, HighRegisterFrom(out), operand_high);
3135 }
3136
3137 return;
3138 }
3139 }
3140
3141 vixl32::Label* false_target = nullptr;
3142 vixl32::Label* true_target = nullptr;
3143 vixl32::Label select_end;
3144 vixl32::Label other_case;
3145 vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3146
3147 if (out.Equals(second)) {
3148 true_target = target;
3149 src = first;
3150 } else {
3151 false_target = target;
3152 src = second;
3153
3154 if (!out.Equals(first)) {
3155 if (output_overlaps_with_condition_inputs) {
3156 false_target = &other_case;
3157 } else {
3158 codegen_->MoveLocation(out, first, type);
3159 }
3160 }
3161 }
3162
3163 GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3164 codegen_->MoveLocation(out, src, type);
3165 if (output_overlaps_with_condition_inputs) {
3166 __ B(target);
3167 __ Bind(&other_case);
3168 codegen_->MoveLocation(out, first, type);
3169 }
3170
3171 if (select_end.IsReferenced()) {
3172 __ Bind(&select_end);
3173 }
3174 }
3175
VisitNop(HNop * nop)3176 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3177 new (GetGraph()->GetAllocator()) LocationSummary(nop);
3178 }
3179
VisitNop(HNop *)3180 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3181 // The environment recording already happened in CodeGenerator::Compile.
3182 }
3183
IncreaseFrame(size_t adjustment)3184 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3185 __ Claim(adjustment);
3186 GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3187 }
3188
DecreaseFrame(size_t adjustment)3189 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3190 __ Drop(adjustment);
3191 GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3192 }
3193
GenerateNop()3194 void CodeGeneratorARMVIXL::GenerateNop() {
3195 __ Nop();
3196 }
3197
3198 // `temp` is an extra temporary register that is used for some conditions;
3199 // callers may not specify it, in which case the method will use a scratch
3200 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3201 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3202 vixl32::Register out,
3203 vixl32::Register in,
3204 vixl32::Register temp) {
3205 switch (condition) {
3206 case kCondEQ:
3207 // x <= 0 iff x == 0 when the comparison is unsigned.
3208 case kCondBE:
3209 if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3210 temp = out;
3211 }
3212
3213 // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3214 // different as well.
3215 if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3216 // temp = - in; only 0 sets the carry flag.
3217 __ Rsbs(temp, in, 0);
3218
3219 if (out.Is(in)) {
3220 std::swap(in, temp);
3221 }
3222
3223 // out = - in + in + carry = carry
3224 __ Adc(out, temp, in);
3225 } else {
3226 // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3227 __ Clz(out, in);
3228 // Any number less than 32 logically shifted right by 5 bits results in 0;
3229 // the same operation on 32 yields 1.
3230 __ Lsr(out, out, 5);
3231 }
3232
3233 break;
3234 case kCondNE:
3235 // x > 0 iff x != 0 when the comparison is unsigned.
3236 case kCondA: {
3237 UseScratchRegisterScope temps(GetVIXLAssembler());
3238
3239 if (out.Is(in)) {
3240 if (!temp.IsValid() || in.Is(temp)) {
3241 temp = temps.Acquire();
3242 }
3243 } else if (!temp.IsValid() || !temp.IsLow()) {
3244 temp = out;
3245 }
3246
3247 // temp = in - 1; only 0 does not set the carry flag.
3248 __ Subs(temp, in, 1);
3249 // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3250 __ Sbc(out, in, temp);
3251 break;
3252 }
3253 case kCondGE:
3254 __ Mvn(out, in);
3255 in = out;
3256 FALLTHROUGH_INTENDED;
3257 case kCondLT:
3258 // We only care about the sign bit.
3259 __ Lsr(out, in, 31);
3260 break;
3261 case kCondAE:
3262 // Trivially true.
3263 __ Mov(out, 1);
3264 break;
3265 case kCondB:
3266 // Trivially false.
3267 __ Mov(out, 0);
3268 break;
3269 default:
3270 LOG(FATAL) << "Unexpected condition " << condition;
3271 UNREACHABLE();
3272 }
3273 }
3274
HandleCondition(HCondition * cond)3275 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3276 LocationSummary* locations =
3277 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3278 const DataType::Type type = cond->InputAt(0)->GetType();
3279 if (DataType::IsFloatingPointType(type)) {
3280 locations->SetInAt(0, Location::RequiresFpuRegister());
3281 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3282 } else {
3283 locations->SetInAt(0, Location::RequiresRegister());
3284 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3285 }
3286 if (!cond->IsEmittedAtUseSite()) {
3287 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3288 }
3289 }
3290
HandleCondition(HCondition * cond)3291 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3292 if (cond->IsEmittedAtUseSite()) {
3293 return;
3294 }
3295
3296 const DataType::Type type = cond->GetLeft()->GetType();
3297
3298 if (DataType::IsFloatingPointType(type)) {
3299 GenerateConditionGeneric(cond, codegen_);
3300 return;
3301 }
3302
3303 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3304
3305 const IfCondition condition = cond->GetCondition();
3306
3307 // A condition with only one boolean input, or two boolean inputs without being equality or
3308 // inequality results from transformations done by the instruction simplifier, and is handled
3309 // as a regular condition with integral inputs.
3310 if (type == DataType::Type::kBool &&
3311 cond->GetRight()->GetType() == DataType::Type::kBool &&
3312 (condition == kCondEQ || condition == kCondNE)) {
3313 vixl32::Register left = InputRegisterAt(cond, 0);
3314 const vixl32::Register out = OutputRegister(cond);
3315 const Location right_loc = cond->GetLocations()->InAt(1);
3316
3317 // The constant case is handled by the instruction simplifier.
3318 DCHECK(!right_loc.IsConstant());
3319
3320 vixl32::Register right = RegisterFrom(right_loc);
3321
3322 // Avoid 32-bit instructions if possible.
3323 if (out.Is(right)) {
3324 std::swap(left, right);
3325 }
3326
3327 __ Eor(out, left, right);
3328
3329 if (condition == kCondEQ) {
3330 __ Eor(out, out, 1);
3331 }
3332
3333 return;
3334 }
3335
3336 GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3337 }
3338
VisitEqual(HEqual * comp)3339 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3340 HandleCondition(comp);
3341 }
3342
VisitEqual(HEqual * comp)3343 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3344 HandleCondition(comp);
3345 }
3346
VisitNotEqual(HNotEqual * comp)3347 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3348 HandleCondition(comp);
3349 }
3350
VisitNotEqual(HNotEqual * comp)3351 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3352 HandleCondition(comp);
3353 }
3354
VisitLessThan(HLessThan * comp)3355 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3356 HandleCondition(comp);
3357 }
3358
VisitLessThan(HLessThan * comp)3359 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3360 HandleCondition(comp);
3361 }
3362
VisitLessThanOrEqual(HLessThanOrEqual * comp)3363 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3364 HandleCondition(comp);
3365 }
3366
VisitLessThanOrEqual(HLessThanOrEqual * comp)3367 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3368 HandleCondition(comp);
3369 }
3370
VisitGreaterThan(HGreaterThan * comp)3371 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3372 HandleCondition(comp);
3373 }
3374
VisitGreaterThan(HGreaterThan * comp)3375 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3376 HandleCondition(comp);
3377 }
3378
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3379 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3380 HandleCondition(comp);
3381 }
3382
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3383 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3384 HandleCondition(comp);
3385 }
3386
VisitBelow(HBelow * comp)3387 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3388 HandleCondition(comp);
3389 }
3390
VisitBelow(HBelow * comp)3391 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3392 HandleCondition(comp);
3393 }
3394
VisitBelowOrEqual(HBelowOrEqual * comp)3395 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3396 HandleCondition(comp);
3397 }
3398
VisitBelowOrEqual(HBelowOrEqual * comp)3399 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3400 HandleCondition(comp);
3401 }
3402
VisitAbove(HAbove * comp)3403 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3404 HandleCondition(comp);
3405 }
3406
VisitAbove(HAbove * comp)3407 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3408 HandleCondition(comp);
3409 }
3410
VisitAboveOrEqual(HAboveOrEqual * comp)3411 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3412 HandleCondition(comp);
3413 }
3414
VisitAboveOrEqual(HAboveOrEqual * comp)3415 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3416 HandleCondition(comp);
3417 }
3418
VisitIntConstant(HIntConstant * constant)3419 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3420 LocationSummary* locations =
3421 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3422 locations->SetOut(Location::ConstantLocation(constant));
3423 }
3424
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)3425 void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
3426 // Will be generated at use site.
3427 }
3428
VisitNullConstant(HNullConstant * constant)3429 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3430 LocationSummary* locations =
3431 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3432 locations->SetOut(Location::ConstantLocation(constant));
3433 }
3434
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)3435 void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
3436 // Will be generated at use site.
3437 }
3438
VisitLongConstant(HLongConstant * constant)3439 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3440 LocationSummary* locations =
3441 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3442 locations->SetOut(Location::ConstantLocation(constant));
3443 }
3444
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)3445 void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
3446 // Will be generated at use site.
3447 }
3448
VisitFloatConstant(HFloatConstant * constant)3449 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3450 LocationSummary* locations =
3451 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3452 locations->SetOut(Location::ConstantLocation(constant));
3453 }
3454
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3455 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3456 HFloatConstant* constant ATTRIBUTE_UNUSED) {
3457 // Will be generated at use site.
3458 }
3459
VisitDoubleConstant(HDoubleConstant * constant)3460 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3461 LocationSummary* locations =
3462 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3463 locations->SetOut(Location::ConstantLocation(constant));
3464 }
3465
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3466 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3467 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3468 // Will be generated at use site.
3469 }
3470
VisitConstructorFence(HConstructorFence * constructor_fence)3471 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3472 constructor_fence->SetLocations(nullptr);
3473 }
3474
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)3475 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3476 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
3477 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3478 }
3479
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3480 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3481 memory_barrier->SetLocations(nullptr);
3482 }
3483
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3484 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3485 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3486 }
3487
VisitReturnVoid(HReturnVoid * ret)3488 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3489 ret->SetLocations(nullptr);
3490 }
3491
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)3492 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
3493 codegen_->GenerateFrameExit();
3494 }
3495
VisitReturn(HReturn * ret)3496 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3497 LocationSummary* locations =
3498 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3499 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3500 }
3501
VisitReturn(HReturn * ret)3502 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3503 if (GetGraph()->IsCompilingOsr()) {
3504 // To simplify callers of an OSR method, we put the return value in both
3505 // floating point and core registers.
3506 switch (ret->InputAt(0)->GetType()) {
3507 case DataType::Type::kFloat32:
3508 __ Vmov(r0, s0);
3509 break;
3510 case DataType::Type::kFloat64:
3511 __ Vmov(r0, r1, d0);
3512 break;
3513 default:
3514 break;
3515 }
3516 }
3517 codegen_->GenerateFrameExit();
3518 }
3519
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3520 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3521 // The trampoline uses the same calling convention as dex calling conventions,
3522 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3523 // the method_idx.
3524 HandleInvoke(invoke);
3525 }
3526
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3527 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3528 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3529 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3530 }
3531
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3532 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3533 // Explicit clinit checks triggered by static invokes must have been pruned by
3534 // art::PrepareForRegisterAllocation.
3535 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3536
3537 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3538 if (intrinsic.TryDispatch(invoke)) {
3539 return;
3540 }
3541
3542 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3543 CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3544 /*for_register_allocation=*/ true);
3545 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3546 } else {
3547 HandleInvoke(invoke);
3548 }
3549 }
3550
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3551 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3552 if (invoke->GetLocations()->Intrinsified()) {
3553 IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3554 intrinsic.Dispatch(invoke);
3555 return true;
3556 }
3557 return false;
3558 }
3559
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3560 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3561 // Explicit clinit checks triggered by static invokes must have been pruned by
3562 // art::PrepareForRegisterAllocation.
3563 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3564
3565 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3566 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3567 return;
3568 }
3569
3570 LocationSummary* locations = invoke->GetLocations();
3571 codegen_->GenerateStaticOrDirectCall(
3572 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3573
3574 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3575 }
3576
HandleInvoke(HInvoke * invoke)3577 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3578 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3579 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3580 }
3581
VisitInvokeVirtual(HInvokeVirtual * invoke)3582 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3583 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3584 if (intrinsic.TryDispatch(invoke)) {
3585 return;
3586 }
3587
3588 HandleInvoke(invoke);
3589 }
3590
VisitInvokeVirtual(HInvokeVirtual * invoke)3591 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3592 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3593 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3594 return;
3595 }
3596
3597 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3598 DCHECK(!codegen_->IsLeafMethod());
3599
3600 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3601 }
3602
VisitInvokeInterface(HInvokeInterface * invoke)3603 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3604 HandleInvoke(invoke);
3605 // Add the hidden argument.
3606 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3607 // We cannot request r12 as it's blocked by the register allocator.
3608 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3609 }
3610 }
3611
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3612 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3613 vixl32::Register klass) {
3614 DCHECK_EQ(r0.GetCode(), klass.GetCode());
3615 // We know the destination of an intrinsic, so no need to record inline
3616 // caches.
3617 if (!instruction->GetLocations()->Intrinsified() &&
3618 GetGraph()->IsCompilingBaseline() &&
3619 !Runtime::Current()->IsAotCompiler()) {
3620 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
3621 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3622 DCHECK(info != nullptr);
3623 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
3624 uint32_t address = reinterpret_cast32<uint32_t>(cache);
3625 vixl32::Label done;
3626 UseScratchRegisterScope temps(GetVIXLAssembler());
3627 temps.Exclude(ip);
3628 __ Mov(r4, address);
3629 __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3630 // Fast path for a monomorphic cache.
3631 __ Cmp(klass, ip);
3632 __ B(eq, &done, /* is_far_target= */ false);
3633 InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3634 __ Bind(&done);
3635 }
3636 }
3637
VisitInvokeInterface(HInvokeInterface * invoke)3638 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3639 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3640 LocationSummary* locations = invoke->GetLocations();
3641 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3642 Location receiver = locations->InAt(0);
3643 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3644
3645 DCHECK(!receiver.IsStackSlot());
3646
3647 // Ensure the pc position is recorded immediately after the `ldr` instruction.
3648 {
3649 ExactAssemblyScope aas(GetVIXLAssembler(),
3650 vixl32::kMaxInstructionSizeInBytes,
3651 CodeBufferCheckScope::kMaximumSize);
3652 // /* HeapReference<Class> */ temp = receiver->klass_
3653 __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3654 codegen_->MaybeRecordImplicitNullCheck(invoke);
3655 }
3656 // Instead of simply (possibly) unpoisoning `temp` here, we should
3657 // emit a read barrier for the previous class reference load.
3658 // However this is not required in practice, as this is an
3659 // intermediate/temporary reference and because the current
3660 // concurrent copying collector keeps the from-space memory
3661 // intact/accessible until the end of the marking phase (the
3662 // concurrent copying collector may not in the future).
3663 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3664
3665 // If we're compiling baseline, update the inline cache.
3666 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3667
3668 GetAssembler()->LoadFromOffset(kLoadWord,
3669 temp,
3670 temp,
3671 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3672
3673 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3674 invoke->GetImtIndex(), kArmPointerSize));
3675 // temp = temp->GetImtEntryAt(method_offset);
3676 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3677 uint32_t entry_point =
3678 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3679 // LR = temp->GetEntryPoint();
3680 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3681
3682 {
3683 // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3684 // instruction from clobbering it as they might use r12 as a scratch register.
3685 Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3686 // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3687 // so it checks if the application is using them (by passing them to the macro assembler
3688 // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3689 // what is available, and is the opposite of the standard usage: Instead of requesting a
3690 // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3691 // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3692 // (to materialize the constant), since the destination register becomes available for such use
3693 // internally for the duration of the macro instruction.
3694 UseScratchRegisterScope temps(GetVIXLAssembler());
3695 temps.Exclude(RegisterFrom(hidden_reg));
3696 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3697 Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3698 if (current_method.IsStackSlot()) {
3699 GetAssembler()->LoadFromOffset(
3700 kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3701 } else {
3702 __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3703 }
3704 } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3705 // We pass the method from the IMT in case of a conflict. This will ensure
3706 // we go into the runtime to resolve the actual method.
3707 CHECK_NE(temp.GetCode(), lr.GetCode());
3708 __ Mov(RegisterFrom(hidden_reg), temp);
3709 } else {
3710 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3711 }
3712 }
3713 {
3714 // Ensure the pc position is recorded immediately after the `blx` instruction.
3715 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3716 ExactAssemblyScope aas(GetVIXLAssembler(),
3717 vixl32::k16BitT32InstructionSizeInBytes,
3718 CodeBufferCheckScope::kExactSize);
3719 // LR();
3720 __ blx(lr);
3721 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3722 DCHECK(!codegen_->IsLeafMethod());
3723 }
3724
3725 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3726 }
3727
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3728 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3729 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3730 if (intrinsic.TryDispatch(invoke)) {
3731 return;
3732 }
3733 HandleInvoke(invoke);
3734 }
3735
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3736 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3737 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3738 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3739 return;
3740 }
3741 codegen_->GenerateInvokePolymorphicCall(invoke);
3742 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3743 }
3744
VisitInvokeCustom(HInvokeCustom * invoke)3745 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3746 HandleInvoke(invoke);
3747 }
3748
VisitInvokeCustom(HInvokeCustom * invoke)3749 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3750 codegen_->GenerateInvokeCustomCall(invoke);
3751 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3752 }
3753
VisitNeg(HNeg * neg)3754 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3755 LocationSummary* locations =
3756 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3757 switch (neg->GetResultType()) {
3758 case DataType::Type::kInt32: {
3759 locations->SetInAt(0, Location::RequiresRegister());
3760 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3761 break;
3762 }
3763 case DataType::Type::kInt64: {
3764 locations->SetInAt(0, Location::RequiresRegister());
3765 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3766 break;
3767 }
3768
3769 case DataType::Type::kFloat32:
3770 case DataType::Type::kFloat64:
3771 locations->SetInAt(0, Location::RequiresFpuRegister());
3772 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3773 break;
3774
3775 default:
3776 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3777 }
3778 }
3779
VisitNeg(HNeg * neg)3780 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3781 LocationSummary* locations = neg->GetLocations();
3782 Location out = locations->Out();
3783 Location in = locations->InAt(0);
3784 switch (neg->GetResultType()) {
3785 case DataType::Type::kInt32:
3786 __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3787 break;
3788
3789 case DataType::Type::kInt64:
3790 // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3791 __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3792 // We cannot emit an RSC (Reverse Subtract with Carry)
3793 // instruction here, as it does not exist in the Thumb-2
3794 // instruction set. We use the following approach
3795 // using SBC and SUB instead.
3796 //
3797 // out.hi = -C
3798 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3799 // out.hi = out.hi - in.hi
3800 __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3801 break;
3802
3803 case DataType::Type::kFloat32:
3804 case DataType::Type::kFloat64:
3805 __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3806 break;
3807
3808 default:
3809 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3810 }
3811 }
3812
VisitTypeConversion(HTypeConversion * conversion)3813 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3814 DataType::Type result_type = conversion->GetResultType();
3815 DataType::Type input_type = conversion->GetInputType();
3816 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3817 << input_type << " -> " << result_type;
3818
3819 // The float-to-long, double-to-long and long-to-float type conversions
3820 // rely on a call to the runtime.
3821 LocationSummary::CallKind call_kind =
3822 (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3823 && result_type == DataType::Type::kInt64)
3824 || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3825 ? LocationSummary::kCallOnMainOnly
3826 : LocationSummary::kNoCall;
3827 LocationSummary* locations =
3828 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3829
3830 switch (result_type) {
3831 case DataType::Type::kUint8:
3832 case DataType::Type::kInt8:
3833 case DataType::Type::kUint16:
3834 case DataType::Type::kInt16:
3835 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3836 locations->SetInAt(0, Location::RequiresRegister());
3837 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3838 break;
3839
3840 case DataType::Type::kInt32:
3841 switch (input_type) {
3842 case DataType::Type::kInt64:
3843 locations->SetInAt(0, Location::Any());
3844 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3845 break;
3846
3847 case DataType::Type::kFloat32:
3848 locations->SetInAt(0, Location::RequiresFpuRegister());
3849 locations->SetOut(Location::RequiresRegister());
3850 locations->AddTemp(Location::RequiresFpuRegister());
3851 break;
3852
3853 case DataType::Type::kFloat64:
3854 locations->SetInAt(0, Location::RequiresFpuRegister());
3855 locations->SetOut(Location::RequiresRegister());
3856 locations->AddTemp(Location::RequiresFpuRegister());
3857 break;
3858
3859 default:
3860 LOG(FATAL) << "Unexpected type conversion from " << input_type
3861 << " to " << result_type;
3862 }
3863 break;
3864
3865 case DataType::Type::kInt64:
3866 switch (input_type) {
3867 case DataType::Type::kBool:
3868 case DataType::Type::kUint8:
3869 case DataType::Type::kInt8:
3870 case DataType::Type::kUint16:
3871 case DataType::Type::kInt16:
3872 case DataType::Type::kInt32:
3873 locations->SetInAt(0, Location::RequiresRegister());
3874 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3875 break;
3876
3877 case DataType::Type::kFloat32: {
3878 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3879 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3880 locations->SetOut(LocationFrom(r0, r1));
3881 break;
3882 }
3883
3884 case DataType::Type::kFloat64: {
3885 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3886 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3887 calling_convention.GetFpuRegisterAt(1)));
3888 locations->SetOut(LocationFrom(r0, r1));
3889 break;
3890 }
3891
3892 default:
3893 LOG(FATAL) << "Unexpected type conversion from " << input_type
3894 << " to " << result_type;
3895 }
3896 break;
3897
3898 case DataType::Type::kFloat32:
3899 switch (input_type) {
3900 case DataType::Type::kBool:
3901 case DataType::Type::kUint8:
3902 case DataType::Type::kInt8:
3903 case DataType::Type::kUint16:
3904 case DataType::Type::kInt16:
3905 case DataType::Type::kInt32:
3906 locations->SetInAt(0, Location::RequiresRegister());
3907 locations->SetOut(Location::RequiresFpuRegister());
3908 break;
3909
3910 case DataType::Type::kInt64: {
3911 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3912 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3913 calling_convention.GetRegisterAt(1)));
3914 locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3915 break;
3916 }
3917
3918 case DataType::Type::kFloat64:
3919 locations->SetInAt(0, Location::RequiresFpuRegister());
3920 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3921 break;
3922
3923 default:
3924 LOG(FATAL) << "Unexpected type conversion from " << input_type
3925 << " to " << result_type;
3926 }
3927 break;
3928
3929 case DataType::Type::kFloat64:
3930 switch (input_type) {
3931 case DataType::Type::kBool:
3932 case DataType::Type::kUint8:
3933 case DataType::Type::kInt8:
3934 case DataType::Type::kUint16:
3935 case DataType::Type::kInt16:
3936 case DataType::Type::kInt32:
3937 locations->SetInAt(0, Location::RequiresRegister());
3938 locations->SetOut(Location::RequiresFpuRegister());
3939 break;
3940
3941 case DataType::Type::kInt64:
3942 locations->SetInAt(0, Location::RequiresRegister());
3943 locations->SetOut(Location::RequiresFpuRegister());
3944 locations->AddTemp(Location::RequiresFpuRegister());
3945 locations->AddTemp(Location::RequiresFpuRegister());
3946 break;
3947
3948 case DataType::Type::kFloat32:
3949 locations->SetInAt(0, Location::RequiresFpuRegister());
3950 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3951 break;
3952
3953 default:
3954 LOG(FATAL) << "Unexpected type conversion from " << input_type
3955 << " to " << result_type;
3956 }
3957 break;
3958
3959 default:
3960 LOG(FATAL) << "Unexpected type conversion from " << input_type
3961 << " to " << result_type;
3962 }
3963 }
3964
VisitTypeConversion(HTypeConversion * conversion)3965 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3966 LocationSummary* locations = conversion->GetLocations();
3967 Location out = locations->Out();
3968 Location in = locations->InAt(0);
3969 DataType::Type result_type = conversion->GetResultType();
3970 DataType::Type input_type = conversion->GetInputType();
3971 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3972 << input_type << " -> " << result_type;
3973 switch (result_type) {
3974 case DataType::Type::kUint8:
3975 switch (input_type) {
3976 case DataType::Type::kInt8:
3977 case DataType::Type::kUint16:
3978 case DataType::Type::kInt16:
3979 case DataType::Type::kInt32:
3980 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3981 break;
3982 case DataType::Type::kInt64:
3983 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
3984 break;
3985
3986 default:
3987 LOG(FATAL) << "Unexpected type conversion from " << input_type
3988 << " to " << result_type;
3989 }
3990 break;
3991
3992 case DataType::Type::kInt8:
3993 switch (input_type) {
3994 case DataType::Type::kUint8:
3995 case DataType::Type::kUint16:
3996 case DataType::Type::kInt16:
3997 case DataType::Type::kInt32:
3998 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3999 break;
4000 case DataType::Type::kInt64:
4001 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4002 break;
4003
4004 default:
4005 LOG(FATAL) << "Unexpected type conversion from " << input_type
4006 << " to " << result_type;
4007 }
4008 break;
4009
4010 case DataType::Type::kUint16:
4011 switch (input_type) {
4012 case DataType::Type::kInt8:
4013 case DataType::Type::kInt16:
4014 case DataType::Type::kInt32:
4015 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4016 break;
4017 case DataType::Type::kInt64:
4018 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4019 break;
4020
4021 default:
4022 LOG(FATAL) << "Unexpected type conversion from " << input_type
4023 << " to " << result_type;
4024 }
4025 break;
4026
4027 case DataType::Type::kInt16:
4028 switch (input_type) {
4029 case DataType::Type::kUint16:
4030 case DataType::Type::kInt32:
4031 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4032 break;
4033 case DataType::Type::kInt64:
4034 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4035 break;
4036
4037 default:
4038 LOG(FATAL) << "Unexpected type conversion from " << input_type
4039 << " to " << result_type;
4040 }
4041 break;
4042
4043 case DataType::Type::kInt32:
4044 switch (input_type) {
4045 case DataType::Type::kInt64:
4046 DCHECK(out.IsRegister());
4047 if (in.IsRegisterPair()) {
4048 __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4049 } else if (in.IsDoubleStackSlot()) {
4050 GetAssembler()->LoadFromOffset(kLoadWord,
4051 OutputRegister(conversion),
4052 sp,
4053 in.GetStackIndex());
4054 } else {
4055 DCHECK(in.IsConstant());
4056 DCHECK(in.GetConstant()->IsLongConstant());
4057 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4058 __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4059 }
4060 break;
4061
4062 case DataType::Type::kFloat32: {
4063 vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4064 __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4065 __ Vmov(OutputRegister(conversion), temp);
4066 break;
4067 }
4068
4069 case DataType::Type::kFloat64: {
4070 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4071 __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4072 __ Vmov(OutputRegister(conversion), temp_s);
4073 break;
4074 }
4075
4076 default:
4077 LOG(FATAL) << "Unexpected type conversion from " << input_type
4078 << " to " << result_type;
4079 }
4080 break;
4081
4082 case DataType::Type::kInt64:
4083 switch (input_type) {
4084 case DataType::Type::kBool:
4085 case DataType::Type::kUint8:
4086 case DataType::Type::kInt8:
4087 case DataType::Type::kUint16:
4088 case DataType::Type::kInt16:
4089 case DataType::Type::kInt32:
4090 DCHECK(out.IsRegisterPair());
4091 DCHECK(in.IsRegister());
4092 __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4093 // Sign extension.
4094 __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4095 break;
4096
4097 case DataType::Type::kFloat32:
4098 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
4099 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4100 break;
4101
4102 case DataType::Type::kFloat64:
4103 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
4104 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4105 break;
4106
4107 default:
4108 LOG(FATAL) << "Unexpected type conversion from " << input_type
4109 << " to " << result_type;
4110 }
4111 break;
4112
4113 case DataType::Type::kFloat32:
4114 switch (input_type) {
4115 case DataType::Type::kBool:
4116 case DataType::Type::kUint8:
4117 case DataType::Type::kInt8:
4118 case DataType::Type::kUint16:
4119 case DataType::Type::kInt16:
4120 case DataType::Type::kInt32:
4121 __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4122 __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4123 break;
4124
4125 case DataType::Type::kInt64:
4126 codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
4127 CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4128 break;
4129
4130 case DataType::Type::kFloat64:
4131 __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4132 break;
4133
4134 default:
4135 LOG(FATAL) << "Unexpected type conversion from " << input_type
4136 << " to " << result_type;
4137 }
4138 break;
4139
4140 case DataType::Type::kFloat64:
4141 switch (input_type) {
4142 case DataType::Type::kBool:
4143 case DataType::Type::kUint8:
4144 case DataType::Type::kInt8:
4145 case DataType::Type::kUint16:
4146 case DataType::Type::kInt16:
4147 case DataType::Type::kInt32:
4148 __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4149 __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4150 break;
4151
4152 case DataType::Type::kInt64: {
4153 vixl32::Register low = LowRegisterFrom(in);
4154 vixl32::Register high = HighRegisterFrom(in);
4155 vixl32::SRegister out_s = LowSRegisterFrom(out);
4156 vixl32::DRegister out_d = DRegisterFrom(out);
4157 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4158 vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4159 vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4160
4161 // temp_d = int-to-double(high)
4162 __ Vmov(temp_s, high);
4163 __ Vcvt(F64, S32, temp_d, temp_s);
4164 // constant_d = k2Pow32EncodingForDouble
4165 __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4166 // out_d = unsigned-to-double(low)
4167 __ Vmov(out_s, low);
4168 __ Vcvt(F64, U32, out_d, out_s);
4169 // out_d += temp_d * constant_d
4170 __ Vmla(F64, out_d, temp_d, constant_d);
4171 break;
4172 }
4173
4174 case DataType::Type::kFloat32:
4175 __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4176 break;
4177
4178 default:
4179 LOG(FATAL) << "Unexpected type conversion from " << input_type
4180 << " to " << result_type;
4181 }
4182 break;
4183
4184 default:
4185 LOG(FATAL) << "Unexpected type conversion from " << input_type
4186 << " to " << result_type;
4187 }
4188 }
4189
VisitAdd(HAdd * add)4190 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4191 LocationSummary* locations =
4192 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4193 switch (add->GetResultType()) {
4194 case DataType::Type::kInt32: {
4195 locations->SetInAt(0, Location::RequiresRegister());
4196 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4197 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4198 break;
4199 }
4200
4201 case DataType::Type::kInt64: {
4202 locations->SetInAt(0, Location::RequiresRegister());
4203 locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4204 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4205 break;
4206 }
4207
4208 case DataType::Type::kFloat32:
4209 case DataType::Type::kFloat64: {
4210 locations->SetInAt(0, Location::RequiresFpuRegister());
4211 locations->SetInAt(1, Location::RequiresFpuRegister());
4212 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4213 break;
4214 }
4215
4216 default:
4217 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4218 }
4219 }
4220
VisitAdd(HAdd * add)4221 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4222 LocationSummary* locations = add->GetLocations();
4223 Location out = locations->Out();
4224 Location first = locations->InAt(0);
4225 Location second = locations->InAt(1);
4226
4227 switch (add->GetResultType()) {
4228 case DataType::Type::kInt32: {
4229 __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4230 }
4231 break;
4232
4233 case DataType::Type::kInt64: {
4234 if (second.IsConstant()) {
4235 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4236 GenerateAddLongConst(out, first, value);
4237 } else {
4238 DCHECK(second.IsRegisterPair());
4239 __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4240 __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4241 }
4242 break;
4243 }
4244
4245 case DataType::Type::kFloat32:
4246 case DataType::Type::kFloat64:
4247 __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4248 break;
4249
4250 default:
4251 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4252 }
4253 }
4254
VisitSub(HSub * sub)4255 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4256 LocationSummary* locations =
4257 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4258 switch (sub->GetResultType()) {
4259 case DataType::Type::kInt32: {
4260 locations->SetInAt(0, Location::RequiresRegister());
4261 locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4262 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4263 break;
4264 }
4265
4266 case DataType::Type::kInt64: {
4267 locations->SetInAt(0, Location::RequiresRegister());
4268 locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4269 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4270 break;
4271 }
4272 case DataType::Type::kFloat32:
4273 case DataType::Type::kFloat64: {
4274 locations->SetInAt(0, Location::RequiresFpuRegister());
4275 locations->SetInAt(1, Location::RequiresFpuRegister());
4276 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4277 break;
4278 }
4279 default:
4280 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4281 }
4282 }
4283
VisitSub(HSub * sub)4284 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4285 LocationSummary* locations = sub->GetLocations();
4286 Location out = locations->Out();
4287 Location first = locations->InAt(0);
4288 Location second = locations->InAt(1);
4289 switch (sub->GetResultType()) {
4290 case DataType::Type::kInt32: {
4291 __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4292 break;
4293 }
4294
4295 case DataType::Type::kInt64: {
4296 if (second.IsConstant()) {
4297 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4298 GenerateAddLongConst(out, first, -value);
4299 } else {
4300 DCHECK(second.IsRegisterPair());
4301 __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4302 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4303 }
4304 break;
4305 }
4306
4307 case DataType::Type::kFloat32:
4308 case DataType::Type::kFloat64:
4309 __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4310 break;
4311
4312 default:
4313 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4314 }
4315 }
4316
VisitMul(HMul * mul)4317 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4318 LocationSummary* locations =
4319 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4320 switch (mul->GetResultType()) {
4321 case DataType::Type::kInt32:
4322 case DataType::Type::kInt64: {
4323 locations->SetInAt(0, Location::RequiresRegister());
4324 locations->SetInAt(1, Location::RequiresRegister());
4325 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4326 break;
4327 }
4328
4329 case DataType::Type::kFloat32:
4330 case DataType::Type::kFloat64: {
4331 locations->SetInAt(0, Location::RequiresFpuRegister());
4332 locations->SetInAt(1, Location::RequiresFpuRegister());
4333 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4334 break;
4335 }
4336
4337 default:
4338 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4339 }
4340 }
4341
VisitMul(HMul * mul)4342 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4343 LocationSummary* locations = mul->GetLocations();
4344 Location out = locations->Out();
4345 Location first = locations->InAt(0);
4346 Location second = locations->InAt(1);
4347 switch (mul->GetResultType()) {
4348 case DataType::Type::kInt32: {
4349 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4350 break;
4351 }
4352 case DataType::Type::kInt64: {
4353 vixl32::Register out_hi = HighRegisterFrom(out);
4354 vixl32::Register out_lo = LowRegisterFrom(out);
4355 vixl32::Register in1_hi = HighRegisterFrom(first);
4356 vixl32::Register in1_lo = LowRegisterFrom(first);
4357 vixl32::Register in2_hi = HighRegisterFrom(second);
4358 vixl32::Register in2_lo = LowRegisterFrom(second);
4359
4360 // Extra checks to protect caused by the existence of R1_R2.
4361 // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4362 // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4363 DCHECK(!out_hi.Is(in1_lo));
4364 DCHECK(!out_hi.Is(in2_lo));
4365
4366 // input: in1 - 64 bits, in2 - 64 bits
4367 // output: out
4368 // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4369 // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4370 // parts: out.lo = (in1.lo * in2.lo)[31:0]
4371
4372 UseScratchRegisterScope temps(GetVIXLAssembler());
4373 vixl32::Register temp = temps.Acquire();
4374 // temp <- in1.lo * in2.hi
4375 __ Mul(temp, in1_lo, in2_hi);
4376 // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4377 __ Mla(out_hi, in1_hi, in2_lo, temp);
4378 // out.lo <- (in1.lo * in2.lo)[31:0];
4379 __ Umull(out_lo, temp, in1_lo, in2_lo);
4380 // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4381 __ Add(out_hi, out_hi, temp);
4382 break;
4383 }
4384
4385 case DataType::Type::kFloat32:
4386 case DataType::Type::kFloat64:
4387 __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4388 break;
4389
4390 default:
4391 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4392 }
4393 }
4394
DivRemOneOrMinusOne(HBinaryOperation * instruction)4395 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4396 DCHECK(instruction->IsDiv() || instruction->IsRem());
4397 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4398
4399 Location second = instruction->GetLocations()->InAt(1);
4400 DCHECK(second.IsConstant());
4401
4402 vixl32::Register out = OutputRegister(instruction);
4403 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4404 int32_t imm = Int32ConstantFrom(second);
4405 DCHECK(imm == 1 || imm == -1);
4406
4407 if (instruction->IsRem()) {
4408 __ Mov(out, 0);
4409 } else {
4410 if (imm == 1) {
4411 __ Mov(out, dividend);
4412 } else {
4413 __ Rsb(out, dividend, 0);
4414 }
4415 }
4416 }
4417
DivRemByPowerOfTwo(HBinaryOperation * instruction)4418 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4419 DCHECK(instruction->IsDiv() || instruction->IsRem());
4420 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4421
4422 LocationSummary* locations = instruction->GetLocations();
4423 Location second = locations->InAt(1);
4424 DCHECK(second.IsConstant());
4425
4426 vixl32::Register out = OutputRegister(instruction);
4427 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4428 int32_t imm = Int32ConstantFrom(second);
4429 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4430 int ctz_imm = CTZ(abs_imm);
4431
4432 auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4433 __ Asr(out, in, ctz_imm);
4434 if (imm < 0) {
4435 __ Rsb(out, out, 0);
4436 }
4437 };
4438
4439 if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4440 // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4441 // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4442 // imm == 2
4443 // HDiv
4444 // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4445 // asr out, out(0x80000001), #1 => out = 0xc0000000
4446 // This is the same as 'asr out, dividend(0x80000000), #1'
4447 //
4448 // imm > 2
4449 // HDiv
4450 // asr out, dividend(0x80000000), #31 => out = -1
4451 // add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4452 // where the number of the rightmost 1s is ctz_imm.
4453 // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4454 // leftmost 1s is ctz_imm + 1.
4455 // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4456 //
4457 // imm == INT32_MIN
4458 // HDiv
4459 // asr out, dividend(0x80000000), #31 => out = -1
4460 // add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4461 // asr out, out(0xc0000000), #31 => out = -1
4462 // rsb out, out(-1), #0 => out = 1
4463 // This is the same as
4464 // asr out, dividend(0x80000000), #31
4465 // rsb out, out, #0
4466 //
4467 //
4468 // INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4469 // 0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4470 // produce zero.
4471 if (instruction->IsDiv()) {
4472 generate_div_code(out, dividend);
4473 } else {
4474 if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4475 __ And(out, dividend, abs_imm - 1);
4476 } else {
4477 __ Ubfx(out, dividend, 0, ctz_imm);
4478 }
4479 return;
4480 }
4481 } else {
4482 vixl32::Register add_right_input = dividend;
4483 if (ctz_imm > 1) {
4484 __ Asr(out, dividend, 31);
4485 add_right_input = out;
4486 }
4487 __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4488
4489 if (instruction->IsDiv()) {
4490 generate_div_code(out, out);
4491 } else {
4492 __ Bfc(out, 0, ctz_imm);
4493 __ Sub(out, dividend, out);
4494 }
4495 }
4496 }
4497
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4498 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4499 DCHECK(instruction->IsDiv() || instruction->IsRem());
4500 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4501
4502 LocationSummary* locations = instruction->GetLocations();
4503 Location second = locations->InAt(1);
4504 DCHECK(second.IsConstant());
4505
4506 vixl32::Register out = OutputRegister(instruction);
4507 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4508 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4509 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4510 int32_t imm = Int32ConstantFrom(second);
4511
4512 int64_t magic;
4513 int shift;
4514 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4515
4516 auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4517 vixl32::Register dividend,
4518 vixl32::Register temp1,
4519 vixl32::Register temp2) {
4520 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4521 __ Mov(temp1, static_cast<int32_t>(magic));
4522 if (magic > 0 && shift == 0) {
4523 __ Smull(temp2, out, dividend, temp1);
4524 } else {
4525 __ Smull(temp2, temp1, dividend, temp1);
4526 if (magic < 0) {
4527 // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4528 // than INT32_MAX. In such a case shift is never 0.
4529 // Proof:
4530 // m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4531 //
4532 // If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4533 // = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4534 // = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4535 //
4536 // 1 + (2^32 / d) is decreasing when d is increasing.
4537 // The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4538 // the minimum is 3, when d = 2^31 -1.
4539 // So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4540 // is never less than 0.
4541 __ Add(temp1, temp1, dividend);
4542 }
4543 DCHECK_NE(shift, 0);
4544 __ Lsr(out, temp1, shift);
4545 }
4546 };
4547
4548 if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4549 // No need to adjust the result for a non-negative dividend and a positive divisor.
4550 if (instruction->IsDiv()) {
4551 generate_unsigned_div_code(out, dividend, temp1, temp2);
4552 } else {
4553 generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4554 __ Mov(temp2, imm);
4555 __ Mls(out, temp1, temp2, dividend);
4556 }
4557 } else {
4558 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4559 __ Mov(temp1, static_cast<int32_t>(magic));
4560 __ Smull(temp2, temp1, dividend, temp1);
4561
4562 if (imm > 0 && magic < 0) {
4563 __ Add(temp1, temp1, dividend);
4564 } else if (imm < 0 && magic > 0) {
4565 __ Sub(temp1, temp1, dividend);
4566 }
4567
4568 if (shift != 0) {
4569 __ Asr(temp1, temp1, shift);
4570 }
4571
4572 if (instruction->IsDiv()) {
4573 __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4574 } else {
4575 __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4576 // TODO: Strength reduction for mls.
4577 __ Mov(temp2, imm);
4578 __ Mls(out, temp1, temp2, dividend);
4579 }
4580 }
4581 }
4582
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4583 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4584 HBinaryOperation* instruction) {
4585 DCHECK(instruction->IsDiv() || instruction->IsRem());
4586 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4587
4588 Location second = instruction->GetLocations()->InAt(1);
4589 DCHECK(second.IsConstant());
4590
4591 int32_t imm = Int32ConstantFrom(second);
4592 if (imm == 0) {
4593 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4594 } else if (imm == 1 || imm == -1) {
4595 DivRemOneOrMinusOne(instruction);
4596 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4597 DivRemByPowerOfTwo(instruction);
4598 } else {
4599 DCHECK(imm <= -2 || imm >= 2);
4600 GenerateDivRemWithAnyConstant(instruction);
4601 }
4602 }
4603
VisitDiv(HDiv * div)4604 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4605 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4606 if (div->GetResultType() == DataType::Type::kInt64) {
4607 // pLdiv runtime call.
4608 call_kind = LocationSummary::kCallOnMainOnly;
4609 } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4610 // sdiv will be replaced by other instruction sequence.
4611 } else if (div->GetResultType() == DataType::Type::kInt32 &&
4612 !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4613 // pIdivmod runtime call.
4614 call_kind = LocationSummary::kCallOnMainOnly;
4615 }
4616
4617 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4618
4619 switch (div->GetResultType()) {
4620 case DataType::Type::kInt32: {
4621 HInstruction* divisor = div->InputAt(1);
4622 if (divisor->IsConstant()) {
4623 locations->SetInAt(0, Location::RequiresRegister());
4624 locations->SetInAt(1, Location::ConstantLocation(divisor));
4625 int32_t value = Int32ConstantFrom(divisor);
4626 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4627 if (value == 1 || value == 0 || value == -1) {
4628 // No temp register required.
4629 } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4630 value != 2 &&
4631 value != -2 &&
4632 !HasNonNegativeOrMinIntInputAt(div, 0)) {
4633 // The "out" register is used as a temporary, so it overlaps with the inputs.
4634 out_overlaps = Location::kOutputOverlap;
4635 } else {
4636 locations->AddRegisterTemps(2);
4637 }
4638 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4639 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4640 locations->SetInAt(0, Location::RequiresRegister());
4641 locations->SetInAt(1, Location::RequiresRegister());
4642 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4643 } else {
4644 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4645 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4646 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4647 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4648 // we only need the former.
4649 locations->SetOut(LocationFrom(r0));
4650 }
4651 break;
4652 }
4653 case DataType::Type::kInt64: {
4654 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4655 locations->SetInAt(0, LocationFrom(
4656 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4657 locations->SetInAt(1, LocationFrom(
4658 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4659 locations->SetOut(LocationFrom(r0, r1));
4660 break;
4661 }
4662 case DataType::Type::kFloat32:
4663 case DataType::Type::kFloat64: {
4664 locations->SetInAt(0, Location::RequiresFpuRegister());
4665 locations->SetInAt(1, Location::RequiresFpuRegister());
4666 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4667 break;
4668 }
4669
4670 default:
4671 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4672 }
4673 }
4674
VisitDiv(HDiv * div)4675 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4676 Location lhs = div->GetLocations()->InAt(0);
4677 Location rhs = div->GetLocations()->InAt(1);
4678
4679 switch (div->GetResultType()) {
4680 case DataType::Type::kInt32: {
4681 if (rhs.IsConstant()) {
4682 GenerateDivRemConstantIntegral(div);
4683 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4684 __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4685 } else {
4686 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4687 DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4688 DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4689 DCHECK(r0.Is(OutputRegister(div)));
4690
4691 codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4692 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4693 }
4694 break;
4695 }
4696
4697 case DataType::Type::kInt64: {
4698 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4699 DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4700 DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4701 DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4702 DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4703 DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4704 DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4705
4706 codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4707 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4708 break;
4709 }
4710
4711 case DataType::Type::kFloat32:
4712 case DataType::Type::kFloat64:
4713 __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4714 break;
4715
4716 default:
4717 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4718 }
4719 }
4720
VisitRem(HRem * rem)4721 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4722 DataType::Type type = rem->GetResultType();
4723
4724 // Most remainders are implemented in the runtime.
4725 LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4726 if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4727 // sdiv will be replaced by other instruction sequence.
4728 call_kind = LocationSummary::kNoCall;
4729 } else if ((rem->GetResultType() == DataType::Type::kInt32)
4730 && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4731 // Have hardware divide instruction for int, do it with three instructions.
4732 call_kind = LocationSummary::kNoCall;
4733 }
4734
4735 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4736
4737 switch (type) {
4738 case DataType::Type::kInt32: {
4739 HInstruction* divisor = rem->InputAt(1);
4740 if (divisor->IsConstant()) {
4741 locations->SetInAt(0, Location::RequiresRegister());
4742 locations->SetInAt(1, Location::ConstantLocation(divisor));
4743 int32_t value = Int32ConstantFrom(divisor);
4744 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4745 if (value == 1 || value == 0 || value == -1) {
4746 // No temp register required.
4747 } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4748 // The "out" register is used as a temporary, so it overlaps with the inputs.
4749 out_overlaps = Location::kOutputOverlap;
4750 } else {
4751 locations->AddRegisterTemps(2);
4752 }
4753 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4754 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4755 locations->SetInAt(0, Location::RequiresRegister());
4756 locations->SetInAt(1, Location::RequiresRegister());
4757 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4758 locations->AddTemp(Location::RequiresRegister());
4759 } else {
4760 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4761 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4762 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4763 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4764 // we only need the latter.
4765 locations->SetOut(LocationFrom(r1));
4766 }
4767 break;
4768 }
4769 case DataType::Type::kInt64: {
4770 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4771 locations->SetInAt(0, LocationFrom(
4772 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4773 locations->SetInAt(1, LocationFrom(
4774 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4775 // The runtime helper puts the output in R2,R3.
4776 locations->SetOut(LocationFrom(r2, r3));
4777 break;
4778 }
4779 case DataType::Type::kFloat32: {
4780 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4781 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4782 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4783 locations->SetOut(LocationFrom(s0));
4784 break;
4785 }
4786
4787 case DataType::Type::kFloat64: {
4788 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4789 locations->SetInAt(0, LocationFrom(
4790 calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4791 locations->SetInAt(1, LocationFrom(
4792 calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4793 locations->SetOut(LocationFrom(s0, s1));
4794 break;
4795 }
4796
4797 default:
4798 LOG(FATAL) << "Unexpected rem type " << type;
4799 }
4800 }
4801
VisitRem(HRem * rem)4802 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4803 LocationSummary* locations = rem->GetLocations();
4804 Location second = locations->InAt(1);
4805
4806 DataType::Type type = rem->GetResultType();
4807 switch (type) {
4808 case DataType::Type::kInt32: {
4809 vixl32::Register reg1 = InputRegisterAt(rem, 0);
4810 vixl32::Register out_reg = OutputRegister(rem);
4811 if (second.IsConstant()) {
4812 GenerateDivRemConstantIntegral(rem);
4813 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4814 vixl32::Register reg2 = RegisterFrom(second);
4815 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4816
4817 // temp = reg1 / reg2 (integer division)
4818 // dest = reg1 - temp * reg2
4819 __ Sdiv(temp, reg1, reg2);
4820 __ Mls(out_reg, temp, reg2, reg1);
4821 } else {
4822 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4823 DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4824 DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4825 DCHECK(out_reg.Is(r1));
4826
4827 codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4828 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4829 }
4830 break;
4831 }
4832
4833 case DataType::Type::kInt64: {
4834 codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4835 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4836 break;
4837 }
4838
4839 case DataType::Type::kFloat32: {
4840 codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4841 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4842 break;
4843 }
4844
4845 case DataType::Type::kFloat64: {
4846 codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4847 CheckEntrypointTypes<kQuickFmod, double, double, double>();
4848 break;
4849 }
4850
4851 default:
4852 LOG(FATAL) << "Unexpected rem type " << type;
4853 }
4854 }
4855
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4856 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4857 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4858 switch (minmax->GetResultType()) {
4859 case DataType::Type::kInt32:
4860 locations->SetInAt(0, Location::RequiresRegister());
4861 locations->SetInAt(1, Location::RequiresRegister());
4862 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4863 break;
4864 case DataType::Type::kInt64:
4865 locations->SetInAt(0, Location::RequiresRegister());
4866 locations->SetInAt(1, Location::RequiresRegister());
4867 locations->SetOut(Location::SameAsFirstInput());
4868 break;
4869 case DataType::Type::kFloat32:
4870 locations->SetInAt(0, Location::RequiresFpuRegister());
4871 locations->SetInAt(1, Location::RequiresFpuRegister());
4872 locations->SetOut(Location::SameAsFirstInput());
4873 locations->AddTemp(Location::RequiresRegister());
4874 break;
4875 case DataType::Type::kFloat64:
4876 locations->SetInAt(0, Location::RequiresFpuRegister());
4877 locations->SetInAt(1, Location::RequiresFpuRegister());
4878 locations->SetOut(Location::SameAsFirstInput());
4879 break;
4880 default:
4881 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4882 }
4883 }
4884
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4885 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4886 Location op1_loc = locations->InAt(0);
4887 Location op2_loc = locations->InAt(1);
4888 Location out_loc = locations->Out();
4889
4890 vixl32::Register op1 = RegisterFrom(op1_loc);
4891 vixl32::Register op2 = RegisterFrom(op2_loc);
4892 vixl32::Register out = RegisterFrom(out_loc);
4893
4894 __ Cmp(op1, op2);
4895
4896 {
4897 ExactAssemblyScope aas(GetVIXLAssembler(),
4898 3 * kMaxInstructionSizeInBytes,
4899 CodeBufferCheckScope::kMaximumSize);
4900
4901 __ ite(is_min ? lt : gt);
4902 __ mov(is_min ? lt : gt, out, op1);
4903 __ mov(is_min ? ge : le, out, op2);
4904 }
4905 }
4906
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4907 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4908 Location op1_loc = locations->InAt(0);
4909 Location op2_loc = locations->InAt(1);
4910 Location out_loc = locations->Out();
4911
4912 // Optimization: don't generate any code if inputs are the same.
4913 if (op1_loc.Equals(op2_loc)) {
4914 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
4915 return;
4916 }
4917
4918 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
4919 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
4920 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
4921 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
4922 vixl32::Register out_lo = LowRegisterFrom(out_loc);
4923 vixl32::Register out_hi = HighRegisterFrom(out_loc);
4924 UseScratchRegisterScope temps(GetVIXLAssembler());
4925 const vixl32::Register temp = temps.Acquire();
4926
4927 DCHECK(op1_lo.Is(out_lo));
4928 DCHECK(op1_hi.Is(out_hi));
4929
4930 // Compare op1 >= op2, or op1 < op2.
4931 __ Cmp(out_lo, op2_lo);
4932 __ Sbcs(temp, out_hi, op2_hi);
4933
4934 // Now GE/LT condition code is correct for the long comparison.
4935 {
4936 vixl32::ConditionType cond = is_min ? ge : lt;
4937 ExactAssemblyScope it_scope(GetVIXLAssembler(),
4938 3 * kMaxInstructionSizeInBytes,
4939 CodeBufferCheckScope::kMaximumSize);
4940 __ itt(cond);
4941 __ mov(cond, out_lo, op2_lo);
4942 __ mov(cond, out_hi, op2_hi);
4943 }
4944 }
4945
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)4946 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
4947 LocationSummary* locations = minmax->GetLocations();
4948 Location op1_loc = locations->InAt(0);
4949 Location op2_loc = locations->InAt(1);
4950 Location out_loc = locations->Out();
4951
4952 // Optimization: don't generate any code if inputs are the same.
4953 if (op1_loc.Equals(op2_loc)) {
4954 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
4955 return;
4956 }
4957
4958 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
4959 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
4960 vixl32::SRegister out = SRegisterFrom(out_loc);
4961
4962 UseScratchRegisterScope temps(GetVIXLAssembler());
4963 const vixl32::Register temp1 = temps.Acquire();
4964 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
4965 vixl32::Label nan, done;
4966 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
4967
4968 DCHECK(op1.Is(out));
4969
4970 __ Vcmp(op1, op2);
4971 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
4972 __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
4973
4974 // op1 <> op2
4975 vixl32::ConditionType cond = is_min ? gt : lt;
4976 {
4977 ExactAssemblyScope it_scope(GetVIXLAssembler(),
4978 2 * kMaxInstructionSizeInBytes,
4979 CodeBufferCheckScope::kMaximumSize);
4980 __ it(cond);
4981 __ vmov(cond, F32, out, op2);
4982 }
4983 // for <>(not equal), we've done min/max calculation.
4984 __ B(ne, final_label, /* is_far_target= */ false);
4985
4986 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
4987 __ Vmov(temp1, op1);
4988 __ Vmov(temp2, op2);
4989 if (is_min) {
4990 __ Orr(temp1, temp1, temp2);
4991 } else {
4992 __ And(temp1, temp1, temp2);
4993 }
4994 __ Vmov(out, temp1);
4995 __ B(final_label);
4996
4997 // handle NaN input.
4998 __ Bind(&nan);
4999 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
5000 __ Vmov(out, temp1);
5001
5002 if (done.IsReferenced()) {
5003 __ Bind(&done);
5004 }
5005 }
5006
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5007 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5008 LocationSummary* locations = minmax->GetLocations();
5009 Location op1_loc = locations->InAt(0);
5010 Location op2_loc = locations->InAt(1);
5011 Location out_loc = locations->Out();
5012
5013 // Optimization: don't generate any code if inputs are the same.
5014 if (op1_loc.Equals(op2_loc)) {
5015 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
5016 return;
5017 }
5018
5019 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5020 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5021 vixl32::DRegister out = DRegisterFrom(out_loc);
5022 vixl32::Label handle_nan_eq, done;
5023 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5024
5025 DCHECK(op1.Is(out));
5026
5027 __ Vcmp(op1, op2);
5028 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5029 __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
5030
5031 // op1 <> op2
5032 vixl32::ConditionType cond = is_min ? gt : lt;
5033 {
5034 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5035 2 * kMaxInstructionSizeInBytes,
5036 CodeBufferCheckScope::kMaximumSize);
5037 __ it(cond);
5038 __ vmov(cond, F64, out, op2);
5039 }
5040 // for <>(not equal), we've done min/max calculation.
5041 __ B(ne, final_label, /* is_far_target= */ false);
5042
5043 // handle op1 == op2, max(+0.0,-0.0).
5044 if (!is_min) {
5045 __ Vand(F64, out, op1, op2);
5046 __ B(final_label);
5047 }
5048
5049 // handle op1 == op2, min(+0.0,-0.0), NaN input.
5050 __ Bind(&handle_nan_eq);
5051 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
5052
5053 if (done.IsReferenced()) {
5054 __ Bind(&done);
5055 }
5056 }
5057
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5058 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5059 DataType::Type type = minmax->GetResultType();
5060 switch (type) {
5061 case DataType::Type::kInt32:
5062 GenerateMinMaxInt(minmax->GetLocations(), is_min);
5063 break;
5064 case DataType::Type::kInt64:
5065 GenerateMinMaxLong(minmax->GetLocations(), is_min);
5066 break;
5067 case DataType::Type::kFloat32:
5068 GenerateMinMaxFloat(minmax, is_min);
5069 break;
5070 case DataType::Type::kFloat64:
5071 GenerateMinMaxDouble(minmax, is_min);
5072 break;
5073 default:
5074 LOG(FATAL) << "Unexpected type for HMinMax " << type;
5075 }
5076 }
5077
VisitMin(HMin * min)5078 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5079 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5080 }
5081
VisitMin(HMin * min)5082 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5083 GenerateMinMax(min, /*is_min*/ true);
5084 }
5085
VisitMax(HMax * max)5086 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5087 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5088 }
5089
VisitMax(HMax * max)5090 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5091 GenerateMinMax(max, /*is_min*/ false);
5092 }
5093
VisitAbs(HAbs * abs)5094 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5095 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5096 switch (abs->GetResultType()) {
5097 case DataType::Type::kInt32:
5098 case DataType::Type::kInt64:
5099 locations->SetInAt(0, Location::RequiresRegister());
5100 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5101 locations->AddTemp(Location::RequiresRegister());
5102 break;
5103 case DataType::Type::kFloat32:
5104 case DataType::Type::kFloat64:
5105 locations->SetInAt(0, Location::RequiresFpuRegister());
5106 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5107 break;
5108 default:
5109 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5110 }
5111 }
5112
VisitAbs(HAbs * abs)5113 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5114 LocationSummary* locations = abs->GetLocations();
5115 switch (abs->GetResultType()) {
5116 case DataType::Type::kInt32: {
5117 vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5118 vixl32::Register out_reg = RegisterFrom(locations->Out());
5119 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5120 __ Asr(mask, in_reg, 31);
5121 __ Add(out_reg, in_reg, mask);
5122 __ Eor(out_reg, out_reg, mask);
5123 break;
5124 }
5125 case DataType::Type::kInt64: {
5126 Location in = locations->InAt(0);
5127 vixl32::Register in_reg_lo = LowRegisterFrom(in);
5128 vixl32::Register in_reg_hi = HighRegisterFrom(in);
5129 Location output = locations->Out();
5130 vixl32::Register out_reg_lo = LowRegisterFrom(output);
5131 vixl32::Register out_reg_hi = HighRegisterFrom(output);
5132 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5133 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5134 __ Asr(mask, in_reg_hi, 31);
5135 __ Adds(out_reg_lo, in_reg_lo, mask);
5136 __ Adc(out_reg_hi, in_reg_hi, mask);
5137 __ Eor(out_reg_lo, out_reg_lo, mask);
5138 __ Eor(out_reg_hi, out_reg_hi, mask);
5139 break;
5140 }
5141 case DataType::Type::kFloat32:
5142 case DataType::Type::kFloat64:
5143 __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5144 break;
5145 default:
5146 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5147 }
5148 }
5149
VisitDivZeroCheck(HDivZeroCheck * instruction)5150 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5151 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5152 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5153 }
5154
VisitDivZeroCheck(HDivZeroCheck * instruction)5155 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5156 DivZeroCheckSlowPathARMVIXL* slow_path =
5157 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5158 codegen_->AddSlowPath(slow_path);
5159
5160 LocationSummary* locations = instruction->GetLocations();
5161 Location value = locations->InAt(0);
5162
5163 switch (instruction->GetType()) {
5164 case DataType::Type::kBool:
5165 case DataType::Type::kUint8:
5166 case DataType::Type::kInt8:
5167 case DataType::Type::kUint16:
5168 case DataType::Type::kInt16:
5169 case DataType::Type::kInt32: {
5170 if (value.IsRegister()) {
5171 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5172 } else {
5173 DCHECK(value.IsConstant()) << value;
5174 if (Int32ConstantFrom(value) == 0) {
5175 __ B(slow_path->GetEntryLabel());
5176 }
5177 }
5178 break;
5179 }
5180 case DataType::Type::kInt64: {
5181 if (value.IsRegisterPair()) {
5182 UseScratchRegisterScope temps(GetVIXLAssembler());
5183 vixl32::Register temp = temps.Acquire();
5184 __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5185 __ B(eq, slow_path->GetEntryLabel());
5186 } else {
5187 DCHECK(value.IsConstant()) << value;
5188 if (Int64ConstantFrom(value) == 0) {
5189 __ B(slow_path->GetEntryLabel());
5190 }
5191 }
5192 break;
5193 }
5194 default:
5195 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5196 }
5197 }
5198
HandleIntegerRotate(HRor * ror)5199 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
5200 LocationSummary* locations = ror->GetLocations();
5201 vixl32::Register in = InputRegisterAt(ror, 0);
5202 Location rhs = locations->InAt(1);
5203 vixl32::Register out = OutputRegister(ror);
5204
5205 if (rhs.IsConstant()) {
5206 // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5207 // so map all rotations to a +ve. equivalent in that range.
5208 // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5209 uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
5210 if (rot) {
5211 // Rotate, mapping left rotations to right equivalents if necessary.
5212 // (e.g. left by 2 bits == right by 30.)
5213 __ Ror(out, in, rot);
5214 } else if (!out.Is(in)) {
5215 __ Mov(out, in);
5216 }
5217 } else {
5218 __ Ror(out, in, RegisterFrom(rhs));
5219 }
5220 }
5221
5222 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5223 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5224 // a larger rotation) or flipping direction (thus treating larger right/left
5225 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HRor * ror)5226 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
5227 LocationSummary* locations = ror->GetLocations();
5228 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5229 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5230 Location rhs = locations->InAt(1);
5231 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5232 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5233
5234 if (rhs.IsConstant()) {
5235 uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5236 // Map all rotations to +ve. equivalents on the interval [0,63].
5237 rot &= kMaxLongShiftDistance;
5238 // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5239 // logic below to a simple pair of binary orr.
5240 // (e.g. 34 bits == in_reg swap + 2 bits right.)
5241 if (rot >= kArmBitsPerWord) {
5242 rot -= kArmBitsPerWord;
5243 std::swap(in_reg_hi, in_reg_lo);
5244 }
5245 // Rotate, or mov to out for zero or word size rotations.
5246 if (rot != 0u) {
5247 __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5248 __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5249 __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5250 __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5251 } else {
5252 __ Mov(out_reg_lo, in_reg_lo);
5253 __ Mov(out_reg_hi, in_reg_hi);
5254 }
5255 } else {
5256 vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5257 vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5258 vixl32::Label end;
5259 vixl32::Label shift_by_32_plus_shift_right;
5260 vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
5261
5262 __ And(shift_right, RegisterFrom(rhs), 0x1F);
5263 __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5264 __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5265 __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5266
5267 // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5268 // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5269 __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5270 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5271 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5272 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5273 __ Lsr(shift_left, in_reg_hi, shift_right);
5274 __ Add(out_reg_lo, out_reg_lo, shift_left);
5275 __ B(final_label);
5276
5277 __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
5278 // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5279 // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5280 __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5281 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5282 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5283 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5284 __ Lsl(shift_right, in_reg_hi, shift_left);
5285 __ Add(out_reg_lo, out_reg_lo, shift_right);
5286
5287 if (end.IsReferenced()) {
5288 __ Bind(&end);
5289 }
5290 }
5291 }
5292
VisitRor(HRor * ror)5293 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5294 LocationSummary* locations =
5295 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5296 HInstruction* shift = ror->InputAt(1);
5297 switch (ror->GetResultType()) {
5298 case DataType::Type::kInt32: {
5299 locations->SetInAt(0, Location::RequiresRegister());
5300 locations->SetInAt(1, Location::RegisterOrConstant(shift));
5301 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5302 break;
5303 }
5304 case DataType::Type::kInt64: {
5305 locations->SetInAt(0, Location::RequiresRegister());
5306 if (shift->IsConstant()) {
5307 locations->SetInAt(1, Location::ConstantLocation(shift));
5308 } else {
5309 locations->SetInAt(1, Location::RequiresRegister());
5310 locations->AddTemp(Location::RequiresRegister());
5311 locations->AddTemp(Location::RequiresRegister());
5312 }
5313 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5314 break;
5315 }
5316 default:
5317 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5318 }
5319 }
5320
VisitRor(HRor * ror)5321 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5322 DataType::Type type = ror->GetResultType();
5323 switch (type) {
5324 case DataType::Type::kInt32: {
5325 HandleIntegerRotate(ror);
5326 break;
5327 }
5328 case DataType::Type::kInt64: {
5329 HandleLongRotate(ror);
5330 break;
5331 }
5332 default:
5333 LOG(FATAL) << "Unexpected operation type " << type;
5334 UNREACHABLE();
5335 }
5336 }
5337
HandleShift(HBinaryOperation * op)5338 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5339 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5340
5341 LocationSummary* locations =
5342 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5343
5344 HInstruction* shift = op->InputAt(1);
5345 switch (op->GetResultType()) {
5346 case DataType::Type::kInt32: {
5347 locations->SetInAt(0, Location::RequiresRegister());
5348 if (shift->IsConstant()) {
5349 locations->SetInAt(1, Location::ConstantLocation(shift));
5350 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5351 } else {
5352 locations->SetInAt(1, Location::RequiresRegister());
5353 // Make the output overlap, as it will be used to hold the masked
5354 // second input.
5355 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5356 }
5357 break;
5358 }
5359 case DataType::Type::kInt64: {
5360 locations->SetInAt(0, Location::RequiresRegister());
5361 if (shift->IsConstant()) {
5362 locations->SetInAt(1, Location::ConstantLocation(shift));
5363 // For simplicity, use kOutputOverlap even though we only require that low registers
5364 // don't clash with high registers which the register allocator currently guarantees.
5365 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5366 } else {
5367 locations->SetInAt(1, Location::RequiresRegister());
5368 locations->AddTemp(Location::RequiresRegister());
5369 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5370 }
5371 break;
5372 }
5373 default:
5374 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5375 }
5376 }
5377
HandleShift(HBinaryOperation * op)5378 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5379 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5380
5381 LocationSummary* locations = op->GetLocations();
5382 Location out = locations->Out();
5383 Location first = locations->InAt(0);
5384 Location second = locations->InAt(1);
5385
5386 DataType::Type type = op->GetResultType();
5387 switch (type) {
5388 case DataType::Type::kInt32: {
5389 vixl32::Register out_reg = OutputRegister(op);
5390 vixl32::Register first_reg = InputRegisterAt(op, 0);
5391 if (second.IsRegister()) {
5392 vixl32::Register second_reg = RegisterFrom(second);
5393 // ARM doesn't mask the shift count so we need to do it ourselves.
5394 __ And(out_reg, second_reg, kMaxIntShiftDistance);
5395 if (op->IsShl()) {
5396 __ Lsl(out_reg, first_reg, out_reg);
5397 } else if (op->IsShr()) {
5398 __ Asr(out_reg, first_reg, out_reg);
5399 } else {
5400 __ Lsr(out_reg, first_reg, out_reg);
5401 }
5402 } else {
5403 int32_t cst = Int32ConstantFrom(second);
5404 uint32_t shift_value = cst & kMaxIntShiftDistance;
5405 if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
5406 __ Mov(out_reg, first_reg);
5407 } else if (op->IsShl()) {
5408 __ Lsl(out_reg, first_reg, shift_value);
5409 } else if (op->IsShr()) {
5410 __ Asr(out_reg, first_reg, shift_value);
5411 } else {
5412 __ Lsr(out_reg, first_reg, shift_value);
5413 }
5414 }
5415 break;
5416 }
5417 case DataType::Type::kInt64: {
5418 vixl32::Register o_h = HighRegisterFrom(out);
5419 vixl32::Register o_l = LowRegisterFrom(out);
5420
5421 vixl32::Register high = HighRegisterFrom(first);
5422 vixl32::Register low = LowRegisterFrom(first);
5423
5424 if (second.IsRegister()) {
5425 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5426
5427 vixl32::Register second_reg = RegisterFrom(second);
5428
5429 if (op->IsShl()) {
5430 __ And(o_l, second_reg, kMaxLongShiftDistance);
5431 // Shift the high part
5432 __ Lsl(o_h, high, o_l);
5433 // Shift the low part and `or` what overflew on the high part
5434 __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5435 __ Lsr(temp, low, temp);
5436 __ Orr(o_h, o_h, temp);
5437 // If the shift is > 32 bits, override the high part
5438 __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5439 {
5440 ExactAssemblyScope guard(GetVIXLAssembler(),
5441 2 * vixl32::kMaxInstructionSizeInBytes,
5442 CodeBufferCheckScope::kMaximumSize);
5443 __ it(pl);
5444 __ lsl(pl, o_h, low, temp);
5445 }
5446 // Shift the low part
5447 __ Lsl(o_l, low, o_l);
5448 } else if (op->IsShr()) {
5449 __ And(o_h, second_reg, kMaxLongShiftDistance);
5450 // Shift the low part
5451 __ Lsr(o_l, low, o_h);
5452 // Shift the high part and `or` what underflew on the low part
5453 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5454 __ Lsl(temp, high, temp);
5455 __ Orr(o_l, o_l, temp);
5456 // If the shift is > 32 bits, override the low part
5457 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5458 {
5459 ExactAssemblyScope guard(GetVIXLAssembler(),
5460 2 * vixl32::kMaxInstructionSizeInBytes,
5461 CodeBufferCheckScope::kMaximumSize);
5462 __ it(pl);
5463 __ asr(pl, o_l, high, temp);
5464 }
5465 // Shift the high part
5466 __ Asr(o_h, high, o_h);
5467 } else {
5468 __ And(o_h, second_reg, kMaxLongShiftDistance);
5469 // same as Shr except we use `Lsr`s and not `Asr`s
5470 __ Lsr(o_l, low, o_h);
5471 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5472 __ Lsl(temp, high, temp);
5473 __ Orr(o_l, o_l, temp);
5474 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5475 {
5476 ExactAssemblyScope guard(GetVIXLAssembler(),
5477 2 * vixl32::kMaxInstructionSizeInBytes,
5478 CodeBufferCheckScope::kMaximumSize);
5479 __ it(pl);
5480 __ lsr(pl, o_l, high, temp);
5481 }
5482 __ Lsr(o_h, high, o_h);
5483 }
5484 } else {
5485 // Register allocator doesn't create partial overlap.
5486 DCHECK(!o_l.Is(high));
5487 DCHECK(!o_h.Is(low));
5488 int32_t cst = Int32ConstantFrom(second);
5489 uint32_t shift_value = cst & kMaxLongShiftDistance;
5490 if (shift_value > 32) {
5491 if (op->IsShl()) {
5492 __ Lsl(o_h, low, shift_value - 32);
5493 __ Mov(o_l, 0);
5494 } else if (op->IsShr()) {
5495 __ Asr(o_l, high, shift_value - 32);
5496 __ Asr(o_h, high, 31);
5497 } else {
5498 __ Lsr(o_l, high, shift_value - 32);
5499 __ Mov(o_h, 0);
5500 }
5501 } else if (shift_value == 32) {
5502 if (op->IsShl()) {
5503 __ Mov(o_h, low);
5504 __ Mov(o_l, 0);
5505 } else if (op->IsShr()) {
5506 __ Mov(o_l, high);
5507 __ Asr(o_h, high, 31);
5508 } else {
5509 __ Mov(o_l, high);
5510 __ Mov(o_h, 0);
5511 }
5512 } else if (shift_value == 1) {
5513 if (op->IsShl()) {
5514 __ Lsls(o_l, low, 1);
5515 __ Adc(o_h, high, high);
5516 } else if (op->IsShr()) {
5517 __ Asrs(o_h, high, 1);
5518 __ Rrx(o_l, low);
5519 } else {
5520 __ Lsrs(o_h, high, 1);
5521 __ Rrx(o_l, low);
5522 }
5523 } else if (shift_value == 0) {
5524 __ Mov(o_l, low);
5525 __ Mov(o_h, high);
5526 } else {
5527 DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5528 if (op->IsShl()) {
5529 __ Lsl(o_h, high, shift_value);
5530 __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5531 __ Lsl(o_l, low, shift_value);
5532 } else if (op->IsShr()) {
5533 __ Lsr(o_l, low, shift_value);
5534 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5535 __ Asr(o_h, high, shift_value);
5536 } else {
5537 __ Lsr(o_l, low, shift_value);
5538 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5539 __ Lsr(o_h, high, shift_value);
5540 }
5541 }
5542 }
5543 break;
5544 }
5545 default:
5546 LOG(FATAL) << "Unexpected operation type " << type;
5547 UNREACHABLE();
5548 }
5549 }
5550
VisitShl(HShl * shl)5551 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5552 HandleShift(shl);
5553 }
5554
VisitShl(HShl * shl)5555 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5556 HandleShift(shl);
5557 }
5558
VisitShr(HShr * shr)5559 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5560 HandleShift(shr);
5561 }
5562
VisitShr(HShr * shr)5563 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5564 HandleShift(shr);
5565 }
5566
VisitUShr(HUShr * ushr)5567 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5568 HandleShift(ushr);
5569 }
5570
VisitUShr(HUShr * ushr)5571 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5572 HandleShift(ushr);
5573 }
5574
VisitNewInstance(HNewInstance * instruction)5575 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5576 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5577 instruction, LocationSummary::kCallOnMainOnly);
5578 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5579 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5580 locations->SetOut(LocationFrom(r0));
5581 }
5582
VisitNewInstance(HNewInstance * instruction)5583 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5584 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5585 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5586 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5587 }
5588
VisitNewArray(HNewArray * instruction)5589 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5590 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5591 instruction, LocationSummary::kCallOnMainOnly);
5592 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5593 locations->SetOut(LocationFrom(r0));
5594 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5595 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5596 }
5597
VisitNewArray(HNewArray * instruction)5598 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5599 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5600 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5601 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5602 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5603 DCHECK(!codegen_->IsLeafMethod());
5604 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5605 }
5606
VisitParameterValue(HParameterValue * instruction)5607 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5608 LocationSummary* locations =
5609 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5610 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5611 if (location.IsStackSlot()) {
5612 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5613 } else if (location.IsDoubleStackSlot()) {
5614 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5615 }
5616 locations->SetOut(location);
5617 }
5618
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5619 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5620 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5621 // Nothing to do, the parameter is already at its location.
5622 }
5623
VisitCurrentMethod(HCurrentMethod * instruction)5624 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5625 LocationSummary* locations =
5626 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5627 locations->SetOut(LocationFrom(kMethodRegister));
5628 }
5629
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5630 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5631 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5632 // Nothing to do, the method is already at its location.
5633 }
5634
VisitNot(HNot * not_)5635 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5636 LocationSummary* locations =
5637 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5638 locations->SetInAt(0, Location::RequiresRegister());
5639 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5640 }
5641
VisitNot(HNot * not_)5642 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5643 LocationSummary* locations = not_->GetLocations();
5644 Location out = locations->Out();
5645 Location in = locations->InAt(0);
5646 switch (not_->GetResultType()) {
5647 case DataType::Type::kInt32:
5648 __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5649 break;
5650
5651 case DataType::Type::kInt64:
5652 __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5653 __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5654 break;
5655
5656 default:
5657 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5658 }
5659 }
5660
VisitBooleanNot(HBooleanNot * bool_not)5661 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5662 LocationSummary* locations =
5663 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5664 locations->SetInAt(0, Location::RequiresRegister());
5665 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5666 }
5667
VisitBooleanNot(HBooleanNot * bool_not)5668 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5669 __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5670 }
5671
VisitCompare(HCompare * compare)5672 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5673 LocationSummary* locations =
5674 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5675 switch (compare->InputAt(0)->GetType()) {
5676 case DataType::Type::kBool:
5677 case DataType::Type::kUint8:
5678 case DataType::Type::kInt8:
5679 case DataType::Type::kUint16:
5680 case DataType::Type::kInt16:
5681 case DataType::Type::kInt32:
5682 case DataType::Type::kInt64: {
5683 locations->SetInAt(0, Location::RequiresRegister());
5684 locations->SetInAt(1, Location::RequiresRegister());
5685 // Output overlaps because it is written before doing the low comparison.
5686 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5687 break;
5688 }
5689 case DataType::Type::kFloat32:
5690 case DataType::Type::kFloat64: {
5691 locations->SetInAt(0, Location::RequiresFpuRegister());
5692 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5693 locations->SetOut(Location::RequiresRegister());
5694 break;
5695 }
5696 default:
5697 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5698 }
5699 }
5700
VisitCompare(HCompare * compare)5701 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5702 LocationSummary* locations = compare->GetLocations();
5703 vixl32::Register out = OutputRegister(compare);
5704 Location left = locations->InAt(0);
5705 Location right = locations->InAt(1);
5706
5707 vixl32::Label less, greater, done;
5708 vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5709 DataType::Type type = compare->InputAt(0)->GetType();
5710 vixl32::Condition less_cond = vixl32::Condition::None();
5711 switch (type) {
5712 case DataType::Type::kBool:
5713 case DataType::Type::kUint8:
5714 case DataType::Type::kInt8:
5715 case DataType::Type::kUint16:
5716 case DataType::Type::kInt16:
5717 case DataType::Type::kInt32: {
5718 // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5719 __ Mov(out, 0);
5720 __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare.
5721 less_cond = lt;
5722 break;
5723 }
5724 case DataType::Type::kInt64: {
5725 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
5726 __ B(lt, &less, /* is_far_target= */ false);
5727 __ B(gt, &greater, /* is_far_target= */ false);
5728 // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5729 __ Mov(out, 0);
5730 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
5731 less_cond = lo;
5732 break;
5733 }
5734 case DataType::Type::kFloat32:
5735 case DataType::Type::kFloat64: {
5736 __ Mov(out, 0);
5737 GenerateVcmp(compare, codegen_);
5738 // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5739 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5740 less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5741 break;
5742 }
5743 default:
5744 LOG(FATAL) << "Unexpected compare type " << type;
5745 UNREACHABLE();
5746 }
5747
5748 __ B(eq, final_label, /* is_far_target= */ false);
5749 __ B(less_cond, &less, /* is_far_target= */ false);
5750
5751 __ Bind(&greater);
5752 __ Mov(out, 1);
5753 __ B(final_label);
5754
5755 __ Bind(&less);
5756 __ Mov(out, -1);
5757
5758 if (done.IsReferenced()) {
5759 __ Bind(&done);
5760 }
5761 }
5762
VisitPhi(HPhi * instruction)5763 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5764 LocationSummary* locations =
5765 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5766 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5767 locations->SetInAt(i, Location::Any());
5768 }
5769 locations->SetOut(Location::Any());
5770 }
5771
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5772 void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5773 LOG(FATAL) << "Unreachable";
5774 }
5775
GenerateMemoryBarrier(MemBarrierKind kind)5776 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5777 // TODO (ported from quick): revisit ARM barrier kinds.
5778 DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
5779 switch (kind) {
5780 case MemBarrierKind::kAnyStore:
5781 case MemBarrierKind::kLoadAny:
5782 case MemBarrierKind::kAnyAny: {
5783 flavor = DmbOptions::ISH;
5784 break;
5785 }
5786 case MemBarrierKind::kStoreStore: {
5787 flavor = DmbOptions::ISHST;
5788 break;
5789 }
5790 default:
5791 LOG(FATAL) << "Unexpected memory barrier " << kind;
5792 }
5793 __ Dmb(flavor);
5794 }
5795
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5796 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5797 uint32_t offset,
5798 vixl32::Register out_lo,
5799 vixl32::Register out_hi) {
5800 UseScratchRegisterScope temps(GetVIXLAssembler());
5801 if (offset != 0) {
5802 vixl32::Register temp = temps.Acquire();
5803 __ Add(temp, addr, offset);
5804 addr = temp;
5805 }
5806 __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5807 }
5808
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5809 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5810 uint32_t offset,
5811 vixl32::Register value_lo,
5812 vixl32::Register value_hi,
5813 vixl32::Register temp1,
5814 vixl32::Register temp2,
5815 HInstruction* instruction) {
5816 UseScratchRegisterScope temps(GetVIXLAssembler());
5817 vixl32::Label fail;
5818 if (offset != 0) {
5819 vixl32::Register temp = temps.Acquire();
5820 __ Add(temp, addr, offset);
5821 addr = temp;
5822 }
5823 __ Bind(&fail);
5824 {
5825 // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5826 ExactAssemblyScope aas(GetVIXLAssembler(),
5827 vixl32::kMaxInstructionSizeInBytes,
5828 CodeBufferCheckScope::kMaximumSize);
5829 // We need a load followed by store. (The address used in a STREX instruction must
5830 // be the same as the address in the most recently executed LDREX instruction.)
5831 __ ldrexd(temp1, temp2, MemOperand(addr));
5832 codegen_->MaybeRecordImplicitNullCheck(instruction);
5833 }
5834 __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5835 __ CompareAndBranchIfNonZero(temp1, &fail);
5836 }
5837
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5838 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5839 const FieldInfo& field_info,
5840 WriteBarrierKind write_barrier_kind) {
5841 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5842
5843 LocationSummary* locations =
5844 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5845 locations->SetInAt(0, Location::RequiresRegister());
5846
5847 DataType::Type field_type = field_info.GetFieldType();
5848 if (DataType::IsFloatingPointType(field_type)) {
5849 locations->SetInAt(1, Location::RequiresFpuRegister());
5850 } else {
5851 locations->SetInAt(1, Location::RequiresRegister());
5852 }
5853
5854 bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5855 bool generate_volatile = field_info.IsVolatile()
5856 && is_wide
5857 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5858 bool needs_write_barrier =
5859 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5860 // Temporary registers for the write barrier.
5861 // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5862 if (needs_write_barrier) {
5863 if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
5864 locations->AddTemp(Location::RequiresRegister());
5865 locations->AddTemp(Location::RequiresRegister());
5866 } else if (kPoisonHeapReferences) {
5867 locations->AddTemp(Location::RequiresRegister());
5868 }
5869 } else if (generate_volatile) {
5870 // ARM encoding have some additional constraints for ldrexd/strexd:
5871 // - registers need to be consecutive
5872 // - the first register should be even but not R14.
5873 // We don't test for ARM yet, and the assertion makes sure that we
5874 // revisit this if we ever enable ARM encoding.
5875 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5876
5877 locations->AddTemp(Location::RequiresRegister());
5878 locations->AddTemp(Location::RequiresRegister());
5879 if (field_type == DataType::Type::kFloat64) {
5880 // For doubles we need two more registers to copy the value.
5881 locations->AddTemp(LocationFrom(r2));
5882 locations->AddTemp(LocationFrom(r3));
5883 }
5884 }
5885 }
5886
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5887 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
5888 const FieldInfo& field_info,
5889 bool value_can_be_null,
5890 WriteBarrierKind write_barrier_kind) {
5891 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5892
5893 LocationSummary* locations = instruction->GetLocations();
5894 vixl32::Register base = InputRegisterAt(instruction, 0);
5895 Location value = locations->InAt(1);
5896 std::optional<vixl::aarch32::Label> pred_is_null;
5897
5898 bool is_predicated =
5899 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5900 bool is_volatile = field_info.IsVolatile();
5901 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5902 DataType::Type field_type = field_info.GetFieldType();
5903 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5904 bool needs_write_barrier =
5905 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5906
5907 if (is_predicated) {
5908 pred_is_null.emplace();
5909 __ CompareAndBranchIfZero(base, &*pred_is_null, /* is_far_target= */ false);
5910 }
5911
5912 if (is_volatile) {
5913 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5914 }
5915
5916 switch (field_type) {
5917 case DataType::Type::kBool:
5918 case DataType::Type::kUint8:
5919 case DataType::Type::kInt8:
5920 case DataType::Type::kUint16:
5921 case DataType::Type::kInt16:
5922 case DataType::Type::kInt32: {
5923 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5924 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5925 StoreOperandType operand_type = GetStoreOperandType(field_type);
5926 GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
5927 codegen_->MaybeRecordImplicitNullCheck(instruction);
5928 break;
5929 }
5930
5931 case DataType::Type::kReference: {
5932 vixl32::Register value_reg = RegisterFrom(value);
5933 if (kPoisonHeapReferences && needs_write_barrier) {
5934 // Note that in the case where `value` is a null reference,
5935 // we do not enter this block, as a null reference does not
5936 // need poisoning.
5937 DCHECK_EQ(field_type, DataType::Type::kReference);
5938 value_reg = RegisterFrom(locations->GetTemp(0));
5939 __ Mov(value_reg, RegisterFrom(value));
5940 GetAssembler()->PoisonHeapReference(value_reg);
5941 }
5942 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5943 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5944 GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
5945 codegen_->MaybeRecordImplicitNullCheck(instruction);
5946 break;
5947 }
5948
5949 case DataType::Type::kInt64: {
5950 if (is_volatile && !atomic_ldrd_strd) {
5951 GenerateWideAtomicStore(base,
5952 offset,
5953 LowRegisterFrom(value),
5954 HighRegisterFrom(value),
5955 RegisterFrom(locations->GetTemp(0)),
5956 RegisterFrom(locations->GetTemp(1)),
5957 instruction);
5958 } else {
5959 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5960 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5961 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
5962 codegen_->MaybeRecordImplicitNullCheck(instruction);
5963 }
5964 break;
5965 }
5966
5967 case DataType::Type::kFloat32: {
5968 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5969 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5970 GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
5971 codegen_->MaybeRecordImplicitNullCheck(instruction);
5972 break;
5973 }
5974
5975 case DataType::Type::kFloat64: {
5976 vixl32::DRegister value_reg = DRegisterFrom(value);
5977 if (is_volatile && !atomic_ldrd_strd) {
5978 vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
5979 vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
5980
5981 __ Vmov(value_reg_lo, value_reg_hi, value_reg);
5982
5983 GenerateWideAtomicStore(base,
5984 offset,
5985 value_reg_lo,
5986 value_reg_hi,
5987 RegisterFrom(locations->GetTemp(2)),
5988 RegisterFrom(locations->GetTemp(3)),
5989 instruction);
5990 } else {
5991 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5992 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5993 GetAssembler()->StoreDToOffset(value_reg, base, offset);
5994 codegen_->MaybeRecordImplicitNullCheck(instruction);
5995 }
5996 break;
5997 }
5998
5999 case DataType::Type::kUint32:
6000 case DataType::Type::kUint64:
6001 case DataType::Type::kVoid:
6002 LOG(FATAL) << "Unreachable type " << field_type;
6003 UNREACHABLE();
6004 }
6005
6006 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
6007 write_barrier_kind != WriteBarrierKind::kDontEmit) {
6008 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6009 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6010 codegen_->MarkGCCard(
6011 temp,
6012 card,
6013 base,
6014 RegisterFrom(value),
6015 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
6016 }
6017
6018 if (is_volatile) {
6019 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6020 }
6021
6022 if (is_predicated) {
6023 __ Bind(&*pred_is_null);
6024 }
6025 }
6026
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6027 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6028 const FieldInfo& field_info) {
6029 DCHECK(instruction->IsInstanceFieldGet() ||
6030 instruction->IsStaticFieldGet() ||
6031 instruction->IsPredicatedInstanceFieldGet());
6032
6033 bool object_field_get_with_read_barrier =
6034 gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
6035 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
6036 LocationSummary* locations =
6037 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6038 object_field_get_with_read_barrier
6039 ? LocationSummary::kCallOnSlowPath
6040 : LocationSummary::kNoCall);
6041 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6042 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6043 }
6044 // Input for object receiver.
6045 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
6046
6047 bool volatile_for_double = field_info.IsVolatile()
6048 && (field_info.GetFieldType() == DataType::Type::kFloat64)
6049 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6050 // The output overlaps in case of volatile long: we don't want the
6051 // code generated by GenerateWideAtomicLoad to overwrite the
6052 // object's location. Likewise, in the case of an object field get
6053 // with read barriers enabled, we do not want the load to overwrite
6054 // the object's location, as we need it to emit the read barrier.
6055 bool overlap =
6056 (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6057 object_field_get_with_read_barrier;
6058
6059 if (DataType::IsFloatingPointType(instruction->GetType())) {
6060 if (is_predicated) {
6061 locations->SetInAt(0, Location::RequiresFpuRegister());
6062 locations->SetOut(Location::SameAsFirstInput());
6063 } else {
6064 locations->SetOut(Location::RequiresFpuRegister());
6065 }
6066 } else {
6067 if (is_predicated) {
6068 locations->SetInAt(0, Location::RequiresRegister());
6069 locations->SetOut(Location::SameAsFirstInput());
6070 } else {
6071 locations->SetOut(Location::RequiresRegister(),
6072 (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6073 }
6074 }
6075 if (volatile_for_double) {
6076 // ARM encoding have some additional constraints for ldrexd/strexd:
6077 // - registers need to be consecutive
6078 // - the first register should be even but not R14.
6079 // We don't test for ARM yet, and the assertion makes sure that we
6080 // revisit this if we ever enable ARM encoding.
6081 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6082 locations->AddTemp(Location::RequiresRegister());
6083 locations->AddTemp(Location::RequiresRegister());
6084 } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6085 // We need a temporary register for the read barrier load in
6086 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6087 // only if the offset is too big.
6088 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6089 locations->AddTemp(Location::RequiresRegister());
6090 }
6091 }
6092 }
6093
ArithmeticZeroOrFpuRegister(HInstruction * input)6094 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6095 DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6096 if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6097 (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6098 return Location::ConstantLocation(input);
6099 } else {
6100 return Location::RequiresFpuRegister();
6101 }
6102 }
6103
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6104 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6105 Opcode opcode) {
6106 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6107 if (constant->IsConstant() &&
6108 CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6109 return Location::ConstantLocation(constant);
6110 }
6111 return Location::RequiresRegister();
6112 }
6113
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6114 static bool CanEncode32BitConstantAsImmediate(
6115 CodeGeneratorARMVIXL* codegen,
6116 uint32_t value,
6117 Opcode opcode,
6118 vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6119 ArmVIXLAssembler* assembler = codegen->GetAssembler();
6120 if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6121 return true;
6122 }
6123 Opcode neg_opcode = kNoOperand;
6124 uint32_t neg_value = 0;
6125 switch (opcode) {
6126 case AND: neg_opcode = BIC; neg_value = ~value; break;
6127 case ORR: neg_opcode = ORN; neg_value = ~value; break;
6128 case ADD: neg_opcode = SUB; neg_value = -value; break;
6129 case ADC: neg_opcode = SBC; neg_value = ~value; break;
6130 case SUB: neg_opcode = ADD; neg_value = -value; break;
6131 case SBC: neg_opcode = ADC; neg_value = ~value; break;
6132 case MOV: neg_opcode = MVN; neg_value = ~value; break;
6133 default:
6134 return false;
6135 }
6136
6137 if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6138 return true;
6139 }
6140
6141 return opcode == AND && IsPowerOfTwo(value + 1);
6142 }
6143
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6144 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6145 uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6146 if (DataType::Is64BitType(input_cst->GetType())) {
6147 Opcode high_opcode = opcode;
6148 vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6149 switch (opcode) {
6150 case SUB:
6151 // Flip the operation to an ADD.
6152 value = -value;
6153 opcode = ADD;
6154 FALLTHROUGH_INTENDED;
6155 case ADD:
6156 if (Low32Bits(value) == 0u) {
6157 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6158 }
6159 high_opcode = ADC;
6160 low_flags_update = vixl32::FlagsUpdate::SetFlags;
6161 break;
6162 default:
6163 break;
6164 }
6165 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6166 CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6167 } else {
6168 return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6169 }
6170 }
6171
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6172 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6173 const FieldInfo& field_info) {
6174 DCHECK(instruction->IsInstanceFieldGet() ||
6175 instruction->IsStaticFieldGet() ||
6176 instruction->IsPredicatedInstanceFieldGet());
6177
6178 LocationSummary* locations = instruction->GetLocations();
6179 uint32_t receiver_input = instruction->IsPredicatedInstanceFieldGet() ? 1 : 0;
6180 vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6181 Location out = locations->Out();
6182 bool is_volatile = field_info.IsVolatile();
6183 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6184 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6185 DataType::Type load_type = instruction->GetType();
6186 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6187
6188 switch (load_type) {
6189 case DataType::Type::kBool:
6190 case DataType::Type::kUint8:
6191 case DataType::Type::kInt8:
6192 case DataType::Type::kUint16:
6193 case DataType::Type::kInt16:
6194 case DataType::Type::kInt32: {
6195 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6196 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6197 LoadOperandType operand_type = GetLoadOperandType(load_type);
6198 GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6199 codegen_->MaybeRecordImplicitNullCheck(instruction);
6200 break;
6201 }
6202
6203 case DataType::Type::kReference: {
6204 // /* HeapReference<Object> */ out = *(base + offset)
6205 if (gUseReadBarrier && kUseBakerReadBarrier) {
6206 Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6207 // Note that a potential implicit null check is handled in this
6208 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6209 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6210 instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6211 if (is_volatile) {
6212 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6213 }
6214 } else {
6215 {
6216 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6217 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6218 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6219 codegen_->MaybeRecordImplicitNullCheck(instruction);
6220 }
6221 if (is_volatile) {
6222 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6223 }
6224 // If read barriers are enabled, emit read barriers other than
6225 // Baker's using a slow path (and also unpoison the loaded
6226 // reference, if heap poisoning is enabled).
6227 codegen_->MaybeGenerateReadBarrierSlow(
6228 instruction, out, out, locations->InAt(receiver_input), offset);
6229 }
6230 break;
6231 }
6232
6233 case DataType::Type::kInt64: {
6234 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6235 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6236 if (is_volatile && !atomic_ldrd_strd) {
6237 GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6238 } else {
6239 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6240 }
6241 codegen_->MaybeRecordImplicitNullCheck(instruction);
6242 break;
6243 }
6244
6245 case DataType::Type::kFloat32: {
6246 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6247 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6248 GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6249 codegen_->MaybeRecordImplicitNullCheck(instruction);
6250 break;
6251 }
6252
6253 case DataType::Type::kFloat64: {
6254 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6255 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6256 vixl32::DRegister out_dreg = DRegisterFrom(out);
6257 if (is_volatile && !atomic_ldrd_strd) {
6258 vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6259 vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6260 GenerateWideAtomicLoad(base, offset, lo, hi);
6261 codegen_->MaybeRecordImplicitNullCheck(instruction);
6262 __ Vmov(out_dreg, lo, hi);
6263 } else {
6264 GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6265 codegen_->MaybeRecordImplicitNullCheck(instruction);
6266 }
6267 break;
6268 }
6269
6270 case DataType::Type::kUint32:
6271 case DataType::Type::kUint64:
6272 case DataType::Type::kVoid:
6273 LOG(FATAL) << "Unreachable type " << load_type;
6274 UNREACHABLE();
6275 }
6276
6277 if (is_volatile) {
6278 if (load_type == DataType::Type::kReference) {
6279 // Memory barriers, in the case of references, are also handled
6280 // in the previous switch statement.
6281 } else {
6282 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6283 }
6284 }
6285 }
6286
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6287 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6288 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6289 }
6290
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6291 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6292 HandleFieldSet(instruction,
6293 instruction->GetFieldInfo(),
6294 instruction->GetValueCanBeNull(),
6295 instruction->GetWriteBarrierKind());
6296 }
6297
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6298 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6299 HandleFieldGet(instruction, instruction->GetFieldInfo());
6300 }
6301
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6302 void LocationsBuilderARMVIXL::VisitPredicatedInstanceFieldGet(
6303 HPredicatedInstanceFieldGet* instruction) {
6304 HandleFieldGet(instruction, instruction->GetFieldInfo());
6305 }
6306
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6307 void InstructionCodeGeneratorARMVIXL::VisitPredicatedInstanceFieldGet(
6308 HPredicatedInstanceFieldGet* instruction) {
6309 vixl::aarch32::Label finish;
6310 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 1), &finish, false);
6311 HandleFieldGet(instruction, instruction->GetFieldInfo());
6312 __ Bind(&finish);
6313 }
6314
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6315 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6316 HandleFieldGet(instruction, instruction->GetFieldInfo());
6317 }
6318
VisitStaticFieldGet(HStaticFieldGet * instruction)6319 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6320 HandleFieldGet(instruction, instruction->GetFieldInfo());
6321 }
6322
VisitStaticFieldGet(HStaticFieldGet * instruction)6323 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6324 HandleFieldGet(instruction, instruction->GetFieldInfo());
6325 }
6326
VisitStaticFieldSet(HStaticFieldSet * instruction)6327 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6328 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6329 }
6330
VisitStaticFieldSet(HStaticFieldSet * instruction)6331 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6332 HandleFieldSet(instruction,
6333 instruction->GetFieldInfo(),
6334 instruction->GetValueCanBeNull(),
6335 instruction->GetWriteBarrierKind());
6336 }
6337
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6338 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6339 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6340 }
6341
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6342 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6343 __ Mov(r0, instruction->GetFormat()->GetValue());
6344 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6345 }
6346
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6347 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6348 HUnresolvedInstanceFieldGet* instruction) {
6349 FieldAccessCallingConventionARMVIXL calling_convention;
6350 codegen_->CreateUnresolvedFieldLocationSummary(
6351 instruction, instruction->GetFieldType(), calling_convention);
6352 }
6353
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6354 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6355 HUnresolvedInstanceFieldGet* instruction) {
6356 FieldAccessCallingConventionARMVIXL calling_convention;
6357 codegen_->GenerateUnresolvedFieldAccess(instruction,
6358 instruction->GetFieldType(),
6359 instruction->GetFieldIndex(),
6360 instruction->GetDexPc(),
6361 calling_convention);
6362 }
6363
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6364 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6365 HUnresolvedInstanceFieldSet* instruction) {
6366 FieldAccessCallingConventionARMVIXL calling_convention;
6367 codegen_->CreateUnresolvedFieldLocationSummary(
6368 instruction, instruction->GetFieldType(), calling_convention);
6369 }
6370
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6371 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6372 HUnresolvedInstanceFieldSet* instruction) {
6373 FieldAccessCallingConventionARMVIXL calling_convention;
6374 codegen_->GenerateUnresolvedFieldAccess(instruction,
6375 instruction->GetFieldType(),
6376 instruction->GetFieldIndex(),
6377 instruction->GetDexPc(),
6378 calling_convention);
6379 }
6380
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6381 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6382 HUnresolvedStaticFieldGet* instruction) {
6383 FieldAccessCallingConventionARMVIXL calling_convention;
6384 codegen_->CreateUnresolvedFieldLocationSummary(
6385 instruction, instruction->GetFieldType(), calling_convention);
6386 }
6387
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6388 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6389 HUnresolvedStaticFieldGet* instruction) {
6390 FieldAccessCallingConventionARMVIXL calling_convention;
6391 codegen_->GenerateUnresolvedFieldAccess(instruction,
6392 instruction->GetFieldType(),
6393 instruction->GetFieldIndex(),
6394 instruction->GetDexPc(),
6395 calling_convention);
6396 }
6397
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6398 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6399 HUnresolvedStaticFieldSet* instruction) {
6400 FieldAccessCallingConventionARMVIXL calling_convention;
6401 codegen_->CreateUnresolvedFieldLocationSummary(
6402 instruction, instruction->GetFieldType(), calling_convention);
6403 }
6404
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6405 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6406 HUnresolvedStaticFieldSet* instruction) {
6407 FieldAccessCallingConventionARMVIXL calling_convention;
6408 codegen_->GenerateUnresolvedFieldAccess(instruction,
6409 instruction->GetFieldType(),
6410 instruction->GetFieldIndex(),
6411 instruction->GetDexPc(),
6412 calling_convention);
6413 }
6414
VisitNullCheck(HNullCheck * instruction)6415 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6416 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6417 locations->SetInAt(0, Location::RequiresRegister());
6418 }
6419
GenerateImplicitNullCheck(HNullCheck * instruction)6420 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6421 if (CanMoveNullCheckToUser(instruction)) {
6422 return;
6423 }
6424
6425 UseScratchRegisterScope temps(GetVIXLAssembler());
6426 // Ensure the pc position is recorded immediately after the `ldr` instruction.
6427 ExactAssemblyScope aas(GetVIXLAssembler(),
6428 vixl32::kMaxInstructionSizeInBytes,
6429 CodeBufferCheckScope::kMaximumSize);
6430 __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6431 RecordPcInfo(instruction, instruction->GetDexPc());
6432 }
6433
GenerateExplicitNullCheck(HNullCheck * instruction)6434 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6435 NullCheckSlowPathARMVIXL* slow_path =
6436 new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6437 AddSlowPath(slow_path);
6438 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6439 }
6440
VisitNullCheck(HNullCheck * instruction)6441 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6442 codegen_->GenerateNullCheck(instruction);
6443 }
6444
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6445 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6446 Location out_loc,
6447 vixl32::Register base,
6448 vixl32::Register reg_index,
6449 vixl32::Condition cond) {
6450 uint32_t shift_count = DataType::SizeShift(type);
6451 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6452
6453 switch (type) {
6454 case DataType::Type::kBool:
6455 case DataType::Type::kUint8:
6456 __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6457 break;
6458 case DataType::Type::kInt8:
6459 __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6460 break;
6461 case DataType::Type::kUint16:
6462 __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6463 break;
6464 case DataType::Type::kInt16:
6465 __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6466 break;
6467 case DataType::Type::kReference:
6468 case DataType::Type::kInt32:
6469 __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6470 break;
6471 // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6472 case DataType::Type::kInt64:
6473 case DataType::Type::kFloat32:
6474 case DataType::Type::kFloat64:
6475 default:
6476 LOG(FATAL) << "Unreachable type " << type;
6477 UNREACHABLE();
6478 }
6479 }
6480
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6481 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6482 Location loc,
6483 vixl32::Register base,
6484 vixl32::Register reg_index,
6485 vixl32::Condition cond) {
6486 uint32_t shift_count = DataType::SizeShift(type);
6487 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6488
6489 switch (type) {
6490 case DataType::Type::kBool:
6491 case DataType::Type::kUint8:
6492 case DataType::Type::kInt8:
6493 __ Strb(cond, RegisterFrom(loc), mem_address);
6494 break;
6495 case DataType::Type::kUint16:
6496 case DataType::Type::kInt16:
6497 __ Strh(cond, RegisterFrom(loc), mem_address);
6498 break;
6499 case DataType::Type::kReference:
6500 case DataType::Type::kInt32:
6501 __ Str(cond, RegisterFrom(loc), mem_address);
6502 break;
6503 // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6504 case DataType::Type::kInt64:
6505 case DataType::Type::kFloat32:
6506 case DataType::Type::kFloat64:
6507 default:
6508 LOG(FATAL) << "Unreachable type " << type;
6509 UNREACHABLE();
6510 }
6511 }
6512
VisitArrayGet(HArrayGet * instruction)6513 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6514 bool object_array_get_with_read_barrier =
6515 gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6516 LocationSummary* locations =
6517 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6518 object_array_get_with_read_barrier
6519 ? LocationSummary::kCallOnSlowPath
6520 : LocationSummary::kNoCall);
6521 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6522 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6523 }
6524 locations->SetInAt(0, Location::RequiresRegister());
6525 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6526 if (DataType::IsFloatingPointType(instruction->GetType())) {
6527 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6528 } else {
6529 // The output overlaps in the case of an object array get with
6530 // read barriers enabled: we do not want the move to overwrite the
6531 // array's location, as we need it to emit the read barrier.
6532 locations->SetOut(
6533 Location::RequiresRegister(),
6534 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6535 }
6536 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6537 if (instruction->GetIndex()->IsConstant()) {
6538 // Array loads with constant index are treated as field loads.
6539 // We need a temporary register for the read barrier load in
6540 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6541 // only if the offset is too big.
6542 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6543 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6544 offset += index << DataType::SizeShift(DataType::Type::kReference);
6545 if (offset >= kReferenceLoadMinFarOffset) {
6546 locations->AddTemp(Location::RequiresRegister());
6547 }
6548 } else {
6549 // We need a non-scratch temporary for the array data pointer in
6550 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6551 locations->AddTemp(Location::RequiresRegister());
6552 }
6553 } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6554 // Also need a temporary for String compression feature.
6555 locations->AddTemp(Location::RequiresRegister());
6556 }
6557 }
6558
VisitArrayGet(HArrayGet * instruction)6559 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6560 LocationSummary* locations = instruction->GetLocations();
6561 Location obj_loc = locations->InAt(0);
6562 vixl32::Register obj = InputRegisterAt(instruction, 0);
6563 Location index = locations->InAt(1);
6564 Location out_loc = locations->Out();
6565 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6566 DataType::Type type = instruction->GetType();
6567 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6568 instruction->IsStringCharAt();
6569 HInstruction* array_instr = instruction->GetArray();
6570 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6571
6572 switch (type) {
6573 case DataType::Type::kBool:
6574 case DataType::Type::kUint8:
6575 case DataType::Type::kInt8:
6576 case DataType::Type::kUint16:
6577 case DataType::Type::kInt16:
6578 case DataType::Type::kInt32: {
6579 vixl32::Register length;
6580 if (maybe_compressed_char_at) {
6581 length = RegisterFrom(locations->GetTemp(0));
6582 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6583 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6584 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6585 GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6586 codegen_->MaybeRecordImplicitNullCheck(instruction);
6587 }
6588 if (index.IsConstant()) {
6589 int32_t const_index = Int32ConstantFrom(index);
6590 if (maybe_compressed_char_at) {
6591 vixl32::Label uncompressed_load, done;
6592 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6593 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6594 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6595 "Expecting 0=compressed, 1=uncompressed");
6596 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6597 GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6598 RegisterFrom(out_loc),
6599 obj,
6600 data_offset + const_index);
6601 __ B(final_label);
6602 __ Bind(&uncompressed_load);
6603 GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6604 RegisterFrom(out_loc),
6605 obj,
6606 data_offset + (const_index << 1));
6607 if (done.IsReferenced()) {
6608 __ Bind(&done);
6609 }
6610 } else {
6611 uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6612
6613 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6614 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6615 LoadOperandType load_type = GetLoadOperandType(type);
6616 GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6617 codegen_->MaybeRecordImplicitNullCheck(instruction);
6618 }
6619 } else {
6620 UseScratchRegisterScope temps(GetVIXLAssembler());
6621 vixl32::Register temp = temps.Acquire();
6622
6623 if (has_intermediate_address) {
6624 // We do not need to compute the intermediate address from the array: the
6625 // input instruction has done it already. See the comment in
6626 // `TryExtractArrayAccessAddress()`.
6627 if (kIsDebugBuild) {
6628 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6629 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6630 }
6631 temp = obj;
6632 } else {
6633 __ Add(temp, obj, data_offset);
6634 }
6635 if (maybe_compressed_char_at) {
6636 vixl32::Label uncompressed_load, done;
6637 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6638 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6639 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6640 "Expecting 0=compressed, 1=uncompressed");
6641 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6642 __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6643 __ B(final_label);
6644 __ Bind(&uncompressed_load);
6645 __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6646 if (done.IsReferenced()) {
6647 __ Bind(&done);
6648 }
6649 } else {
6650 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6651 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6652 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6653 codegen_->MaybeRecordImplicitNullCheck(instruction);
6654 }
6655 }
6656 break;
6657 }
6658
6659 case DataType::Type::kReference: {
6660 // The read barrier instrumentation of object ArrayGet
6661 // instructions does not support the HIntermediateAddress
6662 // instruction.
6663 DCHECK(!(has_intermediate_address && gUseReadBarrier));
6664
6665 static_assert(
6666 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6667 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6668 // /* HeapReference<Object> */ out =
6669 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6670 if (gUseReadBarrier && kUseBakerReadBarrier) {
6671 // Note that a potential implicit null check is handled in this
6672 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6673 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6674 if (index.IsConstant()) {
6675 // Array load with a constant index can be treated as a field load.
6676 Location maybe_temp =
6677 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6678 data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6679 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6680 out_loc,
6681 obj,
6682 data_offset,
6683 maybe_temp,
6684 /* needs_null_check= */ false);
6685 } else {
6686 Location temp = locations->GetTemp(0);
6687 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6688 out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6689 }
6690 } else {
6691 vixl32::Register out = OutputRegister(instruction);
6692 if (index.IsConstant()) {
6693 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6694 {
6695 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6696 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6697 GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6698 codegen_->MaybeRecordImplicitNullCheck(instruction);
6699 }
6700 // If read barriers are enabled, emit read barriers other than
6701 // Baker's using a slow path (and also unpoison the loaded
6702 // reference, if heap poisoning is enabled).
6703 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6704 } else {
6705 UseScratchRegisterScope temps(GetVIXLAssembler());
6706 vixl32::Register temp = temps.Acquire();
6707
6708 if (has_intermediate_address) {
6709 // We do not need to compute the intermediate address from the array: the
6710 // input instruction has done it already. See the comment in
6711 // `TryExtractArrayAccessAddress()`.
6712 if (kIsDebugBuild) {
6713 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6714 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6715 }
6716 temp = obj;
6717 } else {
6718 __ Add(temp, obj, data_offset);
6719 }
6720 {
6721 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6722 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6723 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6724 temps.Close();
6725 codegen_->MaybeRecordImplicitNullCheck(instruction);
6726 }
6727 // If read barriers are enabled, emit read barriers other than
6728 // Baker's using a slow path (and also unpoison the loaded
6729 // reference, if heap poisoning is enabled).
6730 codegen_->MaybeGenerateReadBarrierSlow(
6731 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6732 }
6733 }
6734 break;
6735 }
6736
6737 case DataType::Type::kInt64: {
6738 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6739 // As two macro instructions can be emitted the max size is doubled.
6740 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6741 if (index.IsConstant()) {
6742 size_t offset =
6743 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6744 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6745 } else {
6746 UseScratchRegisterScope temps(GetVIXLAssembler());
6747 vixl32::Register temp = temps.Acquire();
6748 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6749 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6750 }
6751 codegen_->MaybeRecordImplicitNullCheck(instruction);
6752 break;
6753 }
6754
6755 case DataType::Type::kFloat32: {
6756 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6757 // As two macro instructions can be emitted the max size is doubled.
6758 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6759 vixl32::SRegister out = SRegisterFrom(out_loc);
6760 if (index.IsConstant()) {
6761 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6762 GetAssembler()->LoadSFromOffset(out, obj, offset);
6763 } else {
6764 UseScratchRegisterScope temps(GetVIXLAssembler());
6765 vixl32::Register temp = temps.Acquire();
6766 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6767 GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6768 }
6769 codegen_->MaybeRecordImplicitNullCheck(instruction);
6770 break;
6771 }
6772
6773 case DataType::Type::kFloat64: {
6774 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6775 // As two macro instructions can be emitted the max size is doubled.
6776 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6777 if (index.IsConstant()) {
6778 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6779 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6780 } else {
6781 UseScratchRegisterScope temps(GetVIXLAssembler());
6782 vixl32::Register temp = temps.Acquire();
6783 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6784 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6785 }
6786 codegen_->MaybeRecordImplicitNullCheck(instruction);
6787 break;
6788 }
6789
6790 case DataType::Type::kUint32:
6791 case DataType::Type::kUint64:
6792 case DataType::Type::kVoid:
6793 LOG(FATAL) << "Unreachable type " << type;
6794 UNREACHABLE();
6795 }
6796 }
6797
VisitArraySet(HArraySet * instruction)6798 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6799 DataType::Type value_type = instruction->GetComponentType();
6800
6801 bool needs_write_barrier =
6802 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6803 bool needs_type_check = instruction->NeedsTypeCheck();
6804
6805 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6806 instruction,
6807 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6808
6809 locations->SetInAt(0, Location::RequiresRegister());
6810 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6811 if (DataType::IsFloatingPointType(value_type)) {
6812 locations->SetInAt(2, Location::RequiresFpuRegister());
6813 } else {
6814 locations->SetInAt(2, Location::RequiresRegister());
6815 }
6816 if (needs_write_barrier) {
6817 // Temporary registers for the write barrier or register poisoning.
6818 // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
6819 // InstructionCodeGeneratorARMVIXL::VisitArraySet.
6820 locations->AddTemp(Location::RequiresRegister());
6821 locations->AddTemp(Location::RequiresRegister());
6822 }
6823 }
6824
VisitArraySet(HArraySet * instruction)6825 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6826 LocationSummary* locations = instruction->GetLocations();
6827 vixl32::Register array = InputRegisterAt(instruction, 0);
6828 Location index = locations->InAt(1);
6829 DataType::Type value_type = instruction->GetComponentType();
6830 bool needs_type_check = instruction->NeedsTypeCheck();
6831 bool needs_write_barrier =
6832 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6833 uint32_t data_offset =
6834 mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6835 Location value_loc = locations->InAt(2);
6836 HInstruction* array_instr = instruction->GetArray();
6837 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6838
6839 switch (value_type) {
6840 case DataType::Type::kBool:
6841 case DataType::Type::kUint8:
6842 case DataType::Type::kInt8:
6843 case DataType::Type::kUint16:
6844 case DataType::Type::kInt16:
6845 case DataType::Type::kInt32: {
6846 if (index.IsConstant()) {
6847 int32_t const_index = Int32ConstantFrom(index);
6848 uint32_t full_offset =
6849 data_offset + (const_index << DataType::SizeShift(value_type));
6850 StoreOperandType store_type = GetStoreOperandType(value_type);
6851 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6852 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6853 GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6854 codegen_->MaybeRecordImplicitNullCheck(instruction);
6855 } else {
6856 UseScratchRegisterScope temps(GetVIXLAssembler());
6857 vixl32::Register temp = temps.Acquire();
6858
6859 if (has_intermediate_address) {
6860 // We do not need to compute the intermediate address from the array: the
6861 // input instruction has done it already. See the comment in
6862 // `TryExtractArrayAccessAddress()`.
6863 if (kIsDebugBuild) {
6864 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6865 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6866 }
6867 temp = array;
6868 } else {
6869 __ Add(temp, array, data_offset);
6870 }
6871 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6872 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6873 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6874 codegen_->MaybeRecordImplicitNullCheck(instruction);
6875 }
6876 break;
6877 }
6878
6879 case DataType::Type::kReference: {
6880 vixl32::Register value = RegisterFrom(value_loc);
6881 // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6882 // See the comment in instruction_simplifier_shared.cc.
6883 DCHECK(!has_intermediate_address);
6884
6885 if (instruction->InputAt(2)->IsNullConstant()) {
6886 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6887 // As two macro instructions can be emitted the max size is doubled.
6888 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6889 // Just setting null.
6890 if (index.IsConstant()) {
6891 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6892 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6893 } else {
6894 DCHECK(index.IsRegister()) << index;
6895 UseScratchRegisterScope temps(GetVIXLAssembler());
6896 vixl32::Register temp = temps.Acquire();
6897 __ Add(temp, array, data_offset);
6898 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6899 }
6900 codegen_->MaybeRecordImplicitNullCheck(instruction);
6901 DCHECK(!needs_write_barrier);
6902 DCHECK(!needs_type_check);
6903 break;
6904 }
6905
6906 DCHECK(needs_write_barrier);
6907 Location temp1_loc = locations->GetTemp(0);
6908 vixl32::Register temp1 = RegisterFrom(temp1_loc);
6909 Location temp2_loc = locations->GetTemp(1);
6910 vixl32::Register temp2 = RegisterFrom(temp2_loc);
6911
6912 bool can_value_be_null = instruction->GetValueCanBeNull();
6913 vixl32::Label do_store;
6914 if (can_value_be_null) {
6915 __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
6916 }
6917
6918 SlowPathCodeARMVIXL* slow_path = nullptr;
6919 if (needs_type_check) {
6920 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
6921 codegen_->AddSlowPath(slow_path);
6922
6923 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6924 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6925 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6926
6927 // Note that when read barriers are enabled, the type checks
6928 // are performed without read barriers. This is fine, even in
6929 // the case where a class object is in the from-space after
6930 // the flip, as a comparison involving such a type would not
6931 // produce a false positive; it may of course produce a false
6932 // negative, in which case we would take the ArraySet slow
6933 // path.
6934
6935 {
6936 // Ensure we record the pc position immediately after the `ldr` instruction.
6937 ExactAssemblyScope aas(GetVIXLAssembler(),
6938 vixl32::kMaxInstructionSizeInBytes,
6939 CodeBufferCheckScope::kMaximumSize);
6940 // /* HeapReference<Class> */ temp1 = array->klass_
6941 __ ldr(temp1, MemOperand(array, class_offset));
6942 codegen_->MaybeRecordImplicitNullCheck(instruction);
6943 }
6944 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6945
6946 // /* HeapReference<Class> */ temp1 = temp1->component_type_
6947 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
6948 // /* HeapReference<Class> */ temp2 = value->klass_
6949 GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
6950 // If heap poisoning is enabled, no need to unpoison `temp1`
6951 // nor `temp2`, as we are comparing two poisoned references.
6952 __ Cmp(temp1, temp2);
6953
6954 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6955 vixl32::Label do_put;
6956 __ B(eq, &do_put, /* is_far_target= */ false);
6957 // If heap poisoning is enabled, the `temp1` reference has
6958 // not been unpoisoned yet; unpoison it now.
6959 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6960
6961 // /* HeapReference<Class> */ temp1 = temp1->super_class_
6962 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
6963 // If heap poisoning is enabled, no need to unpoison
6964 // `temp1`, as we are comparing against null below.
6965 __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
6966 __ Bind(&do_put);
6967 } else {
6968 __ B(ne, slow_path->GetEntryLabel());
6969 }
6970 }
6971
6972 if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
6973 DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
6974 << " Already null checked so we shouldn't do it again.";
6975 codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false);
6976 }
6977
6978 if (can_value_be_null) {
6979 DCHECK(do_store.IsReferenced());
6980 __ Bind(&do_store);
6981 }
6982
6983 vixl32::Register source = value;
6984 if (kPoisonHeapReferences) {
6985 // Note that in the case where `value` is a null reference,
6986 // we do not enter this block, as a null reference does not
6987 // need poisoning.
6988 DCHECK_EQ(value_type, DataType::Type::kReference);
6989 __ Mov(temp1, value);
6990 GetAssembler()->PoisonHeapReference(temp1);
6991 source = temp1;
6992 }
6993
6994 {
6995 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6996 // As two macro instructions can be emitted the max size is doubled.
6997 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6998 if (index.IsConstant()) {
6999 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7000 GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7001 } else {
7002 DCHECK(index.IsRegister()) << index;
7003
7004 UseScratchRegisterScope temps(GetVIXLAssembler());
7005 vixl32::Register temp = temps.Acquire();
7006 __ Add(temp, array, data_offset);
7007 codegen_->StoreToShiftedRegOffset(value_type,
7008 LocationFrom(source),
7009 temp,
7010 RegisterFrom(index));
7011 }
7012
7013 if (can_value_be_null || !needs_type_check) {
7014 codegen_->MaybeRecordImplicitNullCheck(instruction);
7015 }
7016 }
7017
7018 if (slow_path != nullptr) {
7019 __ Bind(slow_path->GetExitLabel());
7020 }
7021
7022 break;
7023 }
7024
7025 case DataType::Type::kInt64: {
7026 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7027 // As two macro instructions can be emitted the max size is doubled.
7028 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7029 Location value = locations->InAt(2);
7030 if (index.IsConstant()) {
7031 size_t offset =
7032 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7033 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7034 } else {
7035 UseScratchRegisterScope temps(GetVIXLAssembler());
7036 vixl32::Register temp = temps.Acquire();
7037 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7038 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7039 }
7040 codegen_->MaybeRecordImplicitNullCheck(instruction);
7041 break;
7042 }
7043
7044 case DataType::Type::kFloat32: {
7045 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7046 // As two macro instructions can be emitted the max size is doubled.
7047 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7048 Location value = locations->InAt(2);
7049 DCHECK(value.IsFpuRegister());
7050 if (index.IsConstant()) {
7051 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7052 GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7053 } else {
7054 UseScratchRegisterScope temps(GetVIXLAssembler());
7055 vixl32::Register temp = temps.Acquire();
7056 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7057 GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7058 }
7059 codegen_->MaybeRecordImplicitNullCheck(instruction);
7060 break;
7061 }
7062
7063 case DataType::Type::kFloat64: {
7064 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7065 // As two macro instructions can be emitted the max size is doubled.
7066 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7067 Location value = locations->InAt(2);
7068 DCHECK(value.IsFpuRegisterPair());
7069 if (index.IsConstant()) {
7070 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7071 GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7072 } else {
7073 UseScratchRegisterScope temps(GetVIXLAssembler());
7074 vixl32::Register temp = temps.Acquire();
7075 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7076 GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7077 }
7078 codegen_->MaybeRecordImplicitNullCheck(instruction);
7079 break;
7080 }
7081
7082 case DataType::Type::kUint32:
7083 case DataType::Type::kUint64:
7084 case DataType::Type::kVoid:
7085 LOG(FATAL) << "Unreachable type " << value_type;
7086 UNREACHABLE();
7087 }
7088 }
7089
VisitArrayLength(HArrayLength * instruction)7090 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7091 LocationSummary* locations =
7092 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7093 locations->SetInAt(0, Location::RequiresRegister());
7094 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7095 }
7096
VisitArrayLength(HArrayLength * instruction)7097 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7098 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7099 vixl32::Register obj = InputRegisterAt(instruction, 0);
7100 vixl32::Register out = OutputRegister(instruction);
7101 {
7102 ExactAssemblyScope aas(GetVIXLAssembler(),
7103 vixl32::kMaxInstructionSizeInBytes,
7104 CodeBufferCheckScope::kMaximumSize);
7105 __ ldr(out, MemOperand(obj, offset));
7106 codegen_->MaybeRecordImplicitNullCheck(instruction);
7107 }
7108 // Mask out compression flag from String's array length.
7109 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7110 __ Lsr(out, out, 1u);
7111 }
7112 }
7113
VisitIntermediateAddress(HIntermediateAddress * instruction)7114 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7115 LocationSummary* locations =
7116 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7117
7118 locations->SetInAt(0, Location::RequiresRegister());
7119 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7120 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7121 }
7122
VisitIntermediateAddress(HIntermediateAddress * instruction)7123 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7124 vixl32::Register out = OutputRegister(instruction);
7125 vixl32::Register first = InputRegisterAt(instruction, 0);
7126 Location second = instruction->GetLocations()->InAt(1);
7127
7128 if (second.IsRegister()) {
7129 __ Add(out, first, RegisterFrom(second));
7130 } else {
7131 __ Add(out, first, Int32ConstantFrom(second));
7132 }
7133 }
7134
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7135 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7136 HIntermediateAddressIndex* instruction) {
7137 LOG(FATAL) << "Unreachable " << instruction->GetId();
7138 }
7139
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7140 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7141 HIntermediateAddressIndex* instruction) {
7142 LOG(FATAL) << "Unreachable " << instruction->GetId();
7143 }
7144
VisitBoundsCheck(HBoundsCheck * instruction)7145 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7146 RegisterSet caller_saves = RegisterSet::Empty();
7147 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7148 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7149 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7150 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7151
7152 HInstruction* index = instruction->InputAt(0);
7153 HInstruction* length = instruction->InputAt(1);
7154 // If both index and length are constants we can statically check the bounds. But if at least one
7155 // of them is not encodable ArmEncodableConstantOrRegister will create
7156 // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7157 // locations.
7158 bool both_const = index->IsConstant() && length->IsConstant();
7159 locations->SetInAt(0, both_const
7160 ? Location::ConstantLocation(index)
7161 : ArmEncodableConstantOrRegister(index, CMP));
7162 locations->SetInAt(1, both_const
7163 ? Location::ConstantLocation(length)
7164 : ArmEncodableConstantOrRegister(length, CMP));
7165 }
7166
VisitBoundsCheck(HBoundsCheck * instruction)7167 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7168 LocationSummary* locations = instruction->GetLocations();
7169 Location index_loc = locations->InAt(0);
7170 Location length_loc = locations->InAt(1);
7171
7172 if (length_loc.IsConstant()) {
7173 int32_t length = Int32ConstantFrom(length_loc);
7174 if (index_loc.IsConstant()) {
7175 // BCE will remove the bounds check if we are guaranteed to pass.
7176 int32_t index = Int32ConstantFrom(index_loc);
7177 if (index < 0 || index >= length) {
7178 SlowPathCodeARMVIXL* slow_path =
7179 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7180 codegen_->AddSlowPath(slow_path);
7181 __ B(slow_path->GetEntryLabel());
7182 } else {
7183 // Some optimization after BCE may have generated this, and we should not
7184 // generate a bounds check if it is a valid range.
7185 }
7186 return;
7187 }
7188
7189 SlowPathCodeARMVIXL* slow_path =
7190 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7191 __ Cmp(RegisterFrom(index_loc), length);
7192 codegen_->AddSlowPath(slow_path);
7193 __ B(hs, slow_path->GetEntryLabel());
7194 } else {
7195 SlowPathCodeARMVIXL* slow_path =
7196 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7197 __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7198 codegen_->AddSlowPath(slow_path);
7199 __ B(ls, slow_path->GetEntryLabel());
7200 }
7201 }
7202
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7203 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7204 vixl32::Register card,
7205 vixl32::Register object,
7206 vixl32::Register value,
7207 bool emit_null_check) {
7208 vixl32::Label is_null;
7209 if (emit_null_check) {
7210 __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7211 }
7212 // Load the address of the card table into `card`.
7213 GetAssembler()->LoadFromOffset(
7214 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7215 // Calculate the offset (in the card table) of the card corresponding to
7216 // `object`.
7217 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7218 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7219 // `object`'s card.
7220 //
7221 // Register `card` contains the address of the card table. Note that the card
7222 // table's base is biased during its creation so that it always starts at an
7223 // address whose least-significant byte is equal to `kCardDirty` (see
7224 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7225 // below writes the `kCardDirty` (byte) value into the `object`'s card
7226 // (located at `card + object >> kCardShift`).
7227 //
7228 // This dual use of the value in register `card` (1. to calculate the location
7229 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7230 // (no need to explicitly load `kCardDirty` as an immediate value).
7231 __ Strb(card, MemOperand(card, temp));
7232 if (emit_null_check) {
7233 __ Bind(&is_null);
7234 }
7235 }
7236
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)7237 void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
7238 LOG(FATAL) << "Unreachable";
7239 }
7240
VisitParallelMove(HParallelMove * instruction)7241 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7242 if (instruction->GetNext()->IsSuspendCheck() &&
7243 instruction->GetBlock()->GetLoopInformation() != nullptr) {
7244 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7245 // The back edge will generate the suspend check.
7246 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7247 }
7248
7249 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7250 }
7251
VisitSuspendCheck(HSuspendCheck * instruction)7252 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7253 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7254 instruction, LocationSummary::kCallOnSlowPath);
7255 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7256 }
7257
VisitSuspendCheck(HSuspendCheck * instruction)7258 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7259 HBasicBlock* block = instruction->GetBlock();
7260 if (block->GetLoopInformation() != nullptr) {
7261 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7262 // The back edge will generate the suspend check.
7263 return;
7264 }
7265 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7266 // The goto will generate the suspend check.
7267 return;
7268 }
7269 GenerateSuspendCheck(instruction, nullptr);
7270 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7271 }
7272
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7273 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7274 HBasicBlock* successor) {
7275 SuspendCheckSlowPathARMVIXL* slow_path =
7276 down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7277 if (slow_path == nullptr) {
7278 slow_path =
7279 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7280 instruction->SetSlowPath(slow_path);
7281 codegen_->AddSlowPath(slow_path);
7282 if (successor != nullptr) {
7283 DCHECK(successor->IsLoopHeader());
7284 }
7285 } else {
7286 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7287 }
7288
7289 UseScratchRegisterScope temps(GetVIXLAssembler());
7290 vixl32::Register temp = temps.Acquire();
7291 GetAssembler()->LoadFromOffset(
7292 kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7293 __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7294 if (successor == nullptr) {
7295 __ B(ne, slow_path->GetEntryLabel());
7296 __ Bind(slow_path->GetReturnLabel());
7297 } else {
7298 __ B(eq, codegen_->GetLabelOf(successor));
7299 __ B(slow_path->GetEntryLabel());
7300 }
7301 }
7302
GetAssembler() const7303 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7304 return codegen_->GetAssembler();
7305 }
7306
EmitMove(size_t index)7307 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7308 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7309 MoveOperands* move = moves_[index];
7310 Location source = move->GetSource();
7311 Location destination = move->GetDestination();
7312
7313 if (source.IsRegister()) {
7314 if (destination.IsRegister()) {
7315 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7316 } else if (destination.IsFpuRegister()) {
7317 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7318 } else {
7319 DCHECK(destination.IsStackSlot());
7320 GetAssembler()->StoreToOffset(kStoreWord,
7321 RegisterFrom(source),
7322 sp,
7323 destination.GetStackIndex());
7324 }
7325 } else if (source.IsStackSlot()) {
7326 if (destination.IsRegister()) {
7327 GetAssembler()->LoadFromOffset(kLoadWord,
7328 RegisterFrom(destination),
7329 sp,
7330 source.GetStackIndex());
7331 } else if (destination.IsFpuRegister()) {
7332 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7333 } else {
7334 DCHECK(destination.IsStackSlot());
7335 vixl32::Register temp = temps.Acquire();
7336 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7337 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7338 }
7339 } else if (source.IsFpuRegister()) {
7340 if (destination.IsRegister()) {
7341 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7342 } else if (destination.IsFpuRegister()) {
7343 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7344 } else {
7345 DCHECK(destination.IsStackSlot());
7346 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7347 }
7348 } else if (source.IsDoubleStackSlot()) {
7349 if (destination.IsDoubleStackSlot()) {
7350 vixl32::DRegister temp = temps.AcquireD();
7351 GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7352 GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7353 } else if (destination.IsRegisterPair()) {
7354 DCHECK(ExpectedPairLayout(destination));
7355 GetAssembler()->LoadFromOffset(
7356 kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7357 } else {
7358 DCHECK(destination.IsFpuRegisterPair()) << destination;
7359 GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7360 }
7361 } else if (source.IsRegisterPair()) {
7362 if (destination.IsRegisterPair()) {
7363 __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7364 __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7365 } else if (destination.IsFpuRegisterPair()) {
7366 __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7367 } else {
7368 DCHECK(destination.IsDoubleStackSlot()) << destination;
7369 DCHECK(ExpectedPairLayout(source));
7370 GetAssembler()->StoreToOffset(kStoreWordPair,
7371 LowRegisterFrom(source),
7372 sp,
7373 destination.GetStackIndex());
7374 }
7375 } else if (source.IsFpuRegisterPair()) {
7376 if (destination.IsRegisterPair()) {
7377 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7378 } else if (destination.IsFpuRegisterPair()) {
7379 __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7380 } else {
7381 DCHECK(destination.IsDoubleStackSlot()) << destination;
7382 GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7383 }
7384 } else {
7385 DCHECK(source.IsConstant()) << source;
7386 HConstant* constant = source.GetConstant();
7387 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7388 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7389 if (destination.IsRegister()) {
7390 __ Mov(RegisterFrom(destination), value);
7391 } else {
7392 DCHECK(destination.IsStackSlot());
7393 vixl32::Register temp = temps.Acquire();
7394 __ Mov(temp, value);
7395 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7396 }
7397 } else if (constant->IsLongConstant()) {
7398 int64_t value = Int64ConstantFrom(source);
7399 if (destination.IsRegisterPair()) {
7400 __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7401 __ Mov(HighRegisterFrom(destination), High32Bits(value));
7402 } else {
7403 DCHECK(destination.IsDoubleStackSlot()) << destination;
7404 vixl32::Register temp = temps.Acquire();
7405 __ Mov(temp, Low32Bits(value));
7406 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7407 __ Mov(temp, High32Bits(value));
7408 GetAssembler()->StoreToOffset(kStoreWord,
7409 temp,
7410 sp,
7411 destination.GetHighStackIndex(kArmWordSize));
7412 }
7413 } else if (constant->IsDoubleConstant()) {
7414 double value = constant->AsDoubleConstant()->GetValue();
7415 if (destination.IsFpuRegisterPair()) {
7416 __ Vmov(DRegisterFrom(destination), value);
7417 } else {
7418 DCHECK(destination.IsDoubleStackSlot()) << destination;
7419 uint64_t int_value = bit_cast<uint64_t, double>(value);
7420 vixl32::Register temp = temps.Acquire();
7421 __ Mov(temp, Low32Bits(int_value));
7422 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7423 __ Mov(temp, High32Bits(int_value));
7424 GetAssembler()->StoreToOffset(kStoreWord,
7425 temp,
7426 sp,
7427 destination.GetHighStackIndex(kArmWordSize));
7428 }
7429 } else {
7430 DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7431 float value = constant->AsFloatConstant()->GetValue();
7432 if (destination.IsFpuRegister()) {
7433 __ Vmov(SRegisterFrom(destination), value);
7434 } else {
7435 DCHECK(destination.IsStackSlot());
7436 vixl32::Register temp = temps.Acquire();
7437 __ Mov(temp, bit_cast<int32_t, float>(value));
7438 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7439 }
7440 }
7441 }
7442 }
7443
Exchange(vixl32::Register reg,int mem)7444 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7445 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7446 vixl32::Register temp = temps.Acquire();
7447 __ Mov(temp, reg);
7448 GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7449 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7450 }
7451
Exchange(int mem1,int mem2)7452 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7453 // TODO(VIXL32): Double check the performance of this implementation.
7454 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7455 vixl32::Register temp1 = temps.Acquire();
7456 ScratchRegisterScope ensure_scratch(
7457 this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7458 vixl32::Register temp2(ensure_scratch.GetRegister());
7459
7460 int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7461 GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7462 GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7463 GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7464 GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7465 }
7466
EmitSwap(size_t index)7467 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7468 MoveOperands* move = moves_[index];
7469 Location source = move->GetSource();
7470 Location destination = move->GetDestination();
7471 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7472
7473 if (source.IsRegister() && destination.IsRegister()) {
7474 vixl32::Register temp = temps.Acquire();
7475 DCHECK(!RegisterFrom(source).Is(temp));
7476 DCHECK(!RegisterFrom(destination).Is(temp));
7477 __ Mov(temp, RegisterFrom(destination));
7478 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7479 __ Mov(RegisterFrom(source), temp);
7480 } else if (source.IsRegister() && destination.IsStackSlot()) {
7481 Exchange(RegisterFrom(source), destination.GetStackIndex());
7482 } else if (source.IsStackSlot() && destination.IsRegister()) {
7483 Exchange(RegisterFrom(destination), source.GetStackIndex());
7484 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7485 Exchange(source.GetStackIndex(), destination.GetStackIndex());
7486 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7487 vixl32::Register temp = temps.Acquire();
7488 __ Vmov(temp, SRegisterFrom(source));
7489 __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7490 __ Vmov(SRegisterFrom(destination), temp);
7491 } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7492 vixl32::DRegister temp = temps.AcquireD();
7493 __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7494 __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7495 __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7496 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7497 } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7498 vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7499 int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7500 DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7501 vixl32::DRegister temp = temps.AcquireD();
7502 __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7503 GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7504 GetAssembler()->StoreDToOffset(temp, sp, mem);
7505 } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7506 vixl32::DRegister first = DRegisterFrom(source);
7507 vixl32::DRegister second = DRegisterFrom(destination);
7508 vixl32::DRegister temp = temps.AcquireD();
7509 __ Vmov(temp, first);
7510 __ Vmov(first, second);
7511 __ Vmov(second, temp);
7512 } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7513 vixl32::DRegister reg = source.IsFpuRegisterPair()
7514 ? DRegisterFrom(source)
7515 : DRegisterFrom(destination);
7516 int mem = source.IsFpuRegisterPair()
7517 ? destination.GetStackIndex()
7518 : source.GetStackIndex();
7519 vixl32::DRegister temp = temps.AcquireD();
7520 __ Vmov(temp, reg);
7521 GetAssembler()->LoadDFromOffset(reg, sp, mem);
7522 GetAssembler()->StoreDToOffset(temp, sp, mem);
7523 } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7524 vixl32::SRegister reg = source.IsFpuRegister()
7525 ? SRegisterFrom(source)
7526 : SRegisterFrom(destination);
7527 int mem = source.IsFpuRegister()
7528 ? destination.GetStackIndex()
7529 : source.GetStackIndex();
7530 vixl32::Register temp = temps.Acquire();
7531 __ Vmov(temp, reg);
7532 GetAssembler()->LoadSFromOffset(reg, sp, mem);
7533 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7534 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7535 vixl32::DRegister temp1 = temps.AcquireD();
7536 vixl32::DRegister temp2 = temps.AcquireD();
7537 __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7538 __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7539 __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7540 __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7541 } else {
7542 LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7543 }
7544 }
7545
SpillScratch(int reg)7546 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7547 __ Push(vixl32::Register(reg));
7548 }
7549
RestoreScratch(int reg)7550 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7551 __ Pop(vixl32::Register(reg));
7552 }
7553
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7554 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7555 HLoadClass::LoadKind desired_class_load_kind) {
7556 switch (desired_class_load_kind) {
7557 case HLoadClass::LoadKind::kInvalid:
7558 LOG(FATAL) << "UNREACHABLE";
7559 UNREACHABLE();
7560 case HLoadClass::LoadKind::kReferrersClass:
7561 break;
7562 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7563 case HLoadClass::LoadKind::kBootImageRelRo:
7564 case HLoadClass::LoadKind::kBssEntry:
7565 case HLoadClass::LoadKind::kBssEntryPublic:
7566 case HLoadClass::LoadKind::kBssEntryPackage:
7567 DCHECK(!GetCompilerOptions().IsJitCompiler());
7568 break;
7569 case HLoadClass::LoadKind::kJitBootImageAddress:
7570 case HLoadClass::LoadKind::kJitTableAddress:
7571 DCHECK(GetCompilerOptions().IsJitCompiler());
7572 break;
7573 case HLoadClass::LoadKind::kRuntimeCall:
7574 break;
7575 }
7576 return desired_class_load_kind;
7577 }
7578
VisitLoadClass(HLoadClass * cls)7579 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7580 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7581 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7582 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7583 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7584 cls,
7585 LocationFrom(calling_convention.GetRegisterAt(0)),
7586 LocationFrom(r0));
7587 DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7588 return;
7589 }
7590 DCHECK_EQ(cls->NeedsAccessCheck(),
7591 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7592 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7593
7594 const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
7595 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7596 ? LocationSummary::kCallOnSlowPath
7597 : LocationSummary::kNoCall;
7598 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7599 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7600 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7601 }
7602
7603 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7604 locations->SetInAt(0, Location::RequiresRegister());
7605 }
7606 locations->SetOut(Location::RequiresRegister());
7607 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
7608 if (!gUseReadBarrier || kUseBakerReadBarrier) {
7609 // Rely on the type resolution or initialization and marking to save everything we need.
7610 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7611 } else {
7612 // For non-Baker read barrier we have a temp-clobbering call.
7613 }
7614 }
7615 }
7616
7617 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7618 // move.
VisitLoadClass(HLoadClass * cls)7619 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7620 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7621 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7622 codegen_->GenerateLoadClassRuntimeCall(cls);
7623 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7624 return;
7625 }
7626 DCHECK_EQ(cls->NeedsAccessCheck(),
7627 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7628 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7629
7630 LocationSummary* locations = cls->GetLocations();
7631 Location out_loc = locations->Out();
7632 vixl32::Register out = OutputRegister(cls);
7633
7634 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7635 ? kWithoutReadBarrier
7636 : gCompilerReadBarrierOption;
7637 bool generate_null_check = false;
7638 switch (load_kind) {
7639 case HLoadClass::LoadKind::kReferrersClass: {
7640 DCHECK(!cls->CanCallRuntime());
7641 DCHECK(!cls->MustGenerateClinitCheck());
7642 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7643 vixl32::Register current_method = InputRegisterAt(cls, 0);
7644 codegen_->GenerateGcRootFieldLoad(cls,
7645 out_loc,
7646 current_method,
7647 ArtMethod::DeclaringClassOffset().Int32Value(),
7648 read_barrier_option);
7649 break;
7650 }
7651 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7652 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7653 codegen_->GetCompilerOptions().IsBootImageExtension());
7654 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7655 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7656 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7657 codegen_->EmitMovwMovtPlaceholder(labels, out);
7658 break;
7659 }
7660 case HLoadClass::LoadKind::kBootImageRelRo: {
7661 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7662 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7663 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7664 break;
7665 }
7666 case HLoadClass::LoadKind::kBssEntry:
7667 case HLoadClass::LoadKind::kBssEntryPublic:
7668 case HLoadClass::LoadKind::kBssEntryPackage: {
7669 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7670 codegen_->EmitMovwMovtPlaceholder(labels, out);
7671 // All aligned loads are implicitly atomic consume operations on ARM.
7672 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7673 generate_null_check = true;
7674 break;
7675 }
7676 case HLoadClass::LoadKind::kJitBootImageAddress: {
7677 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7678 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7679 DCHECK_NE(address, 0u);
7680 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7681 break;
7682 }
7683 case HLoadClass::LoadKind::kJitTableAddress: {
7684 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7685 cls->GetTypeIndex(),
7686 cls->GetClass()));
7687 // /* GcRoot<mirror::Class> */ out = *out
7688 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7689 break;
7690 }
7691 case HLoadClass::LoadKind::kRuntimeCall:
7692 case HLoadClass::LoadKind::kInvalid:
7693 LOG(FATAL) << "UNREACHABLE";
7694 UNREACHABLE();
7695 }
7696
7697 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7698 DCHECK(cls->CanCallRuntime());
7699 LoadClassSlowPathARMVIXL* slow_path =
7700 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7701 codegen_->AddSlowPath(slow_path);
7702 if (generate_null_check) {
7703 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7704 }
7705 if (cls->MustGenerateClinitCheck()) {
7706 GenerateClassInitializationCheck(slow_path, out);
7707 } else {
7708 __ Bind(slow_path->GetExitLabel());
7709 }
7710 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7711 }
7712 }
7713
VisitLoadMethodHandle(HLoadMethodHandle * load)7714 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7715 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7716 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7717 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7718 }
7719
VisitLoadMethodHandle(HLoadMethodHandle * load)7720 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7721 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7722 }
7723
VisitLoadMethodType(HLoadMethodType * load)7724 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7725 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7726 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7727 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7728 }
7729
VisitLoadMethodType(HLoadMethodType * load)7730 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7731 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7732 }
7733
VisitClinitCheck(HClinitCheck * check)7734 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7735 LocationSummary* locations =
7736 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7737 locations->SetInAt(0, Location::RequiresRegister());
7738 if (check->HasUses()) {
7739 locations->SetOut(Location::SameAsFirstInput());
7740 }
7741 // Rely on the type initialization to save everything we need.
7742 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7743 }
7744
VisitClinitCheck(HClinitCheck * check)7745 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7746 // We assume the class is not null.
7747 LoadClassSlowPathARMVIXL* slow_path =
7748 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7749 codegen_->AddSlowPath(slow_path);
7750 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7751 }
7752
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7753 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7754 LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7755 UseScratchRegisterScope temps(GetVIXLAssembler());
7756 vixl32::Register temp = temps.Acquire();
7757 __ Ldrb(temp, MemOperand(class_reg, status_byte_offset));
7758 __ Cmp(temp, shifted_visibly_initialized_value);
7759 __ B(lo, slow_path->GetEntryLabel());
7760 __ Bind(slow_path->GetExitLabel());
7761 }
7762
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7763 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7764 HTypeCheckInstruction* check,
7765 vixl32::Register temp,
7766 vixl32::FlagsUpdate flags_update) {
7767 uint32_t path_to_root = check->GetBitstringPathToRoot();
7768 uint32_t mask = check->GetBitstringMask();
7769 DCHECK(IsPowerOfTwo(mask + 1));
7770 size_t mask_bits = WhichPowerOf2(mask + 1);
7771
7772 // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7773 // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7774 if (mask_bits == 16u) {
7775 // Load only the bitstring part of the status word.
7776 __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7777 // Check if the bitstring bits are equal to `path_to_root`.
7778 if (flags_update == SetFlags) {
7779 __ Cmp(temp, path_to_root);
7780 } else {
7781 __ Sub(temp, temp, path_to_root);
7782 }
7783 } else {
7784 // /* uint32_t */ temp = temp->status_
7785 __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7786 if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7787 // Compare the bitstring bits using SUB.
7788 __ Sub(temp, temp, path_to_root);
7789 // Shift out bits that do not contribute to the comparison.
7790 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7791 } else if (IsUint<16>(path_to_root)) {
7792 if (temp.IsLow()) {
7793 // Note: Optimized for size but contains one more dependent instruction than necessary.
7794 // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7795 // macro assembler would use the high reg IP for the constant by default.
7796 // Compare the bitstring bits using SUB.
7797 __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
7798 __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
7799 // Shift out bits that do not contribute to the comparison.
7800 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7801 } else {
7802 // Extract the bitstring bits.
7803 __ Ubfx(temp, temp, 0, mask_bits);
7804 // Check if the bitstring bits are equal to `path_to_root`.
7805 if (flags_update == SetFlags) {
7806 __ Cmp(temp, path_to_root);
7807 } else {
7808 __ Sub(temp, temp, path_to_root);
7809 }
7810 }
7811 } else {
7812 // Shift out bits that do not contribute to the comparison.
7813 __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7814 // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7815 if (flags_update == SetFlags) {
7816 __ Cmp(temp, path_to_root << (32u - mask_bits));
7817 } else {
7818 __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7819 }
7820 }
7821 }
7822 }
7823
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7824 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7825 HLoadString::LoadKind desired_string_load_kind) {
7826 switch (desired_string_load_kind) {
7827 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7828 case HLoadString::LoadKind::kBootImageRelRo:
7829 case HLoadString::LoadKind::kBssEntry:
7830 DCHECK(!GetCompilerOptions().IsJitCompiler());
7831 break;
7832 case HLoadString::LoadKind::kJitBootImageAddress:
7833 case HLoadString::LoadKind::kJitTableAddress:
7834 DCHECK(GetCompilerOptions().IsJitCompiler());
7835 break;
7836 case HLoadString::LoadKind::kRuntimeCall:
7837 break;
7838 }
7839 return desired_string_load_kind;
7840 }
7841
VisitLoadString(HLoadString * load)7842 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7843 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7844 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7845 HLoadString::LoadKind load_kind = load->GetLoadKind();
7846 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7847 locations->SetOut(LocationFrom(r0));
7848 } else {
7849 locations->SetOut(Location::RequiresRegister());
7850 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7851 if (!gUseReadBarrier || kUseBakerReadBarrier) {
7852 // Rely on the pResolveString and marking to save everything we need, including temps.
7853 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7854 } else {
7855 // For non-Baker read barrier we have a temp-clobbering call.
7856 }
7857 }
7858 }
7859 }
7860
7861 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7862 // move.
VisitLoadString(HLoadString * load)7863 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7864 LocationSummary* locations = load->GetLocations();
7865 Location out_loc = locations->Out();
7866 vixl32::Register out = OutputRegister(load);
7867 HLoadString::LoadKind load_kind = load->GetLoadKind();
7868
7869 switch (load_kind) {
7870 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7871 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7872 codegen_->GetCompilerOptions().IsBootImageExtension());
7873 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7874 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
7875 codegen_->EmitMovwMovtPlaceholder(labels, out);
7876 return;
7877 }
7878 case HLoadString::LoadKind::kBootImageRelRo: {
7879 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7880 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
7881 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7882 return;
7883 }
7884 case HLoadString::LoadKind::kBssEntry: {
7885 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7886 codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
7887 codegen_->EmitMovwMovtPlaceholder(labels, out);
7888 // All aligned loads are implicitly atomic consume operations on ARM.
7889 codegen_->GenerateGcRootFieldLoad(
7890 load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
7891 LoadStringSlowPathARMVIXL* slow_path =
7892 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
7893 codegen_->AddSlowPath(slow_path);
7894 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7895 __ Bind(slow_path->GetExitLabel());
7896 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
7897 return;
7898 }
7899 case HLoadString::LoadKind::kJitBootImageAddress: {
7900 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7901 DCHECK_NE(address, 0u);
7902 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7903 return;
7904 }
7905 case HLoadString::LoadKind::kJitTableAddress: {
7906 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
7907 load->GetStringIndex(),
7908 load->GetString()));
7909 // /* GcRoot<mirror::String> */ out = *out
7910 codegen_->GenerateGcRootFieldLoad(
7911 load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
7912 return;
7913 }
7914 default:
7915 break;
7916 }
7917
7918 // TODO: Re-add the compiler code to do string dex cache lookup again.
7919 DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
7920 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7921 __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
7922 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7923 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7924 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
7925 }
7926
GetExceptionTlsOffset()7927 static int32_t GetExceptionTlsOffset() {
7928 return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
7929 }
7930
VisitLoadException(HLoadException * load)7931 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
7932 LocationSummary* locations =
7933 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7934 locations->SetOut(Location::RequiresRegister());
7935 }
7936
VisitLoadException(HLoadException * load)7937 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
7938 vixl32::Register out = OutputRegister(load);
7939 GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
7940 }
7941
7942
VisitClearException(HClearException * clear)7943 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
7944 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7945 }
7946
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7947 void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7948 UseScratchRegisterScope temps(GetVIXLAssembler());
7949 vixl32::Register temp = temps.Acquire();
7950 __ Mov(temp, 0);
7951 GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
7952 }
7953
VisitThrow(HThrow * instruction)7954 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
7955 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7956 instruction, LocationSummary::kCallOnMainOnly);
7957 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7958 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
7959 }
7960
VisitThrow(HThrow * instruction)7961 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
7962 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7963 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7964 }
7965
7966 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7967 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7968 if (gUseReadBarrier &&
7969 (kUseBakerReadBarrier ||
7970 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7971 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7972 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7973 return 1;
7974 }
7975 return 0;
7976 }
7977
7978 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
7979 // interface pointer, one for loading the current interface.
7980 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7981 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7982 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7983 return 3;
7984 }
7985 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7986 }
7987
VisitInstanceOf(HInstanceOf * instruction)7988 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
7989 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7990 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7991 bool baker_read_barrier_slow_path = false;
7992 switch (type_check_kind) {
7993 case TypeCheckKind::kExactCheck:
7994 case TypeCheckKind::kAbstractClassCheck:
7995 case TypeCheckKind::kClassHierarchyCheck:
7996 case TypeCheckKind::kArrayObjectCheck: {
7997 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7998 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7999 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
8000 break;
8001 }
8002 case TypeCheckKind::kArrayCheck:
8003 case TypeCheckKind::kUnresolvedCheck:
8004 case TypeCheckKind::kInterfaceCheck:
8005 call_kind = LocationSummary::kCallOnSlowPath;
8006 break;
8007 case TypeCheckKind::kBitstringCheck:
8008 break;
8009 }
8010
8011 LocationSummary* locations =
8012 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8013 if (baker_read_barrier_slow_path) {
8014 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
8015 }
8016 locations->SetInAt(0, Location::RequiresRegister());
8017 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8018 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8019 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8020 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8021 } else {
8022 locations->SetInAt(1, Location::RequiresRegister());
8023 }
8024 // The "out" register is used as a temporary, so it overlaps with the inputs.
8025 // Note that TypeCheckSlowPathARM uses this register too.
8026 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8027 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
8028 }
8029
VisitInstanceOf(HInstanceOf * instruction)8030 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8031 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8032 LocationSummary* locations = instruction->GetLocations();
8033 Location obj_loc = locations->InAt(0);
8034 vixl32::Register obj = InputRegisterAt(instruction, 0);
8035 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8036 ? vixl32::Register()
8037 : InputRegisterAt(instruction, 1);
8038 Location out_loc = locations->Out();
8039 vixl32::Register out = OutputRegister(instruction);
8040 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
8041 DCHECK_LE(num_temps, 1u);
8042 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8043 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8044 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8045 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8046 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8047 vixl32::Label done;
8048 vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8049 SlowPathCodeARMVIXL* slow_path = nullptr;
8050
8051 // Return 0 if `obj` is null.
8052 // avoid null check if we know obj is not null.
8053 if (instruction->MustDoNullCheck()) {
8054 DCHECK(!out.Is(obj));
8055 __ Mov(out, 0);
8056 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8057 }
8058
8059 switch (type_check_kind) {
8060 case TypeCheckKind::kExactCheck: {
8061 ReadBarrierOption read_barrier_option =
8062 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
8063 // /* HeapReference<Class> */ out = obj->klass_
8064 GenerateReferenceLoadTwoRegisters(instruction,
8065 out_loc,
8066 obj_loc,
8067 class_offset,
8068 maybe_temp_loc,
8069 read_barrier_option);
8070 // Classes must be equal for the instanceof to succeed.
8071 __ Cmp(out, cls);
8072 // We speculatively set the result to false without changing the condition
8073 // flags, which allows us to avoid some branching later.
8074 __ Mov(LeaveFlags, out, 0);
8075
8076 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8077 // we check that the output is in a low register, so that a 16-bit MOV
8078 // encoding can be used.
8079 if (out.IsLow()) {
8080 // We use the scope because of the IT block that follows.
8081 ExactAssemblyScope guard(GetVIXLAssembler(),
8082 2 * vixl32::k16BitT32InstructionSizeInBytes,
8083 CodeBufferCheckScope::kExactSize);
8084
8085 __ it(eq);
8086 __ mov(eq, out, 1);
8087 } else {
8088 __ B(ne, final_label, /* is_far_target= */ false);
8089 __ Mov(out, 1);
8090 }
8091
8092 break;
8093 }
8094
8095 case TypeCheckKind::kAbstractClassCheck: {
8096 ReadBarrierOption read_barrier_option =
8097 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
8098 // /* HeapReference<Class> */ out = obj->klass_
8099 GenerateReferenceLoadTwoRegisters(instruction,
8100 out_loc,
8101 obj_loc,
8102 class_offset,
8103 maybe_temp_loc,
8104 read_barrier_option);
8105 // If the class is abstract, we eagerly fetch the super class of the
8106 // object to avoid doing a comparison we know will fail.
8107 vixl32::Label loop;
8108 __ Bind(&loop);
8109 // /* HeapReference<Class> */ out = out->super_class_
8110 GenerateReferenceLoadOneRegister(instruction,
8111 out_loc,
8112 super_offset,
8113 maybe_temp_loc,
8114 read_barrier_option);
8115 // If `out` is null, we use it for the result, and jump to the final label.
8116 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8117 __ Cmp(out, cls);
8118 __ B(ne, &loop, /* is_far_target= */ false);
8119 __ Mov(out, 1);
8120 break;
8121 }
8122
8123 case TypeCheckKind::kClassHierarchyCheck: {
8124 ReadBarrierOption read_barrier_option =
8125 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
8126 // /* HeapReference<Class> */ out = obj->klass_
8127 GenerateReferenceLoadTwoRegisters(instruction,
8128 out_loc,
8129 obj_loc,
8130 class_offset,
8131 maybe_temp_loc,
8132 read_barrier_option);
8133 // Walk over the class hierarchy to find a match.
8134 vixl32::Label loop, success;
8135 __ Bind(&loop);
8136 __ Cmp(out, cls);
8137 __ B(eq, &success, /* is_far_target= */ false);
8138 // /* HeapReference<Class> */ out = out->super_class_
8139 GenerateReferenceLoadOneRegister(instruction,
8140 out_loc,
8141 super_offset,
8142 maybe_temp_loc,
8143 read_barrier_option);
8144 // This is essentially a null check, but it sets the condition flags to the
8145 // proper value for the code that follows the loop, i.e. not `eq`.
8146 __ Cmp(out, 1);
8147 __ B(hs, &loop, /* is_far_target= */ false);
8148
8149 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8150 // we check that the output is in a low register, so that a 16-bit MOV
8151 // encoding can be used.
8152 if (out.IsLow()) {
8153 // If `out` is null, we use it for the result, and the condition flags
8154 // have already been set to `ne`, so the IT block that comes afterwards
8155 // (and which handles the successful case) turns into a NOP (instead of
8156 // overwriting `out`).
8157 __ Bind(&success);
8158
8159 // We use the scope because of the IT block that follows.
8160 ExactAssemblyScope guard(GetVIXLAssembler(),
8161 2 * vixl32::k16BitT32InstructionSizeInBytes,
8162 CodeBufferCheckScope::kExactSize);
8163
8164 // There is only one branch to the `success` label (which is bound to this
8165 // IT block), and it has the same condition, `eq`, so in that case the MOV
8166 // is executed.
8167 __ it(eq);
8168 __ mov(eq, out, 1);
8169 } else {
8170 // If `out` is null, we use it for the result, and jump to the final label.
8171 __ B(final_label);
8172 __ Bind(&success);
8173 __ Mov(out, 1);
8174 }
8175
8176 break;
8177 }
8178
8179 case TypeCheckKind::kArrayObjectCheck: {
8180 ReadBarrierOption read_barrier_option =
8181 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
8182 // /* HeapReference<Class> */ out = obj->klass_
8183 GenerateReferenceLoadTwoRegisters(instruction,
8184 out_loc,
8185 obj_loc,
8186 class_offset,
8187 maybe_temp_loc,
8188 read_barrier_option);
8189 // Do an exact check.
8190 vixl32::Label exact_check;
8191 __ Cmp(out, cls);
8192 __ B(eq, &exact_check, /* is_far_target= */ false);
8193 // Otherwise, we need to check that the object's class is a non-primitive array.
8194 // /* HeapReference<Class> */ out = out->component_type_
8195 GenerateReferenceLoadOneRegister(instruction,
8196 out_loc,
8197 component_offset,
8198 maybe_temp_loc,
8199 read_barrier_option);
8200 // If `out` is null, we use it for the result, and jump to the final label.
8201 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8202 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8203 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8204 __ Cmp(out, 0);
8205 // We speculatively set the result to false without changing the condition
8206 // flags, which allows us to avoid some branching later.
8207 __ Mov(LeaveFlags, out, 0);
8208
8209 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8210 // we check that the output is in a low register, so that a 16-bit MOV
8211 // encoding can be used.
8212 if (out.IsLow()) {
8213 __ Bind(&exact_check);
8214
8215 // We use the scope because of the IT block that follows.
8216 ExactAssemblyScope guard(GetVIXLAssembler(),
8217 2 * vixl32::k16BitT32InstructionSizeInBytes,
8218 CodeBufferCheckScope::kExactSize);
8219
8220 __ it(eq);
8221 __ mov(eq, out, 1);
8222 } else {
8223 __ B(ne, final_label, /* is_far_target= */ false);
8224 __ Bind(&exact_check);
8225 __ Mov(out, 1);
8226 }
8227
8228 break;
8229 }
8230
8231 case TypeCheckKind::kArrayCheck: {
8232 // No read barrier since the slow path will retry upon failure.
8233 // /* HeapReference<Class> */ out = obj->klass_
8234 GenerateReferenceLoadTwoRegisters(instruction,
8235 out_loc,
8236 obj_loc,
8237 class_offset,
8238 maybe_temp_loc,
8239 kWithoutReadBarrier);
8240 __ Cmp(out, cls);
8241 DCHECK(locations->OnlyCallsOnSlowPath());
8242 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8243 instruction, /* is_fatal= */ false);
8244 codegen_->AddSlowPath(slow_path);
8245 __ B(ne, slow_path->GetEntryLabel());
8246 __ Mov(out, 1);
8247 break;
8248 }
8249
8250 case TypeCheckKind::kUnresolvedCheck:
8251 case TypeCheckKind::kInterfaceCheck: {
8252 // Note that we indeed only call on slow path, but we always go
8253 // into the slow path for the unresolved and interface check
8254 // cases.
8255 //
8256 // We cannot directly call the InstanceofNonTrivial runtime
8257 // entry point without resorting to a type checking slow path
8258 // here (i.e. by calling InvokeRuntime directly), as it would
8259 // require to assign fixed registers for the inputs of this
8260 // HInstanceOf instruction (following the runtime calling
8261 // convention), which might be cluttered by the potential first
8262 // read barrier emission at the beginning of this method.
8263 //
8264 // TODO: Introduce a new runtime entry point taking the object
8265 // to test (instead of its class) as argument, and let it deal
8266 // with the read barrier issues. This will let us refactor this
8267 // case of the `switch` code as it was previously (with a direct
8268 // call to the runtime not using a type checking slow path).
8269 // This should also be beneficial for the other cases above.
8270 DCHECK(locations->OnlyCallsOnSlowPath());
8271 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8272 instruction, /* is_fatal= */ false);
8273 codegen_->AddSlowPath(slow_path);
8274 __ B(slow_path->GetEntryLabel());
8275 break;
8276 }
8277
8278 case TypeCheckKind::kBitstringCheck: {
8279 // /* HeapReference<Class> */ temp = obj->klass_
8280 GenerateReferenceLoadTwoRegisters(instruction,
8281 out_loc,
8282 obj_loc,
8283 class_offset,
8284 maybe_temp_loc,
8285 kWithoutReadBarrier);
8286
8287 GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8288 // If `out` is a low reg and we would have another low reg temp, we could
8289 // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8290 //
8291 // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8292 // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8293 // would be the same and we would have fewer direct data dependencies.
8294 codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
8295 break;
8296 }
8297 }
8298
8299 if (done.IsReferenced()) {
8300 __ Bind(&done);
8301 }
8302
8303 if (slow_path != nullptr) {
8304 __ Bind(slow_path->GetExitLabel());
8305 }
8306 }
8307
VisitCheckCast(HCheckCast * instruction)8308 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8309 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8310 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
8311 LocationSummary* locations =
8312 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8313 locations->SetInAt(0, Location::RequiresRegister());
8314 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8315 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8316 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8317 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8318 } else {
8319 locations->SetInAt(1, Location::RequiresRegister());
8320 }
8321 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
8322 }
8323
VisitCheckCast(HCheckCast * instruction)8324 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8325 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8326 LocationSummary* locations = instruction->GetLocations();
8327 Location obj_loc = locations->InAt(0);
8328 vixl32::Register obj = InputRegisterAt(instruction, 0);
8329 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8330 ? vixl32::Register()
8331 : InputRegisterAt(instruction, 1);
8332 Location temp_loc = locations->GetTemp(0);
8333 vixl32::Register temp = RegisterFrom(temp_loc);
8334 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
8335 DCHECK_LE(num_temps, 3u);
8336 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8337 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8338 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8339 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8340 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8341 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8342 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8343 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8344 const uint32_t object_array_data_offset =
8345 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8346
8347 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
8348 SlowPathCodeARMVIXL* type_check_slow_path =
8349 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8350 instruction, is_type_check_slow_path_fatal);
8351 codegen_->AddSlowPath(type_check_slow_path);
8352
8353 vixl32::Label done;
8354 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8355 // Avoid null check if we know obj is not null.
8356 if (instruction->MustDoNullCheck()) {
8357 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8358 }
8359
8360 switch (type_check_kind) {
8361 case TypeCheckKind::kExactCheck:
8362 case TypeCheckKind::kArrayCheck: {
8363 // /* HeapReference<Class> */ temp = obj->klass_
8364 GenerateReferenceLoadTwoRegisters(instruction,
8365 temp_loc,
8366 obj_loc,
8367 class_offset,
8368 maybe_temp2_loc,
8369 kWithoutReadBarrier);
8370
8371 __ Cmp(temp, cls);
8372 // Jump to slow path for throwing the exception or doing a
8373 // more involved array check.
8374 __ B(ne, type_check_slow_path->GetEntryLabel());
8375 break;
8376 }
8377
8378 case TypeCheckKind::kAbstractClassCheck: {
8379 // /* HeapReference<Class> */ temp = obj->klass_
8380 GenerateReferenceLoadTwoRegisters(instruction,
8381 temp_loc,
8382 obj_loc,
8383 class_offset,
8384 maybe_temp2_loc,
8385 kWithoutReadBarrier);
8386
8387 // If the class is abstract, we eagerly fetch the super class of the
8388 // object to avoid doing a comparison we know will fail.
8389 vixl32::Label loop;
8390 __ Bind(&loop);
8391 // /* HeapReference<Class> */ temp = temp->super_class_
8392 GenerateReferenceLoadOneRegister(instruction,
8393 temp_loc,
8394 super_offset,
8395 maybe_temp2_loc,
8396 kWithoutReadBarrier);
8397
8398 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8399 // exception.
8400 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8401
8402 // Otherwise, compare the classes.
8403 __ Cmp(temp, cls);
8404 __ B(ne, &loop, /* is_far_target= */ false);
8405 break;
8406 }
8407
8408 case TypeCheckKind::kClassHierarchyCheck: {
8409 // /* HeapReference<Class> */ temp = obj->klass_
8410 GenerateReferenceLoadTwoRegisters(instruction,
8411 temp_loc,
8412 obj_loc,
8413 class_offset,
8414 maybe_temp2_loc,
8415 kWithoutReadBarrier);
8416
8417 // Walk over the class hierarchy to find a match.
8418 vixl32::Label loop;
8419 __ Bind(&loop);
8420 __ Cmp(temp, cls);
8421 __ B(eq, final_label, /* is_far_target= */ false);
8422
8423 // /* HeapReference<Class> */ temp = temp->super_class_
8424 GenerateReferenceLoadOneRegister(instruction,
8425 temp_loc,
8426 super_offset,
8427 maybe_temp2_loc,
8428 kWithoutReadBarrier);
8429
8430 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8431 // exception.
8432 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8433 // Otherwise, jump to the beginning of the loop.
8434 __ B(&loop);
8435 break;
8436 }
8437
8438 case TypeCheckKind::kArrayObjectCheck: {
8439 // /* HeapReference<Class> */ temp = obj->klass_
8440 GenerateReferenceLoadTwoRegisters(instruction,
8441 temp_loc,
8442 obj_loc,
8443 class_offset,
8444 maybe_temp2_loc,
8445 kWithoutReadBarrier);
8446
8447 // Do an exact check.
8448 __ Cmp(temp, cls);
8449 __ B(eq, final_label, /* is_far_target= */ false);
8450
8451 // Otherwise, we need to check that the object's class is a non-primitive array.
8452 // /* HeapReference<Class> */ temp = temp->component_type_
8453 GenerateReferenceLoadOneRegister(instruction,
8454 temp_loc,
8455 component_offset,
8456 maybe_temp2_loc,
8457 kWithoutReadBarrier);
8458 // If the component type is null, jump to the slow path to throw the exception.
8459 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8460 // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8461 // to further check that this component type is not a primitive type.
8462 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8463 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8464 __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8465 break;
8466 }
8467
8468 case TypeCheckKind::kUnresolvedCheck:
8469 // We always go into the type check slow path for the unresolved check case.
8470 // We cannot directly call the CheckCast runtime entry point
8471 // without resorting to a type checking slow path here (i.e. by
8472 // calling InvokeRuntime directly), as it would require to
8473 // assign fixed registers for the inputs of this HInstanceOf
8474 // instruction (following the runtime calling convention), which
8475 // might be cluttered by the potential first read barrier
8476 // emission at the beginning of this method.
8477
8478 __ B(type_check_slow_path->GetEntryLabel());
8479 break;
8480
8481 case TypeCheckKind::kInterfaceCheck: {
8482 // Avoid read barriers to improve performance of the fast path. We can not get false
8483 // positives by doing this.
8484 // /* HeapReference<Class> */ temp = obj->klass_
8485 GenerateReferenceLoadTwoRegisters(instruction,
8486 temp_loc,
8487 obj_loc,
8488 class_offset,
8489 maybe_temp2_loc,
8490 kWithoutReadBarrier);
8491
8492 // /* HeapReference<Class> */ temp = temp->iftable_
8493 GenerateReferenceLoadTwoRegisters(instruction,
8494 temp_loc,
8495 temp_loc,
8496 iftable_offset,
8497 maybe_temp2_loc,
8498 kWithoutReadBarrier);
8499 // Iftable is never null.
8500 __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8501 // Loop through the iftable and check if any class matches.
8502 vixl32::Label start_loop;
8503 __ Bind(&start_loop);
8504 __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8505 type_check_slow_path->GetEntryLabel());
8506 __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8507 GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8508 // Go to next interface.
8509 __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8510 __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8511 // Compare the classes and continue the loop if they do not match.
8512 __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8513 __ B(ne, &start_loop, /* is_far_target= */ false);
8514 break;
8515 }
8516
8517 case TypeCheckKind::kBitstringCheck: {
8518 // /* HeapReference<Class> */ temp = obj->klass_
8519 GenerateReferenceLoadTwoRegisters(instruction,
8520 temp_loc,
8521 obj_loc,
8522 class_offset,
8523 maybe_temp2_loc,
8524 kWithoutReadBarrier);
8525
8526 GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8527 __ B(ne, type_check_slow_path->GetEntryLabel());
8528 break;
8529 }
8530 }
8531 if (done.IsReferenced()) {
8532 __ Bind(&done);
8533 }
8534
8535 __ Bind(type_check_slow_path->GetExitLabel());
8536 }
8537
VisitMonitorOperation(HMonitorOperation * instruction)8538 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8539 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8540 instruction, LocationSummary::kCallOnMainOnly);
8541 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8542 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8543 }
8544
VisitMonitorOperation(HMonitorOperation * instruction)8545 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8546 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8547 instruction,
8548 instruction->GetDexPc());
8549 if (instruction->IsEnter()) {
8550 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8551 } else {
8552 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8553 }
8554 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8555 }
8556
VisitAnd(HAnd * instruction)8557 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8558 HandleBitwiseOperation(instruction, AND);
8559 }
8560
VisitOr(HOr * instruction)8561 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8562 HandleBitwiseOperation(instruction, ORR);
8563 }
8564
VisitXor(HXor * instruction)8565 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8566 HandleBitwiseOperation(instruction, EOR);
8567 }
8568
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8569 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8570 LocationSummary* locations =
8571 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8572 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8573 || instruction->GetResultType() == DataType::Type::kInt64);
8574 // Note: GVN reorders commutative operations to have the constant on the right hand side.
8575 locations->SetInAt(0, Location::RequiresRegister());
8576 locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8577 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8578 }
8579
VisitAnd(HAnd * instruction)8580 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8581 HandleBitwiseOperation(instruction);
8582 }
8583
VisitOr(HOr * instruction)8584 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8585 HandleBitwiseOperation(instruction);
8586 }
8587
VisitXor(HXor * instruction)8588 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8589 HandleBitwiseOperation(instruction);
8590 }
8591
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8592 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8593 LocationSummary* locations =
8594 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8595 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8596 || instruction->GetResultType() == DataType::Type::kInt64);
8597
8598 locations->SetInAt(0, Location::RequiresRegister());
8599 locations->SetInAt(1, Location::RequiresRegister());
8600 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8601 }
8602
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8603 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8604 LocationSummary* locations = instruction->GetLocations();
8605 Location first = locations->InAt(0);
8606 Location second = locations->InAt(1);
8607 Location out = locations->Out();
8608
8609 if (instruction->GetResultType() == DataType::Type::kInt32) {
8610 vixl32::Register first_reg = RegisterFrom(first);
8611 vixl32::Register second_reg = RegisterFrom(second);
8612 vixl32::Register out_reg = RegisterFrom(out);
8613
8614 switch (instruction->GetOpKind()) {
8615 case HInstruction::kAnd:
8616 __ Bic(out_reg, first_reg, second_reg);
8617 break;
8618 case HInstruction::kOr:
8619 __ Orn(out_reg, first_reg, second_reg);
8620 break;
8621 // There is no EON on arm.
8622 case HInstruction::kXor:
8623 default:
8624 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8625 UNREACHABLE();
8626 }
8627 return;
8628
8629 } else {
8630 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8631 vixl32::Register first_low = LowRegisterFrom(first);
8632 vixl32::Register first_high = HighRegisterFrom(first);
8633 vixl32::Register second_low = LowRegisterFrom(second);
8634 vixl32::Register second_high = HighRegisterFrom(second);
8635 vixl32::Register out_low = LowRegisterFrom(out);
8636 vixl32::Register out_high = HighRegisterFrom(out);
8637
8638 switch (instruction->GetOpKind()) {
8639 case HInstruction::kAnd:
8640 __ Bic(out_low, first_low, second_low);
8641 __ Bic(out_high, first_high, second_high);
8642 break;
8643 case HInstruction::kOr:
8644 __ Orn(out_low, first_low, second_low);
8645 __ Orn(out_high, first_high, second_high);
8646 break;
8647 // There is no EON on arm.
8648 case HInstruction::kXor:
8649 default:
8650 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8651 UNREACHABLE();
8652 }
8653 }
8654 }
8655
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8656 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8657 HDataProcWithShifterOp* instruction) {
8658 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8659 instruction->GetType() == DataType::Type::kInt64);
8660 LocationSummary* locations =
8661 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8662 const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8663 HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8664
8665 locations->SetInAt(0, Location::RequiresRegister());
8666 locations->SetInAt(1, Location::RequiresRegister());
8667 locations->SetOut(Location::RequiresRegister(),
8668 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8669 }
8670
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8671 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8672 HDataProcWithShifterOp* instruction) {
8673 const LocationSummary* const locations = instruction->GetLocations();
8674 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8675 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8676
8677 if (instruction->GetType() == DataType::Type::kInt32) {
8678 const vixl32::Register first = InputRegisterAt(instruction, 0);
8679 const vixl32::Register output = OutputRegister(instruction);
8680 const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8681 ? LowRegisterFrom(locations->InAt(1))
8682 : InputRegisterAt(instruction, 1);
8683
8684 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8685 DCHECK_EQ(kind, HInstruction::kAdd);
8686
8687 switch (op_kind) {
8688 case HDataProcWithShifterOp::kUXTB:
8689 __ Uxtab(output, first, second);
8690 break;
8691 case HDataProcWithShifterOp::kUXTH:
8692 __ Uxtah(output, first, second);
8693 break;
8694 case HDataProcWithShifterOp::kSXTB:
8695 __ Sxtab(output, first, second);
8696 break;
8697 case HDataProcWithShifterOp::kSXTH:
8698 __ Sxtah(output, first, second);
8699 break;
8700 default:
8701 LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8702 UNREACHABLE();
8703 }
8704 } else {
8705 GenerateDataProcInstruction(kind,
8706 output,
8707 first,
8708 Operand(second,
8709 ShiftFromOpKind(op_kind),
8710 instruction->GetShiftAmount()),
8711 codegen_);
8712 }
8713 } else {
8714 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8715
8716 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8717 const vixl32::Register second = InputRegisterAt(instruction, 1);
8718
8719 DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8720 GenerateDataProc(kind,
8721 locations->Out(),
8722 locations->InAt(0),
8723 second,
8724 Operand(second, ShiftType::ASR, 31),
8725 codegen_);
8726 } else {
8727 GenerateLongDataProc(instruction, codegen_);
8728 }
8729 }
8730 }
8731
8732 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8733 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8734 vixl32::Register first,
8735 uint32_t value) {
8736 // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8737 if (value == 0xffffffffu) {
8738 if (!out.Is(first)) {
8739 __ Mov(out, first);
8740 }
8741 return;
8742 }
8743 if (value == 0u) {
8744 __ Mov(out, 0);
8745 return;
8746 }
8747 if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8748 __ And(out, first, value);
8749 } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8750 __ Bic(out, first, ~value);
8751 } else {
8752 DCHECK(IsPowerOfTwo(value + 1));
8753 __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8754 }
8755 }
8756
8757 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8758 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8759 vixl32::Register first,
8760 uint32_t value) {
8761 // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8762 if (value == 0u) {
8763 if (!out.Is(first)) {
8764 __ Mov(out, first);
8765 }
8766 return;
8767 }
8768 if (value == 0xffffffffu) {
8769 __ Mvn(out, 0);
8770 return;
8771 }
8772 if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8773 __ Orr(out, first, value);
8774 } else {
8775 DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8776 __ Orn(out, first, ~value);
8777 }
8778 }
8779
8780 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8781 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8782 vixl32::Register first,
8783 uint32_t value) {
8784 // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8785 if (value == 0u) {
8786 if (!out.Is(first)) {
8787 __ Mov(out, first);
8788 }
8789 return;
8790 }
8791 __ Eor(out, first, value);
8792 }
8793
GenerateAddLongConst(Location out,Location first,uint64_t value)8794 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8795 Location first,
8796 uint64_t value) {
8797 vixl32::Register out_low = LowRegisterFrom(out);
8798 vixl32::Register out_high = HighRegisterFrom(out);
8799 vixl32::Register first_low = LowRegisterFrom(first);
8800 vixl32::Register first_high = HighRegisterFrom(first);
8801 uint32_t value_low = Low32Bits(value);
8802 uint32_t value_high = High32Bits(value);
8803 if (value_low == 0u) {
8804 if (!out_low.Is(first_low)) {
8805 __ Mov(out_low, first_low);
8806 }
8807 __ Add(out_high, first_high, value_high);
8808 return;
8809 }
8810 __ Adds(out_low, first_low, value_low);
8811 if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
8812 __ Adc(out_high, first_high, value_high);
8813 } else {
8814 DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
8815 __ Sbc(out_high, first_high, ~value_high);
8816 }
8817 }
8818
HandleBitwiseOperation(HBinaryOperation * instruction)8819 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
8820 LocationSummary* locations = instruction->GetLocations();
8821 Location first = locations->InAt(0);
8822 Location second = locations->InAt(1);
8823 Location out = locations->Out();
8824
8825 if (second.IsConstant()) {
8826 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
8827 uint32_t value_low = Low32Bits(value);
8828 if (instruction->GetResultType() == DataType::Type::kInt32) {
8829 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8830 vixl32::Register out_reg = OutputRegister(instruction);
8831 if (instruction->IsAnd()) {
8832 GenerateAndConst(out_reg, first_reg, value_low);
8833 } else if (instruction->IsOr()) {
8834 GenerateOrrConst(out_reg, first_reg, value_low);
8835 } else {
8836 DCHECK(instruction->IsXor());
8837 GenerateEorConst(out_reg, first_reg, value_low);
8838 }
8839 } else {
8840 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8841 uint32_t value_high = High32Bits(value);
8842 vixl32::Register first_low = LowRegisterFrom(first);
8843 vixl32::Register first_high = HighRegisterFrom(first);
8844 vixl32::Register out_low = LowRegisterFrom(out);
8845 vixl32::Register out_high = HighRegisterFrom(out);
8846 if (instruction->IsAnd()) {
8847 GenerateAndConst(out_low, first_low, value_low);
8848 GenerateAndConst(out_high, first_high, value_high);
8849 } else if (instruction->IsOr()) {
8850 GenerateOrrConst(out_low, first_low, value_low);
8851 GenerateOrrConst(out_high, first_high, value_high);
8852 } else {
8853 DCHECK(instruction->IsXor());
8854 GenerateEorConst(out_low, first_low, value_low);
8855 GenerateEorConst(out_high, first_high, value_high);
8856 }
8857 }
8858 return;
8859 }
8860
8861 if (instruction->GetResultType() == DataType::Type::kInt32) {
8862 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8863 vixl32::Register second_reg = InputRegisterAt(instruction, 1);
8864 vixl32::Register out_reg = OutputRegister(instruction);
8865 if (instruction->IsAnd()) {
8866 __ And(out_reg, first_reg, second_reg);
8867 } else if (instruction->IsOr()) {
8868 __ Orr(out_reg, first_reg, second_reg);
8869 } else {
8870 DCHECK(instruction->IsXor());
8871 __ Eor(out_reg, first_reg, second_reg);
8872 }
8873 } else {
8874 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8875 vixl32::Register first_low = LowRegisterFrom(first);
8876 vixl32::Register first_high = HighRegisterFrom(first);
8877 vixl32::Register second_low = LowRegisterFrom(second);
8878 vixl32::Register second_high = HighRegisterFrom(second);
8879 vixl32::Register out_low = LowRegisterFrom(out);
8880 vixl32::Register out_high = HighRegisterFrom(out);
8881 if (instruction->IsAnd()) {
8882 __ And(out_low, first_low, second_low);
8883 __ And(out_high, first_high, second_high);
8884 } else if (instruction->IsOr()) {
8885 __ Orr(out_low, first_low, second_low);
8886 __ Orr(out_high, first_high, second_high);
8887 } else {
8888 DCHECK(instruction->IsXor());
8889 __ Eor(out_low, first_low, second_low);
8890 __ Eor(out_high, first_high, second_high);
8891 }
8892 }
8893 }
8894
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8895 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
8896 HInstruction* instruction,
8897 Location out,
8898 uint32_t offset,
8899 Location maybe_temp,
8900 ReadBarrierOption read_barrier_option) {
8901 vixl32::Register out_reg = RegisterFrom(out);
8902 if (read_barrier_option == kWithReadBarrier) {
8903 CHECK(gUseReadBarrier);
8904 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8905 if (kUseBakerReadBarrier) {
8906 // Load with fast path based Baker's read barrier.
8907 // /* HeapReference<Object> */ out = *(out + offset)
8908 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8909 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
8910 } else {
8911 // Load with slow path based read barrier.
8912 // Save the value of `out` into `maybe_temp` before overwriting it
8913 // in the following move operation, as we will need it for the
8914 // read barrier below.
8915 __ Mov(RegisterFrom(maybe_temp), out_reg);
8916 // /* HeapReference<Object> */ out = *(out + offset)
8917 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8918 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8919 }
8920 } else {
8921 // Plain load with no read barrier.
8922 // /* HeapReference<Object> */ out = *(out + offset)
8923 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8924 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8925 }
8926 }
8927
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8928 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
8929 HInstruction* instruction,
8930 Location out,
8931 Location obj,
8932 uint32_t offset,
8933 Location maybe_temp,
8934 ReadBarrierOption read_barrier_option) {
8935 vixl32::Register out_reg = RegisterFrom(out);
8936 vixl32::Register obj_reg = RegisterFrom(obj);
8937 if (read_barrier_option == kWithReadBarrier) {
8938 CHECK(gUseReadBarrier);
8939 if (kUseBakerReadBarrier) {
8940 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8941 // Load with fast path based Baker's read barrier.
8942 // /* HeapReference<Object> */ out = *(obj + offset)
8943 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8944 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
8945 } else {
8946 // Load with slow path based read barrier.
8947 // /* HeapReference<Object> */ out = *(obj + offset)
8948 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8949 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8950 }
8951 } else {
8952 // Plain load with no read barrier.
8953 // /* HeapReference<Object> */ out = *(obj + offset)
8954 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8955 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8956 }
8957 }
8958
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)8959 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
8960 HInstruction* instruction,
8961 Location root,
8962 vixl32::Register obj,
8963 uint32_t offset,
8964 ReadBarrierOption read_barrier_option) {
8965 vixl32::Register root_reg = RegisterFrom(root);
8966 if (read_barrier_option == kWithReadBarrier) {
8967 DCHECK(gUseReadBarrier);
8968 if (kUseBakerReadBarrier) {
8969 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8970 // Baker's read barrier are used.
8971
8972 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
8973 // the Marking Register) to decide whether we need to enter
8974 // the slow path to mark the GC root.
8975 //
8976 // We use shared thunks for the slow path; shared within the method
8977 // for JIT, across methods for AOT. That thunk checks the reference
8978 // and jumps to the entrypoint if needed.
8979 //
8980 // lr = &return_address;
8981 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
8982 // if (mr) { // Thread::Current()->GetIsGcMarking()
8983 // goto gc_root_thunk<root_reg>(lr)
8984 // }
8985 // return_address:
8986
8987 UseScratchRegisterScope temps(GetVIXLAssembler());
8988 temps.Exclude(ip);
8989 bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
8990 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
8991
8992 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
8993 size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
8994 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8995 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8996 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8997 vixl32::Label return_address;
8998 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8999 __ cmp(mr, Operand(0));
9000 // Currently the offset is always within range. If that changes,
9001 // we shall have to split the load the same way as for fields.
9002 DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9003 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9004 __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9005 EmitBakerReadBarrierBne(custom_data);
9006 __ bind(&return_address);
9007 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9008 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9009 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9010 } else {
9011 // GC root loaded through a slow path for read barriers other
9012 // than Baker's.
9013 // /* GcRoot<mirror::Object>* */ root = obj + offset
9014 __ Add(root_reg, obj, offset);
9015 // /* mirror::Object* */ root = root->Read()
9016 GenerateReadBarrierForRootSlow(instruction, root, root);
9017 }
9018 } else {
9019 // Plain GC root load with no read barrier.
9020 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9021 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9022 // Note that GC roots are not affected by heap poisoning, thus we
9023 // do not have to unpoison `root_reg` here.
9024 }
9025 MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9026 }
9027
GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9028 void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier(
9029 vixl::aarch32::Register marked_old_value,
9030 vixl::aarch32::Register old_value) {
9031 DCHECK(gUseReadBarrier);
9032 DCHECK(kUseBakerReadBarrier);
9033
9034 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9035 // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9036 // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9037 bool narrow_mov = marked_old_value.IsLow();
9038 uint32_t custom_data = narrow_mov
9039 ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9040 : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9041
9042 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9043 size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9044 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9045 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9046 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9047 vixl32::Label return_address;
9048 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9049 __ cmp(mr, Operand(0));
9050 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9051 __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9052 EmitBakerReadBarrierBne(custom_data);
9053 __ bind(&return_address);
9054 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9055 narrow_mov
9056 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9057 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9058 }
9059
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9060 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9061 Location ref,
9062 vixl32::Register obj,
9063 const vixl32::MemOperand& src,
9064 bool needs_null_check) {
9065 DCHECK(gUseReadBarrier);
9066 DCHECK(kUseBakerReadBarrier);
9067
9068 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9069 // Marking Register) to decide whether we need to enter the slow
9070 // path to mark the reference. Then, in the slow path, check the
9071 // gray bit in the lock word of the reference's holder (`obj`) to
9072 // decide whether to mark `ref` or not.
9073 //
9074 // We use shared thunks for the slow path; shared within the method
9075 // for JIT, across methods for AOT. That thunk checks the holder
9076 // and jumps to the entrypoint if needed. If the holder is not gray,
9077 // it creates a fake dependency and returns to the LDR instruction.
9078 //
9079 // lr = &gray_return_address;
9080 // if (mr) { // Thread::Current()->GetIsGcMarking()
9081 // goto field_thunk<holder_reg, base_reg>(lr)
9082 // }
9083 // not_gray_return_address:
9084 // // Original reference load. If the offset is too large to fit
9085 // // into LDR, we use an adjusted base register here.
9086 // HeapReference<mirror::Object> reference = *(obj+offset);
9087 // gray_return_address:
9088
9089 DCHECK(src.GetAddrMode() == vixl32::Offset);
9090 DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9091 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9092 bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9093
9094 UseScratchRegisterScope temps(GetVIXLAssembler());
9095 temps.Exclude(ip);
9096 uint32_t custom_data =
9097 EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9098
9099 {
9100 size_t narrow_instructions =
9101 /* CMP */ (mr.IsLow() ? 1u : 0u) +
9102 /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9103 size_t wide_instructions =
9104 /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9105 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9106 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9107 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9108 vixl32::Label return_address;
9109 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9110 __ cmp(mr, Operand(0));
9111 EmitBakerReadBarrierBne(custom_data);
9112 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9113 __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9114 if (needs_null_check) {
9115 MaybeRecordImplicitNullCheck(instruction);
9116 }
9117 // Note: We need a specific width for the unpoisoning NEG.
9118 if (kPoisonHeapReferences) {
9119 if (narrow) {
9120 // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9121 __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9122 } else {
9123 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9124 }
9125 }
9126 __ bind(&return_address);
9127 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9128 narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9129 : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9130 }
9131 MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9132 }
9133
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9134 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9135 Location ref,
9136 vixl32::Register obj,
9137 uint32_t offset,
9138 Location maybe_temp,
9139 bool needs_null_check) {
9140 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9141 vixl32::Register base = obj;
9142 if (offset >= kReferenceLoadMinFarOffset) {
9143 base = RegisterFrom(maybe_temp);
9144 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9145 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9146 offset &= (kReferenceLoadMinFarOffset - 1u);
9147 }
9148 GenerateFieldLoadWithBakerReadBarrier(
9149 instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9150 }
9151
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9152 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9153 vixl32::Register obj,
9154 uint32_t data_offset,
9155 Location index,
9156 Location temp,
9157 bool needs_null_check) {
9158 DCHECK(gUseReadBarrier);
9159 DCHECK(kUseBakerReadBarrier);
9160
9161 static_assert(
9162 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9163 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9164 ScaleFactor scale_factor = TIMES_4;
9165
9166 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9167 // Marking Register) to decide whether we need to enter the slow
9168 // path to mark the reference. Then, in the slow path, check the
9169 // gray bit in the lock word of the reference's holder (`obj`) to
9170 // decide whether to mark `ref` or not.
9171 //
9172 // We use shared thunks for the slow path; shared within the method
9173 // for JIT, across methods for AOT. That thunk checks the holder
9174 // and jumps to the entrypoint if needed. If the holder is not gray,
9175 // it creates a fake dependency and returns to the LDR instruction.
9176 //
9177 // lr = &gray_return_address;
9178 // if (mr) { // Thread::Current()->GetIsGcMarking()
9179 // goto array_thunk<base_reg>(lr)
9180 // }
9181 // not_gray_return_address:
9182 // // Original reference load. If the offset is too large to fit
9183 // // into LDR, we use an adjusted base register here.
9184 // HeapReference<mirror::Object> reference = data[index];
9185 // gray_return_address:
9186
9187 DCHECK(index.IsValid());
9188 vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9189 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9190 vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
9191
9192 UseScratchRegisterScope temps(GetVIXLAssembler());
9193 temps.Exclude(ip);
9194 uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9195
9196 __ Add(data_reg, obj, Operand(data_offset));
9197 {
9198 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9199 size_t wide_instructions =
9200 /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9201 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9202 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9203 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9204 vixl32::Label return_address;
9205 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9206 __ cmp(mr, Operand(0));
9207 EmitBakerReadBarrierBne(custom_data);
9208 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9209 __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9210 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
9211 // Note: We need a Wide NEG for the unpoisoning.
9212 if (kPoisonHeapReferences) {
9213 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9214 }
9215 __ bind(&return_address);
9216 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9217 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9218 }
9219 MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9220 }
9221
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9222 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9223 // The following condition is a compile-time one, so it does not have a run-time cost.
9224 if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
9225 // The following condition is a run-time one; it is executed after the
9226 // previous compile-time test, to avoid penalizing non-debug builds.
9227 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9228 UseScratchRegisterScope temps(GetVIXLAssembler());
9229 vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9230 GetAssembler()->GenerateMarkingRegisterCheck(temp,
9231 kMarkingRegisterCheckBreakCodeBaseCode + code);
9232 }
9233 }
9234 }
9235
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9236 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9237 Location out,
9238 Location ref,
9239 Location obj,
9240 uint32_t offset,
9241 Location index) {
9242 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9243 ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9244 AddSlowPath(slow_path);
9245 return slow_path;
9246 }
9247
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9248 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9249 Location out,
9250 Location ref,
9251 Location obj,
9252 uint32_t offset,
9253 Location index) {
9254 DCHECK(gUseReadBarrier);
9255
9256 // Insert a slow path based read barrier *after* the reference load.
9257 //
9258 // If heap poisoning is enabled, the unpoisoning of the loaded
9259 // reference will be carried out by the runtime within the slow
9260 // path.
9261 //
9262 // Note that `ref` currently does not get unpoisoned (when heap
9263 // poisoning is enabled), which is alright as the `ref` argument is
9264 // not used by the artReadBarrierSlow entry point.
9265 //
9266 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9267 SlowPathCodeARMVIXL* slow_path =
9268 AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9269
9270 __ B(slow_path->GetEntryLabel());
9271 __ Bind(slow_path->GetExitLabel());
9272 }
9273
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9274 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9275 Location out,
9276 Location ref,
9277 Location obj,
9278 uint32_t offset,
9279 Location index) {
9280 if (gUseReadBarrier) {
9281 // Baker's read barriers shall be handled by the fast path
9282 // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9283 DCHECK(!kUseBakerReadBarrier);
9284 // If heap poisoning is enabled, unpoisoning will be taken care of
9285 // by the runtime within the slow path.
9286 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9287 } else if (kPoisonHeapReferences) {
9288 GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9289 }
9290 }
9291
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9292 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9293 Location out,
9294 Location root) {
9295 DCHECK(gUseReadBarrier);
9296
9297 // Insert a slow path based read barrier *after* the GC root load.
9298 //
9299 // Note that GC roots are not affected by heap poisoning, so we do
9300 // not need to do anything special for this here.
9301 SlowPathCodeARMVIXL* slow_path =
9302 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9303 AddSlowPath(slow_path);
9304
9305 __ B(slow_path->GetEntryLabel());
9306 __ Bind(slow_path->GetExitLabel());
9307 }
9308
9309 // Check if the desired_dispatch_info is supported. If it is, return it,
9310 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9311 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9312 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9313 ArtMethod* method) {
9314 if (method->IsIntrinsic() &&
9315 desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9316 // As a work-around for soft-float native ABI interfering with type checks, we are
9317 // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9318 // when a float or double argument is passed in core registers but we cannot do that
9319 // for actual intrinsic implementations that expect them in FP registers. Therefore
9320 // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9321 // properly intrinsified, the dispatch type does not matter anyway.
9322 ScopedObjectAccess soa(Thread::Current());
9323 uint32_t shorty_len;
9324 const char* shorty = method->GetShorty(&shorty_len);
9325 for (uint32_t i = 1; i != shorty_len; ++i) {
9326 if (shorty[i] == 'D' || shorty[i] == 'F') {
9327 HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9328 dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9329 return dispatch_info;
9330 }
9331 }
9332 }
9333 return desired_dispatch_info;
9334 }
9335
9336
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9337 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9338 switch (load_kind) {
9339 case MethodLoadKind::kBootImageLinkTimePcRelative: {
9340 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9341 PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9342 vixl32::Register temp_reg = RegisterFrom(temp);
9343 EmitMovwMovtPlaceholder(labels, temp_reg);
9344 break;
9345 }
9346 case MethodLoadKind::kBootImageRelRo: {
9347 uint32_t boot_image_offset = GetBootImageOffset(invoke);
9348 LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9349 break;
9350 }
9351 case MethodLoadKind::kBssEntry: {
9352 PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9353 vixl32::Register temp_reg = RegisterFrom(temp);
9354 EmitMovwMovtPlaceholder(labels, temp_reg);
9355 // All aligned loads are implicitly atomic consume operations on ARM.
9356 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9357 break;
9358 }
9359 case MethodLoadKind::kJitDirectAddress: {
9360 __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9361 break;
9362 }
9363 case MethodLoadKind::kRuntimeCall: {
9364 // Test situation, don't do anything.
9365 break;
9366 }
9367 default: {
9368 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9369 UNREACHABLE();
9370 }
9371 }
9372 }
9373
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9374 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9375 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9376 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
9377 switch (invoke->GetMethodLoadKind()) {
9378 case MethodLoadKind::kStringInit: {
9379 uint32_t offset =
9380 GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9381 // temp = thread->string_init_entrypoint
9382 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9383 break;
9384 }
9385 case MethodLoadKind::kRecursive: {
9386 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9387 break;
9388 }
9389 case MethodLoadKind::kRuntimeCall: {
9390 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9391 return; // No code pointer retrieval; the runtime performs the call directly.
9392 }
9393 case MethodLoadKind::kBootImageLinkTimePcRelative:
9394 // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9395 // pointer for kCallCriticalNative because it would not save us an instruction from
9396 // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9397 // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9398 FALLTHROUGH_INTENDED;
9399 default: {
9400 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9401 break;
9402 }
9403 }
9404
9405 auto call_code_pointer_member = [&](MemberOffset offset) {
9406 // LR = callee_method->member;
9407 GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9408 {
9409 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9410 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9411 ExactAssemblyScope aas(GetVIXLAssembler(),
9412 vixl32::k16BitT32InstructionSizeInBytes,
9413 CodeBufferCheckScope::kExactSize);
9414 // LR()
9415 __ blx(lr);
9416 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9417 }
9418 };
9419 switch (invoke->GetCodePtrLocation()) {
9420 case CodePtrLocation::kCallSelf:
9421 {
9422 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9423 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9424 ExactAssemblyScope aas(GetVIXLAssembler(),
9425 vixl32::k32BitT32InstructionSizeInBytes,
9426 CodeBufferCheckScope::kMaximumSize);
9427 __ bl(GetFrameEntryLabel());
9428 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9429 }
9430 break;
9431 case CodePtrLocation::kCallCriticalNative: {
9432 size_t out_frame_size =
9433 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9434 kAapcsStackAlignment,
9435 GetCriticalNativeDirectCallFrameSize>(invoke);
9436 call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9437 // Move the result when needed due to native and managed ABI mismatch.
9438 switch (invoke->GetType()) {
9439 case DataType::Type::kFloat32:
9440 __ Vmov(s0, r0);
9441 break;
9442 case DataType::Type::kFloat64:
9443 __ Vmov(d0, r0, r1);
9444 break;
9445 case DataType::Type::kBool:
9446 case DataType::Type::kInt8:
9447 case DataType::Type::kUint16:
9448 case DataType::Type::kInt16:
9449 case DataType::Type::kInt32:
9450 case DataType::Type::kInt64:
9451 case DataType::Type::kVoid:
9452 break;
9453 default:
9454 DCHECK(false) << invoke->GetType();
9455 break;
9456 }
9457 if (out_frame_size != 0u) {
9458 DecreaseFrame(out_frame_size);
9459 }
9460 break;
9461 }
9462 case CodePtrLocation::kCallArtMethod:
9463 call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9464 break;
9465 }
9466
9467 DCHECK(!IsLeafMethod());
9468 }
9469
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9470 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9471 HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9472 vixl32::Register temp = RegisterFrom(temp_location);
9473 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9474 invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9475
9476 // Use the calling convention instead of the location of the receiver, as
9477 // intrinsics may have put the receiver in a different register. In the intrinsics
9478 // slow path, the arguments have been moved to the right place, so here we are
9479 // guaranteed that the receiver is the first register of the calling convention.
9480 InvokeDexCallingConventionARMVIXL calling_convention;
9481 vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9482 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9483 {
9484 // Make sure the pc is recorded immediately after the `ldr` instruction.
9485 ExactAssemblyScope aas(GetVIXLAssembler(),
9486 vixl32::kMaxInstructionSizeInBytes,
9487 CodeBufferCheckScope::kMaximumSize);
9488 // /* HeapReference<Class> */ temp = receiver->klass_
9489 __ ldr(temp, MemOperand(receiver, class_offset));
9490 MaybeRecordImplicitNullCheck(invoke);
9491 }
9492 // Instead of simply (possibly) unpoisoning `temp` here, we should
9493 // emit a read barrier for the previous class reference load.
9494 // However this is not required in practice, as this is an
9495 // intermediate/temporary reference and because the current
9496 // concurrent copying collector keeps the from-space memory
9497 // intact/accessible until the end of the marking phase (the
9498 // concurrent copying collector may not in the future).
9499 GetAssembler()->MaybeUnpoisonHeapReference(temp);
9500
9501 // If we're compiling baseline, update the inline cache.
9502 MaybeGenerateInlineCacheCheck(invoke, temp);
9503
9504 // temp = temp->GetMethodAt(method_offset);
9505 uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9506 kArmPointerSize).Int32Value();
9507 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9508 // LR = temp->GetEntryPoint();
9509 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9510 {
9511 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9512 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9513 ExactAssemblyScope aas(GetVIXLAssembler(),
9514 vixl32::k16BitT32InstructionSizeInBytes,
9515 CodeBufferCheckScope::kExactSize);
9516 // LR();
9517 __ blx(lr);
9518 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9519 }
9520 }
9521
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9522 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9523 uint32_t intrinsic_data) {
9524 return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9525 }
9526
NewBootImageRelRoPatch(uint32_t boot_image_offset)9527 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9528 uint32_t boot_image_offset) {
9529 return NewPcRelativePatch(/* dex_file= */ nullptr,
9530 boot_image_offset,
9531 &boot_image_other_patches_);
9532 }
9533
NewBootImageMethodPatch(MethodReference target_method)9534 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9535 MethodReference target_method) {
9536 return NewPcRelativePatch(
9537 target_method.dex_file, target_method.index, &boot_image_method_patches_);
9538 }
9539
NewMethodBssEntryPatch(MethodReference target_method)9540 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9541 MethodReference target_method) {
9542 return NewPcRelativePatch(
9543 target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9544 }
9545
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9546 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9547 const DexFile& dex_file, dex::TypeIndex type_index) {
9548 return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9549 }
9550
NewTypeBssEntryPatch(HLoadClass * load_class)9551 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9552 HLoadClass* load_class) {
9553 const DexFile& dex_file = load_class->GetDexFile();
9554 dex::TypeIndex type_index = load_class->GetTypeIndex();
9555 ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9556 switch (load_class->GetLoadKind()) {
9557 case HLoadClass::LoadKind::kBssEntry:
9558 patches = &type_bss_entry_patches_;
9559 break;
9560 case HLoadClass::LoadKind::kBssEntryPublic:
9561 patches = &public_type_bss_entry_patches_;
9562 break;
9563 case HLoadClass::LoadKind::kBssEntryPackage:
9564 patches = &package_type_bss_entry_patches_;
9565 break;
9566 default:
9567 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9568 UNREACHABLE();
9569 }
9570 return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9571 }
9572
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9573 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9574 const DexFile& dex_file, dex::StringIndex string_index) {
9575 return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9576 }
9577
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9578 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9579 const DexFile& dex_file, dex::StringIndex string_index) {
9580 return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9581 }
9582
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9583 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9584 const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9585 patches->emplace_back(dex_file, offset_or_index);
9586 return &patches->back();
9587 }
9588
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9589 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9590 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9591 DCHECK(!GetCompilerOptions().IsJitCompiler());
9592 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9593 vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9594 __ bind(bl_label);
9595 vixl32::Label placeholder_label;
9596 __ bl(&placeholder_label); // Placeholder, patched at link-time.
9597 __ bind(&placeholder_label);
9598 }
9599
EmitBakerReadBarrierBne(uint32_t custom_data)9600 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9601 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9602 if (GetCompilerOptions().IsJitCompiler()) {
9603 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9604 vixl::aarch32::Label* slow_path_entry = &it->second.label;
9605 __ b(ne, EncodingSize(Wide), slow_path_entry);
9606 } else {
9607 baker_read_barrier_patches_.emplace_back(custom_data);
9608 vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9609 __ bind(patch_label);
9610 vixl32::Label placeholder_label;
9611 __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
9612 __ bind(&placeholder_label);
9613 }
9614 }
9615
DeduplicateBootImageAddressLiteral(uint32_t address)9616 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9617 return DeduplicateUint32Literal(address, &uint32_literals_);
9618 }
9619
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9620 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9621 const DexFile& dex_file,
9622 dex::StringIndex string_index,
9623 Handle<mirror::String> handle) {
9624 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9625 return jit_string_patches_.GetOrCreate(
9626 StringReference(&dex_file, string_index),
9627 [this]() {
9628 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9629 });
9630 }
9631
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9632 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9633 dex::TypeIndex type_index,
9634 Handle<mirror::Class> handle) {
9635 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9636 return jit_class_patches_.GetOrCreate(
9637 TypeReference(&dex_file, type_index),
9638 [this]() {
9639 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9640 });
9641 }
9642
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9643 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9644 uint32_t boot_image_offset) {
9645 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9646 EmitMovwMovtPlaceholder(labels, reg);
9647 __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9648 }
9649
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9650 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9651 uint32_t boot_image_reference) {
9652 if (GetCompilerOptions().IsBootImage()) {
9653 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9654 NewBootImageIntrinsicPatch(boot_image_reference);
9655 EmitMovwMovtPlaceholder(labels, reg);
9656 } else if (GetCompilerOptions().GetCompilePic()) {
9657 LoadBootImageRelRoEntry(reg, boot_image_reference);
9658 } else {
9659 DCHECK(GetCompilerOptions().IsJitCompiler());
9660 gc::Heap* heap = Runtime::Current()->GetHeap();
9661 DCHECK(!heap->GetBootImageSpaces().empty());
9662 uintptr_t address =
9663 reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9664 __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9665 }
9666 }
9667
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9668 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9669 TypeReference target_type) {
9670 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9671 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9672 PcRelativePatchInfo* labels =
9673 NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9674 EmitMovwMovtPlaceholder(labels, reg);
9675 }
9676
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9677 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9678 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9679 if (GetCompilerOptions().IsBootImage()) {
9680 MethodReference target_method = invoke->GetResolvedMethodReference();
9681 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9682 LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9683 } else {
9684 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9685 LoadBootImageAddress(reg, boot_image_offset);
9686 }
9687 }
9688
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9689 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9690 ClassRoot class_root) {
9691 if (GetCompilerOptions().IsBootImage()) {
9692 ScopedObjectAccess soa(Thread::Current());
9693 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9694 TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9695 LoadTypeForBootImageIntrinsic(reg, target_type);
9696 } else {
9697 uint32_t boot_image_offset = GetBootImageOffset(class_root);
9698 LoadBootImageAddress(reg, boot_image_offset);
9699 }
9700 }
9701
9702 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9703 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9704 const ArenaDeque<PcRelativePatchInfo>& infos,
9705 ArenaVector<linker::LinkerPatch>* linker_patches) {
9706 for (const PcRelativePatchInfo& info : infos) {
9707 const DexFile* dex_file = info.target_dex_file;
9708 size_t offset_or_index = info.offset_or_index;
9709 DCHECK(info.add_pc_label.IsBound());
9710 uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9711 // Add MOVW patch.
9712 DCHECK(info.movw_label.IsBound());
9713 uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9714 linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9715 // Add MOVT patch.
9716 DCHECK(info.movt_label.IsBound());
9717 uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9718 linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9719 }
9720 }
9721
9722 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9723 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9724 const DexFile* target_dex_file,
9725 uint32_t pc_insn_offset,
9726 uint32_t boot_image_offset) {
9727 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
9728 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9729 }
9730
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9731 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9732 DCHECK(linker_patches->empty());
9733 size_t size =
9734 /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9735 /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9736 /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9737 /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9738 /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9739 /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9740 /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9741 /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9742 /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9743 call_entrypoint_patches_.size() +
9744 baker_read_barrier_patches_.size();
9745 linker_patches->reserve(size);
9746 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9747 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9748 boot_image_method_patches_, linker_patches);
9749 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9750 boot_image_type_patches_, linker_patches);
9751 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9752 boot_image_string_patches_, linker_patches);
9753 } else {
9754 DCHECK(boot_image_method_patches_.empty());
9755 DCHECK(boot_image_type_patches_.empty());
9756 DCHECK(boot_image_string_patches_.empty());
9757 }
9758 if (GetCompilerOptions().IsBootImage()) {
9759 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9760 boot_image_other_patches_, linker_patches);
9761 } else {
9762 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
9763 boot_image_other_patches_, linker_patches);
9764 }
9765 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9766 method_bss_entry_patches_, linker_patches);
9767 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9768 type_bss_entry_patches_, linker_patches);
9769 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
9770 public_type_bss_entry_patches_, linker_patches);
9771 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
9772 package_type_bss_entry_patches_, linker_patches);
9773 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9774 string_bss_entry_patches_, linker_patches);
9775 for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9776 DCHECK(info.target_dex_file == nullptr);
9777 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9778 info.label.GetLocation(), info.offset_or_index));
9779 }
9780 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9781 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
9782 info.label.GetLocation(), info.custom_data));
9783 }
9784 DCHECK_EQ(size, linker_patches->size());
9785 }
9786
NeedsThunkCode(const linker::LinkerPatch & patch) const9787 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
9788 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
9789 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
9790 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
9791 }
9792
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)9793 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
9794 /*out*/ ArenaVector<uint8_t>* code,
9795 /*out*/ std::string* debug_name) {
9796 arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
9797 switch (patch.GetType()) {
9798 case linker::LinkerPatch::Type::kCallRelative: {
9799 // The thunk just uses the entry point in the ArtMethod. This works even for calls
9800 // to the generic JNI and interpreter trampolines.
9801 MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
9802 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
9803 assembler.GetVIXLAssembler()->Bkpt(0);
9804 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9805 *debug_name = "MethodCallThunk";
9806 }
9807 break;
9808 }
9809 case linker::LinkerPatch::Type::kCallEntrypoint: {
9810 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
9811 assembler.GetVIXLAssembler()->Bkpt(0);
9812 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9813 *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
9814 }
9815 break;
9816 }
9817 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
9818 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
9819 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
9820 break;
9821 }
9822 default:
9823 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
9824 UNREACHABLE();
9825 }
9826
9827 // Ensure we emit the literal pool if any.
9828 assembler.FinalizeCode();
9829 code->resize(assembler.CodeSize());
9830 MemoryRegion code_region(code->data(), code->size());
9831 assembler.FinalizeInstructions(code_region);
9832 }
9833
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)9834 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
9835 uint32_t value,
9836 Uint32ToLiteralMap* map) {
9837 return map->GetOrCreate(
9838 value,
9839 [this, value]() {
9840 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
9841 });
9842 }
9843
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9844 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9845 LocationSummary* locations =
9846 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
9847 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
9848 Location::RequiresRegister());
9849 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
9850 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
9851 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
9852 }
9853
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9854 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9855 vixl32::Register res = OutputRegister(instr);
9856 vixl32::Register accumulator =
9857 InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
9858 vixl32::Register mul_left =
9859 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
9860 vixl32::Register mul_right =
9861 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
9862
9863 if (instr->GetOpKind() == HInstruction::kAdd) {
9864 __ Mla(res, mul_left, mul_right, accumulator);
9865 } else {
9866 __ Mls(res, mul_left, mul_right, accumulator);
9867 }
9868 }
9869
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9870 void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9871 // Nothing to do, this should be removed during prepare for register allocator.
9872 LOG(FATAL) << "Unreachable";
9873 }
9874
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9875 void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9876 // Nothing to do, this should be removed during prepare for register allocator.
9877 LOG(FATAL) << "Unreachable";
9878 }
9879
9880 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)9881 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9882 LocationSummary* locations =
9883 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
9884 locations->SetInAt(0, Location::RequiresRegister());
9885 if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
9886 codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9887 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
9888 if (switch_instr->GetStartValue() != 0) {
9889 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias.
9890 }
9891 }
9892 }
9893
9894 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)9895 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9896 int32_t lower_bound = switch_instr->GetStartValue();
9897 uint32_t num_entries = switch_instr->GetNumEntries();
9898 LocationSummary* locations = switch_instr->GetLocations();
9899 vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
9900 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
9901
9902 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
9903 !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9904 // Create a series of compare/jumps.
9905 UseScratchRegisterScope temps(GetVIXLAssembler());
9906 vixl32::Register temp_reg = temps.Acquire();
9907 // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
9908 // the immediate, because IP is used as the destination register. For the other
9909 // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
9910 // and they can be encoded in the instruction without making use of IP register.
9911 __ Adds(temp_reg, value_reg, -lower_bound);
9912
9913 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
9914 // Jump to successors[0] if value == lower_bound.
9915 __ B(eq, codegen_->GetLabelOf(successors[0]));
9916 int32_t last_index = 0;
9917 for (; num_entries - last_index > 2; last_index += 2) {
9918 __ Adds(temp_reg, temp_reg, -2);
9919 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
9920 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
9921 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
9922 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
9923 }
9924 if (num_entries - last_index == 2) {
9925 // The last missing case_value.
9926 __ Cmp(temp_reg, 1);
9927 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
9928 }
9929
9930 // And the default for any other value.
9931 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
9932 __ B(codegen_->GetLabelOf(default_block));
9933 }
9934 } else {
9935 // Create a table lookup.
9936 vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
9937
9938 JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
9939
9940 // Remove the bias.
9941 vixl32::Register key_reg;
9942 if (lower_bound != 0) {
9943 key_reg = RegisterFrom(locations->GetTemp(1));
9944 __ Sub(key_reg, value_reg, lower_bound);
9945 } else {
9946 key_reg = value_reg;
9947 }
9948
9949 // Check whether the value is in the table, jump to default block if not.
9950 __ Cmp(key_reg, num_entries - 1);
9951 __ B(hi, codegen_->GetLabelOf(default_block));
9952
9953 UseScratchRegisterScope temps(GetVIXLAssembler());
9954 vixl32::Register jump_offset = temps.Acquire();
9955
9956 // Load jump offset from the table.
9957 {
9958 const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
9959 ExactAssemblyScope aas(GetVIXLAssembler(),
9960 (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
9961 CodeBufferCheckScope::kMaximumSize);
9962 __ adr(table_base, jump_table->GetTableStartLabel());
9963 __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
9964
9965 // Jump to target block by branching to table_base(pc related) + offset.
9966 vixl32::Register target_address = table_base;
9967 __ add(target_address, table_base, jump_offset);
9968 __ bx(target_address);
9969
9970 jump_table->EmitTable(codegen_);
9971 }
9972 }
9973 }
9974
9975 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)9976 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
9977 if (!trg.IsValid()) {
9978 DCHECK_EQ(type, DataType::Type::kVoid);
9979 return;
9980 }
9981
9982 DCHECK_NE(type, DataType::Type::kVoid);
9983
9984 Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
9985 if (return_loc.Equals(trg)) {
9986 return;
9987 }
9988
9989 // Let the parallel move resolver take care of all of this.
9990 HParallelMove parallel_move(GetGraph()->GetAllocator());
9991 parallel_move.AddMove(return_loc, trg, type, nullptr);
9992 GetMoveResolver()->EmitNativeCode(¶llel_move);
9993 }
9994
VisitClassTableGet(HClassTableGet * instruction)9995 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
9996 LocationSummary* locations =
9997 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
9998 locations->SetInAt(0, Location::RequiresRegister());
9999 locations->SetOut(Location::RequiresRegister());
10000 }
10001
VisitClassTableGet(HClassTableGet * instruction)10002 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10003 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10004 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10005 instruction->GetIndex(), kArmPointerSize).SizeValue();
10006 GetAssembler()->LoadFromOffset(kLoadWord,
10007 OutputRegister(instruction),
10008 InputRegisterAt(instruction, 0),
10009 method_offset);
10010 } else {
10011 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10012 instruction->GetIndex(), kArmPointerSize));
10013 GetAssembler()->LoadFromOffset(kLoadWord,
10014 OutputRegister(instruction),
10015 InputRegisterAt(instruction, 0),
10016 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10017 GetAssembler()->LoadFromOffset(kLoadWord,
10018 OutputRegister(instruction),
10019 OutputRegister(instruction),
10020 method_offset);
10021 }
10022 }
10023
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10024 static void PatchJitRootUse(uint8_t* code,
10025 const uint8_t* roots_data,
10026 VIXLUInt32Literal* literal,
10027 uint64_t index_in_table) {
10028 DCHECK(literal->IsBound());
10029 uint32_t literal_offset = literal->GetLocation();
10030 uintptr_t address =
10031 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10032 uint8_t* data = code + literal_offset;
10033 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10034 }
10035
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10036 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10037 for (const auto& entry : jit_string_patches_) {
10038 const StringReference& string_reference = entry.first;
10039 VIXLUInt32Literal* table_entry_literal = entry.second;
10040 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10041 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10042 }
10043 for (const auto& entry : jit_class_patches_) {
10044 const TypeReference& type_reference = entry.first;
10045 VIXLUInt32Literal* table_entry_literal = entry.second;
10046 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10047 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10048 }
10049 }
10050
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10051 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10052 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10053 vixl32::Register out) {
10054 ExactAssemblyScope aas(GetVIXLAssembler(),
10055 3 * vixl32::kMaxInstructionSizeInBytes,
10056 CodeBufferCheckScope::kMaximumSize);
10057 // TODO(VIXL): Think about using mov instead of movw.
10058 __ bind(&labels->movw_label);
10059 __ movw(out, /* operand= */ 0u);
10060 __ bind(&labels->movt_label);
10061 __ movt(out, /* operand= */ 0u);
10062 __ bind(&labels->add_pc_label);
10063 __ add(out, out, pc);
10064 }
10065
10066 #undef __
10067 #undef QUICK_ENTRY_POINT
10068 #undef TODO_VIXL32
10069
10070 #define __ assembler.GetVIXLAssembler()->
10071
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10072 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10073 vixl32::Register base_reg,
10074 vixl32::MemOperand& lock_word,
10075 vixl32::Label* slow_path,
10076 int32_t raw_ldr_offset,
10077 vixl32::Label* throw_npe = nullptr) {
10078 // Load the lock word containing the rb_state.
10079 __ Ldr(ip, lock_word);
10080 // Given the numeric representation, it's enough to check the low bit of the rb_state.
10081 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10082 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10083 __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10084 __ B(ne, slow_path, /* is_far_target= */ false);
10085 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10086 if (throw_npe != nullptr) {
10087 __ Bind(throw_npe);
10088 }
10089 __ Add(lr, lr, raw_ldr_offset);
10090 // Introduce a dependency on the lock_word including rb_state,
10091 // to prevent load-load reordering, and without using
10092 // a memory barrier (which would be more expensive).
10093 __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10094 __ Bx(lr); // And return back to the function.
10095 // Note: The fake dependency is unnecessary for the slow path.
10096 }
10097
10098 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10099 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10100 // The register where the read barrier introspection entrypoint is loaded
10101 // is the marking register. We clobber it here and the entrypoint restores it to 1.
10102 vixl32::Register entrypoint = mr;
10103 // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10104 DCHECK_EQ(ip.GetCode(), 12u);
10105 const int32_t entry_point_offset =
10106 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10107 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10108 return entrypoint;
10109 }
10110
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10111 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10112 uint32_t encoded_data,
10113 /*out*/ std::string* debug_name) {
10114 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10115 switch (kind) {
10116 case BakerReadBarrierKind::kField: {
10117 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10118 CheckValidReg(base_reg.GetCode());
10119 vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10120 CheckValidReg(holder_reg.GetCode());
10121 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10122 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10123 temps.Exclude(ip);
10124 // In the case of a field load, if `base_reg` differs from
10125 // `holder_reg`, the offset was too large and we must have emitted (during the construction
10126 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10127 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10128 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10129 // not necessarily do that check before going to the thunk.
10130 vixl32::Label throw_npe_label;
10131 vixl32::Label* throw_npe = nullptr;
10132 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10133 throw_npe = &throw_npe_label;
10134 __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10135 }
10136 // Check if the holder is gray and, if not, add fake dependency to the base register
10137 // and return to the LDR instruction to load the reference. Otherwise, use introspection
10138 // to load the reference and call the entrypoint that performs further checks on the
10139 // reference and marks it if needed.
10140 vixl32::Label slow_path;
10141 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10142 const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10143 ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10144 : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10145 EmitGrayCheckAndFastPath(
10146 assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10147 __ Bind(&slow_path);
10148 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10149 raw_ldr_offset;
10150 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10151 if (width == BakerReadBarrierWidth::kWide) {
10152 MemOperand ldr_half_address(lr, ldr_offset + 2);
10153 __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
10154 __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
10155 __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
10156 } else {
10157 MemOperand ldr_address(lr, ldr_offset);
10158 __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
10159 __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
10160 ep_reg, // for narrow LDR.
10161 Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10162 __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
10163 __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
10164 }
10165 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10166 __ Bx(ep_reg); // Jump to the entrypoint.
10167 break;
10168 }
10169 case BakerReadBarrierKind::kArray: {
10170 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10171 CheckValidReg(base_reg.GetCode());
10172 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10173 BakerReadBarrierSecondRegField::Decode(encoded_data));
10174 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10175 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10176 temps.Exclude(ip);
10177 vixl32::Label slow_path;
10178 int32_t data_offset =
10179 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10180 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10181 DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10182 const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10183 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10184 __ Bind(&slow_path);
10185 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10186 raw_ldr_offset;
10187 MemOperand ldr_address(lr, ldr_offset + 2);
10188 __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
10189 // i.e. Rm+32 because the scale in imm2 is 2.
10190 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10191 __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
10192 // a switch case target based on the index register.
10193 __ Mov(ip, base_reg); // Move the base register to ip0.
10194 __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
10195 break;
10196 }
10197 case BakerReadBarrierKind::kGcRoot:
10198 case BakerReadBarrierKind::kIntrinsicCas: {
10199 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10200 // and it does not have a forwarding address), call the correct introspection entrypoint;
10201 // otherwise return the reference (or the extracted forwarding address).
10202 // There is no gray bit check for GC roots.
10203 vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10204 CheckValidReg(root_reg.GetCode());
10205 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10206 BakerReadBarrierSecondRegField::Decode(encoded_data));
10207 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10208 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10209 temps.Exclude(ip);
10210 vixl32::Label return_label, not_marked, forwarding_address;
10211 __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10212 MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10213 __ Ldr(ip, lock_word);
10214 __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10215 __ B(eq, ¬_marked);
10216 __ Bind(&return_label);
10217 __ Bx(lr);
10218 __ Bind(¬_marked);
10219 static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10220 "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10221 " the highest bits and the 'forwarding address' state to have all bits set");
10222 __ Cmp(ip, Operand(0xc0000000));
10223 __ B(hs, &forwarding_address);
10224 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10225 // Adjust the art_quick_read_barrier_mark_introspection address
10226 // in kBakerCcEntrypointRegister to one of
10227 // art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10228 if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10229 DCHECK(width == BakerReadBarrierWidth::kWide);
10230 DCHECK(!root_reg.IsLow());
10231 }
10232 int32_t entrypoint_offset =
10233 (kind == BakerReadBarrierKind::kGcRoot)
10234 ? (width == BakerReadBarrierWidth::kWide)
10235 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10236 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10237 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10238 __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10239 __ Mov(ip, root_reg);
10240 __ Bx(ep_reg);
10241 __ Bind(&forwarding_address);
10242 __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10243 __ Bx(lr);
10244 break;
10245 }
10246 default:
10247 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10248 UNREACHABLE();
10249 }
10250
10251 // For JIT, the slow path is considered part of the compiled method,
10252 // so JIT should pass null as `debug_name`.
10253 DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10254 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10255 std::ostringstream oss;
10256 oss << "BakerReadBarrierThunk";
10257 switch (kind) {
10258 case BakerReadBarrierKind::kField:
10259 oss << "Field";
10260 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10261 oss << "Wide";
10262 }
10263 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10264 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10265 break;
10266 case BakerReadBarrierKind::kArray:
10267 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10268 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10269 BakerReadBarrierSecondRegField::Decode(encoded_data));
10270 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10271 break;
10272 case BakerReadBarrierKind::kGcRoot:
10273 oss << "GcRoot";
10274 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10275 oss << "Wide";
10276 }
10277 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10278 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10279 BakerReadBarrierSecondRegField::Decode(encoded_data));
10280 break;
10281 case BakerReadBarrierKind::kIntrinsicCas:
10282 oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10283 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10284 BakerReadBarrierSecondRegField::Decode(encoded_data));
10285 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10286 break;
10287 }
10288 *debug_name = oss.str();
10289 }
10290 }
10291
10292 #undef __
10293
10294 } // namespace arm
10295 } // namespace art
10296