1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm_vixl.h"
18
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/globals.h"
26 #include "class_root-inl.h"
27 #include "class_table.h"
28 #include "code_generator_utils.h"
29 #include "common_arm.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "intrinsics_list.h"
38 #include "intrinsics_utils.h"
39 #include "jit/profiling_info.h"
40 #include "linker/linker_patch.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "profiling_info_builder.h"
45 #include "scoped_thread_state_change-inl.h"
46 #include "thread.h"
47 #include "trace.h"
48 #include "utils/arm/assembler_arm_vixl.h"
49 #include "utils/arm/managed_register_arm.h"
50 #include "utils/assembler.h"
51 #include "utils/stack_checks.h"
52
53 namespace art HIDDEN {
54 namespace arm {
55
56 namespace vixl32 = vixl::aarch32;
57 using namespace vixl32; // NOLINT(build/namespaces)
58
59 using helpers::DRegisterFrom;
60 using helpers::HighRegisterFrom;
61 using helpers::InputDRegisterAt;
62 using helpers::InputOperandAt;
63 using helpers::InputRegister;
64 using helpers::InputRegisterAt;
65 using helpers::InputSRegisterAt;
66 using helpers::InputVRegister;
67 using helpers::InputVRegisterAt;
68 using helpers::Int32ConstantFrom;
69 using helpers::Int64ConstantFrom;
70 using helpers::LocationFrom;
71 using helpers::LowRegisterFrom;
72 using helpers::LowSRegisterFrom;
73 using helpers::OperandFrom;
74 using helpers::OutputRegister;
75 using helpers::OutputSRegister;
76 using helpers::OutputVRegister;
77 using helpers::RegisterFrom;
78 using helpers::SRegisterFrom;
79 using helpers::Uint64ConstantFrom;
80
81 using vixl::EmissionCheckScope;
82 using vixl::ExactAssemblyScope;
83 using vixl::CodeBufferCheckScope;
84
85 using RegisterList = vixl32::RegisterList;
86
ExpectedPairLayout(Location location)87 static bool ExpectedPairLayout(Location location) {
88 // We expected this for both core and fpu register pairs.
89 return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
90 }
91 // Use a local definition to prevent copying mistakes.
92 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
93 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95
96 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
97 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
101
102 // Using a base helps identify when we hit Marking Register check breakpoints.
103 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
104
105 #ifdef __
106 #error "ARM Codegen VIXL macro-assembler macro already defined."
107 #endif
108
109 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
110 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
111 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
112
113 // Marker that code is yet to be, and must, be implemented.
114 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
115
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)116 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
117 return rt.IsLow() && rn.IsLow() && offset < 32u;
118 }
119
120 class EmitAdrCode {
121 public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)122 EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
123 : assembler_(assembler), rd_(rd), label_(label) {
124 DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
125 adr_location_ = assembler->GetCursorOffset();
126 assembler->adr(EncodingSize(Wide), rd, label);
127 }
128
~EmitAdrCode()129 ~EmitAdrCode() {
130 DCHECK(label_->IsBound());
131 // The ADR emitted by the assembler does not set the Thumb mode bit we need.
132 // TODO: Maybe extend VIXL to allow ADR for return address?
133 uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
134 // Expecting ADR encoding T3 with `(offset & 1) == 0`.
135 DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
136 DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
137 DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
138 DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
139 // Add the Thumb mode bit.
140 raw_adr[2] |= 0x01u;
141 }
142
143 private:
144 ArmVIXLMacroAssembler* const assembler_;
145 vixl32::Register rd_;
146 vixl32::Label* const label_;
147 int32_t adr_location_;
148 };
149
OneRegInReferenceOutSaveEverythingCallerSaves()150 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
151 InvokeRuntimeCallingConventionARMVIXL calling_convention;
152 RegisterSet caller_saves = RegisterSet::Empty();
153 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
154 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
155 // that the kPrimNot result register is the same as the first argument register.
156 return caller_saves;
157 }
158
159 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
160 // for each live D registers they treat two corresponding S registers as live ones.
161 //
162 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
163 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
164 // S registers corner cases) and save/restore this new list treating them as D registers.
165 // - decreasing code size
166 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
167 // restored and then used in regular non SlowPath code as D register.
168 //
169 // For the following example (v means the S register is live):
170 // D names: | D0 | D1 | D2 | D4 | ...
171 // S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
172 // Live? | | v | v | v | v | v | v | | ...
173 //
174 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
175 // as D registers.
176 //
177 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
178 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)179 static size_t SaveContiguousSRegisterList(size_t first,
180 size_t last,
181 CodeGenerator* codegen,
182 size_t stack_offset) {
183 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
184 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
185 DCHECK_LE(first, last);
186 if ((first == last) && (first == 0)) {
187 __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
188 return stack_offset + kSRegSizeInBytes;
189 }
190 if (first % 2 == 1) {
191 __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
192 stack_offset += kSRegSizeInBytes;
193 }
194
195 bool save_last = false;
196 if (last % 2 == 0) {
197 save_last = true;
198 --last;
199 }
200
201 if (first < last) {
202 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
203 DCHECK_EQ((last - first + 1) % 2, 0u);
204 size_t number_of_d_regs = (last - first + 1) / 2;
205
206 if (number_of_d_regs == 1) {
207 __ Vstr(d_reg, MemOperand(sp, stack_offset));
208 } else if (number_of_d_regs > 1) {
209 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
210 vixl32::Register base = sp;
211 if (stack_offset != 0) {
212 base = temps.Acquire();
213 __ Add(base, sp, Operand::From(stack_offset));
214 }
215 __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
216 }
217 stack_offset += number_of_d_regs * kDRegSizeInBytes;
218 }
219
220 if (save_last) {
221 __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
222 stack_offset += kSRegSizeInBytes;
223 }
224
225 return stack_offset;
226 }
227
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)228 static size_t RestoreContiguousSRegisterList(size_t first,
229 size_t last,
230 CodeGenerator* codegen,
231 size_t stack_offset) {
232 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
233 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
234 DCHECK_LE(first, last);
235 if ((first == last) && (first == 0)) {
236 __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
237 return stack_offset + kSRegSizeInBytes;
238 }
239 if (first % 2 == 1) {
240 __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
241 stack_offset += kSRegSizeInBytes;
242 }
243
244 bool restore_last = false;
245 if (last % 2 == 0) {
246 restore_last = true;
247 --last;
248 }
249
250 if (first < last) {
251 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
252 DCHECK_EQ((last - first + 1) % 2, 0u);
253 size_t number_of_d_regs = (last - first + 1) / 2;
254 if (number_of_d_regs == 1) {
255 __ Vldr(d_reg, MemOperand(sp, stack_offset));
256 } else if (number_of_d_regs > 1) {
257 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
258 vixl32::Register base = sp;
259 if (stack_offset != 0) {
260 base = temps.Acquire();
261 __ Add(base, sp, Operand::From(stack_offset));
262 }
263 __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
264 }
265 stack_offset += number_of_d_regs * kDRegSizeInBytes;
266 }
267
268 if (restore_last) {
269 __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
270 stack_offset += kSRegSizeInBytes;
271 }
272
273 return stack_offset;
274 }
275
GetLoadOperandType(DataType::Type type)276 static LoadOperandType GetLoadOperandType(DataType::Type type) {
277 switch (type) {
278 case DataType::Type::kReference:
279 return kLoadWord;
280 case DataType::Type::kBool:
281 case DataType::Type::kUint8:
282 return kLoadUnsignedByte;
283 case DataType::Type::kInt8:
284 return kLoadSignedByte;
285 case DataType::Type::kUint16:
286 return kLoadUnsignedHalfword;
287 case DataType::Type::kInt16:
288 return kLoadSignedHalfword;
289 case DataType::Type::kInt32:
290 return kLoadWord;
291 case DataType::Type::kInt64:
292 return kLoadWordPair;
293 case DataType::Type::kFloat32:
294 return kLoadSWord;
295 case DataType::Type::kFloat64:
296 return kLoadDWord;
297 default:
298 LOG(FATAL) << "Unreachable type " << type;
299 UNREACHABLE();
300 }
301 }
302
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)303 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
304 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
305 size_t orig_offset = stack_offset;
306
307 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
308 for (uint32_t i : LowToHighBits(core_spills)) {
309 // If the register holds an object, update the stack mask.
310 if (locations->RegisterContainsObject(i)) {
311 locations->SetStackBit(stack_offset / kVRegSize);
312 }
313 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
314 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
315 saved_core_stack_offsets_[i] = stack_offset;
316 stack_offset += kArmWordSize;
317 }
318
319 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
320 arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
321
322 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
323 orig_offset = stack_offset;
324 for (uint32_t i : LowToHighBits(fp_spills)) {
325 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
326 saved_fpu_stack_offsets_[i] = stack_offset;
327 stack_offset += kArmWordSize;
328 }
329
330 stack_offset = orig_offset;
331 while (fp_spills != 0u) {
332 uint32_t begin = CTZ(fp_spills);
333 uint32_t tmp = fp_spills + (1u << begin);
334 fp_spills &= tmp; // Clear the contiguous range of 1s.
335 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
336 stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
337 }
338 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
339 }
340
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)341 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
342 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
343 size_t orig_offset = stack_offset;
344
345 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
346 for (uint32_t i : LowToHighBits(core_spills)) {
347 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
348 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
349 stack_offset += kArmWordSize;
350 }
351
352 // TODO(VIXL): Check the coherency of stack_offset after this with a test.
353 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
354 arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
355
356 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
357 while (fp_spills != 0u) {
358 uint32_t begin = CTZ(fp_spills);
359 uint32_t tmp = fp_spills + (1u << begin);
360 fp_spills &= tmp; // Clear the contiguous range of 1s.
361 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
362 stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
363 }
364 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 }
366
367 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
368 public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)369 explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
370
EmitNativeCode(CodeGenerator * codegen)371 void EmitNativeCode(CodeGenerator* codegen) override {
372 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
373 __ Bind(GetEntryLabel());
374 if (instruction_->CanThrowIntoCatchBlock()) {
375 // Live registers will be restored in the catch block if caught.
376 SaveLiveRegisters(codegen, instruction_->GetLocations());
377 }
378 arm_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, this);
379 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
380 }
381
IsFatal() const382 bool IsFatal() const override { return true; }
383
GetDescription() const384 const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
385
386 private:
387 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
388 };
389
390 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
391 public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)392 explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
393 : SlowPathCodeARMVIXL(instruction) {}
394
EmitNativeCode(CodeGenerator * codegen)395 void EmitNativeCode(CodeGenerator* codegen) override {
396 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
397 __ Bind(GetEntryLabel());
398 arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, this);
399 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
400 }
401
IsFatal() const402 bool IsFatal() const override { return true; }
403
GetDescription() const404 const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
405
406 private:
407 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
408 };
409
410 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
411 public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)412 SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
413 : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
414
EmitNativeCode(CodeGenerator * codegen)415 void EmitNativeCode(CodeGenerator* codegen) override {
416 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
417 __ Bind(GetEntryLabel());
418 arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, this);
419 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
420 if (successor_ == nullptr) {
421 __ B(GetReturnLabel());
422 } else {
423 __ B(arm_codegen->GetLabelOf(successor_));
424 }
425 }
426
GetReturnLabel()427 vixl32::Label* GetReturnLabel() {
428 DCHECK(successor_ == nullptr);
429 return &return_label_;
430 }
431
GetSuccessor() const432 HBasicBlock* GetSuccessor() const {
433 return successor_;
434 }
435
GetDescription() const436 const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
437
438 private:
439 // If not null, the block to branch to after the suspend check.
440 HBasicBlock* const successor_;
441
442 // If `successor_` is null, the label to branch to after the suspend check.
443 vixl32::Label return_label_;
444
445 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
446 };
447
448 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
449 public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)450 explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
451 : SlowPathCodeARMVIXL(instruction) {}
452
EmitNativeCode(CodeGenerator * codegen)453 void EmitNativeCode(CodeGenerator* codegen) override {
454 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
455 LocationSummary* locations = instruction_->GetLocations();
456
457 __ Bind(GetEntryLabel());
458 if (instruction_->CanThrowIntoCatchBlock()) {
459 // Live registers will be restored in the catch block if caught.
460 SaveLiveRegisters(codegen, instruction_->GetLocations());
461 }
462 // We're moving two locations to locations that could overlap, so we need a parallel
463 // move resolver.
464 InvokeRuntimeCallingConventionARMVIXL calling_convention;
465 codegen->EmitParallelMoves(
466 locations->InAt(0),
467 LocationFrom(calling_convention.GetRegisterAt(0)),
468 DataType::Type::kInt32,
469 locations->InAt(1),
470 LocationFrom(calling_convention.GetRegisterAt(1)),
471 DataType::Type::kInt32);
472 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
473 ? kQuickThrowStringBounds
474 : kQuickThrowArrayBounds;
475 arm_codegen->InvokeRuntime(entrypoint, instruction_, this);
476 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
477 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
478 }
479
IsFatal() const480 bool IsFatal() const override { return true; }
481
GetDescription() const482 const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
483
484 private:
485 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
486 };
487
488 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
489 public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)490 LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
491 : SlowPathCodeARMVIXL(at), cls_(cls) {
492 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
493 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
494 }
495
EmitNativeCode(CodeGenerator * codegen)496 void EmitNativeCode(CodeGenerator* codegen) override {
497 LocationSummary* locations = instruction_->GetLocations();
498 Location out = locations->Out();
499 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
500 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
501
502 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
503 __ Bind(GetEntryLabel());
504 SaveLiveRegisters(codegen, locations);
505
506 InvokeRuntimeCallingConventionARMVIXL calling_convention;
507 if (must_resolve_type) {
508 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
509 arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
510 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
511 &cls_->GetDexFile()));
512 dex::TypeIndex type_index = cls_->GetTypeIndex();
513 __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
514 if (cls_->NeedsAccessCheck()) {
515 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
516 arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, this);
517 } else {
518 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
519 arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, this);
520 }
521 // If we also must_do_clinit, the resolved type is now in the correct register.
522 } else {
523 DCHECK(must_do_clinit);
524 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
525 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
526 }
527 if (must_do_clinit) {
528 arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, this);
529 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
530 }
531
532 // Move the class to the desired location.
533 if (out.IsValid()) {
534 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
535 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
536 }
537 RestoreLiveRegisters(codegen, locations);
538 __ B(GetExitLabel());
539 }
540
GetDescription() const541 const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
542
543 private:
544 // The class this slow path will load.
545 HLoadClass* const cls_;
546
547 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
548 };
549
550 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
551 public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)552 explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
553 : SlowPathCodeARMVIXL(instruction) {}
554
EmitNativeCode(CodeGenerator * codegen)555 void EmitNativeCode(CodeGenerator* codegen) override {
556 DCHECK(instruction_->IsLoadString());
557 DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
558 LocationSummary* locations = instruction_->GetLocations();
559 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
560 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
561
562 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
563 __ Bind(GetEntryLabel());
564 SaveLiveRegisters(codegen, locations);
565
566 InvokeRuntimeCallingConventionARMVIXL calling_convention;
567 __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
568 arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, this);
569 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
570
571 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
572 RestoreLiveRegisters(codegen, locations);
573
574 __ B(GetExitLabel());
575 }
576
GetDescription() const577 const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
578
579 private:
580 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
581 };
582
583 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
584 public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)585 TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
586 : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
587
EmitNativeCode(CodeGenerator * codegen)588 void EmitNativeCode(CodeGenerator* codegen) override {
589 LocationSummary* locations = instruction_->GetLocations();
590 DCHECK(instruction_->IsCheckCast()
591 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
592
593 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
594 __ Bind(GetEntryLabel());
595
596 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
597 SaveLiveRegisters(codegen, locations);
598 }
599
600 // We're moving two locations to locations that could overlap, so we need a parallel
601 // move resolver.
602 InvokeRuntimeCallingConventionARMVIXL calling_convention;
603
604 codegen->EmitParallelMoves(locations->InAt(0),
605 LocationFrom(calling_convention.GetRegisterAt(0)),
606 DataType::Type::kReference,
607 locations->InAt(1),
608 LocationFrom(calling_convention.GetRegisterAt(1)),
609 DataType::Type::kReference);
610 if (instruction_->IsInstanceOf()) {
611 arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, this);
612 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
613 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
614 } else {
615 DCHECK(instruction_->IsCheckCast());
616 arm_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, this);
617 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
618 }
619
620 if (!is_fatal_) {
621 RestoreLiveRegisters(codegen, locations);
622 __ B(GetExitLabel());
623 }
624 }
625
GetDescription() const626 const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
627
IsFatal() const628 bool IsFatal() const override { return is_fatal_; }
629
630 private:
631 const bool is_fatal_;
632
633 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
634 };
635
636 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
637 public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)638 explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
639 : SlowPathCodeARMVIXL(instruction) {}
640
EmitNativeCode(CodeGenerator * codegen)641 void EmitNativeCode(CodeGenerator* codegen) override {
642 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
643 __ Bind(GetEntryLabel());
644 LocationSummary* locations = instruction_->GetLocations();
645 SaveLiveRegisters(codegen, locations);
646 InvokeRuntimeCallingConventionARMVIXL calling_convention;
647 __ Mov(calling_convention.GetRegisterAt(0),
648 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
649
650 arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, this);
651 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
652 }
653
GetDescription() const654 const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
655
656 private:
657 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
658 };
659
660 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
661 public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)662 explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
663
EmitNativeCode(CodeGenerator * codegen)664 void EmitNativeCode(CodeGenerator* codegen) override {
665 LocationSummary* locations = instruction_->GetLocations();
666 __ Bind(GetEntryLabel());
667 SaveLiveRegisters(codegen, locations);
668
669 InvokeRuntimeCallingConventionARMVIXL calling_convention;
670 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
671 parallel_move.AddMove(
672 locations->InAt(0),
673 LocationFrom(calling_convention.GetRegisterAt(0)),
674 DataType::Type::kReference,
675 nullptr);
676 parallel_move.AddMove(
677 locations->InAt(1),
678 LocationFrom(calling_convention.GetRegisterAt(1)),
679 DataType::Type::kInt32,
680 nullptr);
681 parallel_move.AddMove(
682 locations->InAt(2),
683 LocationFrom(calling_convention.GetRegisterAt(2)),
684 DataType::Type::kReference,
685 nullptr);
686 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
687
688 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
689 arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, this);
690 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
691 RestoreLiveRegisters(codegen, locations);
692 __ B(GetExitLabel());
693 }
694
GetDescription() const695 const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
696
697 private:
698 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
699 };
700
701 // Slow path generating a read barrier for a heap reference.
702 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
703 public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)704 ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
705 Location out,
706 Location ref,
707 Location obj,
708 uint32_t offset,
709 Location index)
710 : SlowPathCodeARMVIXL(instruction),
711 out_(out),
712 ref_(ref),
713 obj_(obj),
714 offset_(offset),
715 index_(index) {
716 // If `obj` is equal to `out` or `ref`, it means the initial object
717 // has been overwritten by (or after) the heap object reference load
718 // to be instrumented, e.g.:
719 //
720 // __ LoadFromOffset(kLoadWord, out, out, offset);
721 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
722 //
723 // In that case, we have lost the information about the original
724 // object, and the emitted read barrier cannot work properly.
725 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
726 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
727 }
728
EmitNativeCode(CodeGenerator * codegen)729 void EmitNativeCode(CodeGenerator* codegen) override {
730 DCHECK(codegen->EmitReadBarrier());
731 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
732 LocationSummary* locations = instruction_->GetLocations();
733 vixl32::Register reg_out = RegisterFrom(out_);
734 DCHECK(locations->CanCall());
735 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
736 DCHECK(instruction_->IsInstanceFieldGet() ||
737 instruction_->IsStaticFieldGet() ||
738 instruction_->IsArrayGet() ||
739 instruction_->IsInstanceOf() ||
740 instruction_->IsCheckCast() ||
741 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
742 << "Unexpected instruction in read barrier for heap reference slow path: "
743 << instruction_->DebugName();
744 // The read barrier instrumentation of object ArrayGet
745 // instructions does not support the HIntermediateAddress
746 // instruction.
747 DCHECK(!(instruction_->IsArrayGet() &&
748 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
749
750 __ Bind(GetEntryLabel());
751 SaveLiveRegisters(codegen, locations);
752
753 // We may have to change the index's value, but as `index_` is a
754 // constant member (like other "inputs" of this slow path),
755 // introduce a copy of it, `index`.
756 Location index = index_;
757 if (index_.IsValid()) {
758 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
759 if (instruction_->IsArrayGet()) {
760 // Compute the actual memory offset and store it in `index`.
761 vixl32::Register index_reg = RegisterFrom(index_);
762 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
763 if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
764 // We are about to change the value of `index_reg` (see the
765 // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
766 // art::arm::ArmVIXLMacroAssembler::Add below), but it has
767 // not been saved by the previous call to
768 // art::SlowPathCode::SaveLiveRegisters, as it is a
769 // callee-save register --
770 // art::SlowPathCode::SaveLiveRegisters does not consider
771 // callee-save registers, as it has been designed with the
772 // assumption that callee-save registers are supposed to be
773 // handled by the called function. So, as a callee-save
774 // register, `index_reg` _would_ eventually be saved onto
775 // the stack, but it would be too late: we would have
776 // changed its value earlier. Therefore, we manually save
777 // it here into another freely available register,
778 // `free_reg`, chosen of course among the caller-save
779 // registers (as a callee-save `free_reg` register would
780 // exhibit the same problem).
781 //
782 // Note we could have requested a temporary register from
783 // the register allocator instead; but we prefer not to, as
784 // this is a slow path, and we know we can find a
785 // caller-save register that is available.
786 vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
787 __ Mov(free_reg, index_reg);
788 index_reg = free_reg;
789 index = LocationFrom(index_reg);
790 } else {
791 // The initial register stored in `index_` has already been
792 // saved in the call to art::SlowPathCode::SaveLiveRegisters
793 // (as it is not a callee-save register), so we can freely
794 // use it.
795 }
796 // Shifting the index value contained in `index_reg` by the scale
797 // factor (2) cannot overflow in practice, as the runtime is
798 // unable to allocate object arrays with a size larger than
799 // 2^26 - 1 (that is, 2^28 - 4 bytes).
800 __ Lsl(index_reg, index_reg, TIMES_4);
801 static_assert(
802 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
803 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
804 __ Add(index_reg, index_reg, offset_);
805 } else {
806 // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
807 // (as in the case of ArrayGet), as it is actually an offset to an object field within an
808 // object.
809 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
810 DCHECK(instruction_->GetLocations()->Intrinsified());
811 HInvoke* invoke = instruction_->AsInvoke();
812 DCHECK(IsUnsafeGetReference(invoke) ||
813 IsVarHandleGet(invoke) ||
814 IsVarHandleCASFamily(invoke))
815 << invoke->GetIntrinsic();
816 DCHECK_EQ(offset_, 0U);
817 // Though UnsafeGet's offset location is a register pair, we only pass the low
818 // part (high part is irrelevant for 32-bit addresses) to the slow path.
819 // For VarHandle intrinsics, the index is always just a register.
820 DCHECK(index_.IsRegister());
821 index = index_;
822 }
823 }
824
825 // We're moving two or three locations to locations that could
826 // overlap, so we need a parallel move resolver.
827 InvokeRuntimeCallingConventionARMVIXL calling_convention;
828 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
829 parallel_move.AddMove(ref_,
830 LocationFrom(calling_convention.GetRegisterAt(0)),
831 DataType::Type::kReference,
832 nullptr);
833 parallel_move.AddMove(obj_,
834 LocationFrom(calling_convention.GetRegisterAt(1)),
835 DataType::Type::kReference,
836 nullptr);
837 if (index.IsValid()) {
838 parallel_move.AddMove(index,
839 LocationFrom(calling_convention.GetRegisterAt(2)),
840 DataType::Type::kInt32,
841 nullptr);
842 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
843 } else {
844 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
845 __ Mov(calling_convention.GetRegisterAt(2), offset_);
846 }
847 arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, this);
848 CheckEntrypointTypes<
849 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
850 arm_codegen->Move32(out_, LocationFrom(r0));
851
852 RestoreLiveRegisters(codegen, locations);
853 __ B(GetExitLabel());
854 }
855
GetDescription() const856 const char* GetDescription() const override {
857 return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
858 }
859
860 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)861 vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
862 uint32_t ref = RegisterFrom(ref_).GetCode();
863 uint32_t obj = RegisterFrom(obj_).GetCode();
864 for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
865 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
866 return vixl32::Register(i);
867 }
868 }
869 // We shall never fail to find a free caller-save register, as
870 // there are more than two core caller-save registers on ARM
871 // (meaning it is possible to find one which is different from
872 // `ref` and `obj`).
873 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
874 LOG(FATAL) << "Could not find a free caller-save register";
875 UNREACHABLE();
876 }
877
878 const Location out_;
879 const Location ref_;
880 const Location obj_;
881 const uint32_t offset_;
882 // An additional location containing an index to an array.
883 // Only used for HArrayGet and the UnsafeGetObject &
884 // UnsafeGetObjectVolatile intrinsics.
885 const Location index_;
886
887 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
888 };
889
890 // Slow path generating a read barrier for a GC root.
891 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
892 public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)893 ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
894 : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
895 }
896
EmitNativeCode(CodeGenerator * codegen)897 void EmitNativeCode(CodeGenerator* codegen) override {
898 DCHECK(codegen->EmitReadBarrier());
899 LocationSummary* locations = instruction_->GetLocations();
900 vixl32::Register reg_out = RegisterFrom(out_);
901 DCHECK(locations->CanCall());
902 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
903 DCHECK(instruction_->IsLoadClass() ||
904 instruction_->IsLoadString() ||
905 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
906 << "Unexpected instruction in read barrier for GC root slow path: "
907 << instruction_->DebugName();
908
909 __ Bind(GetEntryLabel());
910 SaveLiveRegisters(codegen, locations);
911
912 InvokeRuntimeCallingConventionARMVIXL calling_convention;
913 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
914 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
915 arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, this);
916 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
917 arm_codegen->Move32(out_, LocationFrom(r0));
918
919 RestoreLiveRegisters(codegen, locations);
920 __ B(GetExitLabel());
921 }
922
GetDescription() const923 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
924
925 private:
926 const Location out_;
927 const Location root_;
928
929 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
930 };
931
932 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
933 public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)934 explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
935 : SlowPathCodeARMVIXL(instruction) {}
936
EmitNativeCode(CodeGenerator * codegen)937 void EmitNativeCode(CodeGenerator* codegen) override {
938 LocationSummary* locations = instruction_->GetLocations();
939 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
940 QuickEntrypointEnum entry_point =
941 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
942 __ Bind(GetEntryLabel());
943 SaveLiveRegisters(codegen, locations);
944 if (instruction_->IsMethodExitHook()) {
945 // Load frame size to pass to the exit hooks
946 __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
947 }
948 arm_codegen->InvokeRuntime(entry_point, instruction_, this);
949 RestoreLiveRegisters(codegen, locations);
950 __ B(GetExitLabel());
951 }
952
GetDescription() const953 const char* GetDescription() const override {
954 return "MethodEntryExitHooksSlowPath";
955 }
956
957 private:
958 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
959 };
960
961 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
962 public:
CompileOptimizedSlowPathARMVIXL(HSuspendCheck * suspend_check,vixl32::Register profiling_info)963 CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
964 vixl32::Register profiling_info)
965 : SlowPathCodeARMVIXL(suspend_check),
966 profiling_info_(profiling_info) {}
967
EmitNativeCode(CodeGenerator * codegen)968 void EmitNativeCode(CodeGenerator* codegen) override {
969 uint32_t entry_point_offset =
970 GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
971 __ Bind(GetEntryLabel());
972 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
973 UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
974 vixl32::Register tmp = temps.Acquire();
975 __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
976 __ Strh(tmp,
977 MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
978 __ Ldr(lr, MemOperand(tr, entry_point_offset));
979 // Note: we don't record the call here (and therefore don't generate a stack
980 // map), as the entrypoint should never be suspended.
981 __ Blx(lr);
982 __ B(GetExitLabel());
983 }
984
GetDescription() const985 const char* GetDescription() const override {
986 return "CompileOptimizedSlowPath";
987 }
988
989 private:
990 vixl32::Register profiling_info_;
991
992 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
993 };
994
ARMCondition(IfCondition cond)995 inline vixl32::Condition ARMCondition(IfCondition cond) {
996 switch (cond) {
997 case kCondEQ: return eq;
998 case kCondNE: return ne;
999 case kCondLT: return lt;
1000 case kCondLE: return le;
1001 case kCondGT: return gt;
1002 case kCondGE: return ge;
1003 case kCondB: return lo;
1004 case kCondBE: return ls;
1005 case kCondA: return hi;
1006 case kCondAE: return hs;
1007 }
1008 LOG(FATAL) << "Unreachable";
1009 UNREACHABLE();
1010 }
1011
1012 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1013 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1014 switch (cond) {
1015 case kCondEQ: return eq;
1016 case kCondNE: return ne;
1017 // Signed to unsigned.
1018 case kCondLT: return lo;
1019 case kCondLE: return ls;
1020 case kCondGT: return hi;
1021 case kCondGE: return hs;
1022 // Unsigned remain unchanged.
1023 case kCondB: return lo;
1024 case kCondBE: return ls;
1025 case kCondA: return hi;
1026 case kCondAE: return hs;
1027 }
1028 LOG(FATAL) << "Unreachable";
1029 UNREACHABLE();
1030 }
1031
ARMFPCondition(IfCondition cond,bool gt_bias)1032 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1033 // The ARM condition codes can express all the necessary branches, see the
1034 // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1035 // There is no dex instruction or HIR that would need the missing conditions
1036 // "equal or unordered" or "not equal".
1037 switch (cond) {
1038 case kCondEQ: return eq;
1039 case kCondNE: return ne /* unordered */;
1040 case kCondLT: return gt_bias ? cc : lt /* unordered */;
1041 case kCondLE: return gt_bias ? ls : le /* unordered */;
1042 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1043 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1044 default:
1045 LOG(FATAL) << "UNREACHABLE";
1046 UNREACHABLE();
1047 }
1048 }
1049
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1050 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1051 switch (op_kind) {
1052 case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1053 case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1054 case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1055 default:
1056 LOG(FATAL) << "Unexpected op kind " << op_kind;
1057 UNREACHABLE();
1058 }
1059 }
1060
DumpCoreRegister(std::ostream & stream,int reg) const1061 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1062 stream << vixl32::Register(reg);
1063 }
1064
DumpFloatingPointRegister(std::ostream & stream,int reg) const1065 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1066 stream << vixl32::SRegister(reg);
1067 }
1068
GetInstructionSetFeatures() const1069 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1070 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1071 }
1072
ComputeSRegisterListMask(const SRegisterList & regs)1073 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1074 uint32_t mask = 0;
1075 for (uint32_t i = regs.GetFirstSRegister().GetCode();
1076 i <= regs.GetLastSRegister().GetCode();
1077 ++i) {
1078 mask |= (1 << i);
1079 }
1080 return mask;
1081 }
1082
1083 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1084 size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
1085 [[maybe_unused]] uint32_t reg_id) {
1086 TODO_VIXL32(FATAL);
1087 UNREACHABLE();
1088 }
1089
1090 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1091 size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
1092 [[maybe_unused]] uint32_t reg_id) {
1093 TODO_VIXL32(FATAL);
1094 UNREACHABLE();
1095 }
1096
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1098 [[maybe_unused]] uint32_t reg_id) {
1099 TODO_VIXL32(FATAL);
1100 UNREACHABLE();
1101 }
1102
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1103 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1104 [[maybe_unused]] uint32_t reg_id) {
1105 TODO_VIXL32(FATAL);
1106 UNREACHABLE();
1107 }
1108
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1109 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1110 vixl32::Register out,
1111 vixl32::Register first,
1112 const Operand& second,
1113 CodeGeneratorARMVIXL* codegen) {
1114 if (second.IsImmediate() && second.GetImmediate() == 0) {
1115 const Operand in = kind == HInstruction::kAnd
1116 ? Operand(0)
1117 : Operand(first);
1118
1119 __ Mov(out, in);
1120 } else {
1121 switch (kind) {
1122 case HInstruction::kAdd:
1123 __ Add(out, first, second);
1124 break;
1125 case HInstruction::kAnd:
1126 __ And(out, first, second);
1127 break;
1128 case HInstruction::kOr:
1129 __ Orr(out, first, second);
1130 break;
1131 case HInstruction::kSub:
1132 __ Sub(out, first, second);
1133 break;
1134 case HInstruction::kXor:
1135 __ Eor(out, first, second);
1136 break;
1137 default:
1138 LOG(FATAL) << "Unexpected instruction kind: " << kind;
1139 UNREACHABLE();
1140 }
1141 }
1142 }
1143
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1144 static void GenerateDataProc(HInstruction::InstructionKind kind,
1145 const Location& out,
1146 const Location& first,
1147 const Operand& second_lo,
1148 const Operand& second_hi,
1149 CodeGeneratorARMVIXL* codegen) {
1150 const vixl32::Register first_hi = HighRegisterFrom(first);
1151 const vixl32::Register first_lo = LowRegisterFrom(first);
1152 const vixl32::Register out_hi = HighRegisterFrom(out);
1153 const vixl32::Register out_lo = LowRegisterFrom(out);
1154
1155 if (kind == HInstruction::kAdd) {
1156 __ Adds(out_lo, first_lo, second_lo);
1157 __ Adc(out_hi, first_hi, second_hi);
1158 } else if (kind == HInstruction::kSub) {
1159 __ Subs(out_lo, first_lo, second_lo);
1160 __ Sbc(out_hi, first_hi, second_hi);
1161 } else {
1162 GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1163 GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1164 }
1165 }
1166
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1167 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1168 return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1169 }
1170
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1171 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1172 CodeGeneratorARMVIXL* codegen) {
1173 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1174 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1175
1176 const LocationSummary* const locations = instruction->GetLocations();
1177 const uint32_t shift_value = instruction->GetShiftAmount();
1178 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1179 const Location first = locations->InAt(0);
1180 const Location second = locations->InAt(1);
1181 const Location out = locations->Out();
1182 const vixl32::Register first_hi = HighRegisterFrom(first);
1183 const vixl32::Register first_lo = LowRegisterFrom(first);
1184 const vixl32::Register out_hi = HighRegisterFrom(out);
1185 const vixl32::Register out_lo = LowRegisterFrom(out);
1186 const vixl32::Register second_hi = HighRegisterFrom(second);
1187 const vixl32::Register second_lo = LowRegisterFrom(second);
1188 const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1189
1190 if (shift_value >= 32) {
1191 if (shift == ShiftType::LSL) {
1192 GenerateDataProcInstruction(kind,
1193 out_hi,
1194 first_hi,
1195 Operand(second_lo, ShiftType::LSL, shift_value - 32),
1196 codegen);
1197 GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1198 } else if (shift == ShiftType::ASR) {
1199 GenerateDataProc(kind,
1200 out,
1201 first,
1202 GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1203 Operand(second_hi, ShiftType::ASR, 31),
1204 codegen);
1205 } else {
1206 DCHECK_EQ(shift, ShiftType::LSR);
1207 GenerateDataProc(kind,
1208 out,
1209 first,
1210 GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1211 0,
1212 codegen);
1213 }
1214 } else {
1215 DCHECK_GT(shift_value, 1U);
1216 DCHECK_LT(shift_value, 32U);
1217
1218 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1219
1220 if (shift == ShiftType::LSL) {
1221 // We are not doing this for HInstruction::kAdd because the output will require
1222 // Location::kOutputOverlap; not applicable to other cases.
1223 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1224 GenerateDataProcInstruction(kind,
1225 out_hi,
1226 first_hi,
1227 Operand(second_hi, ShiftType::LSL, shift_value),
1228 codegen);
1229 GenerateDataProcInstruction(kind,
1230 out_hi,
1231 out_hi,
1232 Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1233 codegen);
1234 GenerateDataProcInstruction(kind,
1235 out_lo,
1236 first_lo,
1237 Operand(second_lo, ShiftType::LSL, shift_value),
1238 codegen);
1239 } else {
1240 const vixl32::Register temp = temps.Acquire();
1241
1242 __ Lsl(temp, second_hi, shift_value);
1243 __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1244 GenerateDataProc(kind,
1245 out,
1246 first,
1247 Operand(second_lo, ShiftType::LSL, shift_value),
1248 temp,
1249 codegen);
1250 }
1251 } else {
1252 DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1253
1254 // We are not doing this for HInstruction::kAdd because the output will require
1255 // Location::kOutputOverlap; not applicable to other cases.
1256 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1257 GenerateDataProcInstruction(kind,
1258 out_lo,
1259 first_lo,
1260 Operand(second_lo, ShiftType::LSR, shift_value),
1261 codegen);
1262 GenerateDataProcInstruction(kind,
1263 out_lo,
1264 out_lo,
1265 Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1266 codegen);
1267 GenerateDataProcInstruction(kind,
1268 out_hi,
1269 first_hi,
1270 Operand(second_hi, shift, shift_value),
1271 codegen);
1272 } else {
1273 const vixl32::Register temp = temps.Acquire();
1274
1275 __ Lsr(temp, second_lo, shift_value);
1276 __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1277 GenerateDataProc(kind,
1278 out,
1279 first,
1280 temp,
1281 Operand(second_hi, shift, shift_value),
1282 codegen);
1283 }
1284 }
1285 }
1286 }
1287
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1288 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1289 const Location rhs_loc = instruction->GetLocations()->InAt(1);
1290 if (rhs_loc.IsConstant()) {
1291 // 0.0 is the only immediate that can be encoded directly in
1292 // a VCMP instruction.
1293 //
1294 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1295 // specify that in a floating-point comparison, positive zero
1296 // and negative zero are considered equal, so we can use the
1297 // literal 0.0 for both cases here.
1298 //
1299 // Note however that some methods (Float.equal, Float.compare,
1300 // Float.compareTo, Double.equal, Double.compare,
1301 // Double.compareTo, Math.max, Math.min, StrictMath.max,
1302 // StrictMath.min) consider 0.0 to be (strictly) greater than
1303 // -0.0. So if we ever translate calls to these methods into a
1304 // HCompare instruction, we must handle the -0.0 case with
1305 // care here.
1306 DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1307
1308 const DataType::Type type = instruction->InputAt(0)->GetType();
1309
1310 if (type == DataType::Type::kFloat32) {
1311 __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1312 } else {
1313 DCHECK_EQ(type, DataType::Type::kFloat64);
1314 __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1315 }
1316 } else {
1317 __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1318 }
1319 }
1320
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1321 static int64_t AdjustConstantForCondition(int64_t value,
1322 IfCondition* condition,
1323 IfCondition* opposite) {
1324 if (value == 1) {
1325 if (*condition == kCondB) {
1326 value = 0;
1327 *condition = kCondEQ;
1328 *opposite = kCondNE;
1329 } else if (*condition == kCondAE) {
1330 value = 0;
1331 *condition = kCondNE;
1332 *opposite = kCondEQ;
1333 }
1334 } else if (value == -1) {
1335 if (*condition == kCondGT) {
1336 value = 0;
1337 *condition = kCondGE;
1338 *opposite = kCondLT;
1339 } else if (*condition == kCondLE) {
1340 value = 0;
1341 *condition = kCondLT;
1342 *opposite = kCondGE;
1343 }
1344 }
1345
1346 return value;
1347 }
1348
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1349 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1350 HCondition* condition,
1351 bool invert,
1352 CodeGeneratorARMVIXL* codegen) {
1353 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1354
1355 const LocationSummary* const locations = condition->GetLocations();
1356 IfCondition cond = condition->GetCondition();
1357 IfCondition opposite = condition->GetOppositeCondition();
1358
1359 if (invert) {
1360 std::swap(cond, opposite);
1361 }
1362
1363 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1364 const Location left = locations->InAt(0);
1365 const Location right = locations->InAt(1);
1366
1367 DCHECK(right.IsConstant());
1368
1369 const vixl32::Register left_high = HighRegisterFrom(left);
1370 const vixl32::Register left_low = LowRegisterFrom(left);
1371 int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1372 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1373
1374 // Comparisons against 0 are common enough to deserve special attention.
1375 if (value == 0) {
1376 switch (cond) {
1377 case kCondNE:
1378 // x > 0 iff x != 0 when the comparison is unsigned.
1379 case kCondA:
1380 ret = std::make_pair(ne, eq);
1381 FALLTHROUGH_INTENDED;
1382 case kCondEQ:
1383 // x <= 0 iff x == 0 when the comparison is unsigned.
1384 case kCondBE:
1385 __ Orrs(temps.Acquire(), left_low, left_high);
1386 return ret;
1387 case kCondLT:
1388 case kCondGE:
1389 __ Cmp(left_high, 0);
1390 return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1391 // Trivially true or false.
1392 case kCondB:
1393 ret = std::make_pair(ne, eq);
1394 FALLTHROUGH_INTENDED;
1395 case kCondAE:
1396 __ Cmp(left_low, left_low);
1397 return ret;
1398 default:
1399 break;
1400 }
1401 }
1402
1403 switch (cond) {
1404 case kCondEQ:
1405 case kCondNE:
1406 case kCondB:
1407 case kCondBE:
1408 case kCondA:
1409 case kCondAE: {
1410 const uint32_t value_low = Low32Bits(value);
1411 Operand operand_low(value_low);
1412
1413 __ Cmp(left_high, High32Bits(value));
1414
1415 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1416 // we must ensure that the operands corresponding to the least significant
1417 // halves of the inputs fit into a 16-bit CMP encoding.
1418 if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1419 operand_low = Operand(temps.Acquire());
1420 __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1421 }
1422
1423 // We use the scope because of the IT block that follows.
1424 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1425 2 * vixl32::k16BitT32InstructionSizeInBytes,
1426 CodeBufferCheckScope::kExactSize);
1427
1428 __ it(eq);
1429 __ cmp(eq, left_low, operand_low);
1430 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1431 break;
1432 }
1433 case kCondLE:
1434 case kCondGT:
1435 // Trivially true or false.
1436 if (value == std::numeric_limits<int64_t>::max()) {
1437 __ Cmp(left_low, left_low);
1438 ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1439 break;
1440 }
1441
1442 if (cond == kCondLE) {
1443 DCHECK_EQ(opposite, kCondGT);
1444 cond = kCondLT;
1445 opposite = kCondGE;
1446 } else {
1447 DCHECK_EQ(cond, kCondGT);
1448 DCHECK_EQ(opposite, kCondLE);
1449 cond = kCondGE;
1450 opposite = kCondLT;
1451 }
1452
1453 value++;
1454 FALLTHROUGH_INTENDED;
1455 case kCondGE:
1456 case kCondLT: {
1457 __ Cmp(left_low, Low32Bits(value));
1458 __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1459 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1460 break;
1461 }
1462 default:
1463 LOG(FATAL) << "Unreachable";
1464 UNREACHABLE();
1465 }
1466
1467 return ret;
1468 }
1469
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1470 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1471 HCondition* condition,
1472 bool invert,
1473 CodeGeneratorARMVIXL* codegen) {
1474 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1475
1476 const LocationSummary* const locations = condition->GetLocations();
1477 IfCondition cond = condition->GetCondition();
1478 IfCondition opposite = condition->GetOppositeCondition();
1479
1480 if (invert) {
1481 std::swap(cond, opposite);
1482 }
1483
1484 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1485 Location left = locations->InAt(0);
1486 Location right = locations->InAt(1);
1487
1488 DCHECK(right.IsRegisterPair());
1489
1490 switch (cond) {
1491 case kCondEQ:
1492 case kCondNE:
1493 case kCondB:
1494 case kCondBE:
1495 case kCondA:
1496 case kCondAE: {
1497 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1498
1499 // We use the scope because of the IT block that follows.
1500 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1501 2 * vixl32::k16BitT32InstructionSizeInBytes,
1502 CodeBufferCheckScope::kExactSize);
1503
1504 __ it(eq);
1505 __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1506 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1507 break;
1508 }
1509 case kCondLE:
1510 case kCondGT:
1511 if (cond == kCondLE) {
1512 DCHECK_EQ(opposite, kCondGT);
1513 cond = kCondGE;
1514 opposite = kCondLT;
1515 } else {
1516 DCHECK_EQ(cond, kCondGT);
1517 DCHECK_EQ(opposite, kCondLE);
1518 cond = kCondLT;
1519 opposite = kCondGE;
1520 }
1521
1522 std::swap(left, right);
1523 FALLTHROUGH_INTENDED;
1524 case kCondGE:
1525 case kCondLT: {
1526 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1527
1528 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1529 __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1530 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1531 break;
1532 }
1533 default:
1534 LOG(FATAL) << "Unreachable";
1535 UNREACHABLE();
1536 }
1537
1538 return ret;
1539 }
1540
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1541 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1542 bool invert,
1543 CodeGeneratorARMVIXL* codegen) {
1544 const DataType::Type type = condition->GetLeft()->GetType();
1545 IfCondition cond = condition->GetCondition();
1546 IfCondition opposite = condition->GetOppositeCondition();
1547 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1548
1549 if (invert) {
1550 std::swap(cond, opposite);
1551 }
1552
1553 if (type == DataType::Type::kInt64) {
1554 ret = condition->GetLocations()->InAt(1).IsConstant()
1555 ? GenerateLongTestConstant(condition, invert, codegen)
1556 : GenerateLongTest(condition, invert, codegen);
1557 } else if (DataType::IsFloatingPointType(type)) {
1558 GenerateVcmp(condition, codegen);
1559 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1560 ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1561 ARMFPCondition(opposite, condition->IsGtBias()));
1562 } else {
1563 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1564 __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1565 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1566 }
1567
1568 return ret;
1569 }
1570
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1571 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1572 const vixl32::Register out = OutputRegister(cond);
1573 const auto condition = GenerateTest(cond, false, codegen);
1574
1575 __ Mov(LeaveFlags, out, 0);
1576
1577 if (out.IsLow()) {
1578 // We use the scope because of the IT block that follows.
1579 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1580 2 * vixl32::k16BitT32InstructionSizeInBytes,
1581 CodeBufferCheckScope::kExactSize);
1582
1583 __ it(condition.first);
1584 __ mov(condition.first, out, 1);
1585 } else {
1586 vixl32::Label done_label;
1587 vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1588
1589 __ B(condition.second, final_label, /* is_far_target= */ false);
1590 __ Mov(out, 1);
1591
1592 if (done_label.IsReferenced()) {
1593 __ Bind(&done_label);
1594 }
1595 }
1596 }
1597
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1598 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1599 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1600
1601 const LocationSummary* const locations = cond->GetLocations();
1602 IfCondition condition = cond->GetCondition();
1603 const vixl32::Register out = OutputRegister(cond);
1604 const Location left = locations->InAt(0);
1605 const Location right = locations->InAt(1);
1606 vixl32::Register left_high = HighRegisterFrom(left);
1607 vixl32::Register left_low = LowRegisterFrom(left);
1608 vixl32::Register temp;
1609 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1610
1611 if (right.IsConstant()) {
1612 IfCondition opposite = cond->GetOppositeCondition();
1613 const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1614 &condition,
1615 &opposite);
1616 Operand right_high = High32Bits(value);
1617 Operand right_low = Low32Bits(value);
1618
1619 // The output uses Location::kNoOutputOverlap.
1620 if (out.Is(left_high)) {
1621 std::swap(left_low, left_high);
1622 std::swap(right_low, right_high);
1623 }
1624
1625 __ Sub(out, left_low, right_low);
1626 temp = temps.Acquire();
1627 __ Sub(temp, left_high, right_high);
1628 } else {
1629 DCHECK(right.IsRegisterPair());
1630 temp = temps.Acquire();
1631 __ Sub(temp, left_high, HighRegisterFrom(right));
1632 __ Sub(out, left_low, LowRegisterFrom(right));
1633 }
1634
1635 // Need to check after calling AdjustConstantForCondition().
1636 DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1637
1638 if (condition == kCondNE && out.IsLow()) {
1639 __ Orrs(out, out, temp);
1640
1641 // We use the scope because of the IT block that follows.
1642 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1643 2 * vixl32::k16BitT32InstructionSizeInBytes,
1644 CodeBufferCheckScope::kExactSize);
1645
1646 __ it(ne);
1647 __ mov(ne, out, 1);
1648 } else {
1649 __ Orr(out, out, temp);
1650 codegen->GenerateConditionWithZero(condition, out, out, temp);
1651 }
1652 }
1653
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1654 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1655 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1656
1657 const LocationSummary* const locations = cond->GetLocations();
1658 IfCondition condition = cond->GetCondition();
1659 const vixl32::Register out = OutputRegister(cond);
1660 const Location left = locations->InAt(0);
1661 const Location right = locations->InAt(1);
1662
1663 if (right.IsConstant()) {
1664 IfCondition opposite = cond->GetOppositeCondition();
1665
1666 // Comparisons against 0 are common enough to deserve special attention.
1667 if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1668 switch (condition) {
1669 case kCondNE:
1670 case kCondA:
1671 if (out.IsLow()) {
1672 // We only care if both input registers are 0 or not.
1673 __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1674
1675 // We use the scope because of the IT block that follows.
1676 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1677 2 * vixl32::k16BitT32InstructionSizeInBytes,
1678 CodeBufferCheckScope::kExactSize);
1679
1680 __ it(ne);
1681 __ mov(ne, out, 1);
1682 return;
1683 }
1684
1685 FALLTHROUGH_INTENDED;
1686 case kCondEQ:
1687 case kCondBE:
1688 // We only care if both input registers are 0 or not.
1689 __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1690 codegen->GenerateConditionWithZero(condition, out, out);
1691 return;
1692 case kCondLT:
1693 case kCondGE:
1694 // We only care about the sign bit.
1695 FALLTHROUGH_INTENDED;
1696 case kCondAE:
1697 case kCondB:
1698 codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1699 return;
1700 case kCondLE:
1701 case kCondGT:
1702 default:
1703 break;
1704 }
1705 }
1706 }
1707
1708 // If `out` is a low register, then the GenerateConditionGeneric()
1709 // function generates a shorter code sequence that is still branchless.
1710 if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1711 GenerateEqualLong(cond, codegen);
1712 return;
1713 }
1714
1715 GenerateConditionGeneric(cond, codegen);
1716 }
1717
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1718 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1719 CodeGeneratorARMVIXL* codegen) {
1720 const DataType::Type type = cond->GetLeft()->GetType();
1721
1722 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1723
1724 if (type == DataType::Type::kInt64) {
1725 GenerateConditionLong(cond, codegen);
1726 return;
1727 }
1728
1729 IfCondition condition = cond->GetCondition();
1730 vixl32::Register in = InputRegisterAt(cond, 0);
1731 const vixl32::Register out = OutputRegister(cond);
1732 const Location right = cond->GetLocations()->InAt(1);
1733 int64_t value;
1734
1735 if (right.IsConstant()) {
1736 IfCondition opposite = cond->GetOppositeCondition();
1737
1738 value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1739
1740 // Comparisons against 0 are common enough to deserve special attention.
1741 if (value == 0) {
1742 switch (condition) {
1743 case kCondNE:
1744 case kCondA:
1745 if (out.IsLow() && out.Is(in)) {
1746 __ Cmp(out, 0);
1747
1748 // We use the scope because of the IT block that follows.
1749 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1750 2 * vixl32::k16BitT32InstructionSizeInBytes,
1751 CodeBufferCheckScope::kExactSize);
1752
1753 __ it(ne);
1754 __ mov(ne, out, 1);
1755 return;
1756 }
1757
1758 FALLTHROUGH_INTENDED;
1759 case kCondEQ:
1760 case kCondBE:
1761 case kCondLT:
1762 case kCondGE:
1763 case kCondAE:
1764 case kCondB:
1765 codegen->GenerateConditionWithZero(condition, out, in);
1766 return;
1767 case kCondLE:
1768 case kCondGT:
1769 default:
1770 break;
1771 }
1772 }
1773 }
1774
1775 if (condition == kCondEQ || condition == kCondNE) {
1776 Operand operand(0);
1777
1778 if (right.IsConstant()) {
1779 operand = Operand::From(value);
1780 } else if (out.Is(RegisterFrom(right))) {
1781 // Avoid 32-bit instructions if possible.
1782 operand = InputOperandAt(cond, 0);
1783 in = RegisterFrom(right);
1784 } else {
1785 operand = InputOperandAt(cond, 1);
1786 }
1787
1788 if (condition == kCondNE && out.IsLow()) {
1789 __ Subs(out, in, operand);
1790
1791 // We use the scope because of the IT block that follows.
1792 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1793 2 * vixl32::k16BitT32InstructionSizeInBytes,
1794 CodeBufferCheckScope::kExactSize);
1795
1796 __ it(ne);
1797 __ mov(ne, out, 1);
1798 } else {
1799 __ Sub(out, in, operand);
1800 codegen->GenerateConditionWithZero(condition, out, out);
1801 }
1802
1803 return;
1804 }
1805
1806 GenerateConditionGeneric(cond, codegen);
1807 }
1808
CanEncodeConstantAs8BitImmediate(HConstant * constant)1809 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1810 const DataType::Type type = constant->GetType();
1811 bool ret = false;
1812
1813 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1814
1815 if (type == DataType::Type::kInt64) {
1816 const uint64_t value = Uint64ConstantFrom(constant);
1817
1818 ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1819 } else {
1820 ret = IsUint<8>(Int32ConstantFrom(constant));
1821 }
1822
1823 return ret;
1824 }
1825
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1826 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1827 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1828
1829 if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1830 return Location::ConstantLocation(constant);
1831 }
1832
1833 return Location::RequiresRegister();
1834 }
1835
CanGenerateConditionalMove(const Location & out,const Location & src)1836 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1837 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1838 // we check that we are not dealing with floating-point output (there is no
1839 // 16-bit VMOV encoding).
1840 if (!out.IsRegister() && !out.IsRegisterPair()) {
1841 return false;
1842 }
1843
1844 // For constants, we also check that the output is in one or two low registers,
1845 // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1846 // MOV encoding can be used.
1847 if (src.IsConstant()) {
1848 if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1849 return false;
1850 }
1851
1852 if (out.IsRegister()) {
1853 if (!RegisterFrom(out).IsLow()) {
1854 return false;
1855 }
1856 } else {
1857 DCHECK(out.IsRegisterPair());
1858
1859 if (!HighRegisterFrom(out).IsLow()) {
1860 return false;
1861 }
1862 }
1863 }
1864
1865 return true;
1866 }
1867
1868 #undef __
1869
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1870 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1871 vixl32::Label* final_label) {
1872 DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1873 DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1874
1875 const HBasicBlock* const block = instruction->GetBlock();
1876 const HLoopInformation* const info = block->GetLoopInformation();
1877 HInstruction* const next = instruction->GetNext();
1878
1879 // Avoid a branch to a branch.
1880 if (next->IsGoto() && (info == nullptr ||
1881 !info->IsBackEdge(*block) ||
1882 !info->HasSuspendCheck())) {
1883 final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1884 }
1885
1886 return final_label;
1887 }
1888
1889 namespace detail {
1890
1891 // Mark which intrinsics we don't have handcrafted code for.
1892 template <Intrinsics T>
1893 struct IsUnimplemented {
1894 bool is_unimplemented = false;
1895 };
1896
1897 #define TRUE_OVERRIDE(Name) \
1898 template <> \
1899 struct IsUnimplemented<Intrinsics::k##Name> { \
1900 bool is_unimplemented = true; \
1901 };
1902 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1903 #undef TRUE_OVERRIDE
1904
1905 static constexpr bool kIsIntrinsicUnimplemented[] = {
1906 false, // kNone
1907 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1908 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1909 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1910 #undef IS_UNIMPLEMENTED
1911 };
1912
1913 } // namespace detail
1914
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1915 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1916 const CompilerOptions& compiler_options,
1917 OptimizingCompilerStats* stats)
1918 : CodeGenerator(graph,
1919 kNumberOfCoreRegisters,
1920 kNumberOfSRegisters,
1921 kNumberOfRegisterPairs,
1922 kCoreCalleeSaves.GetList(),
1923 ComputeSRegisterListMask(kFpuCalleeSaves),
1924 compiler_options,
1925 stats,
1926 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1927 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1928 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1929 location_builder_(graph, this),
1930 instruction_visitor_(graph, this),
1931 move_resolver_(graph->GetAllocator(), this),
1932 assembler_(graph->GetAllocator()),
1933 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1934 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1935 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1936 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1937 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1938 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1939 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1940 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1941 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1942 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1943 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1944 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1945 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1946 uint32_literals_(std::less<uint32_t>(),
1947 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1948 jit_string_patches_(StringReferenceValueComparator(),
1949 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950 jit_class_patches_(TypeReferenceValueComparator(),
1951 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1952 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1953 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1954 // Always save the LR register to mimic Quick.
1955 AddAllocatedRegister(Location::RegisterLocation(LR));
1956 // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1957 // S0-S31, which alias to D0-D15.
1958 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1959 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1960 }
1961
EmitTable(CodeGeneratorARMVIXL * codegen)1962 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1963 uint32_t num_entries = switch_instr_->GetNumEntries();
1964 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1965
1966 // We are about to use the assembler to place literals directly. Make sure we have enough
1967 // underlying code buffer and we have generated a jump table of the right size, using
1968 // codegen->GetVIXLAssembler()->GetBuffer().Align();
1969 ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1970 num_entries * sizeof(int32_t),
1971 CodeBufferCheckScope::kMaximumSize);
1972 // TODO(VIXL): Check that using lower case bind is fine here.
1973 codegen->GetVIXLAssembler()->bind(&table_start_);
1974 for (uint32_t i = 0; i < num_entries; i++) {
1975 codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1976 }
1977 }
1978
FixTable(CodeGeneratorARMVIXL * codegen)1979 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1980 uint32_t num_entries = switch_instr_->GetNumEntries();
1981 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1982
1983 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1984 for (uint32_t i = 0; i < num_entries; i++) {
1985 vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1986 DCHECK(target_label->IsBound());
1987 int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
1988 // When doing BX to address we need to have lower bit set to 1 in T32.
1989 if (codegen->GetVIXLAssembler()->IsUsingT32()) {
1990 jump_offset++;
1991 }
1992 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
1993 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
1994
1995 bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
1996 }
1997 }
1998
FixJumpTables()1999 void CodeGeneratorARMVIXL::FixJumpTables() {
2000 for (auto&& jump_table : jump_tables_) {
2001 jump_table->FixTable(this);
2002 }
2003 }
2004
2005 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
2006
Finalize()2007 void CodeGeneratorARMVIXL::Finalize() {
2008 FixJumpTables();
2009
2010 // Emit JIT baker read barrier slow paths.
2011 DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2012 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2013 uint32_t encoded_data = entry.first;
2014 vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2015 __ Bind(slow_path_entry);
2016 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2017 }
2018
2019 GetAssembler()->FinalizeCode();
2020 CodeGenerator::Finalize();
2021
2022 // Verify Baker read barrier linker patches.
2023 if (kIsDebugBuild) {
2024 ArrayRef<const uint8_t> code(GetCode());
2025 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2026 DCHECK(info.label.IsBound());
2027 uint32_t literal_offset = info.label.GetLocation();
2028 DCHECK_ALIGNED(literal_offset, 2u);
2029
2030 auto GetInsn16 = [&code](uint32_t offset) {
2031 DCHECK_ALIGNED(offset, 2u);
2032 return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2033 (static_cast<uint32_t>(code[offset + 1]) << 8);
2034 };
2035 auto GetInsn32 = [=](uint32_t offset) {
2036 return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2037 };
2038
2039 uint32_t encoded_data = info.custom_data;
2040 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2041 // Check that the next instruction matches the expected LDR.
2042 switch (kind) {
2043 case BakerReadBarrierKind::kField: {
2044 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2045 if (width == BakerReadBarrierWidth::kWide) {
2046 DCHECK_GE(code.size() - literal_offset, 8u);
2047 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2048 // LDR (immediate), encoding T3, with correct base_reg.
2049 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2050 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2051 CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2052 } else {
2053 DCHECK_GE(code.size() - literal_offset, 6u);
2054 uint32_t next_insn = GetInsn16(literal_offset + 4u);
2055 // LDR (immediate), encoding T1, with correct base_reg.
2056 CheckValidReg(next_insn & 0x7u); // Check destination register.
2057 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2058 CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2059 }
2060 break;
2061 }
2062 case BakerReadBarrierKind::kArray: {
2063 DCHECK_GE(code.size() - literal_offset, 8u);
2064 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2065 // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2066 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2067 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2068 CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2069 CheckValidReg(next_insn & 0xf); // Check index register
2070 break;
2071 }
2072 case BakerReadBarrierKind::kGcRoot: {
2073 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2074 if (width == BakerReadBarrierWidth::kWide) {
2075 DCHECK_GE(literal_offset, 4u);
2076 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2077 // LDR (immediate), encoding T3, with correct root_reg.
2078 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2079 CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2080 } else {
2081 DCHECK_GE(literal_offset, 2u);
2082 uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2083 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2084 // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2085 // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2086 if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2087 CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2088 }
2089 }
2090 break;
2091 }
2092 case BakerReadBarrierKind::kIntrinsicCas: {
2093 DCHECK_GE(literal_offset, 4u);
2094 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2095 // MOV (register), encoding T3, with correct root_reg.
2096 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2097 DCHECK_GE(root_reg, 8u); // Used only for high registers.
2098 CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2099 break;
2100 }
2101 default:
2102 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2103 UNREACHABLE();
2104 }
2105 }
2106 }
2107 }
2108
SetupBlockedRegisters() const2109 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2110 // Stack register, LR and PC are always reserved.
2111 blocked_core_registers_[SP] = true;
2112 blocked_core_registers_[LR] = true;
2113 blocked_core_registers_[PC] = true;
2114
2115 // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2116 // that would require some work in the assembler code as the right GC is
2117 // chosen at load-time and not compile time.
2118 if (kReserveMarkingRegister) {
2119 // Reserve marking register.
2120 blocked_core_registers_[MR] = true;
2121 }
2122
2123 // Reserve thread register.
2124 blocked_core_registers_[TR] = true;
2125
2126 // Reserve temp register.
2127 blocked_core_registers_[IP] = true;
2128
2129 if (GetGraph()->IsDebuggable()) {
2130 // Stubs do not save callee-save floating point registers. If the graph
2131 // is debuggable, we need to deal with these registers differently. For
2132 // now, just block them.
2133 for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2134 i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2135 ++i) {
2136 blocked_fpu_registers_[i] = true;
2137 }
2138 }
2139 }
2140
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2141 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2142 CodeGeneratorARMVIXL* codegen)
2143 : InstructionCodeGenerator(graph, codegen),
2144 assembler_(codegen->GetAssembler()),
2145 codegen_(codegen) {}
2146
ComputeSpillMask()2147 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2148 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2149 DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2150 << "At least the return address register must be saved";
2151 // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2152 DCHECK(GetVIXLAssembler()->IsUsingT32());
2153 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2154 // We use vpush and vpop for saving and restoring floating point registers, which take
2155 // a SRegister and the number of registers to save/restore after that SRegister. We
2156 // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2157 // but in the range.
2158 if (fpu_spill_mask_ != 0) {
2159 uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2160 uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2161 for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2162 fpu_spill_mask_ |= (1 << i);
2163 }
2164 }
2165 }
2166
VisitMethodExitHook(HMethodExitHook * method_hook)2167 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2168 LocationSummary* locations = new (GetGraph()->GetAllocator())
2169 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2170 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2171 // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2172 // compute the address to store the timestamp counter.
2173 locations->AddRegisterTemps(3);
2174 }
2175
GenerateMethodEntryExitHook(HInstruction * instruction)2176 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2177 LocationSummary* locations = instruction->GetLocations();
2178 vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
2179 vixl32::Register value = RegisterFrom(locations->GetTemp(1));
2180 vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
2181
2182 SlowPathCodeARMVIXL* slow_path =
2183 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2184 codegen_->AddSlowPath(slow_path);
2185
2186 if (instruction->IsMethodExitHook()) {
2187 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2188 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2189 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2190 // disabled in debuggable runtime. The other bit is used when this method itself requires a
2191 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2192 GetAssembler()->LoadFromOffset(
2193 kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2194 __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
2195 }
2196
2197 MemberOffset offset = instruction->IsMethodExitHook() ?
2198 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2199 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2200 uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2201 __ Mov(addr, address + offset.Int32Value());
2202 __ Ldrb(value, MemOperand(addr, 0));
2203 __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
2204 // Check if there are any trace method entry / exit listeners. If no, continue.
2205 __ B(lt, slow_path->GetExitLabel());
2206 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
2207 // If yes, just take the slow path.
2208 __ B(gt, slow_path->GetEntryLabel());
2209
2210 // Check if there is place in the buffer to store a new entry, if no, take slow path.
2211 uint32_t trace_buffer_curr_entry_offset =
2212 Thread::TraceBufferCurrPtrOffset<kArmPointerSize>().Int32Value();
2213 vixl32::Register curr_entry = value;
2214 vixl32::Register init_entry = addr;
2215 __ Ldr(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2216 __ Subs(curr_entry, curr_entry, static_cast<uint32_t>(kNumEntriesForWallClock * sizeof(void*)));
2217 __ Ldr(init_entry, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
2218 __ Cmp(curr_entry, init_entry);
2219 __ B(lt, slow_path->GetEntryLabel());
2220
2221 // Update the index in the `Thread`.
2222 __ Str(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2223
2224 // Record method pointer and trace action.
2225 __ Ldr(tmp, MemOperand(sp, 0));
2226 // Use last two bits to encode trace method action. For MethodEntry it is 0
2227 // so no need to set the bits since they are 0 already.
2228 if (instruction->IsMethodExitHook()) {
2229 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
2230 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
2231 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
2232 __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
2233 }
2234 __ Str(tmp, MemOperand(curr_entry, kMethodOffsetInBytes));
2235
2236 vixl32::Register tmp1 = init_entry;
2237 // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
2238 __ Mrrc(/* lower 32-bit */ tmp,
2239 /* higher 32-bit */ tmp1,
2240 /* coproc= */ 15,
2241 /* opc1= */ 1,
2242 /* crm= */ 14);
2243 static_assert(kHighTimestampOffsetInBytes ==
2244 kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
2245 __ Strd(tmp, tmp1, MemOperand(curr_entry, kTimestampOffsetInBytes));
2246 __ Bind(slow_path->GetExitLabel());
2247 }
2248
VisitMethodExitHook(HMethodExitHook * instruction)2249 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2250 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2251 DCHECK(codegen_->RequiresCurrentMethod());
2252 GenerateMethodEntryExitHook(instruction);
2253 }
2254
VisitMethodEntryHook(HMethodEntryHook * method_hook)2255 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2256 LocationSummary* locations = new (GetGraph()->GetAllocator())
2257 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2258 // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2259 // compute the address to store the timestamp counter.
2260 locations->AddRegisterTemps(3);
2261 }
2262
VisitMethodEntryHook(HMethodEntryHook * instruction)2263 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2264 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2265 DCHECK(codegen_->RequiresCurrentMethod());
2266 GenerateMethodEntryExitHook(instruction);
2267 }
2268
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)2269 void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
2270 bool is_frame_entry) {
2271 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2272 UseScratchRegisterScope temps(GetVIXLAssembler());
2273 vixl32::Register temp = temps.Acquire();
2274 static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2275 if (!is_frame_entry) {
2276 __ Push(vixl32::Register(kMethodRegister));
2277 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2278 GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2279 }
2280 // Load with zero extend to clear the high bits for integer overflow check.
2281 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2282 vixl::aarch32::Label done;
2283 DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2284 __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2285 __ Add(temp, temp, -1);
2286 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2287 __ Bind(&done);
2288 if (!is_frame_entry) {
2289 __ Pop(vixl32::Register(kMethodRegister));
2290 GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2291 }
2292 }
2293
2294 if (GetGraph()->IsCompilingBaseline() &&
2295 GetGraph()->IsUsefulOptimizing() &&
2296 !Runtime::Current()->IsAotCompiler()) {
2297 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2298 DCHECK(info != nullptr);
2299 DCHECK(!HasEmptyFrame());
2300 uint32_t address = reinterpret_cast32<uint32_t>(info);
2301 UseScratchRegisterScope temps(GetVIXLAssembler());
2302 vixl32::Register tmp = temps.Acquire();
2303 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
2304 suspend_check, /* profiling_info= */ lr);
2305 AddSlowPath(slow_path);
2306 __ Mov(lr, address);
2307 __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2308 __ Adds(tmp, tmp, -1);
2309 __ B(cc, slow_path->GetEntryLabel());
2310 __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2311 __ Bind(slow_path->GetExitLabel());
2312 }
2313 }
2314
GenerateFrameEntry()2315 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2316 bool skip_overflow_check =
2317 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2318 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2319
2320 // Check if we need to generate the clinit check. We will jump to the
2321 // resolution stub if the class is not initialized and the executing thread is
2322 // not the thread initializing it.
2323 // We do this before constructing the frame to get the correct stack trace if
2324 // an exception is thrown.
2325 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2326 UseScratchRegisterScope temps(GetVIXLAssembler());
2327 vixl32::Label resolution;
2328 vixl32::Label memory_barrier;
2329
2330 // Check if we're visibly initialized.
2331
2332 vixl32::Register temp1 = temps.Acquire();
2333 // Use r4 as other temporary register.
2334 DCHECK(!blocked_core_registers_[R4]);
2335 DCHECK(!kCoreCalleeSaves.Includes(r4));
2336 vixl32::Register temp2 = r4;
2337 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2338 DCHECK(!reg.Is(r4));
2339 }
2340
2341 // We don't emit a read barrier here to save on code size. We rely on the
2342 // resolution trampoline to do a suspend check before re-entering this code.
2343 __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2344 __ Ldrb(temp2, MemOperand(temp1, kClassStatusByteOffset));
2345 __ Cmp(temp2, kShiftedVisiblyInitializedValue);
2346 __ B(cs, &frame_entry_label_);
2347
2348 // Check if we're initialized and jump to code that does a memory barrier if
2349 // so.
2350 __ Cmp(temp2, kShiftedInitializedValue);
2351 __ B(cs, &memory_barrier);
2352
2353 // Check if we're initializing and the thread initializing is the one
2354 // executing the code.
2355 __ Cmp(temp2, kShiftedInitializingValue);
2356 __ B(lo, &resolution);
2357
2358 __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2359 __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2360 __ Cmp(temp1, temp2);
2361 __ B(eq, &frame_entry_label_);
2362 __ Bind(&resolution);
2363
2364 // Jump to the resolution stub.
2365 ThreadOffset32 entrypoint_offset =
2366 GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2367 __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2368 __ Bx(temp1);
2369
2370 __ Bind(&memory_barrier);
2371 GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2372 }
2373
2374 __ Bind(&frame_entry_label_);
2375
2376 if (HasEmptyFrame()) {
2377 // Ensure that the CFI opcode list is not empty.
2378 GetAssembler()->cfi().Nop();
2379 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2380 return;
2381 }
2382
2383 // Make sure the frame size isn't unreasonably large.
2384 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
2385
2386 if (!skip_overflow_check) {
2387 // Using r4 instead of IP saves 2 bytes.
2388 UseScratchRegisterScope temps(GetVIXLAssembler());
2389 vixl32::Register temp;
2390 // TODO: Remove this check when R4 is made a callee-save register
2391 // in ART compiled code (b/72801708). Currently we need to make
2392 // sure r4 is not blocked, e.g. in special purpose
2393 // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2394 // here.
2395 if (!blocked_core_registers_[R4]) {
2396 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2397 DCHECK(!reg.Is(r4));
2398 }
2399 DCHECK(!kCoreCalleeSaves.Includes(r4));
2400 temp = r4;
2401 } else {
2402 temp = temps.Acquire();
2403 }
2404 __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2405 // The load must immediately precede RecordPcInfo.
2406 ExactAssemblyScope aas(GetVIXLAssembler(),
2407 vixl32::kMaxInstructionSizeInBytes,
2408 CodeBufferCheckScope::kMaximumSize);
2409 __ ldr(temp, MemOperand(temp));
2410 RecordPcInfoForFrameOrBlockEntry();
2411 }
2412
2413 uint32_t frame_size = GetFrameSize();
2414 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2415 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2416 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2417 core_spills_offset <= 3u * kArmWordSize) {
2418 // Do a single PUSH for core registers including the method and up to two
2419 // filler registers. Then store the single FP spill if any.
2420 // (The worst case is when the method is not required and we actually
2421 // store 3 extra registers but they are stored in the same properly
2422 // aligned 16-byte chunk where we're already writing anyway.)
2423 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2424 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2425 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2426 __ Push(RegisterList(core_spill_mask_ | extra_regs));
2427 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2428 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2429 core_spills_offset,
2430 core_spill_mask_,
2431 kArmWordSize);
2432 if (fpu_spill_mask_ != 0u) {
2433 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2434 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2435 GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2436 GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2437 }
2438 } else {
2439 __ Push(RegisterList(core_spill_mask_));
2440 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2441 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2442 /*offset=*/ 0,
2443 core_spill_mask_,
2444 kArmWordSize);
2445 if (fpu_spill_mask_ != 0) {
2446 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2447
2448 // Check that list is contiguous.
2449 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2450
2451 __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2452 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2453 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2454 /*offset=*/ 0,
2455 fpu_spill_mask_,
2456 kArmWordSize);
2457 }
2458
2459 // Adjust SP and save the current method if we need it. Note that we do
2460 // not save the method in HCurrentMethod, as the instruction might have
2461 // been removed in the SSA graph.
2462 if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2463 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2464 __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2465 GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2466 } else {
2467 IncreaseFrame(fp_spills_offset);
2468 if (RequiresCurrentMethod()) {
2469 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2470 }
2471 }
2472 }
2473
2474 if (GetGraph()->HasShouldDeoptimizeFlag()) {
2475 UseScratchRegisterScope temps(GetVIXLAssembler());
2476 vixl32::Register temp = temps.Acquire();
2477 // Initialize should_deoptimize flag to 0.
2478 __ Mov(temp, 0);
2479 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2480 }
2481
2482 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2483 MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2484 }
2485
GenerateFrameExit()2486 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2487 if (HasEmptyFrame()) {
2488 __ Bx(lr);
2489 return;
2490 }
2491
2492 // Pop LR into PC to return.
2493 DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2494 uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2495
2496 uint32_t frame_size = GetFrameSize();
2497 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2498 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2499 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2500 // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2501 core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2502 // Load the FP spill if any and then do a single POP including the method
2503 // and up to two filler registers. If we have no FP spills, this also has
2504 // the advantage that we do not need to emit CFI directives.
2505 if (fpu_spill_mask_ != 0u) {
2506 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2507 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2508 GetAssembler()->cfi().RememberState();
2509 GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2510 GetAssembler()->cfi().Restore(DWARFReg(sreg));
2511 }
2512 // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2513 // never hold the return value.
2514 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2515 DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2516 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2517 __ Pop(RegisterList(pop_mask | extra_regs));
2518 if (fpu_spill_mask_ != 0u) {
2519 GetAssembler()->cfi().RestoreState();
2520 }
2521 } else {
2522 GetAssembler()->cfi().RememberState();
2523 DecreaseFrame(fp_spills_offset);
2524 if (fpu_spill_mask_ != 0) {
2525 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2526
2527 // Check that list is contiguous.
2528 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2529
2530 __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2531 GetAssembler()->cfi().AdjustCFAOffset(
2532 -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2533 GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2534 }
2535 __ Pop(RegisterList(pop_mask));
2536 GetAssembler()->cfi().RestoreState();
2537 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2538 }
2539 }
2540
Bind(HBasicBlock * block)2541 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2542 __ Bind(GetLabelOf(block));
2543 }
2544
GetNextLocation(DataType::Type type)2545 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2546 switch (type) {
2547 case DataType::Type::kReference:
2548 case DataType::Type::kBool:
2549 case DataType::Type::kUint8:
2550 case DataType::Type::kInt8:
2551 case DataType::Type::kUint16:
2552 case DataType::Type::kInt16:
2553 case DataType::Type::kInt32: {
2554 uint32_t index = gp_index_++;
2555 uint32_t stack_index = stack_index_++;
2556 if (index < calling_convention.GetNumberOfRegisters()) {
2557 return LocationFrom(calling_convention.GetRegisterAt(index));
2558 } else {
2559 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2560 }
2561 }
2562
2563 case DataType::Type::kInt64: {
2564 uint32_t index = gp_index_;
2565 uint32_t stack_index = stack_index_;
2566 gp_index_ += 2;
2567 stack_index_ += 2;
2568 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2569 if (calling_convention.GetRegisterAt(index).Is(r1)) {
2570 // Skip R1, and use R2_R3 instead.
2571 gp_index_++;
2572 index++;
2573 }
2574 }
2575 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2576 DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2577 calling_convention.GetRegisterAt(index + 1).GetCode());
2578
2579 return LocationFrom(calling_convention.GetRegisterAt(index),
2580 calling_convention.GetRegisterAt(index + 1));
2581 } else {
2582 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2583 }
2584 }
2585
2586 case DataType::Type::kFloat32: {
2587 uint32_t stack_index = stack_index_++;
2588 if (float_index_ % 2 == 0) {
2589 float_index_ = std::max(double_index_, float_index_);
2590 }
2591 if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2592 return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2593 } else {
2594 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2595 }
2596 }
2597
2598 case DataType::Type::kFloat64: {
2599 double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2600 uint32_t stack_index = stack_index_;
2601 stack_index_ += 2;
2602 if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2603 uint32_t index = double_index_;
2604 double_index_ += 2;
2605 Location result = LocationFrom(
2606 calling_convention.GetFpuRegisterAt(index),
2607 calling_convention.GetFpuRegisterAt(index + 1));
2608 DCHECK(ExpectedPairLayout(result));
2609 return result;
2610 } else {
2611 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2612 }
2613 }
2614
2615 case DataType::Type::kUint32:
2616 case DataType::Type::kUint64:
2617 case DataType::Type::kVoid:
2618 LOG(FATAL) << "Unexpected parameter type " << type;
2619 UNREACHABLE();
2620 }
2621 return Location::NoLocation();
2622 }
2623
GetReturnLocation(DataType::Type type) const2624 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2625 switch (type) {
2626 case DataType::Type::kReference:
2627 case DataType::Type::kBool:
2628 case DataType::Type::kUint8:
2629 case DataType::Type::kInt8:
2630 case DataType::Type::kUint16:
2631 case DataType::Type::kInt16:
2632 case DataType::Type::kUint32:
2633 case DataType::Type::kInt32: {
2634 return LocationFrom(r0);
2635 }
2636
2637 case DataType::Type::kFloat32: {
2638 return LocationFrom(s0);
2639 }
2640
2641 case DataType::Type::kUint64:
2642 case DataType::Type::kInt64: {
2643 return LocationFrom(r0, r1);
2644 }
2645
2646 case DataType::Type::kFloat64: {
2647 return LocationFrom(s0, s1);
2648 }
2649
2650 case DataType::Type::kVoid:
2651 return Location::NoLocation();
2652 }
2653
2654 UNREACHABLE();
2655 }
2656
GetMethodLocation() const2657 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2658 return LocationFrom(kMethodRegister);
2659 }
2660
GetNextLocation(DataType::Type type)2661 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2662 DCHECK_NE(type, DataType::Type::kReference);
2663
2664 // Native ABI uses the same registers as managed, except that the method register r0
2665 // is a normal argument.
2666 Location location = Location::NoLocation();
2667 if (DataType::Is64BitType(type)) {
2668 gpr_index_ = RoundUp(gpr_index_, 2u);
2669 stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2670 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2671 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2672 kParameterCoreRegistersVIXL[gpr_index_]);
2673 gpr_index_ += 2u;
2674 }
2675 } else {
2676 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2677 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2678 ++gpr_index_;
2679 }
2680 }
2681 if (location.IsInvalid()) {
2682 if (DataType::Is64BitType(type)) {
2683 location = Location::DoubleStackSlot(stack_offset_);
2684 stack_offset_ += 2 * kFramePointerSize;
2685 } else {
2686 location = Location::StackSlot(stack_offset_);
2687 stack_offset_ += kFramePointerSize;
2688 }
2689
2690 if (for_register_allocation_) {
2691 location = Location::Any();
2692 }
2693 }
2694 return location;
2695 }
2696
GetReturnLocation(DataType::Type type) const2697 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2698 const {
2699 // We perform conversion to the managed ABI return register after the call if needed.
2700 InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2701 return dex_calling_convention.GetReturnLocation(type);
2702 }
2703
GetMethodLocation() const2704 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2705 // Pass the method in the hidden argument R4.
2706 return Location::RegisterLocation(R4);
2707 }
2708
Move32(Location destination,Location source)2709 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2710 if (source.Equals(destination)) {
2711 return;
2712 }
2713 if (destination.IsRegister()) {
2714 if (source.IsRegister()) {
2715 __ Mov(RegisterFrom(destination), RegisterFrom(source));
2716 } else if (source.IsFpuRegister()) {
2717 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2718 } else {
2719 GetAssembler()->LoadFromOffset(kLoadWord,
2720 RegisterFrom(destination),
2721 sp,
2722 source.GetStackIndex());
2723 }
2724 } else if (destination.IsFpuRegister()) {
2725 if (source.IsRegister()) {
2726 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2727 } else if (source.IsFpuRegister()) {
2728 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2729 } else {
2730 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2731 }
2732 } else {
2733 DCHECK(destination.IsStackSlot()) << destination;
2734 if (source.IsRegister()) {
2735 GetAssembler()->StoreToOffset(kStoreWord,
2736 RegisterFrom(source),
2737 sp,
2738 destination.GetStackIndex());
2739 } else if (source.IsFpuRegister()) {
2740 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2741 } else {
2742 DCHECK(source.IsStackSlot()) << source;
2743 UseScratchRegisterScope temps(GetVIXLAssembler());
2744 vixl32::Register temp = temps.Acquire();
2745 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2746 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2747 }
2748 }
2749 }
2750
MoveConstant(Location location,int32_t value)2751 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2752 DCHECK(location.IsRegister());
2753 __ Mov(RegisterFrom(location), value);
2754 }
2755
MoveLocation(Location dst,Location src,DataType::Type dst_type)2756 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2757 // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2758 // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2759 HParallelMove move(GetGraph()->GetAllocator());
2760 move.AddMove(src, dst, dst_type, nullptr);
2761 GetMoveResolver()->EmitNativeCode(&move);
2762 }
2763
AddLocationAsTemp(Location location,LocationSummary * locations)2764 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2765 if (location.IsRegister()) {
2766 locations->AddTemp(location);
2767 } else if (location.IsRegisterPair()) {
2768 locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2769 locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2770 } else {
2771 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2772 }
2773 }
2774
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,SlowPathCode * slow_path)2775 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2776 HInstruction* instruction,
2777 SlowPathCode* slow_path) {
2778 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2779
2780 ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2781 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2782 // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2783 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2784 if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2785 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2786 // Ensure the pc position is recorded immediately after the `blx` instruction.
2787 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2788 ExactAssemblyScope aas(GetVIXLAssembler(),
2789 vixl32::k16BitT32InstructionSizeInBytes,
2790 CodeBufferCheckScope::kExactSize);
2791 __ blx(lr);
2792 if (EntrypointRequiresStackMap(entrypoint)) {
2793 RecordPcInfo(instruction, slow_path);
2794 }
2795 } else {
2796 // Ensure the pc position is recorded immediately after the `bl` instruction.
2797 ExactAssemblyScope aas(GetVIXLAssembler(),
2798 vixl32::k32BitT32InstructionSizeInBytes,
2799 CodeBufferCheckScope::kExactSize);
2800 EmitEntrypointThunkCall(entrypoint_offset);
2801 if (EntrypointRequiresStackMap(entrypoint)) {
2802 RecordPcInfo(instruction, slow_path);
2803 }
2804 }
2805 }
2806
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2807 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2808 HInstruction* instruction,
2809 SlowPathCode* slow_path) {
2810 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2811 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2812 __ Blx(lr);
2813 }
2814
HandleGoto(HInstruction * got,HBasicBlock * successor)2815 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2816 if (successor->IsExitBlock()) {
2817 DCHECK(got->GetPrevious()->AlwaysThrows());
2818 return; // no code needed
2819 }
2820
2821 HBasicBlock* block = got->GetBlock();
2822 HInstruction* previous = got->GetPrevious();
2823 HLoopInformation* info = block->GetLoopInformation();
2824
2825 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2826 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2827 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2828 return;
2829 }
2830 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2831 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2832 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2833 }
2834 if (!codegen_->GoesToNextBlock(block, successor)) {
2835 __ B(codegen_->GetLabelOf(successor));
2836 }
2837 }
2838
VisitGoto(HGoto * got)2839 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2840 got->SetLocations(nullptr);
2841 }
2842
VisitGoto(HGoto * got)2843 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2844 HandleGoto(got, got->GetSuccessor());
2845 }
2846
VisitTryBoundary(HTryBoundary * try_boundary)2847 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2848 try_boundary->SetLocations(nullptr);
2849 }
2850
VisitTryBoundary(HTryBoundary * try_boundary)2851 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2852 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2853 if (!successor->IsExitBlock()) {
2854 HandleGoto(try_boundary, successor);
2855 }
2856 }
2857
VisitExit(HExit * exit)2858 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2859 exit->SetLocations(nullptr);
2860 }
2861
VisitExit(HExit * exit)2862 void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
2863
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2864 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2865 vixl32::Label* true_target,
2866 vixl32::Label* false_target,
2867 bool is_far_target) {
2868 if (true_target == false_target) {
2869 DCHECK(true_target != nullptr);
2870 __ B(true_target);
2871 return;
2872 }
2873
2874 vixl32::Label* non_fallthrough_target;
2875 bool invert;
2876 bool emit_both_branches;
2877
2878 if (true_target == nullptr) {
2879 // The true target is fallthrough.
2880 DCHECK(false_target != nullptr);
2881 non_fallthrough_target = false_target;
2882 invert = true;
2883 emit_both_branches = false;
2884 } else {
2885 non_fallthrough_target = true_target;
2886 invert = false;
2887 // Either the false target is fallthrough, or there is no fallthrough
2888 // and both branches must be emitted.
2889 emit_both_branches = (false_target != nullptr);
2890 }
2891
2892 const auto cond = GenerateTest(condition, invert, codegen_);
2893
2894 __ B(cond.first, non_fallthrough_target, is_far_target);
2895
2896 if (emit_both_branches) {
2897 // No target falls through, we need to branch.
2898 __ B(false_target);
2899 }
2900 }
2901
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2902 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2903 size_t condition_input_index,
2904 vixl32::Label* true_target,
2905 vixl32::Label* false_target,
2906 bool far_target) {
2907 HInstruction* cond = instruction->InputAt(condition_input_index);
2908
2909 if (true_target == nullptr && false_target == nullptr) {
2910 // Nothing to do. The code always falls through.
2911 return;
2912 } else if (cond->IsIntConstant()) {
2913 // Constant condition, statically compared against "true" (integer value 1).
2914 if (cond->AsIntConstant()->IsTrue()) {
2915 if (true_target != nullptr) {
2916 __ B(true_target);
2917 }
2918 } else {
2919 DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2920 if (false_target != nullptr) {
2921 __ B(false_target);
2922 }
2923 }
2924 return;
2925 }
2926
2927 // The following code generates these patterns:
2928 // (1) true_target == nullptr && false_target != nullptr
2929 // - opposite condition true => branch to false_target
2930 // (2) true_target != nullptr && false_target == nullptr
2931 // - condition true => branch to true_target
2932 // (3) true_target != nullptr && false_target != nullptr
2933 // - condition true => branch to true_target
2934 // - branch to false_target
2935 if (IsBooleanValueOrMaterializedCondition(cond)) {
2936 // Condition has been materialized, compare the output to 0.
2937 if (kIsDebugBuild) {
2938 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2939 DCHECK(cond_val.IsRegister());
2940 }
2941 if (true_target == nullptr) {
2942 __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2943 false_target,
2944 far_target);
2945 } else {
2946 __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2947 true_target,
2948 far_target);
2949 }
2950 } else {
2951 // Condition has not been materialized. Use its inputs as the comparison and
2952 // its condition as the branch condition.
2953 HCondition* condition = cond->AsCondition();
2954
2955 // If this is a long or FP comparison that has been folded into
2956 // the HCondition, generate the comparison directly.
2957 DataType::Type type = condition->InputAt(0)->GetType();
2958 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2959 GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2960 return;
2961 }
2962
2963 vixl32::Label* non_fallthrough_target;
2964 vixl32::Condition arm_cond = vixl32::Condition::None();
2965 const vixl32::Register left = InputRegisterAt(cond, 0);
2966 const Operand right = InputOperandAt(cond, 1);
2967
2968 if (true_target == nullptr) {
2969 arm_cond = ARMCondition(condition->GetOppositeCondition());
2970 non_fallthrough_target = false_target;
2971 } else {
2972 arm_cond = ARMCondition(condition->GetCondition());
2973 non_fallthrough_target = true_target;
2974 }
2975
2976 if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2977 if (arm_cond.Is(eq)) {
2978 __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2979 } else {
2980 DCHECK(arm_cond.Is(ne));
2981 __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2982 }
2983 } else {
2984 __ Cmp(left, right);
2985 __ B(arm_cond, non_fallthrough_target, far_target);
2986 }
2987 }
2988
2989 // If neither branch falls through (case 3), the conditional branch to `true_target`
2990 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2991 if (true_target != nullptr && false_target != nullptr) {
2992 __ B(false_target);
2993 }
2994 }
2995
VisitIf(HIf * if_instr)2996 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
2997 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2998 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2999 locations->SetInAt(0, Location::RequiresRegister());
3000 if (GetGraph()->IsCompilingBaseline() &&
3001 codegen_->GetCompilerOptions().ProfileBranches() &&
3002 !Runtime::Current()->IsAotCompiler()) {
3003 locations->AddTemp(Location::RequiresRegister());
3004 }
3005 }
3006 }
3007
VisitIf(HIf * if_instr)3008 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
3009 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3010 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3011 vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
3012 nullptr : codegen_->GetLabelOf(true_successor);
3013 vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
3014 nullptr : codegen_->GetLabelOf(false_successor);
3015 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3016 if (GetGraph()->IsCompilingBaseline() &&
3017 codegen_->GetCompilerOptions().ProfileBranches() &&
3018 !Runtime::Current()->IsAotCompiler()) {
3019 DCHECK(if_instr->InputAt(0)->IsCondition());
3020 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3021 DCHECK(info != nullptr);
3022 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3023 // Currently, not all If branches are profiled.
3024 if (cache != nullptr) {
3025 uint32_t address =
3026 reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
3027 static_assert(
3028 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3029 "Unexpected offsets for BranchCache");
3030 vixl32::Label done;
3031 UseScratchRegisterScope temps(GetVIXLAssembler());
3032 vixl32::Register temp = temps.Acquire();
3033 vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
3034 vixl32::Register condition = InputRegisterAt(if_instr, 0);
3035 __ Mov(temp, address);
3036 __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3037 __ Adds(counter, counter, 1);
3038 __ Uxth(counter, counter);
3039 __ CompareAndBranchIfZero(counter, &done);
3040 __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3041 __ Bind(&done);
3042 }
3043 }
3044 }
3045 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3046 }
3047
VisitDeoptimize(HDeoptimize * deoptimize)3048 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3049 LocationSummary* locations = new (GetGraph()->GetAllocator())
3050 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3051 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3052 RegisterSet caller_saves = RegisterSet::Empty();
3053 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
3054 locations->SetCustomSlowPathCallerSaves(caller_saves);
3055 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3056 locations->SetInAt(0, Location::RequiresRegister());
3057 }
3058 }
3059
VisitDeoptimize(HDeoptimize * deoptimize)3060 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3061 SlowPathCodeARMVIXL* slow_path =
3062 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
3063 GenerateTestAndBranch(deoptimize,
3064 /* condition_input_index= */ 0,
3065 slow_path->GetEntryLabel(),
3066 /* false_target= */ nullptr);
3067 }
3068
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3069 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3070 LocationSummary* locations = new (GetGraph()->GetAllocator())
3071 LocationSummary(flag, LocationSummary::kNoCall);
3072 locations->SetOut(Location::RequiresRegister());
3073 }
3074
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3075 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3076 GetAssembler()->LoadFromOffset(kLoadWord,
3077 OutputRegister(flag),
3078 sp,
3079 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3080 }
3081
VisitSelect(HSelect * select)3082 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3083 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3084 const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3085
3086 if (is_floating_point) {
3087 locations->SetInAt(0, Location::RequiresFpuRegister());
3088 locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3089 } else {
3090 locations->SetInAt(0, Location::RequiresRegister());
3091 locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3092 }
3093
3094 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3095 locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3096 // The code generator handles overlap with the values, but not with the condition.
3097 locations->SetOut(Location::SameAsFirstInput());
3098 } else if (is_floating_point) {
3099 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3100 } else {
3101 if (!locations->InAt(1).IsConstant()) {
3102 locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3103 }
3104
3105 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3106 }
3107 }
3108
VisitSelect(HSelect * select)3109 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3110 HInstruction* const condition = select->GetCondition();
3111 const LocationSummary* const locations = select->GetLocations();
3112 const DataType::Type type = select->GetType();
3113 const Location first = locations->InAt(0);
3114 const Location out = locations->Out();
3115 const Location second = locations->InAt(1);
3116
3117 // In the unlucky case the output of this instruction overlaps
3118 // with an input of an "emitted-at-use-site" condition, and
3119 // the output of this instruction is not one of its inputs, we'll
3120 // need to fallback to branches instead of conditional ARM instructions.
3121 bool output_overlaps_with_condition_inputs =
3122 !IsBooleanValueOrMaterializedCondition(condition) &&
3123 !out.Equals(first) &&
3124 !out.Equals(second) &&
3125 (condition->GetLocations()->InAt(0).Equals(out) ||
3126 condition->GetLocations()->InAt(1).Equals(out));
3127 DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3128 Location src;
3129
3130 if (condition->IsIntConstant()) {
3131 if (condition->AsIntConstant()->IsFalse()) {
3132 src = first;
3133 } else {
3134 src = second;
3135 }
3136
3137 codegen_->MoveLocation(out, src, type);
3138 return;
3139 }
3140
3141 if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3142 bool invert = false;
3143
3144 if (out.Equals(second)) {
3145 src = first;
3146 invert = true;
3147 } else if (out.Equals(first)) {
3148 src = second;
3149 } else if (second.IsConstant()) {
3150 DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3151 src = second;
3152 } else if (first.IsConstant()) {
3153 DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3154 src = first;
3155 invert = true;
3156 } else {
3157 src = second;
3158 }
3159
3160 if (CanGenerateConditionalMove(out, src)) {
3161 if (!out.Equals(first) && !out.Equals(second)) {
3162 codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3163 }
3164
3165 std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3166
3167 if (IsBooleanValueOrMaterializedCondition(condition)) {
3168 __ Cmp(InputRegisterAt(select, 2), 0);
3169 cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3170 } else {
3171 cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3172 }
3173
3174 const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3175 // We use the scope because of the IT block that follows.
3176 ExactAssemblyScope guard(GetVIXLAssembler(),
3177 instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3178 CodeBufferCheckScope::kExactSize);
3179
3180 if (out.IsRegister()) {
3181 __ it(cond.first);
3182 __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3183 } else {
3184 DCHECK(out.IsRegisterPair());
3185
3186 Operand operand_high(0);
3187 Operand operand_low(0);
3188
3189 if (src.IsConstant()) {
3190 const int64_t value = Int64ConstantFrom(src);
3191
3192 operand_high = High32Bits(value);
3193 operand_low = Low32Bits(value);
3194 } else {
3195 DCHECK(src.IsRegisterPair());
3196 operand_high = HighRegisterFrom(src);
3197 operand_low = LowRegisterFrom(src);
3198 }
3199
3200 __ it(cond.first);
3201 __ mov(cond.first, LowRegisterFrom(out), operand_low);
3202 __ it(cond.first);
3203 __ mov(cond.first, HighRegisterFrom(out), operand_high);
3204 }
3205
3206 return;
3207 }
3208 }
3209
3210 vixl32::Label* false_target = nullptr;
3211 vixl32::Label* true_target = nullptr;
3212 vixl32::Label select_end;
3213 vixl32::Label other_case;
3214 vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3215
3216 if (out.Equals(second)) {
3217 true_target = target;
3218 src = first;
3219 } else {
3220 false_target = target;
3221 src = second;
3222
3223 if (!out.Equals(first)) {
3224 if (output_overlaps_with_condition_inputs) {
3225 false_target = &other_case;
3226 } else {
3227 codegen_->MoveLocation(out, first, type);
3228 }
3229 }
3230 }
3231
3232 GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3233 codegen_->MoveLocation(out, src, type);
3234 if (output_overlaps_with_condition_inputs) {
3235 __ B(target);
3236 __ Bind(&other_case);
3237 codegen_->MoveLocation(out, first, type);
3238 }
3239
3240 if (select_end.IsReferenced()) {
3241 __ Bind(&select_end);
3242 }
3243 }
3244
VisitNop(HNop * nop)3245 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3246 new (GetGraph()->GetAllocator()) LocationSummary(nop);
3247 }
3248
VisitNop(HNop *)3249 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3250 // The environment recording already happened in CodeGenerator::Compile.
3251 }
3252
IncreaseFrame(size_t adjustment)3253 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3254 __ Claim(adjustment);
3255 GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3256 }
3257
DecreaseFrame(size_t adjustment)3258 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3259 __ Drop(adjustment);
3260 GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3261 }
3262
GenerateNop()3263 void CodeGeneratorARMVIXL::GenerateNop() {
3264 __ Nop();
3265 }
3266
3267 // `temp` is an extra temporary register that is used for some conditions;
3268 // callers may not specify it, in which case the method will use a scratch
3269 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3270 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3271 vixl32::Register out,
3272 vixl32::Register in,
3273 vixl32::Register temp) {
3274 switch (condition) {
3275 case kCondEQ:
3276 // x <= 0 iff x == 0 when the comparison is unsigned.
3277 case kCondBE:
3278 if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3279 temp = out;
3280 }
3281
3282 // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3283 // different as well.
3284 if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3285 // temp = - in; only 0 sets the carry flag.
3286 __ Rsbs(temp, in, 0);
3287
3288 if (out.Is(in)) {
3289 std::swap(in, temp);
3290 }
3291
3292 // out = - in + in + carry = carry
3293 __ Adc(out, temp, in);
3294 } else {
3295 // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3296 __ Clz(out, in);
3297 // Any number less than 32 logically shifted right by 5 bits results in 0;
3298 // the same operation on 32 yields 1.
3299 __ Lsr(out, out, 5);
3300 }
3301
3302 break;
3303 case kCondNE:
3304 // x > 0 iff x != 0 when the comparison is unsigned.
3305 case kCondA: {
3306 UseScratchRegisterScope temps(GetVIXLAssembler());
3307
3308 if (out.Is(in)) {
3309 if (!temp.IsValid() || in.Is(temp)) {
3310 temp = temps.Acquire();
3311 }
3312 } else if (!temp.IsValid() || !temp.IsLow()) {
3313 temp = out;
3314 }
3315
3316 // temp = in - 1; only 0 does not set the carry flag.
3317 __ Subs(temp, in, 1);
3318 // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3319 __ Sbc(out, in, temp);
3320 break;
3321 }
3322 case kCondGE:
3323 __ Mvn(out, in);
3324 in = out;
3325 FALLTHROUGH_INTENDED;
3326 case kCondLT:
3327 // We only care about the sign bit.
3328 __ Lsr(out, in, 31);
3329 break;
3330 case kCondAE:
3331 // Trivially true.
3332 __ Mov(out, 1);
3333 break;
3334 case kCondB:
3335 // Trivially false.
3336 __ Mov(out, 0);
3337 break;
3338 default:
3339 LOG(FATAL) << "Unexpected condition " << condition;
3340 UNREACHABLE();
3341 }
3342 }
3343
HandleCondition(HCondition * cond)3344 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3345 LocationSummary* locations =
3346 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3347 const DataType::Type type = cond->InputAt(0)->GetType();
3348 if (DataType::IsFloatingPointType(type)) {
3349 locations->SetInAt(0, Location::RequiresFpuRegister());
3350 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3351 } else {
3352 locations->SetInAt(0, Location::RequiresRegister());
3353 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3354 }
3355 if (!cond->IsEmittedAtUseSite()) {
3356 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3357 }
3358 }
3359
HandleCondition(HCondition * cond)3360 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3361 if (cond->IsEmittedAtUseSite()) {
3362 return;
3363 }
3364
3365 const DataType::Type type = cond->GetLeft()->GetType();
3366
3367 if (DataType::IsFloatingPointType(type)) {
3368 GenerateConditionGeneric(cond, codegen_);
3369 return;
3370 }
3371
3372 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3373
3374 const IfCondition condition = cond->GetCondition();
3375
3376 // A condition with only one boolean input, or two boolean inputs without being equality or
3377 // inequality results from transformations done by the instruction simplifier, and is handled
3378 // as a regular condition with integral inputs.
3379 if (type == DataType::Type::kBool &&
3380 cond->GetRight()->GetType() == DataType::Type::kBool &&
3381 (condition == kCondEQ || condition == kCondNE)) {
3382 vixl32::Register left = InputRegisterAt(cond, 0);
3383 const vixl32::Register out = OutputRegister(cond);
3384 const Location right_loc = cond->GetLocations()->InAt(1);
3385
3386 // The constant case is handled by the instruction simplifier.
3387 DCHECK(!right_loc.IsConstant());
3388
3389 vixl32::Register right = RegisterFrom(right_loc);
3390
3391 // Avoid 32-bit instructions if possible.
3392 if (out.Is(right)) {
3393 std::swap(left, right);
3394 }
3395
3396 __ Eor(out, left, right);
3397
3398 if (condition == kCondEQ) {
3399 __ Eor(out, out, 1);
3400 }
3401
3402 return;
3403 }
3404
3405 GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3406 }
3407
VisitEqual(HEqual * comp)3408 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3409 HandleCondition(comp);
3410 }
3411
VisitEqual(HEqual * comp)3412 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3413 HandleCondition(comp);
3414 }
3415
VisitNotEqual(HNotEqual * comp)3416 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3417 HandleCondition(comp);
3418 }
3419
VisitNotEqual(HNotEqual * comp)3420 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3421 HandleCondition(comp);
3422 }
3423
VisitLessThan(HLessThan * comp)3424 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3425 HandleCondition(comp);
3426 }
3427
VisitLessThan(HLessThan * comp)3428 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3429 HandleCondition(comp);
3430 }
3431
VisitLessThanOrEqual(HLessThanOrEqual * comp)3432 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3433 HandleCondition(comp);
3434 }
3435
VisitLessThanOrEqual(HLessThanOrEqual * comp)3436 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3437 HandleCondition(comp);
3438 }
3439
VisitGreaterThan(HGreaterThan * comp)3440 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3441 HandleCondition(comp);
3442 }
3443
VisitGreaterThan(HGreaterThan * comp)3444 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3445 HandleCondition(comp);
3446 }
3447
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3448 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3449 HandleCondition(comp);
3450 }
3451
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3452 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3453 HandleCondition(comp);
3454 }
3455
VisitBelow(HBelow * comp)3456 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3457 HandleCondition(comp);
3458 }
3459
VisitBelow(HBelow * comp)3460 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3461 HandleCondition(comp);
3462 }
3463
VisitBelowOrEqual(HBelowOrEqual * comp)3464 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3465 HandleCondition(comp);
3466 }
3467
VisitBelowOrEqual(HBelowOrEqual * comp)3468 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3469 HandleCondition(comp);
3470 }
3471
VisitAbove(HAbove * comp)3472 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3473 HandleCondition(comp);
3474 }
3475
VisitAbove(HAbove * comp)3476 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3477 HandleCondition(comp);
3478 }
3479
VisitAboveOrEqual(HAboveOrEqual * comp)3480 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3481 HandleCondition(comp);
3482 }
3483
VisitAboveOrEqual(HAboveOrEqual * comp)3484 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3485 HandleCondition(comp);
3486 }
3487
VisitIntConstant(HIntConstant * constant)3488 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3489 LocationSummary* locations =
3490 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3491 locations->SetOut(Location::ConstantLocation(constant));
3492 }
3493
VisitIntConstant(HIntConstant * constant)3494 void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
3495 // Will be generated at use site.
3496 }
3497
VisitNullConstant(HNullConstant * constant)3498 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3499 LocationSummary* locations =
3500 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3501 locations->SetOut(Location::ConstantLocation(constant));
3502 }
3503
VisitNullConstant(HNullConstant * constant)3504 void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
3505 // Will be generated at use site.
3506 }
3507
VisitLongConstant(HLongConstant * constant)3508 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3509 LocationSummary* locations =
3510 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3511 locations->SetOut(Location::ConstantLocation(constant));
3512 }
3513
VisitLongConstant(HLongConstant * constant)3514 void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
3515 // Will be generated at use site.
3516 }
3517
VisitFloatConstant(HFloatConstant * constant)3518 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3519 LocationSummary* locations =
3520 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3521 locations->SetOut(Location::ConstantLocation(constant));
3522 }
3523
VisitFloatConstant(HFloatConstant * constant)3524 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3525 [[maybe_unused]] HFloatConstant* constant) {
3526 // Will be generated at use site.
3527 }
3528
VisitDoubleConstant(HDoubleConstant * constant)3529 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3530 LocationSummary* locations =
3531 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3532 locations->SetOut(Location::ConstantLocation(constant));
3533 }
3534
VisitDoubleConstant(HDoubleConstant * constant)3535 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3536 [[maybe_unused]] HDoubleConstant* constant) {
3537 // Will be generated at use site.
3538 }
3539
VisitConstructorFence(HConstructorFence * constructor_fence)3540 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3541 constructor_fence->SetLocations(nullptr);
3542 }
3543
VisitConstructorFence(HConstructorFence * constructor_fence)3544 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3545 [[maybe_unused]] HConstructorFence* constructor_fence) {
3546 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3547 }
3548
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3549 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3550 memory_barrier->SetLocations(nullptr);
3551 }
3552
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3553 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3554 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3555 }
3556
VisitReturnVoid(HReturnVoid * ret)3557 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3558 ret->SetLocations(nullptr);
3559 }
3560
VisitReturnVoid(HReturnVoid * ret)3561 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
3562 codegen_->GenerateFrameExit();
3563 }
3564
VisitReturn(HReturn * ret)3565 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3566 LocationSummary* locations =
3567 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3568 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3569 }
3570
VisitReturn(HReturn * ret)3571 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3572 if (GetGraph()->IsCompilingOsr()) {
3573 // To simplify callers of an OSR method, we put the return value in both
3574 // floating point and core registers.
3575 switch (ret->InputAt(0)->GetType()) {
3576 case DataType::Type::kFloat32:
3577 __ Vmov(r0, s0);
3578 break;
3579 case DataType::Type::kFloat64:
3580 __ Vmov(r0, r1, d0);
3581 break;
3582 default:
3583 break;
3584 }
3585 }
3586 codegen_->GenerateFrameExit();
3587 }
3588
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3589 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3590 // The trampoline uses the same calling convention as dex calling conventions,
3591 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3592 // the method_idx.
3593 HandleInvoke(invoke);
3594 }
3595
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3596 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3597 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3598 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3599 }
3600
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3601 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3602 // Explicit clinit checks triggered by static invokes must have been pruned by
3603 // art::PrepareForRegisterAllocation.
3604 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3605
3606 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3607 if (intrinsic.TryDispatch(invoke)) {
3608 return;
3609 }
3610
3611 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3612 CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3613 /*for_register_allocation=*/ true);
3614 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3615 } else {
3616 HandleInvoke(invoke);
3617 }
3618 }
3619
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3620 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3621 if (invoke->GetLocations()->Intrinsified()) {
3622 IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3623 intrinsic.Dispatch(invoke);
3624 return true;
3625 }
3626 return false;
3627 }
3628
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3629 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3630 // Explicit clinit checks triggered by static invokes must have been pruned by
3631 // art::PrepareForRegisterAllocation.
3632 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3633
3634 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3635 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3636 return;
3637 }
3638
3639 LocationSummary* locations = invoke->GetLocations();
3640 codegen_->GenerateStaticOrDirectCall(
3641 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3642
3643 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3644 }
3645
HandleInvoke(HInvoke * invoke)3646 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3647 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3648 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3649 }
3650
VisitInvokeVirtual(HInvokeVirtual * invoke)3651 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3652 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3653 if (intrinsic.TryDispatch(invoke)) {
3654 return;
3655 }
3656
3657 HandleInvoke(invoke);
3658 }
3659
VisitInvokeVirtual(HInvokeVirtual * invoke)3660 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3661 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3662 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3663 return;
3664 }
3665
3666 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3667 DCHECK(!codegen_->IsLeafMethod());
3668
3669 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3670 }
3671
VisitInvokeInterface(HInvokeInterface * invoke)3672 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3673 HandleInvoke(invoke);
3674 // Add the hidden argument.
3675 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3676 // We cannot request r12 as it's blocked by the register allocator.
3677 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3678 }
3679 }
3680
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3681 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3682 vixl32::Register klass) {
3683 DCHECK_EQ(r0.GetCode(), klass.GetCode());
3684 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3685 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3686 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3687 info, GetCompilerOptions(), instruction->AsInvoke());
3688 if (cache != nullptr) {
3689 uint32_t address = reinterpret_cast32<uint32_t>(cache);
3690 vixl32::Label done;
3691 UseScratchRegisterScope temps(GetVIXLAssembler());
3692 temps.Exclude(ip);
3693 __ Mov(r4, address);
3694 __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3695 // Fast path for a monomorphic cache.
3696 __ Cmp(klass, ip);
3697 __ B(eq, &done, /* is_far_target= */ false);
3698 InvokeRuntime(kQuickUpdateInlineCache, instruction);
3699 __ Bind(&done);
3700 } else {
3701 // This is unexpected, but we don't guarantee stable compilation across
3702 // JIT runs so just warn about it.
3703 ScopedObjectAccess soa(Thread::Current());
3704 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3705 }
3706 }
3707 }
3708
VisitInvokeInterface(HInvokeInterface * invoke)3709 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3710 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3711 LocationSummary* locations = invoke->GetLocations();
3712 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3713 Location receiver = locations->InAt(0);
3714 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3715
3716 DCHECK(!receiver.IsStackSlot());
3717
3718 // Ensure the pc position is recorded immediately after the `ldr` instruction.
3719 {
3720 ExactAssemblyScope aas(GetVIXLAssembler(),
3721 vixl32::kMaxInstructionSizeInBytes,
3722 CodeBufferCheckScope::kMaximumSize);
3723 // /* HeapReference<Class> */ temp = receiver->klass_
3724 __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3725 codegen_->MaybeRecordImplicitNullCheck(invoke);
3726 }
3727 // Instead of simply (possibly) unpoisoning `temp` here, we should
3728 // emit a read barrier for the previous class reference load.
3729 // However this is not required in practice, as this is an
3730 // intermediate/temporary reference and because the current
3731 // concurrent copying collector keeps the from-space memory
3732 // intact/accessible until the end of the marking phase (the
3733 // concurrent copying collector may not in the future).
3734 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3735
3736 // If we're compiling baseline, update the inline cache.
3737 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3738
3739 GetAssembler()->LoadFromOffset(kLoadWord,
3740 temp,
3741 temp,
3742 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3743
3744 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3745 invoke->GetImtIndex(), kArmPointerSize));
3746 // temp = temp->GetImtEntryAt(method_offset);
3747 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3748 uint32_t entry_point =
3749 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3750 // LR = temp->GetEntryPoint();
3751 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3752
3753 {
3754 // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3755 // instruction from clobbering it as they might use r12 as a scratch register.
3756 Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3757 // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3758 // so it checks if the application is using them (by passing them to the macro assembler
3759 // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3760 // what is available, and is the opposite of the standard usage: Instead of requesting a
3761 // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3762 // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3763 // (to materialize the constant), since the destination register becomes available for such use
3764 // internally for the duration of the macro instruction.
3765 UseScratchRegisterScope temps(GetVIXLAssembler());
3766 temps.Exclude(RegisterFrom(hidden_reg));
3767 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3768 Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3769 if (current_method.IsStackSlot()) {
3770 GetAssembler()->LoadFromOffset(
3771 kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3772 } else {
3773 __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3774 }
3775 } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3776 // We pass the method from the IMT in case of a conflict. This will ensure
3777 // we go into the runtime to resolve the actual method.
3778 CHECK_NE(temp.GetCode(), lr.GetCode());
3779 __ Mov(RegisterFrom(hidden_reg), temp);
3780 } else {
3781 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3782 }
3783 }
3784 {
3785 // Ensure the pc position is recorded immediately after the `blx` instruction.
3786 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3787 ExactAssemblyScope aas(GetVIXLAssembler(),
3788 vixl32::k16BitT32InstructionSizeInBytes,
3789 CodeBufferCheckScope::kExactSize);
3790 // LR();
3791 __ blx(lr);
3792 codegen_->RecordPcInfo(invoke);
3793 DCHECK(!codegen_->IsLeafMethod());
3794 }
3795
3796 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3797 }
3798
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3799 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3800 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3801 if (intrinsic.TryDispatch(invoke)) {
3802 return;
3803 }
3804 HandleInvoke(invoke);
3805 }
3806
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3807 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3808 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3809 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3810 return;
3811 }
3812 codegen_->GenerateInvokePolymorphicCall(invoke);
3813 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3814 }
3815
VisitInvokeCustom(HInvokeCustom * invoke)3816 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3817 HandleInvoke(invoke);
3818 }
3819
VisitInvokeCustom(HInvokeCustom * invoke)3820 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3821 codegen_->GenerateInvokeCustomCall(invoke);
3822 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3823 }
3824
VisitNeg(HNeg * neg)3825 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3826 LocationSummary* locations =
3827 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3828 switch (neg->GetResultType()) {
3829 case DataType::Type::kInt32: {
3830 locations->SetInAt(0, Location::RequiresRegister());
3831 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3832 break;
3833 }
3834 case DataType::Type::kInt64: {
3835 locations->SetInAt(0, Location::RequiresRegister());
3836 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3837 break;
3838 }
3839
3840 case DataType::Type::kFloat32:
3841 case DataType::Type::kFloat64:
3842 locations->SetInAt(0, Location::RequiresFpuRegister());
3843 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3844 break;
3845
3846 default:
3847 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3848 }
3849 }
3850
VisitNeg(HNeg * neg)3851 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3852 LocationSummary* locations = neg->GetLocations();
3853 Location out = locations->Out();
3854 Location in = locations->InAt(0);
3855 switch (neg->GetResultType()) {
3856 case DataType::Type::kInt32:
3857 __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3858 break;
3859
3860 case DataType::Type::kInt64:
3861 // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3862 __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3863 // We cannot emit an RSC (Reverse Subtract with Carry)
3864 // instruction here, as it does not exist in the Thumb-2
3865 // instruction set. We use the following approach
3866 // using SBC and SUB instead.
3867 //
3868 // out.hi = -C
3869 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3870 // out.hi = out.hi - in.hi
3871 __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3872 break;
3873
3874 case DataType::Type::kFloat32:
3875 case DataType::Type::kFloat64:
3876 __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3877 break;
3878
3879 default:
3880 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3881 }
3882 }
3883
VisitTypeConversion(HTypeConversion * conversion)3884 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3885 DataType::Type result_type = conversion->GetResultType();
3886 DataType::Type input_type = conversion->GetInputType();
3887 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3888 << input_type << " -> " << result_type;
3889
3890 // The float-to-long, double-to-long and long-to-float type conversions
3891 // rely on a call to the runtime.
3892 LocationSummary::CallKind call_kind =
3893 (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3894 && result_type == DataType::Type::kInt64)
3895 || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3896 ? LocationSummary::kCallOnMainOnly
3897 : LocationSummary::kNoCall;
3898 LocationSummary* locations =
3899 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3900
3901 switch (result_type) {
3902 case DataType::Type::kUint8:
3903 case DataType::Type::kInt8:
3904 case DataType::Type::kUint16:
3905 case DataType::Type::kInt16:
3906 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3907 locations->SetInAt(0, Location::RequiresRegister());
3908 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3909 break;
3910
3911 case DataType::Type::kInt32:
3912 switch (input_type) {
3913 case DataType::Type::kInt64:
3914 locations->SetInAt(0, Location::Any());
3915 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3916 break;
3917
3918 case DataType::Type::kFloat32:
3919 locations->SetInAt(0, Location::RequiresFpuRegister());
3920 locations->SetOut(Location::RequiresRegister());
3921 locations->AddTemp(Location::RequiresFpuRegister());
3922 break;
3923
3924 case DataType::Type::kFloat64:
3925 locations->SetInAt(0, Location::RequiresFpuRegister());
3926 locations->SetOut(Location::RequiresRegister());
3927 locations->AddTemp(Location::RequiresFpuRegister());
3928 break;
3929
3930 default:
3931 LOG(FATAL) << "Unexpected type conversion from " << input_type
3932 << " to " << result_type;
3933 }
3934 break;
3935
3936 case DataType::Type::kInt64:
3937 switch (input_type) {
3938 case DataType::Type::kBool:
3939 case DataType::Type::kUint8:
3940 case DataType::Type::kInt8:
3941 case DataType::Type::kUint16:
3942 case DataType::Type::kInt16:
3943 case DataType::Type::kInt32:
3944 locations->SetInAt(0, Location::RequiresRegister());
3945 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3946 break;
3947
3948 case DataType::Type::kFloat32: {
3949 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3950 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3951 locations->SetOut(LocationFrom(r0, r1));
3952 break;
3953 }
3954
3955 case DataType::Type::kFloat64: {
3956 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3957 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3958 calling_convention.GetFpuRegisterAt(1)));
3959 locations->SetOut(LocationFrom(r0, r1));
3960 break;
3961 }
3962
3963 default:
3964 LOG(FATAL) << "Unexpected type conversion from " << input_type
3965 << " to " << result_type;
3966 }
3967 break;
3968
3969 case DataType::Type::kFloat32:
3970 switch (input_type) {
3971 case DataType::Type::kBool:
3972 case DataType::Type::kUint8:
3973 case DataType::Type::kInt8:
3974 case DataType::Type::kUint16:
3975 case DataType::Type::kInt16:
3976 case DataType::Type::kInt32:
3977 locations->SetInAt(0, Location::RequiresRegister());
3978 locations->SetOut(Location::RequiresFpuRegister());
3979 break;
3980
3981 case DataType::Type::kInt64: {
3982 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3983 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3984 calling_convention.GetRegisterAt(1)));
3985 locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3986 break;
3987 }
3988
3989 case DataType::Type::kFloat64:
3990 locations->SetInAt(0, Location::RequiresFpuRegister());
3991 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3992 break;
3993
3994 default:
3995 LOG(FATAL) << "Unexpected type conversion from " << input_type
3996 << " to " << result_type;
3997 }
3998 break;
3999
4000 case DataType::Type::kFloat64:
4001 switch (input_type) {
4002 case DataType::Type::kBool:
4003 case DataType::Type::kUint8:
4004 case DataType::Type::kInt8:
4005 case DataType::Type::kUint16:
4006 case DataType::Type::kInt16:
4007 case DataType::Type::kInt32:
4008 locations->SetInAt(0, Location::RequiresRegister());
4009 locations->SetOut(Location::RequiresFpuRegister());
4010 break;
4011
4012 case DataType::Type::kInt64:
4013 locations->SetInAt(0, Location::RequiresRegister());
4014 locations->SetOut(Location::RequiresFpuRegister());
4015 locations->AddTemp(Location::RequiresFpuRegister());
4016 locations->AddTemp(Location::RequiresFpuRegister());
4017 break;
4018
4019 case DataType::Type::kFloat32:
4020 locations->SetInAt(0, Location::RequiresFpuRegister());
4021 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4022 break;
4023
4024 default:
4025 LOG(FATAL) << "Unexpected type conversion from " << input_type
4026 << " to " << result_type;
4027 }
4028 break;
4029
4030 default:
4031 LOG(FATAL) << "Unexpected type conversion from " << input_type
4032 << " to " << result_type;
4033 }
4034 }
4035
VisitTypeConversion(HTypeConversion * conversion)4036 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
4037 LocationSummary* locations = conversion->GetLocations();
4038 Location out = locations->Out();
4039 Location in = locations->InAt(0);
4040 DataType::Type result_type = conversion->GetResultType();
4041 DataType::Type input_type = conversion->GetInputType();
4042 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
4043 << input_type << " -> " << result_type;
4044 switch (result_type) {
4045 case DataType::Type::kUint8:
4046 switch (input_type) {
4047 case DataType::Type::kInt8:
4048 case DataType::Type::kUint16:
4049 case DataType::Type::kInt16:
4050 case DataType::Type::kInt32:
4051 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4052 break;
4053 case DataType::Type::kInt64:
4054 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4055 break;
4056
4057 default:
4058 LOG(FATAL) << "Unexpected type conversion from " << input_type
4059 << " to " << result_type;
4060 }
4061 break;
4062
4063 case DataType::Type::kInt8:
4064 switch (input_type) {
4065 case DataType::Type::kUint8:
4066 case DataType::Type::kUint16:
4067 case DataType::Type::kInt16:
4068 case DataType::Type::kInt32:
4069 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4070 break;
4071 case DataType::Type::kInt64:
4072 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4073 break;
4074
4075 default:
4076 LOG(FATAL) << "Unexpected type conversion from " << input_type
4077 << " to " << result_type;
4078 }
4079 break;
4080
4081 case DataType::Type::kUint16:
4082 switch (input_type) {
4083 case DataType::Type::kInt8:
4084 case DataType::Type::kInt16:
4085 case DataType::Type::kInt32:
4086 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4087 break;
4088 case DataType::Type::kInt64:
4089 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4090 break;
4091
4092 default:
4093 LOG(FATAL) << "Unexpected type conversion from " << input_type
4094 << " to " << result_type;
4095 }
4096 break;
4097
4098 case DataType::Type::kInt16:
4099 switch (input_type) {
4100 case DataType::Type::kUint16:
4101 case DataType::Type::kInt32:
4102 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4103 break;
4104 case DataType::Type::kInt64:
4105 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4106 break;
4107
4108 default:
4109 LOG(FATAL) << "Unexpected type conversion from " << input_type
4110 << " to " << result_type;
4111 }
4112 break;
4113
4114 case DataType::Type::kInt32:
4115 switch (input_type) {
4116 case DataType::Type::kInt64:
4117 DCHECK(out.IsRegister());
4118 if (in.IsRegisterPair()) {
4119 __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4120 } else if (in.IsDoubleStackSlot()) {
4121 GetAssembler()->LoadFromOffset(kLoadWord,
4122 OutputRegister(conversion),
4123 sp,
4124 in.GetStackIndex());
4125 } else {
4126 DCHECK(in.IsConstant());
4127 DCHECK(in.GetConstant()->IsLongConstant());
4128 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4129 __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4130 }
4131 break;
4132
4133 case DataType::Type::kFloat32: {
4134 vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4135 __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4136 __ Vmov(OutputRegister(conversion), temp);
4137 break;
4138 }
4139
4140 case DataType::Type::kFloat64: {
4141 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4142 __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4143 __ Vmov(OutputRegister(conversion), temp_s);
4144 break;
4145 }
4146
4147 default:
4148 LOG(FATAL) << "Unexpected type conversion from " << input_type
4149 << " to " << result_type;
4150 }
4151 break;
4152
4153 case DataType::Type::kInt64:
4154 switch (input_type) {
4155 case DataType::Type::kBool:
4156 case DataType::Type::kUint8:
4157 case DataType::Type::kInt8:
4158 case DataType::Type::kUint16:
4159 case DataType::Type::kInt16:
4160 case DataType::Type::kInt32:
4161 DCHECK(out.IsRegisterPair());
4162 DCHECK(in.IsRegister());
4163 __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4164 // Sign extension.
4165 __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4166 break;
4167
4168 case DataType::Type::kFloat32:
4169 codegen_->InvokeRuntime(kQuickF2l, conversion);
4170 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4171 break;
4172
4173 case DataType::Type::kFloat64:
4174 codegen_->InvokeRuntime(kQuickD2l, conversion);
4175 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4176 break;
4177
4178 default:
4179 LOG(FATAL) << "Unexpected type conversion from " << input_type
4180 << " to " << result_type;
4181 }
4182 break;
4183
4184 case DataType::Type::kFloat32:
4185 switch (input_type) {
4186 case DataType::Type::kBool:
4187 case DataType::Type::kUint8:
4188 case DataType::Type::kInt8:
4189 case DataType::Type::kUint16:
4190 case DataType::Type::kInt16:
4191 case DataType::Type::kInt32:
4192 __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4193 __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4194 break;
4195
4196 case DataType::Type::kInt64:
4197 codegen_->InvokeRuntime(kQuickL2f, conversion);
4198 CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4199 break;
4200
4201 case DataType::Type::kFloat64:
4202 __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4203 break;
4204
4205 default:
4206 LOG(FATAL) << "Unexpected type conversion from " << input_type
4207 << " to " << result_type;
4208 }
4209 break;
4210
4211 case DataType::Type::kFloat64:
4212 switch (input_type) {
4213 case DataType::Type::kBool:
4214 case DataType::Type::kUint8:
4215 case DataType::Type::kInt8:
4216 case DataType::Type::kUint16:
4217 case DataType::Type::kInt16:
4218 case DataType::Type::kInt32:
4219 __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4220 __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4221 break;
4222
4223 case DataType::Type::kInt64: {
4224 vixl32::Register low = LowRegisterFrom(in);
4225 vixl32::Register high = HighRegisterFrom(in);
4226 vixl32::SRegister out_s = LowSRegisterFrom(out);
4227 vixl32::DRegister out_d = DRegisterFrom(out);
4228 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4229 vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4230 vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4231
4232 // temp_d = int-to-double(high)
4233 __ Vmov(temp_s, high);
4234 __ Vcvt(F64, S32, temp_d, temp_s);
4235 // constant_d = k2Pow32EncodingForDouble
4236 __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4237 // out_d = unsigned-to-double(low)
4238 __ Vmov(out_s, low);
4239 __ Vcvt(F64, U32, out_d, out_s);
4240 // out_d += temp_d * constant_d
4241 __ Vmla(F64, out_d, temp_d, constant_d);
4242 break;
4243 }
4244
4245 case DataType::Type::kFloat32:
4246 __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4247 break;
4248
4249 default:
4250 LOG(FATAL) << "Unexpected type conversion from " << input_type
4251 << " to " << result_type;
4252 }
4253 break;
4254
4255 default:
4256 LOG(FATAL) << "Unexpected type conversion from " << input_type
4257 << " to " << result_type;
4258 }
4259 }
4260
VisitAdd(HAdd * add)4261 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4262 LocationSummary* locations =
4263 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4264 switch (add->GetResultType()) {
4265 case DataType::Type::kInt32: {
4266 locations->SetInAt(0, Location::RequiresRegister());
4267 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4268 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4269 break;
4270 }
4271
4272 case DataType::Type::kInt64: {
4273 locations->SetInAt(0, Location::RequiresRegister());
4274 locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4275 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4276 break;
4277 }
4278
4279 case DataType::Type::kFloat32:
4280 case DataType::Type::kFloat64: {
4281 locations->SetInAt(0, Location::RequiresFpuRegister());
4282 locations->SetInAt(1, Location::RequiresFpuRegister());
4283 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4284 break;
4285 }
4286
4287 default:
4288 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4289 }
4290 }
4291
VisitAdd(HAdd * add)4292 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4293 LocationSummary* locations = add->GetLocations();
4294 Location out = locations->Out();
4295 Location first = locations->InAt(0);
4296 Location second = locations->InAt(1);
4297
4298 switch (add->GetResultType()) {
4299 case DataType::Type::kInt32: {
4300 __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4301 }
4302 break;
4303
4304 case DataType::Type::kInt64: {
4305 if (second.IsConstant()) {
4306 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4307 GenerateAddLongConst(out, first, value);
4308 } else {
4309 DCHECK(second.IsRegisterPair());
4310 __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4311 __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4312 }
4313 break;
4314 }
4315
4316 case DataType::Type::kFloat32:
4317 case DataType::Type::kFloat64:
4318 __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4319 break;
4320
4321 default:
4322 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4323 }
4324 }
4325
VisitSub(HSub * sub)4326 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4327 LocationSummary* locations =
4328 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4329 switch (sub->GetResultType()) {
4330 case DataType::Type::kInt32: {
4331 locations->SetInAt(0, Location::RequiresRegister());
4332 locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4333 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4334 break;
4335 }
4336
4337 case DataType::Type::kInt64: {
4338 locations->SetInAt(0, Location::RequiresRegister());
4339 locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4340 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4341 break;
4342 }
4343 case DataType::Type::kFloat32:
4344 case DataType::Type::kFloat64: {
4345 locations->SetInAt(0, Location::RequiresFpuRegister());
4346 locations->SetInAt(1, Location::RequiresFpuRegister());
4347 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4348 break;
4349 }
4350 default:
4351 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4352 }
4353 }
4354
VisitSub(HSub * sub)4355 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4356 LocationSummary* locations = sub->GetLocations();
4357 Location out = locations->Out();
4358 Location first = locations->InAt(0);
4359 Location second = locations->InAt(1);
4360 switch (sub->GetResultType()) {
4361 case DataType::Type::kInt32: {
4362 __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4363 break;
4364 }
4365
4366 case DataType::Type::kInt64: {
4367 if (second.IsConstant()) {
4368 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4369 GenerateAddLongConst(out, first, -value);
4370 } else {
4371 DCHECK(second.IsRegisterPair());
4372 __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4373 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4374 }
4375 break;
4376 }
4377
4378 case DataType::Type::kFloat32:
4379 case DataType::Type::kFloat64:
4380 __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4381 break;
4382
4383 default:
4384 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4385 }
4386 }
4387
VisitMul(HMul * mul)4388 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4389 LocationSummary* locations =
4390 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4391 switch (mul->GetResultType()) {
4392 case DataType::Type::kInt32:
4393 case DataType::Type::kInt64: {
4394 locations->SetInAt(0, Location::RequiresRegister());
4395 locations->SetInAt(1, Location::RequiresRegister());
4396 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4397 break;
4398 }
4399
4400 case DataType::Type::kFloat32:
4401 case DataType::Type::kFloat64: {
4402 locations->SetInAt(0, Location::RequiresFpuRegister());
4403 locations->SetInAt(1, Location::RequiresFpuRegister());
4404 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4405 break;
4406 }
4407
4408 default:
4409 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4410 }
4411 }
4412
VisitMul(HMul * mul)4413 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4414 LocationSummary* locations = mul->GetLocations();
4415 Location out = locations->Out();
4416 Location first = locations->InAt(0);
4417 Location second = locations->InAt(1);
4418 switch (mul->GetResultType()) {
4419 case DataType::Type::kInt32: {
4420 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4421 break;
4422 }
4423 case DataType::Type::kInt64: {
4424 vixl32::Register out_hi = HighRegisterFrom(out);
4425 vixl32::Register out_lo = LowRegisterFrom(out);
4426 vixl32::Register in1_hi = HighRegisterFrom(first);
4427 vixl32::Register in1_lo = LowRegisterFrom(first);
4428 vixl32::Register in2_hi = HighRegisterFrom(second);
4429 vixl32::Register in2_lo = LowRegisterFrom(second);
4430
4431 // Extra checks to protect caused by the existence of R1_R2.
4432 // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4433 // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4434 DCHECK(!out_hi.Is(in1_lo));
4435 DCHECK(!out_hi.Is(in2_lo));
4436
4437 // input: in1 - 64 bits, in2 - 64 bits
4438 // output: out
4439 // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4440 // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4441 // parts: out.lo = (in1.lo * in2.lo)[31:0]
4442
4443 UseScratchRegisterScope temps(GetVIXLAssembler());
4444 vixl32::Register temp = temps.Acquire();
4445 // temp <- in1.lo * in2.hi
4446 __ Mul(temp, in1_lo, in2_hi);
4447 // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4448 __ Mla(out_hi, in1_hi, in2_lo, temp);
4449 // out.lo <- (in1.lo * in2.lo)[31:0];
4450 __ Umull(out_lo, temp, in1_lo, in2_lo);
4451 // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4452 __ Add(out_hi, out_hi, temp);
4453 break;
4454 }
4455
4456 case DataType::Type::kFloat32:
4457 case DataType::Type::kFloat64:
4458 __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4459 break;
4460
4461 default:
4462 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4463 }
4464 }
4465
DivRemOneOrMinusOne(HBinaryOperation * instruction)4466 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4467 DCHECK(instruction->IsDiv() || instruction->IsRem());
4468 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4469
4470 Location second = instruction->GetLocations()->InAt(1);
4471 DCHECK(second.IsConstant());
4472
4473 vixl32::Register out = OutputRegister(instruction);
4474 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4475 int32_t imm = Int32ConstantFrom(second);
4476 DCHECK(imm == 1 || imm == -1);
4477
4478 if (instruction->IsRem()) {
4479 __ Mov(out, 0);
4480 } else {
4481 if (imm == 1) {
4482 __ Mov(out, dividend);
4483 } else {
4484 __ Rsb(out, dividend, 0);
4485 }
4486 }
4487 }
4488
DivRemByPowerOfTwo(HBinaryOperation * instruction)4489 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4490 DCHECK(instruction->IsDiv() || instruction->IsRem());
4491 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4492
4493 LocationSummary* locations = instruction->GetLocations();
4494 Location second = locations->InAt(1);
4495 DCHECK(second.IsConstant());
4496
4497 vixl32::Register out = OutputRegister(instruction);
4498 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4499 int32_t imm = Int32ConstantFrom(second);
4500 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4501 int ctz_imm = CTZ(abs_imm);
4502
4503 auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4504 __ Asr(out, in, ctz_imm);
4505 if (imm < 0) {
4506 __ Rsb(out, out, 0);
4507 }
4508 };
4509
4510 if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4511 // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4512 // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4513 // imm == 2
4514 // HDiv
4515 // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4516 // asr out, out(0x80000001), #1 => out = 0xc0000000
4517 // This is the same as 'asr out, dividend(0x80000000), #1'
4518 //
4519 // imm > 2
4520 // HDiv
4521 // asr out, dividend(0x80000000), #31 => out = -1
4522 // add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4523 // where the number of the rightmost 1s is ctz_imm.
4524 // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4525 // leftmost 1s is ctz_imm + 1.
4526 // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4527 //
4528 // imm == INT32_MIN
4529 // HDiv
4530 // asr out, dividend(0x80000000), #31 => out = -1
4531 // add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4532 // asr out, out(0xc0000000), #31 => out = -1
4533 // rsb out, out(-1), #0 => out = 1
4534 // This is the same as
4535 // asr out, dividend(0x80000000), #31
4536 // rsb out, out, #0
4537 //
4538 //
4539 // INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4540 // 0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4541 // produce zero.
4542 if (instruction->IsDiv()) {
4543 generate_div_code(out, dividend);
4544 } else {
4545 if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4546 __ And(out, dividend, abs_imm - 1);
4547 } else {
4548 __ Ubfx(out, dividend, 0, ctz_imm);
4549 }
4550 return;
4551 }
4552 } else {
4553 vixl32::Register add_right_input = dividend;
4554 if (ctz_imm > 1) {
4555 __ Asr(out, dividend, 31);
4556 add_right_input = out;
4557 }
4558 __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4559
4560 if (instruction->IsDiv()) {
4561 generate_div_code(out, out);
4562 } else {
4563 __ Bfc(out, 0, ctz_imm);
4564 __ Sub(out, dividend, out);
4565 }
4566 }
4567 }
4568
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4569 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4570 DCHECK(instruction->IsDiv() || instruction->IsRem());
4571 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4572
4573 LocationSummary* locations = instruction->GetLocations();
4574 Location second = locations->InAt(1);
4575 DCHECK(second.IsConstant());
4576
4577 vixl32::Register out = OutputRegister(instruction);
4578 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4579 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4580 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4581 int32_t imm = Int32ConstantFrom(second);
4582
4583 int64_t magic;
4584 int shift;
4585 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4586
4587 auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4588 vixl32::Register dividend,
4589 vixl32::Register temp1,
4590 vixl32::Register temp2) {
4591 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4592 __ Mov(temp1, static_cast<int32_t>(magic));
4593 if (magic > 0 && shift == 0) {
4594 __ Smull(temp2, out, dividend, temp1);
4595 } else {
4596 __ Smull(temp2, temp1, dividend, temp1);
4597 if (magic < 0) {
4598 // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4599 // than INT32_MAX. In such a case shift is never 0.
4600 // Proof:
4601 // m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4602 //
4603 // If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4604 // = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4605 // = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4606 //
4607 // 1 + (2^32 / d) is decreasing when d is increasing.
4608 // The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4609 // the minimum is 3, when d = 2^31 -1.
4610 // So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4611 // is never less than 0.
4612 __ Add(temp1, temp1, dividend);
4613 }
4614 DCHECK_NE(shift, 0);
4615 __ Lsr(out, temp1, shift);
4616 }
4617 };
4618
4619 if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4620 // No need to adjust the result for a non-negative dividend and a positive divisor.
4621 if (instruction->IsDiv()) {
4622 generate_unsigned_div_code(out, dividend, temp1, temp2);
4623 } else {
4624 generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4625 __ Mov(temp2, imm);
4626 __ Mls(out, temp1, temp2, dividend);
4627 }
4628 } else {
4629 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4630 __ Mov(temp1, static_cast<int32_t>(magic));
4631 __ Smull(temp2, temp1, dividend, temp1);
4632
4633 if (imm > 0 && magic < 0) {
4634 __ Add(temp1, temp1, dividend);
4635 } else if (imm < 0 && magic > 0) {
4636 __ Sub(temp1, temp1, dividend);
4637 }
4638
4639 if (shift != 0) {
4640 __ Asr(temp1, temp1, shift);
4641 }
4642
4643 if (instruction->IsDiv()) {
4644 __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4645 } else {
4646 __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4647 // TODO: Strength reduction for mls.
4648 __ Mov(temp2, imm);
4649 __ Mls(out, temp1, temp2, dividend);
4650 }
4651 }
4652 }
4653
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4654 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4655 HBinaryOperation* instruction) {
4656 DCHECK(instruction->IsDiv() || instruction->IsRem());
4657 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4658
4659 Location second = instruction->GetLocations()->InAt(1);
4660 DCHECK(second.IsConstant());
4661
4662 int32_t imm = Int32ConstantFrom(second);
4663 if (imm == 0) {
4664 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4665 } else if (imm == 1 || imm == -1) {
4666 DivRemOneOrMinusOne(instruction);
4667 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4668 DivRemByPowerOfTwo(instruction);
4669 } else {
4670 DCHECK(imm <= -2 || imm >= 2);
4671 GenerateDivRemWithAnyConstant(instruction);
4672 }
4673 }
4674
VisitDiv(HDiv * div)4675 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4676 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4677 if (div->GetResultType() == DataType::Type::kInt64) {
4678 // pLdiv runtime call.
4679 call_kind = LocationSummary::kCallOnMainOnly;
4680 } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4681 // sdiv will be replaced by other instruction sequence.
4682 } else if (div->GetResultType() == DataType::Type::kInt32 &&
4683 !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4684 // pIdivmod runtime call.
4685 call_kind = LocationSummary::kCallOnMainOnly;
4686 }
4687
4688 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4689
4690 switch (div->GetResultType()) {
4691 case DataType::Type::kInt32: {
4692 HInstruction* divisor = div->InputAt(1);
4693 if (divisor->IsConstant()) {
4694 locations->SetInAt(0, Location::RequiresRegister());
4695 locations->SetInAt(1, Location::ConstantLocation(divisor));
4696 int32_t value = Int32ConstantFrom(divisor);
4697 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4698 if (value == 1 || value == 0 || value == -1) {
4699 // No temp register required.
4700 } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4701 value != 2 &&
4702 value != -2 &&
4703 !HasNonNegativeOrMinIntInputAt(div, 0)) {
4704 // The "out" register is used as a temporary, so it overlaps with the inputs.
4705 out_overlaps = Location::kOutputOverlap;
4706 } else {
4707 locations->AddRegisterTemps(2);
4708 }
4709 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4710 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4711 locations->SetInAt(0, Location::RequiresRegister());
4712 locations->SetInAt(1, Location::RequiresRegister());
4713 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4714 } else {
4715 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4716 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4717 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4718 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4719 // we only need the former.
4720 locations->SetOut(LocationFrom(r0));
4721 }
4722 break;
4723 }
4724 case DataType::Type::kInt64: {
4725 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4726 locations->SetInAt(0, LocationFrom(
4727 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4728 locations->SetInAt(1, LocationFrom(
4729 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4730 locations->SetOut(LocationFrom(r0, r1));
4731 break;
4732 }
4733 case DataType::Type::kFloat32:
4734 case DataType::Type::kFloat64: {
4735 locations->SetInAt(0, Location::RequiresFpuRegister());
4736 locations->SetInAt(1, Location::RequiresFpuRegister());
4737 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4738 break;
4739 }
4740
4741 default:
4742 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4743 }
4744 }
4745
VisitDiv(HDiv * div)4746 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4747 Location lhs = div->GetLocations()->InAt(0);
4748 Location rhs = div->GetLocations()->InAt(1);
4749
4750 switch (div->GetResultType()) {
4751 case DataType::Type::kInt32: {
4752 if (rhs.IsConstant()) {
4753 GenerateDivRemConstantIntegral(div);
4754 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4755 __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4756 } else {
4757 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4758 DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4759 DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4760 DCHECK(r0.Is(OutputRegister(div)));
4761
4762 codegen_->InvokeRuntime(kQuickIdivmod, div);
4763 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4764 }
4765 break;
4766 }
4767
4768 case DataType::Type::kInt64: {
4769 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4770 DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4771 DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4772 DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4773 DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4774 DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4775 DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4776
4777 codegen_->InvokeRuntime(kQuickLdiv, div);
4778 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4779 break;
4780 }
4781
4782 case DataType::Type::kFloat32:
4783 case DataType::Type::kFloat64:
4784 __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4785 break;
4786
4787 default:
4788 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4789 }
4790 }
4791
VisitRem(HRem * rem)4792 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4793 DataType::Type type = rem->GetResultType();
4794
4795 // Most remainders are implemented in the runtime.
4796 LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4797 if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4798 // sdiv will be replaced by other instruction sequence.
4799 call_kind = LocationSummary::kNoCall;
4800 } else if ((rem->GetResultType() == DataType::Type::kInt32)
4801 && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4802 // Have hardware divide instruction for int, do it with three instructions.
4803 call_kind = LocationSummary::kNoCall;
4804 }
4805
4806 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4807
4808 switch (type) {
4809 case DataType::Type::kInt32: {
4810 HInstruction* divisor = rem->InputAt(1);
4811 if (divisor->IsConstant()) {
4812 locations->SetInAt(0, Location::RequiresRegister());
4813 locations->SetInAt(1, Location::ConstantLocation(divisor));
4814 int32_t value = Int32ConstantFrom(divisor);
4815 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4816 if (value == 1 || value == 0 || value == -1) {
4817 // No temp register required.
4818 } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4819 // The "out" register is used as a temporary, so it overlaps with the inputs.
4820 out_overlaps = Location::kOutputOverlap;
4821 } else {
4822 locations->AddRegisterTemps(2);
4823 }
4824 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4825 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4826 locations->SetInAt(0, Location::RequiresRegister());
4827 locations->SetInAt(1, Location::RequiresRegister());
4828 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4829 locations->AddTemp(Location::RequiresRegister());
4830 } else {
4831 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4832 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4833 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4834 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4835 // we only need the latter.
4836 locations->SetOut(LocationFrom(r1));
4837 }
4838 break;
4839 }
4840 case DataType::Type::kInt64: {
4841 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4842 locations->SetInAt(0, LocationFrom(
4843 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4844 locations->SetInAt(1, LocationFrom(
4845 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4846 // The runtime helper puts the output in R2,R3.
4847 locations->SetOut(LocationFrom(r2, r3));
4848 break;
4849 }
4850 case DataType::Type::kFloat32: {
4851 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4852 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4853 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4854 locations->SetOut(LocationFrom(s0));
4855 break;
4856 }
4857
4858 case DataType::Type::kFloat64: {
4859 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4860 locations->SetInAt(0, LocationFrom(
4861 calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4862 locations->SetInAt(1, LocationFrom(
4863 calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4864 locations->SetOut(LocationFrom(s0, s1));
4865 break;
4866 }
4867
4868 default:
4869 LOG(FATAL) << "Unexpected rem type " << type;
4870 }
4871 }
4872
VisitRem(HRem * rem)4873 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4874 LocationSummary* locations = rem->GetLocations();
4875 Location second = locations->InAt(1);
4876
4877 DataType::Type type = rem->GetResultType();
4878 switch (type) {
4879 case DataType::Type::kInt32: {
4880 vixl32::Register reg1 = InputRegisterAt(rem, 0);
4881 vixl32::Register out_reg = OutputRegister(rem);
4882 if (second.IsConstant()) {
4883 GenerateDivRemConstantIntegral(rem);
4884 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4885 vixl32::Register reg2 = RegisterFrom(second);
4886 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4887
4888 // temp = reg1 / reg2 (integer division)
4889 // dest = reg1 - temp * reg2
4890 __ Sdiv(temp, reg1, reg2);
4891 __ Mls(out_reg, temp, reg2, reg1);
4892 } else {
4893 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4894 DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4895 DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4896 DCHECK(out_reg.Is(r1));
4897
4898 codegen_->InvokeRuntime(kQuickIdivmod, rem);
4899 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4900 }
4901 break;
4902 }
4903
4904 case DataType::Type::kInt64: {
4905 codegen_->InvokeRuntime(kQuickLmod, rem);
4906 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4907 break;
4908 }
4909
4910 case DataType::Type::kFloat32: {
4911 codegen_->InvokeRuntime(kQuickFmodf, rem);
4912 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4913 break;
4914 }
4915
4916 case DataType::Type::kFloat64: {
4917 codegen_->InvokeRuntime(kQuickFmod, rem);
4918 CheckEntrypointTypes<kQuickFmod, double, double, double>();
4919 break;
4920 }
4921
4922 default:
4923 LOG(FATAL) << "Unexpected rem type " << type;
4924 }
4925 }
4926
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4927 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4928 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4929 switch (minmax->GetResultType()) {
4930 case DataType::Type::kInt32:
4931 locations->SetInAt(0, Location::RequiresRegister());
4932 locations->SetInAt(1, Location::RequiresRegister());
4933 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4934 break;
4935 case DataType::Type::kInt64:
4936 locations->SetInAt(0, Location::RequiresRegister());
4937 locations->SetInAt(1, Location::RequiresRegister());
4938 locations->SetOut(Location::SameAsFirstInput());
4939 break;
4940 case DataType::Type::kFloat32:
4941 locations->SetInAt(0, Location::RequiresFpuRegister());
4942 locations->SetInAt(1, Location::RequiresFpuRegister());
4943 locations->SetOut(Location::SameAsFirstInput());
4944 locations->AddTemp(Location::RequiresRegister());
4945 break;
4946 case DataType::Type::kFloat64:
4947 locations->SetInAt(0, Location::RequiresFpuRegister());
4948 locations->SetInAt(1, Location::RequiresFpuRegister());
4949 locations->SetOut(Location::SameAsFirstInput());
4950 break;
4951 default:
4952 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4953 }
4954 }
4955
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4956 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4957 Location op1_loc = locations->InAt(0);
4958 Location op2_loc = locations->InAt(1);
4959 Location out_loc = locations->Out();
4960
4961 vixl32::Register op1 = RegisterFrom(op1_loc);
4962 vixl32::Register op2 = RegisterFrom(op2_loc);
4963 vixl32::Register out = RegisterFrom(out_loc);
4964
4965 __ Cmp(op1, op2);
4966
4967 {
4968 ExactAssemblyScope aas(GetVIXLAssembler(),
4969 3 * kMaxInstructionSizeInBytes,
4970 CodeBufferCheckScope::kMaximumSize);
4971
4972 __ ite(is_min ? lt : gt);
4973 __ mov(is_min ? lt : gt, out, op1);
4974 __ mov(is_min ? ge : le, out, op2);
4975 }
4976 }
4977
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4978 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4979 Location op1_loc = locations->InAt(0);
4980 Location op2_loc = locations->InAt(1);
4981 Location out_loc = locations->Out();
4982
4983 // Optimization: don't generate any code if inputs are the same.
4984 if (op1_loc.Equals(op2_loc)) {
4985 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
4986 return;
4987 }
4988
4989 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
4990 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
4991 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
4992 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
4993 vixl32::Register out_lo = LowRegisterFrom(out_loc);
4994 vixl32::Register out_hi = HighRegisterFrom(out_loc);
4995 UseScratchRegisterScope temps(GetVIXLAssembler());
4996 const vixl32::Register temp = temps.Acquire();
4997
4998 DCHECK(op1_lo.Is(out_lo));
4999 DCHECK(op1_hi.Is(out_hi));
5000
5001 // Compare op1 >= op2, or op1 < op2.
5002 __ Cmp(out_lo, op2_lo);
5003 __ Sbcs(temp, out_hi, op2_hi);
5004
5005 // Now GE/LT condition code is correct for the long comparison.
5006 {
5007 vixl32::ConditionType cond = is_min ? ge : lt;
5008 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5009 3 * kMaxInstructionSizeInBytes,
5010 CodeBufferCheckScope::kMaximumSize);
5011 __ itt(cond);
5012 __ mov(cond, out_lo, op2_lo);
5013 __ mov(cond, out_hi, op2_hi);
5014 }
5015 }
5016
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)5017 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
5018 LocationSummary* locations = minmax->GetLocations();
5019 Location op1_loc = locations->InAt(0);
5020 Location op2_loc = locations->InAt(1);
5021 Location out_loc = locations->Out();
5022
5023 // Optimization: don't generate any code if inputs are the same.
5024 if (op1_loc.Equals(op2_loc)) {
5025 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
5026 return;
5027 }
5028
5029 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
5030 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
5031 vixl32::SRegister out = SRegisterFrom(out_loc);
5032
5033 UseScratchRegisterScope temps(GetVIXLAssembler());
5034 const vixl32::Register temp1 = temps.Acquire();
5035 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
5036 vixl32::Label nan, done;
5037 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5038
5039 DCHECK(op1.Is(out));
5040
5041 __ Vcmp(op1, op2);
5042 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5043 __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
5044
5045 // op1 <> op2
5046 vixl32::ConditionType cond = is_min ? gt : lt;
5047 {
5048 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5049 2 * kMaxInstructionSizeInBytes,
5050 CodeBufferCheckScope::kMaximumSize);
5051 __ it(cond);
5052 __ vmov(cond, F32, out, op2);
5053 }
5054 // for <>(not equal), we've done min/max calculation.
5055 __ B(ne, final_label, /* is_far_target= */ false);
5056
5057 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
5058 __ Vmov(temp1, op1);
5059 __ Vmov(temp2, op2);
5060 if (is_min) {
5061 __ Orr(temp1, temp1, temp2);
5062 } else {
5063 __ And(temp1, temp1, temp2);
5064 }
5065 __ Vmov(out, temp1);
5066 __ B(final_label);
5067
5068 // handle NaN input.
5069 __ Bind(&nan);
5070 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
5071 __ Vmov(out, temp1);
5072
5073 if (done.IsReferenced()) {
5074 __ Bind(&done);
5075 }
5076 }
5077
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5078 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5079 LocationSummary* locations = minmax->GetLocations();
5080 Location op1_loc = locations->InAt(0);
5081 Location op2_loc = locations->InAt(1);
5082 Location out_loc = locations->Out();
5083
5084 // Optimization: don't generate any code if inputs are the same.
5085 if (op1_loc.Equals(op2_loc)) {
5086 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
5087 return;
5088 }
5089
5090 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5091 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5092 vixl32::DRegister out = DRegisterFrom(out_loc);
5093 vixl32::Label handle_nan_eq, done;
5094 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5095
5096 DCHECK(op1.Is(out));
5097
5098 __ Vcmp(op1, op2);
5099 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5100 __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
5101
5102 // op1 <> op2
5103 vixl32::ConditionType cond = is_min ? gt : lt;
5104 {
5105 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5106 2 * kMaxInstructionSizeInBytes,
5107 CodeBufferCheckScope::kMaximumSize);
5108 __ it(cond);
5109 __ vmov(cond, F64, out, op2);
5110 }
5111 // for <>(not equal), we've done min/max calculation.
5112 __ B(ne, final_label, /* is_far_target= */ false);
5113
5114 // handle op1 == op2, max(+0.0,-0.0).
5115 if (!is_min) {
5116 __ Vand(F64, out, op1, op2);
5117 __ B(final_label);
5118 }
5119
5120 // handle op1 == op2, min(+0.0,-0.0), NaN input.
5121 __ Bind(&handle_nan_eq);
5122 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
5123
5124 if (done.IsReferenced()) {
5125 __ Bind(&done);
5126 }
5127 }
5128
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5129 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5130 DataType::Type type = minmax->GetResultType();
5131 switch (type) {
5132 case DataType::Type::kInt32:
5133 GenerateMinMaxInt(minmax->GetLocations(), is_min);
5134 break;
5135 case DataType::Type::kInt64:
5136 GenerateMinMaxLong(minmax->GetLocations(), is_min);
5137 break;
5138 case DataType::Type::kFloat32:
5139 GenerateMinMaxFloat(minmax, is_min);
5140 break;
5141 case DataType::Type::kFloat64:
5142 GenerateMinMaxDouble(minmax, is_min);
5143 break;
5144 default:
5145 LOG(FATAL) << "Unexpected type for HMinMax " << type;
5146 }
5147 }
5148
VisitMin(HMin * min)5149 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5150 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5151 }
5152
VisitMin(HMin * min)5153 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5154 GenerateMinMax(min, /*is_min*/ true);
5155 }
5156
VisitMax(HMax * max)5157 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5158 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5159 }
5160
VisitMax(HMax * max)5161 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5162 GenerateMinMax(max, /*is_min*/ false);
5163 }
5164
VisitAbs(HAbs * abs)5165 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5166 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5167 switch (abs->GetResultType()) {
5168 case DataType::Type::kInt32:
5169 case DataType::Type::kInt64:
5170 locations->SetInAt(0, Location::RequiresRegister());
5171 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5172 locations->AddTemp(Location::RequiresRegister());
5173 break;
5174 case DataType::Type::kFloat32:
5175 case DataType::Type::kFloat64:
5176 locations->SetInAt(0, Location::RequiresFpuRegister());
5177 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5178 break;
5179 default:
5180 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5181 }
5182 }
5183
VisitAbs(HAbs * abs)5184 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5185 LocationSummary* locations = abs->GetLocations();
5186 switch (abs->GetResultType()) {
5187 case DataType::Type::kInt32: {
5188 vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5189 vixl32::Register out_reg = RegisterFrom(locations->Out());
5190 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5191 __ Asr(mask, in_reg, 31);
5192 __ Add(out_reg, in_reg, mask);
5193 __ Eor(out_reg, out_reg, mask);
5194 break;
5195 }
5196 case DataType::Type::kInt64: {
5197 Location in = locations->InAt(0);
5198 vixl32::Register in_reg_lo = LowRegisterFrom(in);
5199 vixl32::Register in_reg_hi = HighRegisterFrom(in);
5200 Location output = locations->Out();
5201 vixl32::Register out_reg_lo = LowRegisterFrom(output);
5202 vixl32::Register out_reg_hi = HighRegisterFrom(output);
5203 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5204 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5205 __ Asr(mask, in_reg_hi, 31);
5206 __ Adds(out_reg_lo, in_reg_lo, mask);
5207 __ Adc(out_reg_hi, in_reg_hi, mask);
5208 __ Eor(out_reg_lo, out_reg_lo, mask);
5209 __ Eor(out_reg_hi, out_reg_hi, mask);
5210 break;
5211 }
5212 case DataType::Type::kFloat32:
5213 case DataType::Type::kFloat64:
5214 __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5215 break;
5216 default:
5217 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5218 }
5219 }
5220
VisitDivZeroCheck(HDivZeroCheck * instruction)5221 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5222 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5223 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5224 }
5225
VisitDivZeroCheck(HDivZeroCheck * instruction)5226 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5227 DivZeroCheckSlowPathARMVIXL* slow_path =
5228 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5229 codegen_->AddSlowPath(slow_path);
5230
5231 LocationSummary* locations = instruction->GetLocations();
5232 Location value = locations->InAt(0);
5233
5234 switch (instruction->GetType()) {
5235 case DataType::Type::kBool:
5236 case DataType::Type::kUint8:
5237 case DataType::Type::kInt8:
5238 case DataType::Type::kUint16:
5239 case DataType::Type::kInt16:
5240 case DataType::Type::kInt32: {
5241 if (value.IsRegister()) {
5242 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5243 } else {
5244 DCHECK(value.IsConstant()) << value;
5245 if (Int32ConstantFrom(value) == 0) {
5246 __ B(slow_path->GetEntryLabel());
5247 }
5248 }
5249 break;
5250 }
5251 case DataType::Type::kInt64: {
5252 if (value.IsRegisterPair()) {
5253 UseScratchRegisterScope temps(GetVIXLAssembler());
5254 vixl32::Register temp = temps.Acquire();
5255 __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5256 __ B(eq, slow_path->GetEntryLabel());
5257 } else {
5258 DCHECK(value.IsConstant()) << value;
5259 if (Int64ConstantFrom(value) == 0) {
5260 __ B(slow_path->GetEntryLabel());
5261 }
5262 }
5263 break;
5264 }
5265 default:
5266 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5267 }
5268 }
5269
HandleIntegerRotate(HBinaryOperation * rotate)5270 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HBinaryOperation* rotate) {
5271 LocationSummary* locations = rotate->GetLocations();
5272 vixl32::Register in = InputRegisterAt(rotate, 0);
5273 Location rhs = locations->InAt(1);
5274 vixl32::Register out = OutputRegister(rotate);
5275
5276 if (rhs.IsConstant()) {
5277 // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5278 // so map all rotations to a +ve. equivalent in that range.
5279 // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5280 uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
5281 if (rotate->IsRol()) {
5282 rot = -rot;
5283 }
5284 rot &= 0x1f;
5285
5286 if (rot) {
5287 // Rotate, mapping left rotations to right equivalents if necessary.
5288 // (e.g. left by 2 bits == right by 30.)
5289 __ Ror(out, in, rot);
5290 } else if (!out.Is(in)) {
5291 __ Mov(out, in);
5292 }
5293 } else {
5294 if (rotate->IsRol()) {
5295 UseScratchRegisterScope temps(GetVIXLAssembler());
5296
5297 vixl32::Register negated = temps.Acquire();
5298 __ Rsb(negated, RegisterFrom(rhs), 0);
5299 __ Ror(out, in, negated);
5300 } else {
5301 DCHECK(rotate->IsRor());
5302 __ Ror(out, in, RegisterFrom(rhs));
5303 }
5304 }
5305 }
5306
5307 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5308 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5309 // a larger rotation) or flipping direction (thus treating larger right/left
5310 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HBinaryOperation * rotate)5311 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HBinaryOperation* rotate) {
5312 LocationSummary* locations = rotate->GetLocations();
5313 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5314 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5315 Location rhs = locations->InAt(1);
5316 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5317 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5318
5319 if (rhs.IsConstant()) {
5320 uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5321
5322 if (rotate->IsRol()) {
5323 rot = -rot;
5324 }
5325
5326 // Map all rotations to +ve. equivalents on the interval [0,63].
5327 rot &= kMaxLongShiftDistance;
5328 // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5329 // logic below to a simple pair of binary orr.
5330 // (e.g. 34 bits == in_reg swap + 2 bits right.)
5331 if (rot >= kArmBitsPerWord) {
5332 rot -= kArmBitsPerWord;
5333 std::swap(in_reg_hi, in_reg_lo);
5334 }
5335 // Rotate, or mov to out for zero or word size rotations.
5336 if (rot != 0u) {
5337 __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5338 __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5339 __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5340 __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5341 } else {
5342 __ Mov(out_reg_lo, in_reg_lo);
5343 __ Mov(out_reg_hi, in_reg_hi);
5344 }
5345 } else {
5346 vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5347 vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5348 vixl32::Label end;
5349 vixl32::Label shift_by_32_plus_shift_right;
5350 vixl32::Label* final_label = codegen_->GetFinalLabel(rotate, &end);
5351
5352 // Negate rhs, taken from VisitNeg
5353 if (rotate->IsRol()) {
5354 Location negated = locations->GetTemp(2);
5355 Location in = rhs;
5356
5357 __ Rsb(RegisterFrom(negated), RegisterFrom(in), 0);
5358
5359 rhs = negated;
5360 }
5361
5362 __ And(shift_right, RegisterFrom(rhs), 0x1F);
5363 __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5364 __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5365 __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5366
5367 // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5368 // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5369 __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5370 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5371 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5372 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5373 __ Lsr(shift_left, in_reg_hi, shift_right);
5374 __ Add(out_reg_lo, out_reg_lo, shift_left);
5375 __ B(final_label);
5376
5377 __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
5378 // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5379 // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5380 __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5381 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5382 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5383 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5384 __ Lsl(shift_right, in_reg_hi, shift_left);
5385 __ Add(out_reg_lo, out_reg_lo, shift_right);
5386
5387 if (end.IsReferenced()) {
5388 __ Bind(&end);
5389 }
5390 }
5391 }
5392
HandleRotate(HBinaryOperation * rotate)5393 void LocationsBuilderARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5394 LocationSummary* locations =
5395 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5396 HInstruction* shift = rotate->InputAt(1);
5397 switch (rotate->GetResultType()) {
5398 case DataType::Type::kInt32: {
5399 locations->SetInAt(0, Location::RequiresRegister());
5400 locations->SetInAt(1, Location::RegisterOrConstant(shift));
5401 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5402 break;
5403 }
5404 case DataType::Type::kInt64: {
5405 locations->SetInAt(0, Location::RequiresRegister());
5406 if (shift->IsConstant()) {
5407 locations->SetInAt(1, Location::ConstantLocation(shift));
5408 } else {
5409 locations->SetInAt(1, Location::RequiresRegister());
5410
5411 if (rotate->IsRor()) {
5412 locations->AddRegisterTemps(2);
5413 } else {
5414 DCHECK(rotate->IsRol());
5415 locations->AddRegisterTemps(3);
5416 }
5417 }
5418 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5419 break;
5420 }
5421 default:
5422 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5423 }
5424 }
5425
VisitRol(HRol * rol)5426 void LocationsBuilderARMVIXL::VisitRol(HRol* rol) {
5427 HandleRotate(rol);
5428 }
5429
VisitRor(HRor * ror)5430 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5431 HandleRotate(ror);
5432 }
5433
HandleRotate(HBinaryOperation * rotate)5434 void InstructionCodeGeneratorARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5435 DataType::Type type = rotate->GetResultType();
5436 switch (type) {
5437 case DataType::Type::kInt32: {
5438 HandleIntegerRotate(rotate);
5439 break;
5440 }
5441 case DataType::Type::kInt64: {
5442 HandleLongRotate(rotate);
5443 break;
5444 }
5445 default:
5446 LOG(FATAL) << "Unexpected operation type " << type;
5447 UNREACHABLE();
5448 }
5449 }
5450
VisitRol(HRol * rol)5451 void InstructionCodeGeneratorARMVIXL::VisitRol(HRol* rol) {
5452 HandleRotate(rol);
5453 }
5454
VisitRor(HRor * ror)5455 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5456 HandleRotate(ror);
5457 }
5458
HandleShift(HBinaryOperation * op)5459 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5460 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5461
5462 LocationSummary* locations =
5463 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5464
5465 HInstruction* shift = op->InputAt(1);
5466 switch (op->GetResultType()) {
5467 case DataType::Type::kInt32: {
5468 locations->SetInAt(0, Location::RequiresRegister());
5469 if (shift->IsConstant()) {
5470 locations->SetInAt(1, Location::ConstantLocation(shift));
5471 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5472 } else {
5473 locations->SetInAt(1, Location::RequiresRegister());
5474 // Make the output overlap, as it will be used to hold the masked
5475 // second input.
5476 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5477 }
5478 break;
5479 }
5480 case DataType::Type::kInt64: {
5481 locations->SetInAt(0, Location::RequiresRegister());
5482 if (shift->IsConstant()) {
5483 locations->SetInAt(1, Location::ConstantLocation(shift));
5484 // For simplicity, use kOutputOverlap even though we only require that low registers
5485 // don't clash with high registers which the register allocator currently guarantees.
5486 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5487 } else {
5488 locations->SetInAt(1, Location::RequiresRegister());
5489 locations->AddTemp(Location::RequiresRegister());
5490 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5491 }
5492 break;
5493 }
5494 default:
5495 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5496 }
5497 }
5498
HandleShift(HBinaryOperation * op)5499 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5500 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5501
5502 LocationSummary* locations = op->GetLocations();
5503 Location out = locations->Out();
5504 Location first = locations->InAt(0);
5505 Location second = locations->InAt(1);
5506
5507 DataType::Type type = op->GetResultType();
5508 switch (type) {
5509 case DataType::Type::kInt32: {
5510 vixl32::Register out_reg = OutputRegister(op);
5511 vixl32::Register first_reg = InputRegisterAt(op, 0);
5512 if (second.IsRegister()) {
5513 vixl32::Register second_reg = RegisterFrom(second);
5514 // ARM doesn't mask the shift count so we need to do it ourselves.
5515 __ And(out_reg, second_reg, kMaxIntShiftDistance);
5516 if (op->IsShl()) {
5517 __ Lsl(out_reg, first_reg, out_reg);
5518 } else if (op->IsShr()) {
5519 __ Asr(out_reg, first_reg, out_reg);
5520 } else {
5521 __ Lsr(out_reg, first_reg, out_reg);
5522 }
5523 } else {
5524 int32_t cst = Int32ConstantFrom(second);
5525 uint32_t shift_value = cst & kMaxIntShiftDistance;
5526 if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
5527 __ Mov(out_reg, first_reg);
5528 } else if (op->IsShl()) {
5529 __ Lsl(out_reg, first_reg, shift_value);
5530 } else if (op->IsShr()) {
5531 __ Asr(out_reg, first_reg, shift_value);
5532 } else {
5533 __ Lsr(out_reg, first_reg, shift_value);
5534 }
5535 }
5536 break;
5537 }
5538 case DataType::Type::kInt64: {
5539 vixl32::Register o_h = HighRegisterFrom(out);
5540 vixl32::Register o_l = LowRegisterFrom(out);
5541
5542 vixl32::Register high = HighRegisterFrom(first);
5543 vixl32::Register low = LowRegisterFrom(first);
5544
5545 if (second.IsRegister()) {
5546 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5547
5548 vixl32::Register second_reg = RegisterFrom(second);
5549
5550 if (op->IsShl()) {
5551 __ And(o_l, second_reg, kMaxLongShiftDistance);
5552 // Shift the high part
5553 __ Lsl(o_h, high, o_l);
5554 // Shift the low part and `or` what overflew on the high part
5555 __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5556 __ Lsr(temp, low, temp);
5557 __ Orr(o_h, o_h, temp);
5558 // If the shift is > 32 bits, override the high part
5559 __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5560 {
5561 ExactAssemblyScope guard(GetVIXLAssembler(),
5562 2 * vixl32::kMaxInstructionSizeInBytes,
5563 CodeBufferCheckScope::kMaximumSize);
5564 __ it(pl);
5565 __ lsl(pl, o_h, low, temp);
5566 }
5567 // Shift the low part
5568 __ Lsl(o_l, low, o_l);
5569 } else if (op->IsShr()) {
5570 __ And(o_h, second_reg, kMaxLongShiftDistance);
5571 // Shift the low part
5572 __ Lsr(o_l, low, o_h);
5573 // Shift the high part and `or` what underflew on the low part
5574 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5575 __ Lsl(temp, high, temp);
5576 __ Orr(o_l, o_l, temp);
5577 // If the shift is > 32 bits, override the low part
5578 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5579 {
5580 ExactAssemblyScope guard(GetVIXLAssembler(),
5581 2 * vixl32::kMaxInstructionSizeInBytes,
5582 CodeBufferCheckScope::kMaximumSize);
5583 __ it(pl);
5584 __ asr(pl, o_l, high, temp);
5585 }
5586 // Shift the high part
5587 __ Asr(o_h, high, o_h);
5588 } else {
5589 __ And(o_h, second_reg, kMaxLongShiftDistance);
5590 // same as Shr except we use `Lsr`s and not `Asr`s
5591 __ Lsr(o_l, low, o_h);
5592 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5593 __ Lsl(temp, high, temp);
5594 __ Orr(o_l, o_l, temp);
5595 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5596 {
5597 ExactAssemblyScope guard(GetVIXLAssembler(),
5598 2 * vixl32::kMaxInstructionSizeInBytes,
5599 CodeBufferCheckScope::kMaximumSize);
5600 __ it(pl);
5601 __ lsr(pl, o_l, high, temp);
5602 }
5603 __ Lsr(o_h, high, o_h);
5604 }
5605 } else {
5606 // Register allocator doesn't create partial overlap.
5607 DCHECK(!o_l.Is(high));
5608 DCHECK(!o_h.Is(low));
5609 int32_t cst = Int32ConstantFrom(second);
5610 uint32_t shift_value = cst & kMaxLongShiftDistance;
5611 if (shift_value > 32) {
5612 if (op->IsShl()) {
5613 __ Lsl(o_h, low, shift_value - 32);
5614 __ Mov(o_l, 0);
5615 } else if (op->IsShr()) {
5616 __ Asr(o_l, high, shift_value - 32);
5617 __ Asr(o_h, high, 31);
5618 } else {
5619 __ Lsr(o_l, high, shift_value - 32);
5620 __ Mov(o_h, 0);
5621 }
5622 } else if (shift_value == 32) {
5623 if (op->IsShl()) {
5624 __ Mov(o_h, low);
5625 __ Mov(o_l, 0);
5626 } else if (op->IsShr()) {
5627 __ Mov(o_l, high);
5628 __ Asr(o_h, high, 31);
5629 } else {
5630 __ Mov(o_l, high);
5631 __ Mov(o_h, 0);
5632 }
5633 } else if (shift_value == 1) {
5634 if (op->IsShl()) {
5635 __ Lsls(o_l, low, 1);
5636 __ Adc(o_h, high, high);
5637 } else if (op->IsShr()) {
5638 __ Asrs(o_h, high, 1);
5639 __ Rrx(o_l, low);
5640 } else {
5641 __ Lsrs(o_h, high, 1);
5642 __ Rrx(o_l, low);
5643 }
5644 } else if (shift_value == 0) {
5645 __ Mov(o_l, low);
5646 __ Mov(o_h, high);
5647 } else {
5648 DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5649 if (op->IsShl()) {
5650 __ Lsl(o_h, high, shift_value);
5651 __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5652 __ Lsl(o_l, low, shift_value);
5653 } else if (op->IsShr()) {
5654 __ Lsr(o_l, low, shift_value);
5655 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5656 __ Asr(o_h, high, shift_value);
5657 } else {
5658 __ Lsr(o_l, low, shift_value);
5659 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5660 __ Lsr(o_h, high, shift_value);
5661 }
5662 }
5663 }
5664 break;
5665 }
5666 default:
5667 LOG(FATAL) << "Unexpected operation type " << type;
5668 UNREACHABLE();
5669 }
5670 }
5671
VisitShl(HShl * shl)5672 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5673 HandleShift(shl);
5674 }
5675
VisitShl(HShl * shl)5676 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5677 HandleShift(shl);
5678 }
5679
VisitShr(HShr * shr)5680 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5681 HandleShift(shr);
5682 }
5683
VisitShr(HShr * shr)5684 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5685 HandleShift(shr);
5686 }
5687
VisitUShr(HUShr * ushr)5688 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5689 HandleShift(ushr);
5690 }
5691
VisitUShr(HUShr * ushr)5692 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5693 HandleShift(ushr);
5694 }
5695
VisitNewInstance(HNewInstance * instruction)5696 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5697 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5698 instruction, LocationSummary::kCallOnMainOnly);
5699 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5700 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5701 locations->SetOut(LocationFrom(r0));
5702 }
5703
VisitNewInstance(HNewInstance * instruction)5704 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5705 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction);
5706 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5707 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5708 }
5709
VisitNewArray(HNewArray * instruction)5710 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5711 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5712 instruction, LocationSummary::kCallOnMainOnly);
5713 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5714 locations->SetOut(LocationFrom(r0));
5715 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5716 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5717 }
5718
VisitNewArray(HNewArray * instruction)5719 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5720 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5721 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5722 codegen_->InvokeRuntime(entrypoint, instruction);
5723 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5724 DCHECK(!codegen_->IsLeafMethod());
5725 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5726 }
5727
VisitParameterValue(HParameterValue * instruction)5728 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5729 LocationSummary* locations =
5730 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5731 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5732 if (location.IsStackSlot()) {
5733 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5734 } else if (location.IsDoubleStackSlot()) {
5735 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5736 }
5737 locations->SetOut(location);
5738 }
5739
VisitParameterValue(HParameterValue * instruction)5740 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5741 [[maybe_unused]] HParameterValue* instruction) {
5742 // Nothing to do, the parameter is already at its location.
5743 }
5744
VisitCurrentMethod(HCurrentMethod * instruction)5745 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5746 LocationSummary* locations =
5747 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5748 locations->SetOut(LocationFrom(kMethodRegister));
5749 }
5750
VisitCurrentMethod(HCurrentMethod * instruction)5751 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5752 [[maybe_unused]] HCurrentMethod* instruction) {
5753 // Nothing to do, the method is already at its location.
5754 }
5755
VisitNot(HNot * not_)5756 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5757 LocationSummary* locations =
5758 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5759 locations->SetInAt(0, Location::RequiresRegister());
5760 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5761 }
5762
VisitNot(HNot * not_)5763 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5764 LocationSummary* locations = not_->GetLocations();
5765 Location out = locations->Out();
5766 Location in = locations->InAt(0);
5767 switch (not_->GetResultType()) {
5768 case DataType::Type::kInt32:
5769 __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5770 break;
5771
5772 case DataType::Type::kInt64:
5773 __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5774 __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5775 break;
5776
5777 default:
5778 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5779 }
5780 }
5781
VisitBooleanNot(HBooleanNot * bool_not)5782 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5783 LocationSummary* locations =
5784 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5785 locations->SetInAt(0, Location::RequiresRegister());
5786 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5787 }
5788
VisitBooleanNot(HBooleanNot * bool_not)5789 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5790 __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5791 }
5792
VisitCompare(HCompare * compare)5793 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5794 LocationSummary* locations =
5795 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5796 switch (compare->GetComparisonType()) {
5797 case DataType::Type::kBool:
5798 case DataType::Type::kUint8:
5799 case DataType::Type::kInt8:
5800 case DataType::Type::kUint16:
5801 case DataType::Type::kInt16:
5802 case DataType::Type::kInt32:
5803 case DataType::Type::kUint32:
5804 case DataType::Type::kInt64:
5805 case DataType::Type::kUint64: {
5806 locations->SetInAt(0, Location::RequiresRegister());
5807 locations->SetInAt(1, Location::RequiresRegister());
5808 // Output overlaps because it is written before doing the low comparison.
5809 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5810 break;
5811 }
5812 case DataType::Type::kFloat32:
5813 case DataType::Type::kFloat64: {
5814 locations->SetInAt(0, Location::RequiresFpuRegister());
5815 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5816 locations->SetOut(Location::RequiresRegister());
5817 break;
5818 }
5819 default:
5820 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5821 }
5822 }
5823
VisitCompare(HCompare * compare)5824 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5825 LocationSummary* locations = compare->GetLocations();
5826 vixl32::Register out = OutputRegister(compare);
5827 Location left = locations->InAt(0);
5828 Location right = locations->InAt(1);
5829
5830 vixl32::Label less, greater, done;
5831 vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5832 DataType::Type type = compare->GetComparisonType();
5833 vixl32::Condition less_cond = vixl32::ConditionType::lt;
5834 vixl32::Condition greater_cond = vixl32::ConditionType::gt;
5835 switch (type) {
5836 case DataType::Type::kUint32:
5837 less_cond = vixl32::ConditionType::lo;
5838 // greater_cond - is not needed below
5839 FALLTHROUGH_INTENDED;
5840 case DataType::Type::kBool:
5841 case DataType::Type::kUint8:
5842 case DataType::Type::kInt8:
5843 case DataType::Type::kUint16:
5844 case DataType::Type::kInt16:
5845 case DataType::Type::kInt32: {
5846 // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5847 __ Mov(out, 0);
5848 __ Cmp(RegisterFrom(left), RegisterFrom(right));
5849 break;
5850 }
5851 case DataType::Type::kUint64:
5852 less_cond = vixl32::ConditionType::lo;
5853 greater_cond = vixl32::ConditionType::hi;
5854 FALLTHROUGH_INTENDED;
5855 case DataType::Type::kInt64: {
5856 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // High part compare.
5857 __ B(less_cond, &less, /* is_far_target= */ false);
5858 __ B(greater_cond, &greater, /* is_far_target= */ false);
5859 // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5860 __ Mov(out, 0);
5861 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
5862 less_cond = vixl32::ConditionType::lo;
5863 // greater_cond - is not needed below
5864 break;
5865 }
5866 case DataType::Type::kFloat32:
5867 case DataType::Type::kFloat64: {
5868 __ Mov(out, 0);
5869 GenerateVcmp(compare, codegen_);
5870 // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5871 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5872 less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5873 break;
5874 }
5875 default:
5876 LOG(FATAL) << "Unexpected compare type " << type;
5877 UNREACHABLE();
5878 }
5879
5880 __ B(eq, final_label, /* is_far_target= */ false);
5881 __ B(less_cond, &less, /* is_far_target= */ false);
5882
5883 __ Bind(&greater);
5884 __ Mov(out, 1);
5885 __ B(final_label);
5886
5887 __ Bind(&less);
5888 __ Mov(out, -1);
5889
5890 if (done.IsReferenced()) {
5891 __ Bind(&done);
5892 }
5893 }
5894
VisitPhi(HPhi * instruction)5895 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5896 LocationSummary* locations =
5897 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5898 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5899 locations->SetInAt(i, Location::Any());
5900 }
5901 locations->SetOut(Location::Any());
5902 }
5903
VisitPhi(HPhi * instruction)5904 void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
5905 LOG(FATAL) << "Unreachable";
5906 }
5907
GenerateMemoryBarrier(MemBarrierKind kind)5908 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5909 // TODO (ported from quick): revisit ARM barrier kinds.
5910 DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
5911 switch (kind) {
5912 case MemBarrierKind::kAnyStore:
5913 case MemBarrierKind::kLoadAny:
5914 case MemBarrierKind::kAnyAny: {
5915 flavor = DmbOptions::ISH;
5916 break;
5917 }
5918 case MemBarrierKind::kStoreStore: {
5919 flavor = DmbOptions::ISHST;
5920 break;
5921 }
5922 default:
5923 LOG(FATAL) << "Unexpected memory barrier " << kind;
5924 }
5925 __ Dmb(flavor);
5926 }
5927
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5928 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5929 uint32_t offset,
5930 vixl32::Register out_lo,
5931 vixl32::Register out_hi) {
5932 UseScratchRegisterScope temps(GetVIXLAssembler());
5933 if (offset != 0) {
5934 vixl32::Register temp = temps.Acquire();
5935 __ Add(temp, addr, offset);
5936 addr = temp;
5937 }
5938 __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5939 }
5940
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5941 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5942 uint32_t offset,
5943 vixl32::Register value_lo,
5944 vixl32::Register value_hi,
5945 vixl32::Register temp1,
5946 vixl32::Register temp2,
5947 HInstruction* instruction) {
5948 UseScratchRegisterScope temps(GetVIXLAssembler());
5949 vixl32::Label fail;
5950 if (offset != 0) {
5951 vixl32::Register temp = temps.Acquire();
5952 __ Add(temp, addr, offset);
5953 addr = temp;
5954 }
5955 __ Bind(&fail);
5956 {
5957 // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5958 ExactAssemblyScope aas(GetVIXLAssembler(),
5959 vixl32::kMaxInstructionSizeInBytes,
5960 CodeBufferCheckScope::kMaximumSize);
5961 // We need a load followed by store. (The address used in a STREX instruction must
5962 // be the same as the address in the most recently executed LDREX instruction.)
5963 __ ldrexd(temp1, temp2, MemOperand(addr));
5964 codegen_->MaybeRecordImplicitNullCheck(instruction);
5965 }
5966 __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5967 __ CompareAndBranchIfNonZero(temp1, &fail);
5968 }
5969
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5970 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5971 const FieldInfo& field_info,
5972 WriteBarrierKind write_barrier_kind) {
5973 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5974
5975 LocationSummary* locations =
5976 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5977 locations->SetInAt(0, Location::RequiresRegister());
5978
5979 DataType::Type field_type = field_info.GetFieldType();
5980 if (DataType::IsFloatingPointType(field_type)) {
5981 locations->SetInAt(1, Location::RequiresFpuRegister());
5982 } else {
5983 locations->SetInAt(1, Location::RequiresRegister());
5984 }
5985
5986 bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5987 bool generate_volatile = field_info.IsVolatile()
5988 && is_wide
5989 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5990 bool needs_write_barrier =
5991 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5992 bool check_gc_card =
5993 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5994
5995 // Temporary registers for the write barrier.
5996 // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5997 if (needs_write_barrier || check_gc_card) {
5998 locations->AddRegisterTemps(2);
5999 } else if (generate_volatile) {
6000 // ARM encoding have some additional constraints for ldrexd/strexd:
6001 // - registers need to be consecutive
6002 // - the first register should be even but not R14.
6003 // We don't test for ARM yet, and the assertion makes sure that we
6004 // revisit this if we ever enable ARM encoding.
6005 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6006 locations->AddRegisterTemps(2);
6007 if (field_type == DataType::Type::kFloat64) {
6008 // For doubles we need two more registers to copy the value.
6009 locations->AddTemp(LocationFrom(r2));
6010 locations->AddTemp(LocationFrom(r3));
6011 }
6012 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6013 locations->AddTemp(Location::RequiresRegister());
6014 }
6015 }
6016
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6017 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
6018 const FieldInfo& field_info,
6019 bool value_can_be_null,
6020 WriteBarrierKind write_barrier_kind) {
6021 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6022
6023 LocationSummary* locations = instruction->GetLocations();
6024 vixl32::Register base = InputRegisterAt(instruction, 0);
6025 Location value = locations->InAt(1);
6026
6027 bool is_volatile = field_info.IsVolatile();
6028 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6029 DataType::Type field_type = field_info.GetFieldType();
6030 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6031 bool needs_write_barrier =
6032 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6033
6034 if (is_volatile) {
6035 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6036 }
6037
6038 switch (field_type) {
6039 case DataType::Type::kBool:
6040 case DataType::Type::kUint8:
6041 case DataType::Type::kInt8:
6042 case DataType::Type::kUint16:
6043 case DataType::Type::kInt16:
6044 case DataType::Type::kInt32: {
6045 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6046 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6047 StoreOperandType operand_type = GetStoreOperandType(field_type);
6048 GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
6049 codegen_->MaybeRecordImplicitNullCheck(instruction);
6050 break;
6051 }
6052
6053 case DataType::Type::kReference: {
6054 vixl32::Register value_reg = RegisterFrom(value);
6055 if (kPoisonHeapReferences) {
6056 DCHECK_EQ(field_type, DataType::Type::kReference);
6057 value_reg = RegisterFrom(locations->GetTemp(0));
6058 __ Mov(value_reg, RegisterFrom(value));
6059 GetAssembler()->PoisonHeapReference(value_reg);
6060 }
6061 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6062 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6063 GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
6064 codegen_->MaybeRecordImplicitNullCheck(instruction);
6065 break;
6066 }
6067
6068 case DataType::Type::kInt64: {
6069 if (is_volatile && !atomic_ldrd_strd) {
6070 GenerateWideAtomicStore(base,
6071 offset,
6072 LowRegisterFrom(value),
6073 HighRegisterFrom(value),
6074 RegisterFrom(locations->GetTemp(0)),
6075 RegisterFrom(locations->GetTemp(1)),
6076 instruction);
6077 } else {
6078 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6079 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6080 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
6081 codegen_->MaybeRecordImplicitNullCheck(instruction);
6082 }
6083 break;
6084 }
6085
6086 case DataType::Type::kFloat32: {
6087 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6088 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6089 GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
6090 codegen_->MaybeRecordImplicitNullCheck(instruction);
6091 break;
6092 }
6093
6094 case DataType::Type::kFloat64: {
6095 vixl32::DRegister value_reg = DRegisterFrom(value);
6096 if (is_volatile && !atomic_ldrd_strd) {
6097 vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
6098 vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
6099
6100 __ Vmov(value_reg_lo, value_reg_hi, value_reg);
6101
6102 GenerateWideAtomicStore(base,
6103 offset,
6104 value_reg_lo,
6105 value_reg_hi,
6106 RegisterFrom(locations->GetTemp(2)),
6107 RegisterFrom(locations->GetTemp(3)),
6108 instruction);
6109 } else {
6110 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6111 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6112 GetAssembler()->StoreDToOffset(value_reg, base, offset);
6113 codegen_->MaybeRecordImplicitNullCheck(instruction);
6114 }
6115 break;
6116 }
6117
6118 case DataType::Type::kUint32:
6119 case DataType::Type::kUint64:
6120 case DataType::Type::kVoid:
6121 LOG(FATAL) << "Unreachable type " << field_type;
6122 UNREACHABLE();
6123 }
6124
6125 if (needs_write_barrier) {
6126 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6127 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6128 codegen_->MaybeMarkGCCard(
6129 temp,
6130 card,
6131 base,
6132 RegisterFrom(value),
6133 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6134 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6135 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6136 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6137 codegen_->CheckGCCardIsValid(temp, card, base);
6138 }
6139
6140 if (is_volatile) {
6141 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6142 }
6143 }
6144
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6145 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6146 const FieldInfo& field_info) {
6147 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6148
6149 bool object_field_get_with_read_barrier =
6150 (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6151 LocationSummary* locations =
6152 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6153 object_field_get_with_read_barrier
6154 ? LocationSummary::kCallOnSlowPath
6155 : LocationSummary::kNoCall);
6156 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6157 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6158 }
6159 // Input for object receiver.
6160 locations->SetInAt(0, Location::RequiresRegister());
6161
6162 bool volatile_for_double = field_info.IsVolatile()
6163 && (field_info.GetFieldType() == DataType::Type::kFloat64)
6164 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6165 // The output overlaps in case of volatile long: we don't want the code generated by
6166 // `GenerateWideAtomicLoad()` to overwrite the object's location. Likewise, in the case
6167 // of an object field get with non-Baker read barriers enabled, we do not want the load
6168 // to overwrite the object's location, as we need it to emit the read barrier.
6169 // Baker read barrier implementation with introspection does not have this restriction.
6170 bool overlap =
6171 (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6172 (object_field_get_with_read_barrier && !kUseBakerReadBarrier);
6173
6174 if (DataType::IsFloatingPointType(instruction->GetType())) {
6175 locations->SetOut(Location::RequiresFpuRegister());
6176 } else {
6177 locations->SetOut(Location::RequiresRegister(),
6178 (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6179 }
6180 if (volatile_for_double) {
6181 // ARM encoding have some additional constraints for ldrexd/strexd:
6182 // - registers need to be consecutive
6183 // - the first register should be even but not R14.
6184 // We don't test for ARM yet, and the assertion makes sure that we
6185 // revisit this if we ever enable ARM encoding.
6186 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6187 locations->AddRegisterTemps(2);
6188 } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6189 // We need a temporary register for the read barrier load in
6190 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6191 // only if the offset is too big.
6192 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6193 locations->AddTemp(Location::RequiresRegister());
6194 }
6195 }
6196 }
6197
ArithmeticZeroOrFpuRegister(HInstruction * input)6198 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6199 DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6200 if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6201 (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6202 return Location::ConstantLocation(input);
6203 } else {
6204 return Location::RequiresFpuRegister();
6205 }
6206 }
6207
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6208 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6209 Opcode opcode) {
6210 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6211 if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6212 return Location::ConstantLocation(constant);
6213 }
6214 return Location::RequiresRegister();
6215 }
6216
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6217 static bool CanEncode32BitConstantAsImmediate(
6218 CodeGeneratorARMVIXL* codegen,
6219 uint32_t value,
6220 Opcode opcode,
6221 vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6222 ArmVIXLAssembler* assembler = codegen->GetAssembler();
6223 if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6224 return true;
6225 }
6226 Opcode neg_opcode = kNoOperand;
6227 uint32_t neg_value = 0;
6228 switch (opcode) {
6229 case AND: neg_opcode = BIC; neg_value = ~value; break;
6230 case ORR: neg_opcode = ORN; neg_value = ~value; break;
6231 case ADD: neg_opcode = SUB; neg_value = -value; break;
6232 case ADC: neg_opcode = SBC; neg_value = ~value; break;
6233 case SUB: neg_opcode = ADD; neg_value = -value; break;
6234 case SBC: neg_opcode = ADC; neg_value = ~value; break;
6235 case MOV: neg_opcode = MVN; neg_value = ~value; break;
6236 default:
6237 return false;
6238 }
6239
6240 if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6241 return true;
6242 }
6243
6244 return opcode == AND && IsPowerOfTwo(value + 1);
6245 }
6246
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6247 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6248 uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6249 if (DataType::Is64BitType(input_cst->GetType())) {
6250 Opcode high_opcode = opcode;
6251 vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6252 switch (opcode) {
6253 case SUB:
6254 // Flip the operation to an ADD.
6255 value = -value;
6256 opcode = ADD;
6257 FALLTHROUGH_INTENDED;
6258 case ADD:
6259 if (Low32Bits(value) == 0u) {
6260 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6261 }
6262 high_opcode = ADC;
6263 low_flags_update = vixl32::FlagsUpdate::SetFlags;
6264 break;
6265 default:
6266 break;
6267 }
6268 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6269 CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6270 } else {
6271 return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6272 }
6273 }
6274
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6275 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6276 const FieldInfo& field_info) {
6277 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6278
6279 LocationSummary* locations = instruction->GetLocations();
6280 uint32_t receiver_input = 0;
6281 vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6282 Location out = locations->Out();
6283 bool is_volatile = field_info.IsVolatile();
6284 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6285 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6286 DataType::Type load_type = instruction->GetType();
6287 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6288
6289 switch (load_type) {
6290 case DataType::Type::kBool:
6291 case DataType::Type::kUint8:
6292 case DataType::Type::kInt8:
6293 case DataType::Type::kUint16:
6294 case DataType::Type::kInt16:
6295 case DataType::Type::kInt32: {
6296 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6297 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6298 LoadOperandType operand_type = GetLoadOperandType(load_type);
6299 GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6300 codegen_->MaybeRecordImplicitNullCheck(instruction);
6301 break;
6302 }
6303
6304 case DataType::Type::kReference: {
6305 // /* HeapReference<Object> */ out = *(base + offset)
6306 if (codegen_->EmitBakerReadBarrier()) {
6307 Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6308 // Note that a potential implicit null check is handled in this
6309 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6310 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6311 instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6312 if (is_volatile) {
6313 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6314 }
6315 } else {
6316 {
6317 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6318 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6319 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6320 codegen_->MaybeRecordImplicitNullCheck(instruction);
6321 }
6322 if (is_volatile) {
6323 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6324 }
6325 // If read barriers are enabled, emit read barriers other than
6326 // Baker's using a slow path (and also unpoison the loaded
6327 // reference, if heap poisoning is enabled).
6328 codegen_->MaybeGenerateReadBarrierSlow(
6329 instruction, out, out, locations->InAt(receiver_input), offset);
6330 }
6331 break;
6332 }
6333
6334 case DataType::Type::kInt64: {
6335 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6336 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6337 if (is_volatile && !atomic_ldrd_strd) {
6338 GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6339 } else {
6340 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6341 }
6342 codegen_->MaybeRecordImplicitNullCheck(instruction);
6343 break;
6344 }
6345
6346 case DataType::Type::kFloat32: {
6347 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6348 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6349 GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6350 codegen_->MaybeRecordImplicitNullCheck(instruction);
6351 break;
6352 }
6353
6354 case DataType::Type::kFloat64: {
6355 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6356 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6357 vixl32::DRegister out_dreg = DRegisterFrom(out);
6358 if (is_volatile && !atomic_ldrd_strd) {
6359 vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6360 vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6361 GenerateWideAtomicLoad(base, offset, lo, hi);
6362 codegen_->MaybeRecordImplicitNullCheck(instruction);
6363 __ Vmov(out_dreg, lo, hi);
6364 } else {
6365 GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6366 codegen_->MaybeRecordImplicitNullCheck(instruction);
6367 }
6368 break;
6369 }
6370
6371 case DataType::Type::kUint32:
6372 case DataType::Type::kUint64:
6373 case DataType::Type::kVoid:
6374 LOG(FATAL) << "Unreachable type " << load_type;
6375 UNREACHABLE();
6376 }
6377
6378 if (is_volatile) {
6379 if (load_type == DataType::Type::kReference) {
6380 // Memory barriers, in the case of references, are also handled
6381 // in the previous switch statement.
6382 } else {
6383 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6384 }
6385 }
6386 }
6387
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6388 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6389 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6390 }
6391
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6392 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6393 HandleFieldSet(instruction,
6394 instruction->GetFieldInfo(),
6395 instruction->GetValueCanBeNull(),
6396 instruction->GetWriteBarrierKind());
6397 }
6398
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6399 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6400 HandleFieldGet(instruction, instruction->GetFieldInfo());
6401 }
6402
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6403 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6404 HandleFieldGet(instruction, instruction->GetFieldInfo());
6405 }
6406
VisitStaticFieldGet(HStaticFieldGet * instruction)6407 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6408 HandleFieldGet(instruction, instruction->GetFieldInfo());
6409 }
6410
VisitStaticFieldGet(HStaticFieldGet * instruction)6411 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6412 HandleFieldGet(instruction, instruction->GetFieldInfo());
6413 }
6414
VisitStaticFieldSet(HStaticFieldSet * instruction)6415 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6416 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6417 }
6418
VisitStaticFieldSet(HStaticFieldSet * instruction)6419 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6420 HandleFieldSet(instruction,
6421 instruction->GetFieldInfo(),
6422 instruction->GetValueCanBeNull(),
6423 instruction->GetWriteBarrierKind());
6424 }
6425
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6426 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6427 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6428 }
6429
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6430 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6431 __ Mov(r0, instruction->GetFormat()->GetValue());
6432 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction);
6433 }
6434
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6435 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6436 HUnresolvedInstanceFieldGet* instruction) {
6437 FieldAccessCallingConventionARMVIXL calling_convention;
6438 codegen_->CreateUnresolvedFieldLocationSummary(
6439 instruction, instruction->GetFieldType(), calling_convention);
6440 }
6441
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6442 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6443 HUnresolvedInstanceFieldGet* instruction) {
6444 FieldAccessCallingConventionARMVIXL calling_convention;
6445 codegen_->GenerateUnresolvedFieldAccess(instruction,
6446 instruction->GetFieldType(),
6447 instruction->GetFieldIndex(),
6448 calling_convention);
6449 }
6450
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6451 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6452 HUnresolvedInstanceFieldSet* instruction) {
6453 FieldAccessCallingConventionARMVIXL calling_convention;
6454 codegen_->CreateUnresolvedFieldLocationSummary(
6455 instruction, instruction->GetFieldType(), calling_convention);
6456 }
6457
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6458 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6459 HUnresolvedInstanceFieldSet* instruction) {
6460 FieldAccessCallingConventionARMVIXL calling_convention;
6461 codegen_->GenerateUnresolvedFieldAccess(instruction,
6462 instruction->GetFieldType(),
6463 instruction->GetFieldIndex(),
6464 calling_convention);
6465 }
6466
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6467 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6468 HUnresolvedStaticFieldGet* instruction) {
6469 FieldAccessCallingConventionARMVIXL calling_convention;
6470 codegen_->CreateUnresolvedFieldLocationSummary(
6471 instruction, instruction->GetFieldType(), calling_convention);
6472 }
6473
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6474 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6475 HUnresolvedStaticFieldGet* instruction) {
6476 FieldAccessCallingConventionARMVIXL calling_convention;
6477 codegen_->GenerateUnresolvedFieldAccess(instruction,
6478 instruction->GetFieldType(),
6479 instruction->GetFieldIndex(),
6480 calling_convention);
6481 }
6482
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6483 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6484 HUnresolvedStaticFieldSet* instruction) {
6485 FieldAccessCallingConventionARMVIXL calling_convention;
6486 codegen_->CreateUnresolvedFieldLocationSummary(
6487 instruction, instruction->GetFieldType(), calling_convention);
6488 }
6489
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6490 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6491 HUnresolvedStaticFieldSet* instruction) {
6492 FieldAccessCallingConventionARMVIXL calling_convention;
6493 codegen_->GenerateUnresolvedFieldAccess(instruction,
6494 instruction->GetFieldType(),
6495 instruction->GetFieldIndex(),
6496 calling_convention);
6497 }
6498
VisitNullCheck(HNullCheck * instruction)6499 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6500 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6501 locations->SetInAt(0, Location::RequiresRegister());
6502 }
6503
GenerateImplicitNullCheck(HNullCheck * instruction)6504 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6505 if (CanMoveNullCheckToUser(instruction)) {
6506 return;
6507 }
6508
6509 UseScratchRegisterScope temps(GetVIXLAssembler());
6510 // Ensure the pc position is recorded immediately after the `ldr` instruction.
6511 ExactAssemblyScope aas(GetVIXLAssembler(),
6512 vixl32::kMaxInstructionSizeInBytes,
6513 CodeBufferCheckScope::kMaximumSize);
6514 __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6515 RecordPcInfo(instruction);
6516 }
6517
GenerateExplicitNullCheck(HNullCheck * instruction)6518 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6519 NullCheckSlowPathARMVIXL* slow_path =
6520 new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6521 AddSlowPath(slow_path);
6522 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6523 }
6524
VisitNullCheck(HNullCheck * instruction)6525 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6526 codegen_->GenerateNullCheck(instruction);
6527 }
6528
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6529 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6530 Location out_loc,
6531 vixl32::Register base,
6532 vixl32::Register reg_index,
6533 vixl32::Condition cond) {
6534 uint32_t shift_count = DataType::SizeShift(type);
6535 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6536
6537 switch (type) {
6538 case DataType::Type::kBool:
6539 case DataType::Type::kUint8:
6540 __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6541 break;
6542 case DataType::Type::kInt8:
6543 __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6544 break;
6545 case DataType::Type::kUint16:
6546 __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6547 break;
6548 case DataType::Type::kInt16:
6549 __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6550 break;
6551 case DataType::Type::kReference:
6552 case DataType::Type::kInt32:
6553 __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6554 break;
6555 // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6556 case DataType::Type::kInt64:
6557 case DataType::Type::kFloat32:
6558 case DataType::Type::kFloat64:
6559 default:
6560 LOG(FATAL) << "Unreachable type " << type;
6561 UNREACHABLE();
6562 }
6563 }
6564
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6565 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6566 Location loc,
6567 vixl32::Register base,
6568 vixl32::Register reg_index,
6569 vixl32::Condition cond) {
6570 uint32_t shift_count = DataType::SizeShift(type);
6571 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6572
6573 switch (type) {
6574 case DataType::Type::kBool:
6575 case DataType::Type::kUint8:
6576 case DataType::Type::kInt8:
6577 __ Strb(cond, RegisterFrom(loc), mem_address);
6578 break;
6579 case DataType::Type::kUint16:
6580 case DataType::Type::kInt16:
6581 __ Strh(cond, RegisterFrom(loc), mem_address);
6582 break;
6583 case DataType::Type::kReference:
6584 case DataType::Type::kInt32:
6585 __ Str(cond, RegisterFrom(loc), mem_address);
6586 break;
6587 // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6588 case DataType::Type::kInt64:
6589 case DataType::Type::kFloat32:
6590 case DataType::Type::kFloat64:
6591 default:
6592 LOG(FATAL) << "Unreachable type " << type;
6593 UNREACHABLE();
6594 }
6595 }
6596
VisitArrayGet(HArrayGet * instruction)6597 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6598 bool object_array_get_with_read_barrier =
6599 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6600 LocationSummary* locations =
6601 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6602 object_array_get_with_read_barrier
6603 ? LocationSummary::kCallOnSlowPath
6604 : LocationSummary::kNoCall);
6605 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6606 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6607 }
6608 locations->SetInAt(0, Location::RequiresRegister());
6609 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6610 if (DataType::IsFloatingPointType(instruction->GetType())) {
6611 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6612 } else {
6613 // The output overlaps for an object array get for non-Baker read barriers: we do not want
6614 // the load to overwrite the object's location, as we need it to emit the read barrier.
6615 // Baker read barrier implementation with introspection does not have this restriction.
6616 bool overlap = object_array_get_with_read_barrier && !kUseBakerReadBarrier;
6617 locations->SetOut(Location::RequiresRegister(),
6618 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6619 }
6620 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6621 if (instruction->GetIndex()->IsConstant()) {
6622 // Array loads with constant index are treated as field loads.
6623 // We need a temporary register for the read barrier load in
6624 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6625 // only if the offset is too big.
6626 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6627 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6628 offset += index << DataType::SizeShift(DataType::Type::kReference);
6629 if (offset >= kReferenceLoadMinFarOffset) {
6630 locations->AddTemp(Location::RequiresRegister());
6631 }
6632 } else {
6633 // We need a non-scratch temporary for the array data pointer in
6634 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6635 locations->AddTemp(Location::RequiresRegister());
6636 }
6637 } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6638 // Also need a temporary for String compression feature.
6639 locations->AddTemp(Location::RequiresRegister());
6640 }
6641 }
6642
VisitArrayGet(HArrayGet * instruction)6643 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6644 LocationSummary* locations = instruction->GetLocations();
6645 Location obj_loc = locations->InAt(0);
6646 vixl32::Register obj = InputRegisterAt(instruction, 0);
6647 Location index = locations->InAt(1);
6648 Location out_loc = locations->Out();
6649 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6650 DataType::Type type = instruction->GetType();
6651 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6652 instruction->IsStringCharAt();
6653 HInstruction* array_instr = instruction->GetArray();
6654 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6655
6656 switch (type) {
6657 case DataType::Type::kBool:
6658 case DataType::Type::kUint8:
6659 case DataType::Type::kInt8:
6660 case DataType::Type::kUint16:
6661 case DataType::Type::kInt16:
6662 case DataType::Type::kInt32: {
6663 vixl32::Register length;
6664 if (maybe_compressed_char_at) {
6665 length = RegisterFrom(locations->GetTemp(0));
6666 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6667 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6668 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6669 GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6670 codegen_->MaybeRecordImplicitNullCheck(instruction);
6671 }
6672 if (index.IsConstant()) {
6673 int32_t const_index = Int32ConstantFrom(index);
6674 if (maybe_compressed_char_at) {
6675 vixl32::Label uncompressed_load, done;
6676 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6677 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6678 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6679 "Expecting 0=compressed, 1=uncompressed");
6680 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6681 GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6682 RegisterFrom(out_loc),
6683 obj,
6684 data_offset + const_index);
6685 __ B(final_label);
6686 __ Bind(&uncompressed_load);
6687 GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6688 RegisterFrom(out_loc),
6689 obj,
6690 data_offset + (const_index << 1));
6691 if (done.IsReferenced()) {
6692 __ Bind(&done);
6693 }
6694 } else {
6695 uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6696
6697 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6698 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6699 LoadOperandType load_type = GetLoadOperandType(type);
6700 GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6701 codegen_->MaybeRecordImplicitNullCheck(instruction);
6702 }
6703 } else {
6704 UseScratchRegisterScope temps(GetVIXLAssembler());
6705 vixl32::Register temp = temps.Acquire();
6706
6707 if (has_intermediate_address) {
6708 // We do not need to compute the intermediate address from the array: the
6709 // input instruction has done it already. See the comment in
6710 // `TryExtractArrayAccessAddress()`.
6711 if (kIsDebugBuild) {
6712 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6713 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6714 }
6715 temp = obj;
6716 } else {
6717 __ Add(temp, obj, data_offset);
6718 }
6719 if (maybe_compressed_char_at) {
6720 vixl32::Label uncompressed_load, done;
6721 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6722 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6723 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6724 "Expecting 0=compressed, 1=uncompressed");
6725 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6726 __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6727 __ B(final_label);
6728 __ Bind(&uncompressed_load);
6729 __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6730 if (done.IsReferenced()) {
6731 __ Bind(&done);
6732 }
6733 } else {
6734 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6735 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6736 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6737 codegen_->MaybeRecordImplicitNullCheck(instruction);
6738 }
6739 }
6740 break;
6741 }
6742
6743 case DataType::Type::kReference: {
6744 // The read barrier instrumentation of object ArrayGet
6745 // instructions does not support the HIntermediateAddress
6746 // instruction.
6747 DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
6748
6749 static_assert(
6750 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6751 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6752 // /* HeapReference<Object> */ out =
6753 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6754 if (codegen_->EmitBakerReadBarrier()) {
6755 // Note that a potential implicit null check is handled in this
6756 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6757 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6758 if (index.IsConstant()) {
6759 // Array load with a constant index can be treated as a field load.
6760 Location maybe_temp =
6761 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6762 data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6763 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6764 out_loc,
6765 obj,
6766 data_offset,
6767 maybe_temp,
6768 /* needs_null_check= */ false);
6769 } else {
6770 Location temp = locations->GetTemp(0);
6771 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6772 out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6773 }
6774 } else {
6775 vixl32::Register out = OutputRegister(instruction);
6776 if (index.IsConstant()) {
6777 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6778 {
6779 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6780 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6781 GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6782 codegen_->MaybeRecordImplicitNullCheck(instruction);
6783 }
6784 // If read barriers are enabled, emit read barriers other than
6785 // Baker's using a slow path (and also unpoison the loaded
6786 // reference, if heap poisoning is enabled).
6787 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6788 } else {
6789 UseScratchRegisterScope temps(GetVIXLAssembler());
6790 vixl32::Register temp = temps.Acquire();
6791
6792 if (has_intermediate_address) {
6793 // We do not need to compute the intermediate address from the array: the
6794 // input instruction has done it already. See the comment in
6795 // `TryExtractArrayAccessAddress()`.
6796 if (kIsDebugBuild) {
6797 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6798 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6799 }
6800 temp = obj;
6801 } else {
6802 __ Add(temp, obj, data_offset);
6803 }
6804 {
6805 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6806 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6807 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6808 temps.Close();
6809 codegen_->MaybeRecordImplicitNullCheck(instruction);
6810 }
6811 // If read barriers are enabled, emit read barriers other than
6812 // Baker's using a slow path (and also unpoison the loaded
6813 // reference, if heap poisoning is enabled).
6814 codegen_->MaybeGenerateReadBarrierSlow(
6815 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6816 }
6817 }
6818 break;
6819 }
6820
6821 case DataType::Type::kInt64: {
6822 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6823 // As two macro instructions can be emitted the max size is doubled.
6824 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6825 if (index.IsConstant()) {
6826 size_t offset =
6827 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6828 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6829 } else {
6830 UseScratchRegisterScope temps(GetVIXLAssembler());
6831 vixl32::Register temp = temps.Acquire();
6832 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6833 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6834 }
6835 codegen_->MaybeRecordImplicitNullCheck(instruction);
6836 break;
6837 }
6838
6839 case DataType::Type::kFloat32: {
6840 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6841 // As two macro instructions can be emitted the max size is doubled.
6842 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6843 vixl32::SRegister out = SRegisterFrom(out_loc);
6844 if (index.IsConstant()) {
6845 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6846 GetAssembler()->LoadSFromOffset(out, obj, offset);
6847 } else {
6848 UseScratchRegisterScope temps(GetVIXLAssembler());
6849 vixl32::Register temp = temps.Acquire();
6850 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6851 GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6852 }
6853 codegen_->MaybeRecordImplicitNullCheck(instruction);
6854 break;
6855 }
6856
6857 case DataType::Type::kFloat64: {
6858 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6859 // As two macro instructions can be emitted the max size is doubled.
6860 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6861 if (index.IsConstant()) {
6862 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6863 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6864 } else {
6865 UseScratchRegisterScope temps(GetVIXLAssembler());
6866 vixl32::Register temp = temps.Acquire();
6867 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6868 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6869 }
6870 codegen_->MaybeRecordImplicitNullCheck(instruction);
6871 break;
6872 }
6873
6874 case DataType::Type::kUint32:
6875 case DataType::Type::kUint64:
6876 case DataType::Type::kVoid:
6877 LOG(FATAL) << "Unreachable type " << type;
6878 UNREACHABLE();
6879 }
6880 }
6881
VisitArraySet(HArraySet * instruction)6882 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6883 DataType::Type value_type = instruction->GetComponentType();
6884
6885 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6886 bool needs_write_barrier =
6887 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6888 bool check_gc_card =
6889 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6890
6891 bool needs_type_check = instruction->NeedsTypeCheck();
6892
6893 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6894 instruction,
6895 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6896
6897 locations->SetInAt(0, Location::RequiresRegister());
6898 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6899 if (DataType::IsFloatingPointType(value_type)) {
6900 locations->SetInAt(2, Location::RequiresFpuRegister());
6901 } else {
6902 locations->SetInAt(2, Location::RequiresRegister());
6903 }
6904 if (needs_write_barrier || check_gc_card || instruction->NeedsTypeCheck()) {
6905 // Temporary registers for type checking, write barrier, checking the dirty bit, or register
6906 // poisoning.
6907 locations->AddRegisterTemps(2);
6908 } else if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
6909 locations->AddTemp(Location::RequiresRegister());
6910 }
6911 }
6912
VisitArraySet(HArraySet * instruction)6913 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6914 LocationSummary* locations = instruction->GetLocations();
6915 vixl32::Register array = InputRegisterAt(instruction, 0);
6916 Location index = locations->InAt(1);
6917 DataType::Type value_type = instruction->GetComponentType();
6918 bool needs_type_check = instruction->NeedsTypeCheck();
6919 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6920 bool needs_write_barrier =
6921 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6922 uint32_t data_offset =
6923 mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6924 Location value_loc = locations->InAt(2);
6925 HInstruction* array_instr = instruction->GetArray();
6926 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6927
6928 switch (value_type) {
6929 case DataType::Type::kBool:
6930 case DataType::Type::kUint8:
6931 case DataType::Type::kInt8:
6932 case DataType::Type::kUint16:
6933 case DataType::Type::kInt16:
6934 case DataType::Type::kInt32: {
6935 if (index.IsConstant()) {
6936 int32_t const_index = Int32ConstantFrom(index);
6937 uint32_t full_offset =
6938 data_offset + (const_index << DataType::SizeShift(value_type));
6939 StoreOperandType store_type = GetStoreOperandType(value_type);
6940 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6941 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6942 GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6943 codegen_->MaybeRecordImplicitNullCheck(instruction);
6944 } else {
6945 UseScratchRegisterScope temps(GetVIXLAssembler());
6946 vixl32::Register temp = temps.Acquire();
6947
6948 if (has_intermediate_address) {
6949 // We do not need to compute the intermediate address from the array: the
6950 // input instruction has done it already. See the comment in
6951 // `TryExtractArrayAccessAddress()`.
6952 if (kIsDebugBuild) {
6953 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6954 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6955 }
6956 temp = array;
6957 } else {
6958 __ Add(temp, array, data_offset);
6959 }
6960 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6961 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6962 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6963 codegen_->MaybeRecordImplicitNullCheck(instruction);
6964 }
6965 break;
6966 }
6967
6968 case DataType::Type::kReference: {
6969 vixl32::Register value = RegisterFrom(value_loc);
6970 // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6971 // See the comment in instruction_simplifier_shared.cc.
6972 DCHECK(!has_intermediate_address);
6973
6974 if (instruction->InputAt(2)->IsNullConstant()) {
6975 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6976 // As two macro instructions can be emitted the max size is doubled.
6977 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6978 // Just setting null.
6979 if (index.IsConstant()) {
6980 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6981 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6982 } else {
6983 DCHECK(index.IsRegister()) << index;
6984 UseScratchRegisterScope temps(GetVIXLAssembler());
6985 vixl32::Register temp = temps.Acquire();
6986 __ Add(temp, array, data_offset);
6987 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6988 }
6989 codegen_->MaybeRecordImplicitNullCheck(instruction);
6990 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6991 // We need to set a write barrier here even though we are writing null, since this write
6992 // barrier is being relied on.
6993 DCHECK(needs_write_barrier);
6994 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
6995 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
6996 codegen_->MarkGCCard(temp1, temp2, array);
6997 }
6998 DCHECK(!needs_type_check);
6999 break;
7000 }
7001
7002 const bool can_value_be_null = instruction->GetValueCanBeNull();
7003 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
7004 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
7005 // value is null for the type check).
7006 const bool skip_marking_gc_card =
7007 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
7008 vixl32::Label do_store;
7009 vixl32::Label skip_writing_card;
7010 if (can_value_be_null) {
7011 if (skip_marking_gc_card) {
7012 __ CompareAndBranchIfZero(value, &skip_writing_card, /* is_far_target= */ false);
7013 } else {
7014 __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
7015 }
7016 }
7017
7018 SlowPathCodeARMVIXL* slow_path = nullptr;
7019 if (needs_type_check) {
7020 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
7021 codegen_->AddSlowPath(slow_path);
7022
7023 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7024 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7025 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7026
7027 // Note that when read barriers are enabled, the type checks
7028 // are performed without read barriers. This is fine, even in
7029 // the case where a class object is in the from-space after
7030 // the flip, as a comparison involving such a type would not
7031 // produce a false positive; it may of course produce a false
7032 // negative, in which case we would take the ArraySet slow
7033 // path.
7034
7035 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7036 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7037
7038 {
7039 // Ensure we record the pc position immediately after the `ldr` instruction.
7040 ExactAssemblyScope aas(GetVIXLAssembler(),
7041 vixl32::kMaxInstructionSizeInBytes,
7042 CodeBufferCheckScope::kMaximumSize);
7043 // /* HeapReference<Class> */ temp1 = array->klass_
7044 __ ldr(temp1, MemOperand(array, class_offset));
7045 codegen_->MaybeRecordImplicitNullCheck(instruction);
7046 }
7047 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7048
7049 // /* HeapReference<Class> */ temp1 = temp1->component_type_
7050 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
7051 // /* HeapReference<Class> */ temp2 = value->klass_
7052 GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
7053 // If heap poisoning is enabled, no need to unpoison `temp1`
7054 // nor `temp2`, as we are comparing two poisoned references.
7055 __ Cmp(temp1, temp2);
7056
7057 if (instruction->StaticTypeOfArrayIsObjectArray()) {
7058 vixl32::Label do_put;
7059 __ B(eq, &do_put, /* is_far_target= */ false);
7060 // If heap poisoning is enabled, the `temp1` reference has
7061 // not been unpoisoned yet; unpoison it now.
7062 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7063
7064 // /* HeapReference<Class> */ temp1 = temp1->super_class_
7065 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
7066 // If heap poisoning is enabled, no need to unpoison
7067 // `temp1`, as we are comparing against null below.
7068 __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
7069 __ Bind(&do_put);
7070 } else {
7071 __ B(ne, slow_path->GetEntryLabel());
7072 }
7073 }
7074
7075 if (can_value_be_null && !skip_marking_gc_card) {
7076 DCHECK(do_store.IsReferenced());
7077 __ Bind(&do_store);
7078 }
7079
7080 if (needs_write_barrier) {
7081 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7082 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7083 codegen_->MarkGCCard(temp1, temp2, array);
7084 } else if (codegen_->ShouldCheckGCCard(
7085 value_type, instruction->GetValue(), write_barrier_kind)) {
7086 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7087 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7088 codegen_->CheckGCCardIsValid(temp1, temp2, array);
7089 }
7090
7091 if (skip_marking_gc_card) {
7092 // Note that we don't check that the GC card is valid as it can be correctly clean.
7093 DCHECK(skip_writing_card.IsReferenced());
7094 __ Bind(&skip_writing_card);
7095 }
7096
7097 vixl32::Register source = value;
7098 if (kPoisonHeapReferences) {
7099 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7100 DCHECK_EQ(value_type, DataType::Type::kReference);
7101 __ Mov(temp1, value);
7102 GetAssembler()->PoisonHeapReference(temp1);
7103 source = temp1;
7104 }
7105
7106 {
7107 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7108 // As two macro instructions can be emitted the max size is doubled.
7109 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7110 if (index.IsConstant()) {
7111 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7112 GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7113 } else {
7114 DCHECK(index.IsRegister()) << index;
7115
7116 UseScratchRegisterScope temps(GetVIXLAssembler());
7117 vixl32::Register temp = temps.Acquire();
7118 __ Add(temp, array, data_offset);
7119 codegen_->StoreToShiftedRegOffset(value_type,
7120 LocationFrom(source),
7121 temp,
7122 RegisterFrom(index));
7123 }
7124
7125 if (can_value_be_null || !needs_type_check) {
7126 codegen_->MaybeRecordImplicitNullCheck(instruction);
7127 }
7128 }
7129
7130 if (slow_path != nullptr) {
7131 __ Bind(slow_path->GetExitLabel());
7132 }
7133
7134 break;
7135 }
7136
7137 case DataType::Type::kInt64: {
7138 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7139 // As two macro instructions can be emitted the max size is doubled.
7140 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7141 Location value = locations->InAt(2);
7142 if (index.IsConstant()) {
7143 size_t offset =
7144 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7145 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7146 } else {
7147 UseScratchRegisterScope temps(GetVIXLAssembler());
7148 vixl32::Register temp = temps.Acquire();
7149 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7150 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7151 }
7152 codegen_->MaybeRecordImplicitNullCheck(instruction);
7153 break;
7154 }
7155
7156 case DataType::Type::kFloat32: {
7157 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7158 // As two macro instructions can be emitted the max size is doubled.
7159 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7160 Location value = locations->InAt(2);
7161 DCHECK(value.IsFpuRegister());
7162 if (index.IsConstant()) {
7163 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7164 GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7165 } else {
7166 UseScratchRegisterScope temps(GetVIXLAssembler());
7167 vixl32::Register temp = temps.Acquire();
7168 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7169 GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7170 }
7171 codegen_->MaybeRecordImplicitNullCheck(instruction);
7172 break;
7173 }
7174
7175 case DataType::Type::kFloat64: {
7176 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7177 // As two macro instructions can be emitted the max size is doubled.
7178 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7179 Location value = locations->InAt(2);
7180 DCHECK(value.IsFpuRegisterPair());
7181 if (index.IsConstant()) {
7182 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7183 GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7184 } else {
7185 UseScratchRegisterScope temps(GetVIXLAssembler());
7186 vixl32::Register temp = temps.Acquire();
7187 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7188 GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7189 }
7190 codegen_->MaybeRecordImplicitNullCheck(instruction);
7191 break;
7192 }
7193
7194 case DataType::Type::kUint32:
7195 case DataType::Type::kUint64:
7196 case DataType::Type::kVoid:
7197 LOG(FATAL) << "Unreachable type " << value_type;
7198 UNREACHABLE();
7199 }
7200 }
7201
VisitArrayLength(HArrayLength * instruction)7202 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7203 LocationSummary* locations =
7204 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7205 locations->SetInAt(0, Location::RequiresRegister());
7206 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7207 }
7208
VisitArrayLength(HArrayLength * instruction)7209 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7210 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7211 vixl32::Register obj = InputRegisterAt(instruction, 0);
7212 vixl32::Register out = OutputRegister(instruction);
7213 {
7214 ExactAssemblyScope aas(GetVIXLAssembler(),
7215 vixl32::kMaxInstructionSizeInBytes,
7216 CodeBufferCheckScope::kMaximumSize);
7217 __ ldr(out, MemOperand(obj, offset));
7218 codegen_->MaybeRecordImplicitNullCheck(instruction);
7219 }
7220 // Mask out compression flag from String's array length.
7221 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7222 __ Lsr(out, out, 1u);
7223 }
7224 }
7225
VisitIntermediateAddress(HIntermediateAddress * instruction)7226 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7227 LocationSummary* locations =
7228 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7229
7230 locations->SetInAt(0, Location::RequiresRegister());
7231 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7232 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7233 }
7234
VisitIntermediateAddress(HIntermediateAddress * instruction)7235 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7236 vixl32::Register out = OutputRegister(instruction);
7237 vixl32::Register first = InputRegisterAt(instruction, 0);
7238 Location second = instruction->GetLocations()->InAt(1);
7239
7240 if (second.IsRegister()) {
7241 __ Add(out, first, RegisterFrom(second));
7242 } else {
7243 __ Add(out, first, Int32ConstantFrom(second));
7244 }
7245 }
7246
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7247 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7248 HIntermediateAddressIndex* instruction) {
7249 LOG(FATAL) << "Unreachable " << instruction->GetId();
7250 }
7251
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7252 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7253 HIntermediateAddressIndex* instruction) {
7254 LOG(FATAL) << "Unreachable " << instruction->GetId();
7255 }
7256
VisitBoundsCheck(HBoundsCheck * instruction)7257 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7258 RegisterSet caller_saves = RegisterSet::Empty();
7259 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7260 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7261 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7262 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7263
7264 HInstruction* index = instruction->InputAt(0);
7265 HInstruction* length = instruction->InputAt(1);
7266 // If both index and length are constants we can statically check the bounds. But if at least one
7267 // of them is not encodable ArmEncodableConstantOrRegister will create
7268 // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7269 // locations.
7270 bool both_const = index->IsConstant() && length->IsConstant();
7271 locations->SetInAt(0, both_const
7272 ? Location::ConstantLocation(index)
7273 : ArmEncodableConstantOrRegister(index, CMP));
7274 locations->SetInAt(1, both_const
7275 ? Location::ConstantLocation(length)
7276 : ArmEncodableConstantOrRegister(length, CMP));
7277 }
7278
VisitBoundsCheck(HBoundsCheck * instruction)7279 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7280 LocationSummary* locations = instruction->GetLocations();
7281 Location index_loc = locations->InAt(0);
7282 Location length_loc = locations->InAt(1);
7283
7284 if (length_loc.IsConstant()) {
7285 int32_t length = Int32ConstantFrom(length_loc);
7286 if (index_loc.IsConstant()) {
7287 // BCE will remove the bounds check if we are guaranteed to pass.
7288 int32_t index = Int32ConstantFrom(index_loc);
7289 if (index < 0 || index >= length) {
7290 SlowPathCodeARMVIXL* slow_path =
7291 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7292 codegen_->AddSlowPath(slow_path);
7293 __ B(slow_path->GetEntryLabel());
7294 } else {
7295 // Some optimization after BCE may have generated this, and we should not
7296 // generate a bounds check if it is a valid range.
7297 }
7298 return;
7299 }
7300
7301 SlowPathCodeARMVIXL* slow_path =
7302 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7303 __ Cmp(RegisterFrom(index_loc), length);
7304 codegen_->AddSlowPath(slow_path);
7305 __ B(hs, slow_path->GetEntryLabel());
7306 } else {
7307 SlowPathCodeARMVIXL* slow_path =
7308 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7309 __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7310 codegen_->AddSlowPath(slow_path);
7311 __ B(ls, slow_path->GetEntryLabel());
7312 }
7313 }
7314
MaybeMarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7315 void CodeGeneratorARMVIXL::MaybeMarkGCCard(vixl32::Register temp,
7316 vixl32::Register card,
7317 vixl32::Register object,
7318 vixl32::Register value,
7319 bool emit_null_check) {
7320 vixl32::Label is_null;
7321 if (emit_null_check) {
7322 __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7323 }
7324 MarkGCCard(temp, card, object);
7325 if (emit_null_check) {
7326 __ Bind(&is_null);
7327 }
7328 }
7329
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7330 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7331 vixl32::Register card,
7332 vixl32::Register object) {
7333 // Load the address of the card table into `card`.
7334 GetAssembler()->LoadFromOffset(
7335 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7336 // Calculate the offset (in the card table) of the card corresponding to `object`.
7337 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7338 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7339 // `object`'s card.
7340 //
7341 // Register `card` contains the address of the card table. Note that the card
7342 // table's base is biased during its creation so that it always starts at an
7343 // address whose least-significant byte is equal to `kCardDirty` (see
7344 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7345 // below writes the `kCardDirty` (byte) value into the `object`'s card
7346 // (located at `card + object >> kCardShift`).
7347 //
7348 // This dual use of the value in register `card` (1. to calculate the location
7349 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7350 // (no need to explicitly load `kCardDirty` as an immediate value).
7351 __ Strb(card, MemOperand(card, temp));
7352 }
7353
CheckGCCardIsValid(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7354 void CodeGeneratorARMVIXL::CheckGCCardIsValid(vixl32::Register temp,
7355 vixl32::Register card,
7356 vixl32::Register object) {
7357 vixl32::Label done;
7358 // Load the address of the card table into `card`.
7359 GetAssembler()->LoadFromOffset(
7360 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7361 // Calculate the offset (in the card table) of the card corresponding to `object`.
7362 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7363 // assert (!clean || !self->is_gc_marking)
7364 __ Ldrb(temp, MemOperand(card, temp));
7365 static_assert(gc::accounting::CardTable::kCardClean == 0);
7366 __ CompareAndBranchIfNonZero(temp, &done, /*is_far_target=*/false);
7367 __ CompareAndBranchIfZero(mr, &done, /*is_far_target=*/false);
7368 __ Bkpt(0);
7369 __ Bind(&done);
7370 }
7371
VisitParallelMove(HParallelMove * instruction)7372 void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
7373 LOG(FATAL) << "Unreachable";
7374 }
7375
VisitParallelMove(HParallelMove * instruction)7376 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7377 if (instruction->GetNext()->IsSuspendCheck() &&
7378 instruction->GetBlock()->GetLoopInformation() != nullptr) {
7379 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7380 // The back edge will generate the suspend check.
7381 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7382 }
7383
7384 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7385 }
7386
VisitSuspendCheck(HSuspendCheck * instruction)7387 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7388 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7389 instruction, LocationSummary::kCallOnSlowPath);
7390 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7391 }
7392
VisitSuspendCheck(HSuspendCheck * instruction)7393 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7394 HBasicBlock* block = instruction->GetBlock();
7395 if (block->GetLoopInformation() != nullptr) {
7396 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7397 // The back edge will generate the suspend check.
7398 return;
7399 }
7400 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7401 // The goto will generate the suspend check.
7402 return;
7403 }
7404 GenerateSuspendCheck(instruction, nullptr);
7405 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7406 }
7407
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7408 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7409 HBasicBlock* successor) {
7410 SuspendCheckSlowPathARMVIXL* slow_path =
7411 down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7412 if (slow_path == nullptr) {
7413 slow_path =
7414 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7415 instruction->SetSlowPath(slow_path);
7416 codegen_->AddSlowPath(slow_path);
7417 if (successor != nullptr) {
7418 DCHECK(successor->IsLoopHeader());
7419 }
7420 } else {
7421 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7422 }
7423
7424 UseScratchRegisterScope temps(GetVIXLAssembler());
7425 vixl32::Register temp = temps.Acquire();
7426 GetAssembler()->LoadFromOffset(
7427 kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7428 __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7429 if (successor == nullptr) {
7430 __ B(ne, slow_path->GetEntryLabel());
7431 __ Bind(slow_path->GetReturnLabel());
7432 } else {
7433 __ B(eq, codegen_->GetLabelOf(successor));
7434 __ B(slow_path->GetEntryLabel());
7435 }
7436 }
7437
GetAssembler() const7438 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7439 return codegen_->GetAssembler();
7440 }
7441
EmitMove(size_t index)7442 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7443 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7444 MoveOperands* move = moves_[index];
7445 Location source = move->GetSource();
7446 Location destination = move->GetDestination();
7447
7448 if (source.IsRegister()) {
7449 if (destination.IsRegister()) {
7450 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7451 } else if (destination.IsFpuRegister()) {
7452 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7453 } else {
7454 DCHECK(destination.IsStackSlot());
7455 GetAssembler()->StoreToOffset(kStoreWord,
7456 RegisterFrom(source),
7457 sp,
7458 destination.GetStackIndex());
7459 }
7460 } else if (source.IsStackSlot()) {
7461 if (destination.IsRegister()) {
7462 GetAssembler()->LoadFromOffset(kLoadWord,
7463 RegisterFrom(destination),
7464 sp,
7465 source.GetStackIndex());
7466 } else if (destination.IsFpuRegister()) {
7467 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7468 } else {
7469 DCHECK(destination.IsStackSlot());
7470 vixl32::Register temp = temps.Acquire();
7471 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7472 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7473 }
7474 } else if (source.IsFpuRegister()) {
7475 if (destination.IsRegister()) {
7476 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7477 } else if (destination.IsFpuRegister()) {
7478 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7479 } else {
7480 DCHECK(destination.IsStackSlot());
7481 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7482 }
7483 } else if (source.IsDoubleStackSlot()) {
7484 if (destination.IsDoubleStackSlot()) {
7485 vixl32::DRegister temp = temps.AcquireD();
7486 GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7487 GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7488 } else if (destination.IsRegisterPair()) {
7489 DCHECK(ExpectedPairLayout(destination));
7490 GetAssembler()->LoadFromOffset(
7491 kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7492 } else {
7493 DCHECK(destination.IsFpuRegisterPair()) << destination;
7494 GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7495 }
7496 } else if (source.IsRegisterPair()) {
7497 if (destination.IsRegisterPair()) {
7498 __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7499 __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7500 } else if (destination.IsFpuRegisterPair()) {
7501 __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7502 } else {
7503 DCHECK(destination.IsDoubleStackSlot()) << destination;
7504 DCHECK(ExpectedPairLayout(source));
7505 GetAssembler()->StoreToOffset(kStoreWordPair,
7506 LowRegisterFrom(source),
7507 sp,
7508 destination.GetStackIndex());
7509 }
7510 } else if (source.IsFpuRegisterPair()) {
7511 if (destination.IsRegisterPair()) {
7512 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7513 } else if (destination.IsFpuRegisterPair()) {
7514 __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7515 } else {
7516 DCHECK(destination.IsDoubleStackSlot()) << destination;
7517 GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7518 }
7519 } else {
7520 DCHECK(source.IsConstant()) << source;
7521 HConstant* constant = source.GetConstant();
7522 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7523 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7524 if (destination.IsRegister()) {
7525 __ Mov(RegisterFrom(destination), value);
7526 } else {
7527 DCHECK(destination.IsStackSlot());
7528 vixl32::Register temp = temps.Acquire();
7529 __ Mov(temp, value);
7530 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7531 }
7532 } else if (constant->IsLongConstant()) {
7533 int64_t value = Int64ConstantFrom(source);
7534 if (destination.IsRegisterPair()) {
7535 __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7536 __ Mov(HighRegisterFrom(destination), High32Bits(value));
7537 } else {
7538 DCHECK(destination.IsDoubleStackSlot()) << destination;
7539 vixl32::Register temp = temps.Acquire();
7540 __ Mov(temp, Low32Bits(value));
7541 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7542 __ Mov(temp, High32Bits(value));
7543 GetAssembler()->StoreToOffset(kStoreWord,
7544 temp,
7545 sp,
7546 destination.GetHighStackIndex(kArmWordSize));
7547 }
7548 } else if (constant->IsDoubleConstant()) {
7549 double value = constant->AsDoubleConstant()->GetValue();
7550 if (destination.IsFpuRegisterPair()) {
7551 __ Vmov(DRegisterFrom(destination), value);
7552 } else {
7553 DCHECK(destination.IsDoubleStackSlot()) << destination;
7554 uint64_t int_value = bit_cast<uint64_t, double>(value);
7555 vixl32::Register temp = temps.Acquire();
7556 __ Mov(temp, Low32Bits(int_value));
7557 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7558 __ Mov(temp, High32Bits(int_value));
7559 GetAssembler()->StoreToOffset(kStoreWord,
7560 temp,
7561 sp,
7562 destination.GetHighStackIndex(kArmWordSize));
7563 }
7564 } else {
7565 DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7566 float value = constant->AsFloatConstant()->GetValue();
7567 if (destination.IsFpuRegister()) {
7568 __ Vmov(SRegisterFrom(destination), value);
7569 } else {
7570 DCHECK(destination.IsStackSlot());
7571 vixl32::Register temp = temps.Acquire();
7572 __ Mov(temp, bit_cast<int32_t, float>(value));
7573 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7574 }
7575 }
7576 }
7577 }
7578
Exchange(vixl32::Register reg,int mem)7579 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7580 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7581 vixl32::Register temp = temps.Acquire();
7582 __ Mov(temp, reg);
7583 GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7584 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7585 }
7586
Exchange(int mem1,int mem2)7587 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7588 // TODO(VIXL32): Double check the performance of this implementation.
7589 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7590 vixl32::Register temp1 = temps.Acquire();
7591 ScratchRegisterScope ensure_scratch(
7592 this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7593 vixl32::Register temp2(ensure_scratch.GetRegister());
7594
7595 int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7596 GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7597 GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7598 GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7599 GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7600 }
7601
EmitSwap(size_t index)7602 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7603 MoveOperands* move = moves_[index];
7604 Location source = move->GetSource();
7605 Location destination = move->GetDestination();
7606 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7607
7608 if (source.IsRegister() && destination.IsRegister()) {
7609 vixl32::Register temp = temps.Acquire();
7610 DCHECK(!RegisterFrom(source).Is(temp));
7611 DCHECK(!RegisterFrom(destination).Is(temp));
7612 __ Mov(temp, RegisterFrom(destination));
7613 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7614 __ Mov(RegisterFrom(source), temp);
7615 } else if (source.IsRegister() && destination.IsStackSlot()) {
7616 Exchange(RegisterFrom(source), destination.GetStackIndex());
7617 } else if (source.IsStackSlot() && destination.IsRegister()) {
7618 Exchange(RegisterFrom(destination), source.GetStackIndex());
7619 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7620 Exchange(source.GetStackIndex(), destination.GetStackIndex());
7621 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7622 vixl32::Register temp = temps.Acquire();
7623 __ Vmov(temp, SRegisterFrom(source));
7624 __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7625 __ Vmov(SRegisterFrom(destination), temp);
7626 } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7627 vixl32::DRegister temp = temps.AcquireD();
7628 __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7629 __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7630 __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7631 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7632 } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7633 vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7634 int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7635 DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7636 vixl32::DRegister temp = temps.AcquireD();
7637 __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7638 GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7639 GetAssembler()->StoreDToOffset(temp, sp, mem);
7640 } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7641 vixl32::DRegister first = DRegisterFrom(source);
7642 vixl32::DRegister second = DRegisterFrom(destination);
7643 vixl32::DRegister temp = temps.AcquireD();
7644 __ Vmov(temp, first);
7645 __ Vmov(first, second);
7646 __ Vmov(second, temp);
7647 } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7648 vixl32::DRegister reg = source.IsFpuRegisterPair()
7649 ? DRegisterFrom(source)
7650 : DRegisterFrom(destination);
7651 int mem = source.IsFpuRegisterPair()
7652 ? destination.GetStackIndex()
7653 : source.GetStackIndex();
7654 vixl32::DRegister temp = temps.AcquireD();
7655 __ Vmov(temp, reg);
7656 GetAssembler()->LoadDFromOffset(reg, sp, mem);
7657 GetAssembler()->StoreDToOffset(temp, sp, mem);
7658 } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7659 vixl32::SRegister reg = source.IsFpuRegister()
7660 ? SRegisterFrom(source)
7661 : SRegisterFrom(destination);
7662 int mem = source.IsFpuRegister()
7663 ? destination.GetStackIndex()
7664 : source.GetStackIndex();
7665 vixl32::Register temp = temps.Acquire();
7666 __ Vmov(temp, reg);
7667 GetAssembler()->LoadSFromOffset(reg, sp, mem);
7668 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7669 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7670 vixl32::DRegister temp1 = temps.AcquireD();
7671 vixl32::DRegister temp2 = temps.AcquireD();
7672 __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7673 __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7674 __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7675 __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7676 } else {
7677 LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7678 }
7679 }
7680
SpillScratch(int reg)7681 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7682 __ Push(vixl32::Register(reg));
7683 }
7684
RestoreScratch(int reg)7685 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7686 __ Pop(vixl32::Register(reg));
7687 }
7688
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7689 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7690 HLoadClass::LoadKind desired_class_load_kind) {
7691 switch (desired_class_load_kind) {
7692 case HLoadClass::LoadKind::kInvalid:
7693 LOG(FATAL) << "UNREACHABLE";
7694 UNREACHABLE();
7695 case HLoadClass::LoadKind::kReferrersClass:
7696 break;
7697 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7698 case HLoadClass::LoadKind::kBootImageRelRo:
7699 case HLoadClass::LoadKind::kAppImageRelRo:
7700 case HLoadClass::LoadKind::kBssEntry:
7701 case HLoadClass::LoadKind::kBssEntryPublic:
7702 case HLoadClass::LoadKind::kBssEntryPackage:
7703 DCHECK(!GetCompilerOptions().IsJitCompiler());
7704 break;
7705 case HLoadClass::LoadKind::kJitBootImageAddress:
7706 case HLoadClass::LoadKind::kJitTableAddress:
7707 DCHECK(GetCompilerOptions().IsJitCompiler());
7708 break;
7709 case HLoadClass::LoadKind::kRuntimeCall:
7710 break;
7711 }
7712 return desired_class_load_kind;
7713 }
7714
VisitLoadClass(HLoadClass * cls)7715 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7716 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7717 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7718 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7719 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7720 cls,
7721 LocationFrom(calling_convention.GetRegisterAt(0)),
7722 LocationFrom(r0));
7723 DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7724 return;
7725 }
7726 DCHECK_EQ(cls->NeedsAccessCheck(),
7727 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7728 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7729
7730 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7731 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7732 ? LocationSummary::kCallOnSlowPath
7733 : LocationSummary::kNoCall;
7734 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7735 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7736 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7737 }
7738
7739 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7740 locations->SetInAt(0, Location::RequiresRegister());
7741 }
7742 locations->SetOut(Location::RequiresRegister());
7743 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
7744 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7745 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
7746 if (codegen_->EmitNonBakerReadBarrier()) {
7747 // For non-Baker read barrier we have a temp-clobbering call.
7748 } else {
7749 // Rely on the type resolution or initialization and marking to save everything we need.
7750 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7751 }
7752 }
7753 }
7754
7755 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7756 // move.
VisitLoadClass(HLoadClass * cls)7757 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7758 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7759 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7760 codegen_->GenerateLoadClassRuntimeCall(cls);
7761 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7762 return;
7763 }
7764 DCHECK_EQ(cls->NeedsAccessCheck(),
7765 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7766 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7767
7768 LocationSummary* locations = cls->GetLocations();
7769 Location out_loc = locations->Out();
7770 vixl32::Register out = OutputRegister(cls);
7771
7772 const ReadBarrierOption read_barrier_option =
7773 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7774 bool generate_null_check = false;
7775 switch (load_kind) {
7776 case HLoadClass::LoadKind::kReferrersClass: {
7777 DCHECK(!cls->CanCallRuntime());
7778 DCHECK(!cls->MustGenerateClinitCheck());
7779 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7780 vixl32::Register current_method = InputRegisterAt(cls, 0);
7781 codegen_->GenerateGcRootFieldLoad(cls,
7782 out_loc,
7783 current_method,
7784 ArtMethod::DeclaringClassOffset().Int32Value(),
7785 read_barrier_option);
7786 break;
7787 }
7788 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7789 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7790 codegen_->GetCompilerOptions().IsBootImageExtension());
7791 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7792 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7793 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7794 codegen_->EmitMovwMovtPlaceholder(labels, out);
7795 break;
7796 }
7797 case HLoadClass::LoadKind::kBootImageRelRo: {
7798 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7799 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7800 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7801 break;
7802 }
7803 case HLoadClass::LoadKind::kAppImageRelRo: {
7804 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7805 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7806 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7807 codegen_->NewAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7808 codegen_->EmitMovwMovtPlaceholder(labels, out);
7809 __ Ldr(out, MemOperand(out, /*offset=*/ 0));
7810 break;
7811 }
7812 case HLoadClass::LoadKind::kBssEntry:
7813 case HLoadClass::LoadKind::kBssEntryPublic:
7814 case HLoadClass::LoadKind::kBssEntryPackage: {
7815 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7816 codegen_->EmitMovwMovtPlaceholder(labels, out);
7817 // All aligned loads are implicitly atomic consume operations on ARM.
7818 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7819 generate_null_check = true;
7820 break;
7821 }
7822 case HLoadClass::LoadKind::kJitBootImageAddress: {
7823 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7824 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7825 DCHECK_NE(address, 0u);
7826 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7827 break;
7828 }
7829 case HLoadClass::LoadKind::kJitTableAddress: {
7830 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7831 cls->GetTypeIndex(),
7832 cls->GetClass()));
7833 // /* GcRoot<mirror::Class> */ out = *out
7834 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7835 break;
7836 }
7837 case HLoadClass::LoadKind::kRuntimeCall:
7838 case HLoadClass::LoadKind::kInvalid:
7839 LOG(FATAL) << "UNREACHABLE";
7840 UNREACHABLE();
7841 }
7842
7843 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7844 DCHECK(cls->CanCallRuntime());
7845 LoadClassSlowPathARMVIXL* slow_path =
7846 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7847 codegen_->AddSlowPath(slow_path);
7848 if (generate_null_check) {
7849 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7850 }
7851 if (cls->MustGenerateClinitCheck()) {
7852 GenerateClassInitializationCheck(slow_path, out);
7853 } else {
7854 __ Bind(slow_path->GetExitLabel());
7855 }
7856 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7857 }
7858 }
7859
VisitLoadMethodHandle(HLoadMethodHandle * load)7860 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7861 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7862 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7863 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7864 }
7865
VisitLoadMethodHandle(HLoadMethodHandle * load)7866 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7867 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7868 }
7869
VisitLoadMethodType(HLoadMethodType * load)7870 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7871 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7872 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7873 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7874 }
7875
VisitLoadMethodType(HLoadMethodType * load)7876 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7877 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7878 }
7879
VisitClinitCheck(HClinitCheck * check)7880 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7881 LocationSummary* locations =
7882 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7883 locations->SetInAt(0, Location::RequiresRegister());
7884 if (check->HasUses()) {
7885 locations->SetOut(Location::SameAsFirstInput());
7886 }
7887 // Rely on the type initialization to save everything we need.
7888 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7889 }
7890
VisitClinitCheck(HClinitCheck * check)7891 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7892 // We assume the class is not null.
7893 LoadClassSlowPathARMVIXL* slow_path =
7894 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7895 codegen_->AddSlowPath(slow_path);
7896 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7897 }
7898
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7899 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7900 LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7901 UseScratchRegisterScope temps(GetVIXLAssembler());
7902 vixl32::Register temp = temps.Acquire();
7903 __ Ldrb(temp, MemOperand(class_reg, kClassStatusByteOffset));
7904 __ Cmp(temp, kShiftedVisiblyInitializedValue);
7905 __ B(lo, slow_path->GetEntryLabel());
7906 __ Bind(slow_path->GetExitLabel());
7907 }
7908
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7909 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7910 HTypeCheckInstruction* check,
7911 vixl32::Register temp,
7912 vixl32::FlagsUpdate flags_update) {
7913 uint32_t path_to_root = check->GetBitstringPathToRoot();
7914 uint32_t mask = check->GetBitstringMask();
7915 DCHECK(IsPowerOfTwo(mask + 1));
7916 size_t mask_bits = WhichPowerOf2(mask + 1);
7917
7918 // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7919 // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7920 if (mask_bits == 16u) {
7921 // Load only the bitstring part of the status word.
7922 __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7923 // Check if the bitstring bits are equal to `path_to_root`.
7924 if (flags_update == SetFlags) {
7925 __ Cmp(temp, path_to_root);
7926 } else {
7927 __ Sub(temp, temp, path_to_root);
7928 }
7929 } else {
7930 // /* uint32_t */ temp = temp->status_
7931 __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7932 if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7933 // Compare the bitstring bits using SUB.
7934 __ Sub(temp, temp, path_to_root);
7935 // Shift out bits that do not contribute to the comparison.
7936 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7937 } else if (IsUint<16>(path_to_root)) {
7938 if (temp.IsLow()) {
7939 // Note: Optimized for size but contains one more dependent instruction than necessary.
7940 // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7941 // macro assembler would use the high reg IP for the constant by default.
7942 // Compare the bitstring bits using SUB.
7943 __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
7944 __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
7945 // Shift out bits that do not contribute to the comparison.
7946 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7947 } else {
7948 // Extract the bitstring bits.
7949 __ Ubfx(temp, temp, 0, mask_bits);
7950 // Check if the bitstring bits are equal to `path_to_root`.
7951 if (flags_update == SetFlags) {
7952 __ Cmp(temp, path_to_root);
7953 } else {
7954 __ Sub(temp, temp, path_to_root);
7955 }
7956 }
7957 } else {
7958 // Shift out bits that do not contribute to the comparison.
7959 __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7960 // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7961 if (flags_update == SetFlags) {
7962 __ Cmp(temp, path_to_root << (32u - mask_bits));
7963 } else {
7964 __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7965 }
7966 }
7967 }
7968 }
7969
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7970 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7971 HLoadString::LoadKind desired_string_load_kind) {
7972 switch (desired_string_load_kind) {
7973 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7974 case HLoadString::LoadKind::kBootImageRelRo:
7975 case HLoadString::LoadKind::kBssEntry:
7976 DCHECK(!GetCompilerOptions().IsJitCompiler());
7977 break;
7978 case HLoadString::LoadKind::kJitBootImageAddress:
7979 case HLoadString::LoadKind::kJitTableAddress:
7980 DCHECK(GetCompilerOptions().IsJitCompiler());
7981 break;
7982 case HLoadString::LoadKind::kRuntimeCall:
7983 break;
7984 }
7985 return desired_string_load_kind;
7986 }
7987
VisitLoadString(HLoadString * load)7988 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7989 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7990 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7991 HLoadString::LoadKind load_kind = load->GetLoadKind();
7992 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7993 locations->SetOut(LocationFrom(r0));
7994 } else {
7995 locations->SetOut(Location::RequiresRegister());
7996 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7997 if (codegen_->EmitNonBakerReadBarrier()) {
7998 // For non-Baker read barrier we have a temp-clobbering call.
7999 } else {
8000 // Rely on the pResolveString and marking to save everything we need, including temps.
8001 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
8002 }
8003 }
8004 }
8005 }
8006
8007 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
8008 // move.
VisitLoadString(HLoadString * load)8009 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
8010 LocationSummary* locations = load->GetLocations();
8011 Location out_loc = locations->Out();
8012 vixl32::Register out = OutputRegister(load);
8013 HLoadString::LoadKind load_kind = load->GetLoadKind();
8014
8015 switch (load_kind) {
8016 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
8017 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
8018 codegen_->GetCompilerOptions().IsBootImageExtension());
8019 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8020 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
8021 codegen_->EmitMovwMovtPlaceholder(labels, out);
8022 return;
8023 }
8024 case HLoadString::LoadKind::kBootImageRelRo: {
8025 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
8026 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
8027 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
8028 return;
8029 }
8030 case HLoadString::LoadKind::kBssEntry: {
8031 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8032 codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
8033 codegen_->EmitMovwMovtPlaceholder(labels, out);
8034 // All aligned loads are implicitly atomic consume operations on ARM.
8035 codegen_->GenerateGcRootFieldLoad(
8036 load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8037 LoadStringSlowPathARMVIXL* slow_path =
8038 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
8039 codegen_->AddSlowPath(slow_path);
8040 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
8041 __ Bind(slow_path->GetExitLabel());
8042 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
8043 return;
8044 }
8045 case HLoadString::LoadKind::kJitBootImageAddress: {
8046 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
8047 DCHECK_NE(address, 0u);
8048 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
8049 return;
8050 }
8051 case HLoadString::LoadKind::kJitTableAddress: {
8052 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
8053 load->GetStringIndex(),
8054 load->GetString()));
8055 // /* GcRoot<mirror::String> */ out = *out
8056 codegen_->GenerateGcRootFieldLoad(
8057 load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8058 return;
8059 }
8060 default:
8061 break;
8062 }
8063
8064 DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
8065 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8066 __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
8067 codegen_->InvokeRuntime(kQuickResolveString, load);
8068 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
8069 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
8070 }
8071
GetExceptionTlsOffset()8072 static int32_t GetExceptionTlsOffset() {
8073 return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
8074 }
8075
VisitLoadException(HLoadException * load)8076 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
8077 LocationSummary* locations =
8078 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
8079 locations->SetOut(Location::RequiresRegister());
8080 }
8081
VisitLoadException(HLoadException * load)8082 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
8083 vixl32::Register out = OutputRegister(load);
8084 GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
8085 }
8086
8087
VisitClearException(HClearException * clear)8088 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
8089 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
8090 }
8091
VisitClearException(HClearException * clear)8092 void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
8093 UseScratchRegisterScope temps(GetVIXLAssembler());
8094 vixl32::Register temp = temps.Acquire();
8095 __ Mov(temp, 0);
8096 GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
8097 }
8098
VisitThrow(HThrow * instruction)8099 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
8100 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8101 instruction, LocationSummary::kCallOnMainOnly);
8102 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8103 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8104 }
8105
VisitThrow(HThrow * instruction)8106 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
8107 codegen_->InvokeRuntime(kQuickDeliverException, instruction);
8108 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
8109 }
8110
8111 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8112 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8113 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8114 return 1;
8115 }
8116 if (emit_read_barrier &&
8117 (kUseBakerReadBarrier ||
8118 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
8119 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
8120 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
8121 return 1;
8122 }
8123 return 0;
8124 }
8125
8126 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
8127 // interface pointer, one for loading the current interface.
8128 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8129 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8130 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8131 return 3;
8132 }
8133 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
8134 }
8135
VisitInstanceOf(HInstanceOf * instruction)8136 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8137 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
8138 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8139 bool baker_read_barrier_slow_path = false;
8140 switch (type_check_kind) {
8141 case TypeCheckKind::kExactCheck:
8142 case TypeCheckKind::kAbstractClassCheck:
8143 case TypeCheckKind::kClassHierarchyCheck:
8144 case TypeCheckKind::kArrayObjectCheck:
8145 case TypeCheckKind::kInterfaceCheck: {
8146 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
8147 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
8148 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
8149 (type_check_kind != TypeCheckKind::kInterfaceCheck);
8150 break;
8151 }
8152 case TypeCheckKind::kArrayCheck:
8153 case TypeCheckKind::kUnresolvedCheck:
8154 call_kind = LocationSummary::kCallOnSlowPath;
8155 break;
8156 case TypeCheckKind::kBitstringCheck:
8157 break;
8158 }
8159
8160 LocationSummary* locations =
8161 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8162 if (baker_read_barrier_slow_path) {
8163 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
8164 }
8165 locations->SetInAt(0, Location::RequiresRegister());
8166 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8167 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8168 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8169 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8170 } else {
8171 locations->SetInAt(1, Location::RequiresRegister());
8172 }
8173 // The "out" register is used as a temporary, so it overlaps with the inputs.
8174 // Note that TypeCheckSlowPathARM uses this register too.
8175 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8176 locations->AddRegisterTemps(
8177 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
8178 }
8179
VisitInstanceOf(HInstanceOf * instruction)8180 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8181 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8182 LocationSummary* locations = instruction->GetLocations();
8183 Location obj_loc = locations->InAt(0);
8184 vixl32::Register obj = InputRegisterAt(instruction, 0);
8185 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8186 ? vixl32::Register()
8187 : InputRegisterAt(instruction, 1);
8188 Location out_loc = locations->Out();
8189 vixl32::Register out = OutputRegister(instruction);
8190 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
8191 DCHECK_LE(num_temps, 1u);
8192 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8193 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8194 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8195 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8196 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8197 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8198 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8199 const uint32_t object_array_data_offset =
8200 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8201 vixl32::Label done;
8202 vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8203 SlowPathCodeARMVIXL* slow_path = nullptr;
8204
8205 // Return 0 if `obj` is null.
8206 // avoid null check if we know obj is not null.
8207 if (instruction->MustDoNullCheck()) {
8208 DCHECK(!out.Is(obj));
8209 __ Mov(out, 0);
8210 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8211 }
8212
8213 switch (type_check_kind) {
8214 case TypeCheckKind::kExactCheck: {
8215 ReadBarrierOption read_barrier_option =
8216 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8217 // /* HeapReference<Class> */ out = obj->klass_
8218 GenerateReferenceLoadTwoRegisters(instruction,
8219 out_loc,
8220 obj_loc,
8221 class_offset,
8222 maybe_temp_loc,
8223 read_barrier_option);
8224 // Classes must be equal for the instanceof to succeed.
8225 __ Cmp(out, cls);
8226 // We speculatively set the result to false without changing the condition
8227 // flags, which allows us to avoid some branching later.
8228 __ Mov(LeaveFlags, out, 0);
8229
8230 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8231 // we check that the output is in a low register, so that a 16-bit MOV
8232 // encoding can be used.
8233 if (out.IsLow()) {
8234 // We use the scope because of the IT block that follows.
8235 ExactAssemblyScope guard(GetVIXLAssembler(),
8236 2 * vixl32::k16BitT32InstructionSizeInBytes,
8237 CodeBufferCheckScope::kExactSize);
8238
8239 __ it(eq);
8240 __ mov(eq, out, 1);
8241 } else {
8242 __ B(ne, final_label, /* is_far_target= */ false);
8243 __ Mov(out, 1);
8244 }
8245
8246 break;
8247 }
8248
8249 case TypeCheckKind::kAbstractClassCheck: {
8250 ReadBarrierOption read_barrier_option =
8251 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8252 // /* HeapReference<Class> */ out = obj->klass_
8253 GenerateReferenceLoadTwoRegisters(instruction,
8254 out_loc,
8255 obj_loc,
8256 class_offset,
8257 maybe_temp_loc,
8258 read_barrier_option);
8259 // If the class is abstract, we eagerly fetch the super class of the
8260 // object to avoid doing a comparison we know will fail.
8261 vixl32::Label loop;
8262 __ Bind(&loop);
8263 // /* HeapReference<Class> */ out = out->super_class_
8264 GenerateReferenceLoadOneRegister(instruction,
8265 out_loc,
8266 super_offset,
8267 maybe_temp_loc,
8268 read_barrier_option);
8269 // If `out` is null, we use it for the result, and jump to the final label.
8270 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8271 __ Cmp(out, cls);
8272 __ B(ne, &loop, /* is_far_target= */ false);
8273 __ Mov(out, 1);
8274 break;
8275 }
8276
8277 case TypeCheckKind::kClassHierarchyCheck: {
8278 ReadBarrierOption read_barrier_option =
8279 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8280 // /* HeapReference<Class> */ out = obj->klass_
8281 GenerateReferenceLoadTwoRegisters(instruction,
8282 out_loc,
8283 obj_loc,
8284 class_offset,
8285 maybe_temp_loc,
8286 read_barrier_option);
8287 // Walk over the class hierarchy to find a match.
8288 vixl32::Label loop, success;
8289 __ Bind(&loop);
8290 __ Cmp(out, cls);
8291 __ B(eq, &success, /* is_far_target= */ false);
8292 // /* HeapReference<Class> */ out = out->super_class_
8293 GenerateReferenceLoadOneRegister(instruction,
8294 out_loc,
8295 super_offset,
8296 maybe_temp_loc,
8297 read_barrier_option);
8298 // This is essentially a null check, but it sets the condition flags to the
8299 // proper value for the code that follows the loop, i.e. not `eq`.
8300 __ Cmp(out, 1);
8301 __ B(hs, &loop, /* is_far_target= */ false);
8302
8303 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8304 // we check that the output is in a low register, so that a 16-bit MOV
8305 // encoding can be used.
8306 if (out.IsLow()) {
8307 // If `out` is null, we use it for the result, and the condition flags
8308 // have already been set to `ne`, so the IT block that comes afterwards
8309 // (and which handles the successful case) turns into a NOP (instead of
8310 // overwriting `out`).
8311 __ Bind(&success);
8312
8313 // We use the scope because of the IT block that follows.
8314 ExactAssemblyScope guard(GetVIXLAssembler(),
8315 2 * vixl32::k16BitT32InstructionSizeInBytes,
8316 CodeBufferCheckScope::kExactSize);
8317
8318 // There is only one branch to the `success` label (which is bound to this
8319 // IT block), and it has the same condition, `eq`, so in that case the MOV
8320 // is executed.
8321 __ it(eq);
8322 __ mov(eq, out, 1);
8323 } else {
8324 // If `out` is null, we use it for the result, and jump to the final label.
8325 __ B(final_label);
8326 __ Bind(&success);
8327 __ Mov(out, 1);
8328 }
8329
8330 break;
8331 }
8332
8333 case TypeCheckKind::kArrayObjectCheck: {
8334 ReadBarrierOption read_barrier_option =
8335 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8336 // /* HeapReference<Class> */ out = obj->klass_
8337 GenerateReferenceLoadTwoRegisters(instruction,
8338 out_loc,
8339 obj_loc,
8340 class_offset,
8341 maybe_temp_loc,
8342 read_barrier_option);
8343 // Do an exact check.
8344 vixl32::Label exact_check;
8345 __ Cmp(out, cls);
8346 __ B(eq, &exact_check, /* is_far_target= */ false);
8347 // Otherwise, we need to check that the object's class is a non-primitive array.
8348 // /* HeapReference<Class> */ out = out->component_type_
8349 GenerateReferenceLoadOneRegister(instruction,
8350 out_loc,
8351 component_offset,
8352 maybe_temp_loc,
8353 read_barrier_option);
8354 // If `out` is null, we use it for the result, and jump to the final label.
8355 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8356 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8357 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8358 __ Cmp(out, 0);
8359 // We speculatively set the result to false without changing the condition
8360 // flags, which allows us to avoid some branching later.
8361 __ Mov(LeaveFlags, out, 0);
8362
8363 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8364 // we check that the output is in a low register, so that a 16-bit MOV
8365 // encoding can be used.
8366 if (out.IsLow()) {
8367 __ Bind(&exact_check);
8368
8369 // We use the scope because of the IT block that follows.
8370 ExactAssemblyScope guard(GetVIXLAssembler(),
8371 2 * vixl32::k16BitT32InstructionSizeInBytes,
8372 CodeBufferCheckScope::kExactSize);
8373
8374 __ it(eq);
8375 __ mov(eq, out, 1);
8376 } else {
8377 __ B(ne, final_label, /* is_far_target= */ false);
8378 __ Bind(&exact_check);
8379 __ Mov(out, 1);
8380 }
8381
8382 break;
8383 }
8384
8385 case TypeCheckKind::kArrayCheck: {
8386 // No read barrier since the slow path will retry upon failure.
8387 // /* HeapReference<Class> */ out = obj->klass_
8388 GenerateReferenceLoadTwoRegisters(instruction,
8389 out_loc,
8390 obj_loc,
8391 class_offset,
8392 maybe_temp_loc,
8393 kWithoutReadBarrier);
8394 __ Cmp(out, cls);
8395 DCHECK(locations->OnlyCallsOnSlowPath());
8396 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8397 instruction, /* is_fatal= */ false);
8398 codegen_->AddSlowPath(slow_path);
8399 __ B(ne, slow_path->GetEntryLabel());
8400 __ Mov(out, 1);
8401 break;
8402 }
8403
8404 case TypeCheckKind::kInterfaceCheck: {
8405 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8406 DCHECK(locations->OnlyCallsOnSlowPath());
8407 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8408 instruction, /* is_fatal= */ false);
8409 codegen_->AddSlowPath(slow_path);
8410 if (codegen_->EmitNonBakerReadBarrier()) {
8411 __ B(slow_path->GetEntryLabel());
8412 break;
8413 }
8414 // For Baker read barrier, take the slow path while marking.
8415 __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
8416 }
8417
8418 // Fast-path without read barriers.
8419 UseScratchRegisterScope temps(GetVIXLAssembler());
8420 vixl32::Register temp = RegisterFrom(maybe_temp_loc);
8421 vixl32::Register temp2 = temps.Acquire();
8422 // /* HeapReference<Class> */ temp = obj->klass_
8423 __ Ldr(temp, MemOperand(obj, class_offset));
8424 GetAssembler()->MaybeUnpoisonHeapReference(temp);
8425 // /* HeapReference<Class> */ temp = temp->iftable_
8426 __ Ldr(temp, MemOperand(temp, iftable_offset));
8427 GetAssembler()->MaybeUnpoisonHeapReference(temp);
8428 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8429 __ Ldr(out, MemOperand(temp, array_length_offset));
8430 // Loop through the `IfTable` and check if any class matches.
8431 vixl32::Label loop;
8432 __ Bind(&loop);
8433 // If taken, the result in `out` is already 0 (false).
8434 __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false);
8435 __ Ldr(temp2, MemOperand(temp, object_array_data_offset));
8436 GetAssembler()->MaybeUnpoisonHeapReference(temp2);
8437 // Go to next interface.
8438 __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize));
8439 __ Sub(out, out, 2);
8440 // Compare the classes and continue the loop if they do not match.
8441 __ Cmp(cls, temp2);
8442 __ B(ne, &loop);
8443 __ Mov(out, 1);
8444 break;
8445 }
8446
8447 case TypeCheckKind::kUnresolvedCheck: {
8448 // Note that we indeed only call on slow path, but we always go
8449 // into the slow path for the unresolved check case.
8450 //
8451 // We cannot directly call the InstanceofNonTrivial runtime
8452 // entry point without resorting to a type checking slow path
8453 // here (i.e. by calling InvokeRuntime directly), as it would
8454 // require to assign fixed registers for the inputs of this
8455 // HInstanceOf instruction (following the runtime calling
8456 // convention), which might be cluttered by the potential first
8457 // read barrier emission at the beginning of this method.
8458 //
8459 // TODO: Introduce a new runtime entry point taking the object
8460 // to test (instead of its class) as argument, and let it deal
8461 // with the read barrier issues. This will let us refactor this
8462 // case of the `switch` code as it was previously (with a direct
8463 // call to the runtime not using a type checking slow path).
8464 // This should also be beneficial for the other cases above.
8465 DCHECK(locations->OnlyCallsOnSlowPath());
8466 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8467 instruction, /* is_fatal= */ false);
8468 codegen_->AddSlowPath(slow_path);
8469 __ B(slow_path->GetEntryLabel());
8470 break;
8471 }
8472
8473 case TypeCheckKind::kBitstringCheck: {
8474 // /* HeapReference<Class> */ temp = obj->klass_
8475 GenerateReferenceLoadTwoRegisters(instruction,
8476 out_loc,
8477 obj_loc,
8478 class_offset,
8479 maybe_temp_loc,
8480 kWithoutReadBarrier);
8481
8482 GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8483 // If `out` is a low reg and we would have another low reg temp, we could
8484 // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8485 //
8486 // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8487 // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8488 // would be the same and we would have fewer direct data dependencies.
8489 codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
8490 break;
8491 }
8492 }
8493
8494 if (done.IsReferenced()) {
8495 __ Bind(&done);
8496 }
8497
8498 if (slow_path != nullptr) {
8499 __ Bind(slow_path->GetExitLabel());
8500 }
8501 }
8502
VisitCheckCast(HCheckCast * instruction)8503 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8504 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8505 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8506 LocationSummary* locations =
8507 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8508 locations->SetInAt(0, Location::RequiresRegister());
8509 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8510 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8511 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8512 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8513 } else {
8514 locations->SetInAt(1, Location::RequiresRegister());
8515 }
8516 locations->AddRegisterTemps(
8517 NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8518 }
8519
VisitCheckCast(HCheckCast * instruction)8520 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8521 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8522 LocationSummary* locations = instruction->GetLocations();
8523 Location obj_loc = locations->InAt(0);
8524 vixl32::Register obj = InputRegisterAt(instruction, 0);
8525 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8526 ? vixl32::Register()
8527 : InputRegisterAt(instruction, 1);
8528 Location temp_loc = locations->GetTemp(0);
8529 vixl32::Register temp = RegisterFrom(temp_loc);
8530 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8531 DCHECK_LE(num_temps, 3u);
8532 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8533 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8534 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8535 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8536 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8537 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8538 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8539 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8540 const uint32_t object_array_data_offset =
8541 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8542
8543 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8544 SlowPathCodeARMVIXL* type_check_slow_path =
8545 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8546 instruction, is_type_check_slow_path_fatal);
8547 codegen_->AddSlowPath(type_check_slow_path);
8548
8549 vixl32::Label done;
8550 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8551 // Avoid null check if we know obj is not null.
8552 if (instruction->MustDoNullCheck()) {
8553 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8554 }
8555
8556 switch (type_check_kind) {
8557 case TypeCheckKind::kExactCheck:
8558 case TypeCheckKind::kArrayCheck: {
8559 // /* HeapReference<Class> */ temp = obj->klass_
8560 GenerateReferenceLoadTwoRegisters(instruction,
8561 temp_loc,
8562 obj_loc,
8563 class_offset,
8564 maybe_temp2_loc,
8565 kWithoutReadBarrier);
8566
8567 __ Cmp(temp, cls);
8568 // Jump to slow path for throwing the exception or doing a
8569 // more involved array check.
8570 __ B(ne, type_check_slow_path->GetEntryLabel());
8571 break;
8572 }
8573
8574 case TypeCheckKind::kAbstractClassCheck: {
8575 // /* HeapReference<Class> */ temp = obj->klass_
8576 GenerateReferenceLoadTwoRegisters(instruction,
8577 temp_loc,
8578 obj_loc,
8579 class_offset,
8580 maybe_temp2_loc,
8581 kWithoutReadBarrier);
8582
8583 // If the class is abstract, we eagerly fetch the super class of the
8584 // object to avoid doing a comparison we know will fail.
8585 vixl32::Label loop;
8586 __ Bind(&loop);
8587 // /* HeapReference<Class> */ temp = temp->super_class_
8588 GenerateReferenceLoadOneRegister(instruction,
8589 temp_loc,
8590 super_offset,
8591 maybe_temp2_loc,
8592 kWithoutReadBarrier);
8593
8594 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8595 // exception.
8596 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8597
8598 // Otherwise, compare the classes.
8599 __ Cmp(temp, cls);
8600 __ B(ne, &loop, /* is_far_target= */ false);
8601 break;
8602 }
8603
8604 case TypeCheckKind::kClassHierarchyCheck: {
8605 // /* HeapReference<Class> */ temp = obj->klass_
8606 GenerateReferenceLoadTwoRegisters(instruction,
8607 temp_loc,
8608 obj_loc,
8609 class_offset,
8610 maybe_temp2_loc,
8611 kWithoutReadBarrier);
8612
8613 // Walk over the class hierarchy to find a match.
8614 vixl32::Label loop;
8615 __ Bind(&loop);
8616 __ Cmp(temp, cls);
8617 __ B(eq, final_label, /* is_far_target= */ false);
8618
8619 // /* HeapReference<Class> */ temp = temp->super_class_
8620 GenerateReferenceLoadOneRegister(instruction,
8621 temp_loc,
8622 super_offset,
8623 maybe_temp2_loc,
8624 kWithoutReadBarrier);
8625
8626 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8627 // exception.
8628 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8629 // Otherwise, jump to the beginning of the loop.
8630 __ B(&loop);
8631 break;
8632 }
8633
8634 case TypeCheckKind::kArrayObjectCheck: {
8635 // /* HeapReference<Class> */ temp = obj->klass_
8636 GenerateReferenceLoadTwoRegisters(instruction,
8637 temp_loc,
8638 obj_loc,
8639 class_offset,
8640 maybe_temp2_loc,
8641 kWithoutReadBarrier);
8642
8643 // Do an exact check.
8644 __ Cmp(temp, cls);
8645 __ B(eq, final_label, /* is_far_target= */ false);
8646
8647 // Otherwise, we need to check that the object's class is a non-primitive array.
8648 // /* HeapReference<Class> */ temp = temp->component_type_
8649 GenerateReferenceLoadOneRegister(instruction,
8650 temp_loc,
8651 component_offset,
8652 maybe_temp2_loc,
8653 kWithoutReadBarrier);
8654 // If the component type is null, jump to the slow path to throw the exception.
8655 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8656 // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8657 // to further check that this component type is not a primitive type.
8658 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8659 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8660 __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8661 break;
8662 }
8663
8664 case TypeCheckKind::kUnresolvedCheck:
8665 // We always go into the type check slow path for the unresolved check case.
8666 // We cannot directly call the CheckCast runtime entry point
8667 // without resorting to a type checking slow path here (i.e. by
8668 // calling InvokeRuntime directly), as it would require to
8669 // assign fixed registers for the inputs of this HInstanceOf
8670 // instruction (following the runtime calling convention), which
8671 // might be cluttered by the potential first read barrier
8672 // emission at the beginning of this method.
8673
8674 __ B(type_check_slow_path->GetEntryLabel());
8675 break;
8676
8677 case TypeCheckKind::kInterfaceCheck: {
8678 // Avoid read barriers to improve performance of the fast path. We can not get false
8679 // positives by doing this.
8680 // /* HeapReference<Class> */ temp = obj->klass_
8681 GenerateReferenceLoadTwoRegisters(instruction,
8682 temp_loc,
8683 obj_loc,
8684 class_offset,
8685 maybe_temp2_loc,
8686 kWithoutReadBarrier);
8687
8688 // /* HeapReference<Class> */ temp = temp->iftable_
8689 GenerateReferenceLoadOneRegister(instruction,
8690 temp_loc,
8691 iftable_offset,
8692 maybe_temp2_loc,
8693 kWithoutReadBarrier);
8694 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8695 __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8696 // Loop through the iftable and check if any class matches.
8697 vixl32::Label start_loop;
8698 __ Bind(&start_loop);
8699 __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8700 type_check_slow_path->GetEntryLabel());
8701 __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8702 GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8703 // Go to next interface.
8704 __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8705 __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8706 // Compare the classes and continue the loop if they do not match.
8707 __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8708 __ B(ne, &start_loop, /* is_far_target= */ false);
8709 break;
8710 }
8711
8712 case TypeCheckKind::kBitstringCheck: {
8713 // /* HeapReference<Class> */ temp = obj->klass_
8714 GenerateReferenceLoadTwoRegisters(instruction,
8715 temp_loc,
8716 obj_loc,
8717 class_offset,
8718 maybe_temp2_loc,
8719 kWithoutReadBarrier);
8720
8721 GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8722 __ B(ne, type_check_slow_path->GetEntryLabel());
8723 break;
8724 }
8725 }
8726 if (done.IsReferenced()) {
8727 __ Bind(&done);
8728 }
8729
8730 __ Bind(type_check_slow_path->GetExitLabel());
8731 }
8732
VisitMonitorOperation(HMonitorOperation * instruction)8733 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8734 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8735 instruction, LocationSummary::kCallOnMainOnly);
8736 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8737 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8738 }
8739
VisitMonitorOperation(HMonitorOperation * instruction)8740 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8741 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8742 instruction);
8743 if (instruction->IsEnter()) {
8744 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8745 } else {
8746 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8747 }
8748 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8749 }
8750
VisitAnd(HAnd * instruction)8751 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8752 HandleBitwiseOperation(instruction, AND);
8753 }
8754
VisitOr(HOr * instruction)8755 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8756 HandleBitwiseOperation(instruction, ORR);
8757 }
8758
VisitXor(HXor * instruction)8759 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8760 HandleBitwiseOperation(instruction, EOR);
8761 }
8762
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8763 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8764 LocationSummary* locations =
8765 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8766 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8767 || instruction->GetResultType() == DataType::Type::kInt64);
8768 // Note: GVN reorders commutative operations to have the constant on the right hand side.
8769 locations->SetInAt(0, Location::RequiresRegister());
8770 locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8771 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8772 }
8773
VisitAnd(HAnd * instruction)8774 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8775 HandleBitwiseOperation(instruction);
8776 }
8777
VisitOr(HOr * instruction)8778 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8779 HandleBitwiseOperation(instruction);
8780 }
8781
VisitXor(HXor * instruction)8782 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8783 HandleBitwiseOperation(instruction);
8784 }
8785
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8786 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8787 LocationSummary* locations =
8788 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8789 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8790 || instruction->GetResultType() == DataType::Type::kInt64);
8791
8792 locations->SetInAt(0, Location::RequiresRegister());
8793 locations->SetInAt(1, Location::RequiresRegister());
8794 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8795 }
8796
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8797 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8798 LocationSummary* locations = instruction->GetLocations();
8799 Location first = locations->InAt(0);
8800 Location second = locations->InAt(1);
8801 Location out = locations->Out();
8802
8803 if (instruction->GetResultType() == DataType::Type::kInt32) {
8804 vixl32::Register first_reg = RegisterFrom(first);
8805 vixl32::Register second_reg = RegisterFrom(second);
8806 vixl32::Register out_reg = RegisterFrom(out);
8807
8808 switch (instruction->GetOpKind()) {
8809 case HInstruction::kAnd:
8810 __ Bic(out_reg, first_reg, second_reg);
8811 break;
8812 case HInstruction::kOr:
8813 __ Orn(out_reg, first_reg, second_reg);
8814 break;
8815 // There is no EON on arm.
8816 case HInstruction::kXor:
8817 default:
8818 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8819 UNREACHABLE();
8820 }
8821 return;
8822
8823 } else {
8824 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8825 vixl32::Register first_low = LowRegisterFrom(first);
8826 vixl32::Register first_high = HighRegisterFrom(first);
8827 vixl32::Register second_low = LowRegisterFrom(second);
8828 vixl32::Register second_high = HighRegisterFrom(second);
8829 vixl32::Register out_low = LowRegisterFrom(out);
8830 vixl32::Register out_high = HighRegisterFrom(out);
8831
8832 switch (instruction->GetOpKind()) {
8833 case HInstruction::kAnd:
8834 __ Bic(out_low, first_low, second_low);
8835 __ Bic(out_high, first_high, second_high);
8836 break;
8837 case HInstruction::kOr:
8838 __ Orn(out_low, first_low, second_low);
8839 __ Orn(out_high, first_high, second_high);
8840 break;
8841 // There is no EON on arm.
8842 case HInstruction::kXor:
8843 default:
8844 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8845 UNREACHABLE();
8846 }
8847 }
8848 }
8849
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8850 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8851 HDataProcWithShifterOp* instruction) {
8852 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8853 instruction->GetType() == DataType::Type::kInt64);
8854 LocationSummary* locations =
8855 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8856 const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8857 HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8858
8859 locations->SetInAt(0, Location::RequiresRegister());
8860 locations->SetInAt(1, Location::RequiresRegister());
8861 locations->SetOut(Location::RequiresRegister(),
8862 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8863 }
8864
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8865 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8866 HDataProcWithShifterOp* instruction) {
8867 const LocationSummary* const locations = instruction->GetLocations();
8868 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8869 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8870
8871 if (instruction->GetType() == DataType::Type::kInt32) {
8872 const vixl32::Register first = InputRegisterAt(instruction, 0);
8873 const vixl32::Register output = OutputRegister(instruction);
8874 const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8875 ? LowRegisterFrom(locations->InAt(1))
8876 : InputRegisterAt(instruction, 1);
8877
8878 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8879 DCHECK_EQ(kind, HInstruction::kAdd);
8880
8881 switch (op_kind) {
8882 case HDataProcWithShifterOp::kUXTB:
8883 __ Uxtab(output, first, second);
8884 break;
8885 case HDataProcWithShifterOp::kUXTH:
8886 __ Uxtah(output, first, second);
8887 break;
8888 case HDataProcWithShifterOp::kSXTB:
8889 __ Sxtab(output, first, second);
8890 break;
8891 case HDataProcWithShifterOp::kSXTH:
8892 __ Sxtah(output, first, second);
8893 break;
8894 default:
8895 LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8896 UNREACHABLE();
8897 }
8898 } else {
8899 GenerateDataProcInstruction(kind,
8900 output,
8901 first,
8902 Operand(second,
8903 ShiftFromOpKind(op_kind),
8904 instruction->GetShiftAmount()),
8905 codegen_);
8906 }
8907 } else {
8908 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8909
8910 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8911 const vixl32::Register second = InputRegisterAt(instruction, 1);
8912
8913 DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8914 GenerateDataProc(kind,
8915 locations->Out(),
8916 locations->InAt(0),
8917 second,
8918 Operand(second, ShiftType::ASR, 31),
8919 codegen_);
8920 } else {
8921 GenerateLongDataProc(instruction, codegen_);
8922 }
8923 }
8924 }
8925
8926 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8927 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8928 vixl32::Register first,
8929 uint32_t value) {
8930 // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8931 if (value == 0xffffffffu) {
8932 if (!out.Is(first)) {
8933 __ Mov(out, first);
8934 }
8935 return;
8936 }
8937 if (value == 0u) {
8938 __ Mov(out, 0);
8939 return;
8940 }
8941 if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8942 __ And(out, first, value);
8943 } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8944 __ Bic(out, first, ~value);
8945 } else {
8946 DCHECK(IsPowerOfTwo(value + 1));
8947 __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8948 }
8949 }
8950
8951 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8952 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8953 vixl32::Register first,
8954 uint32_t value) {
8955 // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8956 if (value == 0u) {
8957 if (!out.Is(first)) {
8958 __ Mov(out, first);
8959 }
8960 return;
8961 }
8962 if (value == 0xffffffffu) {
8963 __ Mvn(out, 0);
8964 return;
8965 }
8966 if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8967 __ Orr(out, first, value);
8968 } else {
8969 DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8970 __ Orn(out, first, ~value);
8971 }
8972 }
8973
8974 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8975 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8976 vixl32::Register first,
8977 uint32_t value) {
8978 // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8979 if (value == 0u) {
8980 if (!out.Is(first)) {
8981 __ Mov(out, first);
8982 }
8983 return;
8984 }
8985 __ Eor(out, first, value);
8986 }
8987
GenerateAddLongConst(Location out,Location first,uint64_t value)8988 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8989 Location first,
8990 uint64_t value) {
8991 vixl32::Register out_low = LowRegisterFrom(out);
8992 vixl32::Register out_high = HighRegisterFrom(out);
8993 vixl32::Register first_low = LowRegisterFrom(first);
8994 vixl32::Register first_high = HighRegisterFrom(first);
8995 uint32_t value_low = Low32Bits(value);
8996 uint32_t value_high = High32Bits(value);
8997 if (value_low == 0u) {
8998 if (!out_low.Is(first_low)) {
8999 __ Mov(out_low, first_low);
9000 }
9001 __ Add(out_high, first_high, value_high);
9002 return;
9003 }
9004 __ Adds(out_low, first_low, value_low);
9005 if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
9006 __ Adc(out_high, first_high, value_high);
9007 } else {
9008 DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
9009 __ Sbc(out_high, first_high, ~value_high);
9010 }
9011 }
9012
HandleBitwiseOperation(HBinaryOperation * instruction)9013 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
9014 LocationSummary* locations = instruction->GetLocations();
9015 Location first = locations->InAt(0);
9016 Location second = locations->InAt(1);
9017 Location out = locations->Out();
9018
9019 if (second.IsConstant()) {
9020 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
9021 uint32_t value_low = Low32Bits(value);
9022 if (instruction->GetResultType() == DataType::Type::kInt32) {
9023 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9024 vixl32::Register out_reg = OutputRegister(instruction);
9025 if (instruction->IsAnd()) {
9026 GenerateAndConst(out_reg, first_reg, value_low);
9027 } else if (instruction->IsOr()) {
9028 GenerateOrrConst(out_reg, first_reg, value_low);
9029 } else {
9030 DCHECK(instruction->IsXor());
9031 GenerateEorConst(out_reg, first_reg, value_low);
9032 }
9033 } else {
9034 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9035 uint32_t value_high = High32Bits(value);
9036 vixl32::Register first_low = LowRegisterFrom(first);
9037 vixl32::Register first_high = HighRegisterFrom(first);
9038 vixl32::Register out_low = LowRegisterFrom(out);
9039 vixl32::Register out_high = HighRegisterFrom(out);
9040 if (instruction->IsAnd()) {
9041 GenerateAndConst(out_low, first_low, value_low);
9042 GenerateAndConst(out_high, first_high, value_high);
9043 } else if (instruction->IsOr()) {
9044 GenerateOrrConst(out_low, first_low, value_low);
9045 GenerateOrrConst(out_high, first_high, value_high);
9046 } else {
9047 DCHECK(instruction->IsXor());
9048 GenerateEorConst(out_low, first_low, value_low);
9049 GenerateEorConst(out_high, first_high, value_high);
9050 }
9051 }
9052 return;
9053 }
9054
9055 if (instruction->GetResultType() == DataType::Type::kInt32) {
9056 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9057 vixl32::Register second_reg = InputRegisterAt(instruction, 1);
9058 vixl32::Register out_reg = OutputRegister(instruction);
9059 if (instruction->IsAnd()) {
9060 __ And(out_reg, first_reg, second_reg);
9061 } else if (instruction->IsOr()) {
9062 __ Orr(out_reg, first_reg, second_reg);
9063 } else {
9064 DCHECK(instruction->IsXor());
9065 __ Eor(out_reg, first_reg, second_reg);
9066 }
9067 } else {
9068 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9069 vixl32::Register first_low = LowRegisterFrom(first);
9070 vixl32::Register first_high = HighRegisterFrom(first);
9071 vixl32::Register second_low = LowRegisterFrom(second);
9072 vixl32::Register second_high = HighRegisterFrom(second);
9073 vixl32::Register out_low = LowRegisterFrom(out);
9074 vixl32::Register out_high = HighRegisterFrom(out);
9075 if (instruction->IsAnd()) {
9076 __ And(out_low, first_low, second_low);
9077 __ And(out_high, first_high, second_high);
9078 } else if (instruction->IsOr()) {
9079 __ Orr(out_low, first_low, second_low);
9080 __ Orr(out_high, first_high, second_high);
9081 } else {
9082 DCHECK(instruction->IsXor());
9083 __ Eor(out_low, first_low, second_low);
9084 __ Eor(out_high, first_high, second_high);
9085 }
9086 }
9087 }
9088
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9089 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
9090 HInstruction* instruction,
9091 Location out,
9092 uint32_t offset,
9093 Location maybe_temp,
9094 ReadBarrierOption read_barrier_option) {
9095 vixl32::Register out_reg = RegisterFrom(out);
9096 if (read_barrier_option == kWithReadBarrier) {
9097 DCHECK(codegen_->EmitReadBarrier());
9098 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9099 if (kUseBakerReadBarrier) {
9100 // Load with fast path based Baker's read barrier.
9101 // /* HeapReference<Object> */ out = *(out + offset)
9102 codegen_->GenerateFieldLoadWithBakerReadBarrier(
9103 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
9104 } else {
9105 // Load with slow path based read barrier.
9106 // Save the value of `out` into `maybe_temp` before overwriting it
9107 // in the following move operation, as we will need it for the
9108 // read barrier below.
9109 __ Mov(RegisterFrom(maybe_temp), out_reg);
9110 // /* HeapReference<Object> */ out = *(out + offset)
9111 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9112 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
9113 }
9114 } else {
9115 // Plain load with no read barrier.
9116 // /* HeapReference<Object> */ out = *(out + offset)
9117 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9118 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9119 }
9120 }
9121
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9122 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
9123 HInstruction* instruction,
9124 Location out,
9125 Location obj,
9126 uint32_t offset,
9127 Location maybe_temp,
9128 ReadBarrierOption read_barrier_option) {
9129 vixl32::Register out_reg = RegisterFrom(out);
9130 vixl32::Register obj_reg = RegisterFrom(obj);
9131 if (read_barrier_option == kWithReadBarrier) {
9132 DCHECK(codegen_->EmitReadBarrier());
9133 if (kUseBakerReadBarrier) {
9134 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9135 // Load with fast path based Baker's read barrier.
9136 // /* HeapReference<Object> */ out = *(obj + offset)
9137 codegen_->GenerateFieldLoadWithBakerReadBarrier(
9138 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
9139 } else {
9140 // Load with slow path based read barrier.
9141 // /* HeapReference<Object> */ out = *(obj + offset)
9142 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9143 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
9144 }
9145 } else {
9146 // Plain load with no read barrier.
9147 // /* HeapReference<Object> */ out = *(obj + offset)
9148 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9149 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9150 }
9151 }
9152
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)9153 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
9154 HInstruction* instruction,
9155 Location root,
9156 vixl32::Register obj,
9157 uint32_t offset,
9158 ReadBarrierOption read_barrier_option) {
9159 vixl32::Register root_reg = RegisterFrom(root);
9160 if (read_barrier_option == kWithReadBarrier) {
9161 DCHECK(EmitReadBarrier());
9162 if (kUseBakerReadBarrier) {
9163 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
9164 // Baker's read barrier are used.
9165
9166 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
9167 // the Marking Register) to decide whether we need to enter
9168 // the slow path to mark the GC root.
9169 //
9170 // We use shared thunks for the slow path; shared within the method
9171 // for JIT, across methods for AOT. That thunk checks the reference
9172 // and jumps to the entrypoint if needed.
9173 //
9174 // lr = &return_address;
9175 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
9176 // if (mr) { // Thread::Current()->GetIsGcMarking()
9177 // goto gc_root_thunk<root_reg>(lr)
9178 // }
9179 // return_address:
9180
9181 UseScratchRegisterScope temps(GetVIXLAssembler());
9182 temps.Exclude(ip);
9183 bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
9184 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
9185
9186 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
9187 size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
9188 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9189 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9190 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9191 vixl32::Label return_address;
9192 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9193 __ cmp(mr, Operand(0));
9194 // Currently the offset is always within range. If that changes,
9195 // we shall have to split the load the same way as for fields.
9196 DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9197 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9198 __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9199 EmitBakerReadBarrierBne(custom_data);
9200 __ bind(&return_address);
9201 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9202 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9203 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9204 } else {
9205 // GC root loaded through a slow path for read barriers other
9206 // than Baker's.
9207 // /* GcRoot<mirror::Object>* */ root = obj + offset
9208 __ Add(root_reg, obj, offset);
9209 // /* mirror::Object* */ root = root->Read()
9210 GenerateReadBarrierForRootSlow(instruction, root, root);
9211 }
9212 } else {
9213 // Plain GC root load with no read barrier.
9214 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9215 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9216 // Note that GC roots are not affected by heap poisoning, thus we
9217 // do not have to unpoison `root_reg` here.
9218 }
9219 MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9220 }
9221
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9222 void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
9223 vixl::aarch32::Register marked_old_value,
9224 vixl::aarch32::Register old_value) {
9225 DCHECK(EmitBakerReadBarrier());
9226
9227 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9228 // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9229 // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9230 bool narrow_mov = marked_old_value.IsLow();
9231 uint32_t custom_data = narrow_mov
9232 ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9233 : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9234
9235 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9236 size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9237 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9238 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9239 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9240 vixl32::Label return_address;
9241 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9242 __ cmp(mr, Operand(0));
9243 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9244 __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9245 EmitBakerReadBarrierBne(custom_data);
9246 __ bind(&return_address);
9247 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9248 narrow_mov
9249 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9250 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9251 }
9252
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9253 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9254 Location ref,
9255 vixl32::Register obj,
9256 const vixl32::MemOperand& src,
9257 bool needs_null_check) {
9258 DCHECK(EmitBakerReadBarrier());
9259
9260 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9261 // Marking Register) to decide whether we need to enter the slow
9262 // path to mark the reference. Then, in the slow path, check the
9263 // gray bit in the lock word of the reference's holder (`obj`) to
9264 // decide whether to mark `ref` or not.
9265 //
9266 // We use shared thunks for the slow path; shared within the method
9267 // for JIT, across methods for AOT. That thunk checks the holder
9268 // and jumps to the entrypoint if needed. If the holder is not gray,
9269 // it creates a fake dependency and returns to the LDR instruction.
9270 //
9271 // lr = &gray_return_address;
9272 // if (mr) { // Thread::Current()->GetIsGcMarking()
9273 // goto field_thunk<holder_reg, base_reg>(lr)
9274 // }
9275 // not_gray_return_address:
9276 // // Original reference load. If the offset is too large to fit
9277 // // into LDR, we use an adjusted base register here.
9278 // HeapReference<mirror::Object> reference = *(obj+offset);
9279 // gray_return_address:
9280
9281 DCHECK(src.GetAddrMode() == vixl32::Offset);
9282 DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9283 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9284 bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9285
9286 UseScratchRegisterScope temps(GetVIXLAssembler());
9287 temps.Exclude(ip);
9288 uint32_t custom_data =
9289 EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9290
9291 {
9292 size_t narrow_instructions =
9293 /* CMP */ (mr.IsLow() ? 1u : 0u) +
9294 /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9295 size_t wide_instructions =
9296 /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9297 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9298 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9299 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9300 vixl32::Label return_address;
9301 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9302 __ cmp(mr, Operand(0));
9303 EmitBakerReadBarrierBne(custom_data);
9304 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9305 __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9306 if (needs_null_check) {
9307 MaybeRecordImplicitNullCheck(instruction);
9308 }
9309 // Note: We need a specific width for the unpoisoning NEG.
9310 if (kPoisonHeapReferences) {
9311 if (narrow) {
9312 // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9313 __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9314 } else {
9315 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9316 }
9317 }
9318 __ bind(&return_address);
9319 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9320 narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9321 : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9322 }
9323 MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9324 }
9325
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9326 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9327 Location ref,
9328 vixl32::Register obj,
9329 uint32_t offset,
9330 Location maybe_temp,
9331 bool needs_null_check) {
9332 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9333 vixl32::Register base = obj;
9334 if (offset >= kReferenceLoadMinFarOffset) {
9335 base = RegisterFrom(maybe_temp);
9336 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9337 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9338 offset &= (kReferenceLoadMinFarOffset - 1u);
9339 }
9340 GenerateFieldLoadWithBakerReadBarrier(
9341 instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9342 }
9343
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9344 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9345 vixl32::Register obj,
9346 uint32_t data_offset,
9347 Location index,
9348 Location temp,
9349 bool needs_null_check) {
9350 DCHECK(EmitBakerReadBarrier());
9351
9352 static_assert(
9353 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9354 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9355 ScaleFactor scale_factor = TIMES_4;
9356
9357 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9358 // Marking Register) to decide whether we need to enter the slow
9359 // path to mark the reference. Then, in the slow path, check the
9360 // gray bit in the lock word of the reference's holder (`obj`) to
9361 // decide whether to mark `ref` or not.
9362 //
9363 // We use shared thunks for the slow path; shared within the method
9364 // for JIT, across methods for AOT. That thunk checks the holder
9365 // and jumps to the entrypoint if needed. If the holder is not gray,
9366 // it creates a fake dependency and returns to the LDR instruction.
9367 //
9368 // lr = &gray_return_address;
9369 // if (mr) { // Thread::Current()->GetIsGcMarking()
9370 // goto array_thunk<base_reg>(lr)
9371 // }
9372 // not_gray_return_address:
9373 // // Original reference load. If the offset is too large to fit
9374 // // into LDR, we use an adjusted base register here.
9375 // HeapReference<mirror::Object> reference = data[index];
9376 // gray_return_address:
9377
9378 DCHECK(index.IsValid());
9379 vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9380 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9381 vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
9382
9383 UseScratchRegisterScope temps(GetVIXLAssembler());
9384 temps.Exclude(ip);
9385 uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9386
9387 __ Add(data_reg, obj, Operand(data_offset));
9388 {
9389 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9390 size_t wide_instructions =
9391 /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9392 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9393 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9394 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9395 vixl32::Label return_address;
9396 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9397 __ cmp(mr, Operand(0));
9398 EmitBakerReadBarrierBne(custom_data);
9399 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9400 __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9401 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
9402 // Note: We need a Wide NEG for the unpoisoning.
9403 if (kPoisonHeapReferences) {
9404 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9405 }
9406 __ bind(&return_address);
9407 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9408 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9409 }
9410 MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9411 }
9412
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9413 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9414 // The following condition is a compile-time one, so it does not have a run-time cost.
9415 if (kIsDebugBuild && EmitBakerReadBarrier()) {
9416 // The following condition is a run-time one; it is executed after the
9417 // previous compile-time test, to avoid penalizing non-debug builds.
9418 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9419 UseScratchRegisterScope temps(GetVIXLAssembler());
9420 vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9421 GetAssembler()->GenerateMarkingRegisterCheck(temp,
9422 kMarkingRegisterCheckBreakCodeBaseCode + code);
9423 }
9424 }
9425 }
9426
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9427 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9428 Location out,
9429 Location ref,
9430 Location obj,
9431 uint32_t offset,
9432 Location index) {
9433 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9434 ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9435 AddSlowPath(slow_path);
9436 return slow_path;
9437 }
9438
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9439 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9440 Location out,
9441 Location ref,
9442 Location obj,
9443 uint32_t offset,
9444 Location index) {
9445 DCHECK(EmitReadBarrier());
9446
9447 // Insert a slow path based read barrier *after* the reference load.
9448 //
9449 // If heap poisoning is enabled, the unpoisoning of the loaded
9450 // reference will be carried out by the runtime within the slow
9451 // path.
9452 //
9453 // Note that `ref` currently does not get unpoisoned (when heap
9454 // poisoning is enabled), which is alright as the `ref` argument is
9455 // not used by the artReadBarrierSlow entry point.
9456 //
9457 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9458 SlowPathCodeARMVIXL* slow_path =
9459 AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9460
9461 __ B(slow_path->GetEntryLabel());
9462 __ Bind(slow_path->GetExitLabel());
9463 }
9464
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9465 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9466 Location out,
9467 Location ref,
9468 Location obj,
9469 uint32_t offset,
9470 Location index) {
9471 if (EmitReadBarrier()) {
9472 // Baker's read barriers shall be handled by the fast path
9473 // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9474 DCHECK(!kUseBakerReadBarrier);
9475 // If heap poisoning is enabled, unpoisoning will be taken care of
9476 // by the runtime within the slow path.
9477 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9478 } else if (kPoisonHeapReferences) {
9479 GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9480 }
9481 }
9482
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9483 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9484 Location out,
9485 Location root) {
9486 DCHECK(EmitReadBarrier());
9487
9488 // Insert a slow path based read barrier *after* the GC root load.
9489 //
9490 // Note that GC roots are not affected by heap poisoning, so we do
9491 // not need to do anything special for this here.
9492 SlowPathCodeARMVIXL* slow_path =
9493 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9494 AddSlowPath(slow_path);
9495
9496 __ B(slow_path->GetEntryLabel());
9497 __ Bind(slow_path->GetExitLabel());
9498 }
9499
9500 // Check if the desired_dispatch_info is supported. If it is, return it,
9501 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9502 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9503 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9504 ArtMethod* method) {
9505 if (method->IsIntrinsic() &&
9506 desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9507 // As a work-around for soft-float native ABI interfering with type checks, we are
9508 // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9509 // when a float or double argument is passed in core registers but we cannot do that
9510 // for actual intrinsic implementations that expect them in FP registers. Therefore
9511 // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9512 // properly intrinsified, the dispatch type does not matter anyway.
9513 ScopedObjectAccess soa(Thread::Current());
9514 uint32_t shorty_len;
9515 const char* shorty = method->GetShorty(&shorty_len);
9516 for (uint32_t i = 1; i != shorty_len; ++i) {
9517 if (shorty[i] == 'D' || shorty[i] == 'F') {
9518 HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9519 dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9520 return dispatch_info;
9521 }
9522 }
9523 }
9524 return desired_dispatch_info;
9525 }
9526
9527
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9528 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9529 switch (load_kind) {
9530 case MethodLoadKind::kBootImageLinkTimePcRelative: {
9531 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9532 PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9533 vixl32::Register temp_reg = RegisterFrom(temp);
9534 EmitMovwMovtPlaceholder(labels, temp_reg);
9535 break;
9536 }
9537 case MethodLoadKind::kBootImageRelRo: {
9538 uint32_t boot_image_offset = GetBootImageOffset(invoke);
9539 LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9540 break;
9541 }
9542 case MethodLoadKind::kAppImageRelRo: {
9543 DCHECK(GetCompilerOptions().IsAppImage());
9544 PcRelativePatchInfo* labels = NewAppImageMethodPatch(invoke->GetResolvedMethodReference());
9545 vixl32::Register temp_reg = RegisterFrom(temp);
9546 EmitMovwMovtPlaceholder(labels, temp_reg);
9547 __ Ldr(temp_reg, MemOperand(temp_reg, /*offset=*/ 0));
9548 break;
9549 }
9550 case MethodLoadKind::kBssEntry: {
9551 PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9552 vixl32::Register temp_reg = RegisterFrom(temp);
9553 EmitMovwMovtPlaceholder(labels, temp_reg);
9554 // All aligned loads are implicitly atomic consume operations on ARM.
9555 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9556 break;
9557 }
9558 case MethodLoadKind::kJitDirectAddress: {
9559 __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9560 break;
9561 }
9562 case MethodLoadKind::kRuntimeCall: {
9563 // Test situation, don't do anything.
9564 break;
9565 }
9566 default: {
9567 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9568 UNREACHABLE();
9569 }
9570 }
9571 }
9572
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9573 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9574 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9575 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
9576 switch (invoke->GetMethodLoadKind()) {
9577 case MethodLoadKind::kStringInit: {
9578 uint32_t offset =
9579 GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9580 // temp = thread->string_init_entrypoint
9581 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9582 break;
9583 }
9584 case MethodLoadKind::kRecursive: {
9585 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9586 break;
9587 }
9588 case MethodLoadKind::kRuntimeCall: {
9589 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9590 return; // No code pointer retrieval; the runtime performs the call directly.
9591 }
9592 case MethodLoadKind::kBootImageLinkTimePcRelative:
9593 // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9594 // pointer for kCallCriticalNative because it would not save us an instruction from
9595 // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9596 // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9597 FALLTHROUGH_INTENDED;
9598 default: {
9599 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9600 break;
9601 }
9602 }
9603
9604 auto call_code_pointer_member = [&](MemberOffset offset) {
9605 // LR = callee_method->member;
9606 GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9607 {
9608 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9609 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9610 ExactAssemblyScope aas(GetVIXLAssembler(),
9611 vixl32::k16BitT32InstructionSizeInBytes,
9612 CodeBufferCheckScope::kExactSize);
9613 // LR()
9614 __ blx(lr);
9615 RecordPcInfo(invoke, slow_path);
9616 }
9617 };
9618 switch (invoke->GetCodePtrLocation()) {
9619 case CodePtrLocation::kCallSelf:
9620 {
9621 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9622 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9623 ExactAssemblyScope aas(GetVIXLAssembler(),
9624 vixl32::k32BitT32InstructionSizeInBytes,
9625 CodeBufferCheckScope::kMaximumSize);
9626 __ bl(GetFrameEntryLabel());
9627 RecordPcInfo(invoke, slow_path);
9628 }
9629 break;
9630 case CodePtrLocation::kCallCriticalNative: {
9631 size_t out_frame_size =
9632 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9633 kAapcsStackAlignment,
9634 GetCriticalNativeDirectCallFrameSize>(invoke);
9635 call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9636 // Move the result when needed due to native and managed ABI mismatch.
9637 switch (invoke->GetType()) {
9638 case DataType::Type::kFloat32:
9639 __ Vmov(s0, r0);
9640 break;
9641 case DataType::Type::kFloat64:
9642 __ Vmov(d0, r0, r1);
9643 break;
9644 case DataType::Type::kBool:
9645 case DataType::Type::kInt8:
9646 case DataType::Type::kUint16:
9647 case DataType::Type::kInt16:
9648 case DataType::Type::kInt32:
9649 case DataType::Type::kInt64:
9650 case DataType::Type::kVoid:
9651 break;
9652 default:
9653 DCHECK(false) << invoke->GetType();
9654 break;
9655 }
9656 if (out_frame_size != 0u) {
9657 DecreaseFrame(out_frame_size);
9658 }
9659 break;
9660 }
9661 case CodePtrLocation::kCallArtMethod:
9662 call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9663 break;
9664 }
9665
9666 DCHECK(!IsLeafMethod());
9667 }
9668
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9669 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9670 HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9671 vixl32::Register temp = RegisterFrom(temp_location);
9672 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9673 invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9674
9675 // Use the calling convention instead of the location of the receiver, as
9676 // intrinsics may have put the receiver in a different register. In the intrinsics
9677 // slow path, the arguments have been moved to the right place, so here we are
9678 // guaranteed that the receiver is the first register of the calling convention.
9679 InvokeDexCallingConventionARMVIXL calling_convention;
9680 vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9681 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9682 {
9683 // Make sure the pc is recorded immediately after the `ldr` instruction.
9684 ExactAssemblyScope aas(GetVIXLAssembler(),
9685 vixl32::kMaxInstructionSizeInBytes,
9686 CodeBufferCheckScope::kMaximumSize);
9687 // /* HeapReference<Class> */ temp = receiver->klass_
9688 __ ldr(temp, MemOperand(receiver, class_offset));
9689 MaybeRecordImplicitNullCheck(invoke);
9690 }
9691 // Instead of simply (possibly) unpoisoning `temp` here, we should
9692 // emit a read barrier for the previous class reference load.
9693 // However this is not required in practice, as this is an
9694 // intermediate/temporary reference and because the current
9695 // concurrent copying collector keeps the from-space memory
9696 // intact/accessible until the end of the marking phase (the
9697 // concurrent copying collector may not in the future).
9698 GetAssembler()->MaybeUnpoisonHeapReference(temp);
9699
9700 // If we're compiling baseline, update the inline cache.
9701 MaybeGenerateInlineCacheCheck(invoke, temp);
9702
9703 // temp = temp->GetMethodAt(method_offset);
9704 uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9705 kArmPointerSize).Int32Value();
9706 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9707 // LR = temp->GetEntryPoint();
9708 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9709 {
9710 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9711 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9712 ExactAssemblyScope aas(GetVIXLAssembler(),
9713 vixl32::k16BitT32InstructionSizeInBytes,
9714 CodeBufferCheckScope::kExactSize);
9715 // LR();
9716 __ blx(lr);
9717 RecordPcInfo(invoke, slow_path);
9718 }
9719 }
9720
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9721 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9722 uint32_t intrinsic_data) {
9723 return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9724 }
9725
NewBootImageRelRoPatch(uint32_t boot_image_offset)9726 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9727 uint32_t boot_image_offset) {
9728 return NewPcRelativePatch(/* dex_file= */ nullptr,
9729 boot_image_offset,
9730 &boot_image_other_patches_);
9731 }
9732
NewBootImageMethodPatch(MethodReference target_method)9733 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9734 MethodReference target_method) {
9735 return NewPcRelativePatch(
9736 target_method.dex_file, target_method.index, &boot_image_method_patches_);
9737 }
9738
NewAppImageMethodPatch(MethodReference target_method)9739 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageMethodPatch(
9740 MethodReference target_method) {
9741 return NewPcRelativePatch(
9742 target_method.dex_file, target_method.index, &app_image_method_patches_);
9743 }
9744
NewMethodBssEntryPatch(MethodReference target_method)9745 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9746 MethodReference target_method) {
9747 return NewPcRelativePatch(
9748 target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9749 }
9750
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9751 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9752 const DexFile& dex_file, dex::TypeIndex type_index) {
9753 return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9754 }
9755
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9756 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageTypePatch(
9757 const DexFile& dex_file, dex::TypeIndex type_index) {
9758 return NewPcRelativePatch(&dex_file, type_index.index_, &app_image_type_patches_);
9759 }
9760
NewTypeBssEntryPatch(HLoadClass * load_class)9761 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9762 HLoadClass* load_class) {
9763 const DexFile& dex_file = load_class->GetDexFile();
9764 dex::TypeIndex type_index = load_class->GetTypeIndex();
9765 ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9766 switch (load_class->GetLoadKind()) {
9767 case HLoadClass::LoadKind::kBssEntry:
9768 patches = &type_bss_entry_patches_;
9769 break;
9770 case HLoadClass::LoadKind::kBssEntryPublic:
9771 patches = &public_type_bss_entry_patches_;
9772 break;
9773 case HLoadClass::LoadKind::kBssEntryPackage:
9774 patches = &package_type_bss_entry_patches_;
9775 break;
9776 default:
9777 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9778 UNREACHABLE();
9779 }
9780 return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9781 }
9782
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9783 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9784 const DexFile& dex_file, dex::StringIndex string_index) {
9785 return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9786 }
9787
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9788 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9789 const DexFile& dex_file, dex::StringIndex string_index) {
9790 return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9791 }
9792
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9793 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9794 const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9795 patches->emplace_back(dex_file, offset_or_index);
9796 return &patches->back();
9797 }
9798
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9799 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9800 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9801 DCHECK(!GetCompilerOptions().IsJitCompiler());
9802 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9803 vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9804 __ bind(bl_label);
9805 vixl32::Label placeholder_label;
9806 __ bl(&placeholder_label); // Placeholder, patched at link-time.
9807 __ bind(&placeholder_label);
9808 }
9809
EmitBakerReadBarrierBne(uint32_t custom_data)9810 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9811 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9812 if (GetCompilerOptions().IsJitCompiler()) {
9813 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9814 vixl::aarch32::Label* slow_path_entry = &it->second.label;
9815 __ b(ne, EncodingSize(Wide), slow_path_entry);
9816 } else {
9817 baker_read_barrier_patches_.emplace_back(custom_data);
9818 vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9819 __ bind(patch_label);
9820 vixl32::Label placeholder_label;
9821 __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
9822 __ bind(&placeholder_label);
9823 }
9824 }
9825
DeduplicateBootImageAddressLiteral(uint32_t address)9826 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9827 return DeduplicateUint32Literal(address, &uint32_literals_);
9828 }
9829
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9830 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9831 const DexFile& dex_file,
9832 dex::StringIndex string_index,
9833 Handle<mirror::String> handle) {
9834 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9835 return jit_string_patches_.GetOrCreate(
9836 StringReference(&dex_file, string_index),
9837 [this]() {
9838 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9839 });
9840 }
9841
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9842 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9843 dex::TypeIndex type_index,
9844 Handle<mirror::Class> handle) {
9845 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9846 return jit_class_patches_.GetOrCreate(
9847 TypeReference(&dex_file, type_index),
9848 [this]() {
9849 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9850 });
9851 }
9852
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9853 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9854 uint32_t boot_image_offset) {
9855 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9856 EmitMovwMovtPlaceholder(labels, reg);
9857 __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9858 }
9859
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9860 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9861 uint32_t boot_image_reference) {
9862 if (GetCompilerOptions().IsBootImage()) {
9863 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9864 NewBootImageIntrinsicPatch(boot_image_reference);
9865 EmitMovwMovtPlaceholder(labels, reg);
9866 } else if (GetCompilerOptions().GetCompilePic()) {
9867 LoadBootImageRelRoEntry(reg, boot_image_reference);
9868 } else {
9869 DCHECK(GetCompilerOptions().IsJitCompiler());
9870 gc::Heap* heap = Runtime::Current()->GetHeap();
9871 DCHECK(!heap->GetBootImageSpaces().empty());
9872 uintptr_t address =
9873 reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9874 __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9875 }
9876 }
9877
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9878 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9879 TypeReference target_type) {
9880 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9881 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9882 PcRelativePatchInfo* labels =
9883 NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9884 EmitMovwMovtPlaceholder(labels, reg);
9885 }
9886
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9887 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9888 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9889 if (GetCompilerOptions().IsBootImage()) {
9890 MethodReference target_method = invoke->GetResolvedMethodReference();
9891 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9892 LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9893 } else {
9894 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9895 LoadBootImageAddress(reg, boot_image_offset);
9896 }
9897 }
9898
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9899 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9900 ClassRoot class_root) {
9901 if (GetCompilerOptions().IsBootImage()) {
9902 ScopedObjectAccess soa(Thread::Current());
9903 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9904 TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9905 LoadTypeForBootImageIntrinsic(reg, target_type);
9906 } else {
9907 uint32_t boot_image_offset = GetBootImageOffset(class_root);
9908 LoadBootImageAddress(reg, boot_image_offset);
9909 }
9910 }
9911
9912 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9913 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9914 const ArenaDeque<PcRelativePatchInfo>& infos,
9915 ArenaVector<linker::LinkerPatch>* linker_patches) {
9916 for (const PcRelativePatchInfo& info : infos) {
9917 const DexFile* dex_file = info.target_dex_file;
9918 size_t offset_or_index = info.offset_or_index;
9919 DCHECK(info.add_pc_label.IsBound());
9920 uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9921 // Add MOVW patch.
9922 DCHECK(info.movw_label.IsBound());
9923 uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9924 linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9925 // Add MOVT patch.
9926 DCHECK(info.movt_label.IsBound());
9927 uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9928 linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9929 }
9930 }
9931
9932 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9933 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9934 const DexFile* target_dex_file,
9935 uint32_t pc_insn_offset,
9936 uint32_t boot_image_offset) {
9937 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
9938 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9939 }
9940
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9941 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9942 DCHECK(linker_patches->empty());
9943 size_t size =
9944 /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9945 /* MOVW+MOVT for each entry */ 2u * app_image_method_patches_.size() +
9946 /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9947 /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9948 /* MOVW+MOVT for each entry */ 2u * app_image_type_patches_.size() +
9949 /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9950 /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9951 /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9952 /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9953 /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9954 /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9955 call_entrypoint_patches_.size() +
9956 baker_read_barrier_patches_.size();
9957 linker_patches->reserve(size);
9958 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9959 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9960 boot_image_method_patches_, linker_patches);
9961 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9962 boot_image_type_patches_, linker_patches);
9963 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9964 boot_image_string_patches_, linker_patches);
9965 } else {
9966 DCHECK(boot_image_method_patches_.empty());
9967 DCHECK(boot_image_type_patches_.empty());
9968 DCHECK(boot_image_string_patches_.empty());
9969 }
9970 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
9971 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
9972 if (GetCompilerOptions().IsBootImage()) {
9973 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9974 boot_image_other_patches_, linker_patches);
9975 } else {
9976 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
9977 boot_image_other_patches_, linker_patches);
9978 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
9979 app_image_method_patches_, linker_patches);
9980 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
9981 app_image_type_patches_, linker_patches);
9982 }
9983 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9984 method_bss_entry_patches_, linker_patches);
9985 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9986 type_bss_entry_patches_, linker_patches);
9987 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
9988 public_type_bss_entry_patches_, linker_patches);
9989 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
9990 package_type_bss_entry_patches_, linker_patches);
9991 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9992 string_bss_entry_patches_, linker_patches);
9993 for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9994 DCHECK(info.target_dex_file == nullptr);
9995 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9996 info.label.GetLocation(), info.offset_or_index));
9997 }
9998 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9999 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
10000 info.label.GetLocation(), info.custom_data));
10001 }
10002 DCHECK_EQ(size, linker_patches->size());
10003 }
10004
NeedsThunkCode(const linker::LinkerPatch & patch) const10005 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
10006 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
10007 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
10008 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
10009 }
10010
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)10011 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
10012 /*out*/ ArenaVector<uint8_t>* code,
10013 /*out*/ std::string* debug_name) {
10014 arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
10015 switch (patch.GetType()) {
10016 case linker::LinkerPatch::Type::kCallRelative: {
10017 // The thunk just uses the entry point in the ArtMethod. This works even for calls
10018 // to the generic JNI and interpreter trampolines.
10019 MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
10020 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
10021 assembler.GetVIXLAssembler()->Bkpt(0);
10022 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10023 *debug_name = "MethodCallThunk";
10024 }
10025 break;
10026 }
10027 case linker::LinkerPatch::Type::kCallEntrypoint: {
10028 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
10029 assembler.GetVIXLAssembler()->Bkpt(0);
10030 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10031 *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
10032 }
10033 break;
10034 }
10035 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
10036 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
10037 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
10038 break;
10039 }
10040 default:
10041 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
10042 UNREACHABLE();
10043 }
10044
10045 // Ensure we emit the literal pool if any.
10046 assembler.FinalizeCode();
10047 code->resize(assembler.CodeSize());
10048 MemoryRegion code_region(code->data(), code->size());
10049 assembler.CopyInstructions(code_region);
10050 }
10051
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)10052 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
10053 uint32_t value,
10054 Uint32ToLiteralMap* map) {
10055 return map->GetOrCreate(
10056 value,
10057 [this, value]() {
10058 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
10059 });
10060 }
10061
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10062 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10063 LocationSummary* locations =
10064 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
10065 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
10066 Location::RequiresRegister());
10067 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
10068 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
10069 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
10070 }
10071
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10072 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10073 vixl32::Register res = OutputRegister(instr);
10074 vixl32::Register accumulator =
10075 InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
10076 vixl32::Register mul_left =
10077 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
10078 vixl32::Register mul_right =
10079 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
10080
10081 if (instr->GetOpKind() == HInstruction::kAdd) {
10082 __ Mla(res, mul_left, mul_right, accumulator);
10083 } else {
10084 __ Mls(res, mul_left, mul_right, accumulator);
10085 }
10086 }
10087
VisitBoundType(HBoundType * instruction)10088 void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10089 // Nothing to do, this should be removed during prepare for register allocator.
10090 LOG(FATAL) << "Unreachable";
10091 }
10092
VisitBoundType(HBoundType * instruction)10093 void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10094 // Nothing to do, this should be removed during prepare for register allocator.
10095 LOG(FATAL) << "Unreachable";
10096 }
10097
10098 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)10099 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10100 LocationSummary* locations =
10101 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
10102 locations->SetInAt(0, Location::RequiresRegister());
10103 if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
10104 codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10105 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
10106 if (switch_instr->GetStartValue() != 0) {
10107 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias.
10108 }
10109 }
10110 }
10111
10112 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)10113 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10114 int32_t lower_bound = switch_instr->GetStartValue();
10115 uint32_t num_entries = switch_instr->GetNumEntries();
10116 LocationSummary* locations = switch_instr->GetLocations();
10117 vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
10118 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
10119
10120 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
10121 !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10122 // Create a series of compare/jumps.
10123 UseScratchRegisterScope temps(GetVIXLAssembler());
10124 vixl32::Register temp_reg = temps.Acquire();
10125 // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
10126 // the immediate, because IP is used as the destination register. For the other
10127 // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
10128 // and they can be encoded in the instruction without making use of IP register.
10129 __ Adds(temp_reg, value_reg, -lower_bound);
10130
10131 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
10132 // Jump to successors[0] if value == lower_bound.
10133 __ B(eq, codegen_->GetLabelOf(successors[0]));
10134 int32_t last_index = 0;
10135 for (; num_entries - last_index > 2; last_index += 2) {
10136 __ Adds(temp_reg, temp_reg, -2);
10137 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
10138 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
10139 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
10140 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
10141 }
10142 if (num_entries - last_index == 2) {
10143 // The last missing case_value.
10144 __ Cmp(temp_reg, 1);
10145 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
10146 }
10147
10148 // And the default for any other value.
10149 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
10150 __ B(codegen_->GetLabelOf(default_block));
10151 }
10152 } else {
10153 // Create a table lookup.
10154 vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
10155
10156 JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
10157
10158 // Remove the bias.
10159 vixl32::Register key_reg;
10160 if (lower_bound != 0) {
10161 key_reg = RegisterFrom(locations->GetTemp(1));
10162 __ Sub(key_reg, value_reg, lower_bound);
10163 } else {
10164 key_reg = value_reg;
10165 }
10166
10167 // Check whether the value is in the table, jump to default block if not.
10168 __ Cmp(key_reg, num_entries - 1);
10169 __ B(hi, codegen_->GetLabelOf(default_block));
10170
10171 UseScratchRegisterScope temps(GetVIXLAssembler());
10172 vixl32::Register jump_offset = temps.Acquire();
10173
10174 // Load jump offset from the table.
10175 {
10176 const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
10177 ExactAssemblyScope aas(GetVIXLAssembler(),
10178 (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
10179 CodeBufferCheckScope::kMaximumSize);
10180 __ adr(table_base, jump_table->GetTableStartLabel());
10181 __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
10182
10183 // Jump to target block by branching to table_base(pc related) + offset.
10184 vixl32::Register target_address = table_base;
10185 __ add(target_address, table_base, jump_offset);
10186 __ bx(target_address);
10187
10188 jump_table->EmitTable(codegen_);
10189 }
10190 }
10191 }
10192
10193 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)10194 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
10195 if (!trg.IsValid()) {
10196 DCHECK_EQ(type, DataType::Type::kVoid);
10197 return;
10198 }
10199
10200 DCHECK_NE(type, DataType::Type::kVoid);
10201
10202 Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
10203 if (return_loc.Equals(trg)) {
10204 return;
10205 }
10206
10207 // Let the parallel move resolver take care of all of this.
10208 HParallelMove parallel_move(GetGraph()->GetAllocator());
10209 parallel_move.AddMove(return_loc, trg, type, nullptr);
10210 GetMoveResolver()->EmitNativeCode(¶llel_move);
10211 }
10212
VisitClassTableGet(HClassTableGet * instruction)10213 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10214 LocationSummary* locations =
10215 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
10216 locations->SetInAt(0, Location::RequiresRegister());
10217 locations->SetOut(Location::RequiresRegister());
10218 }
10219
VisitClassTableGet(HClassTableGet * instruction)10220 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10221 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10222 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10223 instruction->GetIndex(), kArmPointerSize).SizeValue();
10224 GetAssembler()->LoadFromOffset(kLoadWord,
10225 OutputRegister(instruction),
10226 InputRegisterAt(instruction, 0),
10227 method_offset);
10228 } else {
10229 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10230 instruction->GetIndex(), kArmPointerSize));
10231 GetAssembler()->LoadFromOffset(kLoadWord,
10232 OutputRegister(instruction),
10233 InputRegisterAt(instruction, 0),
10234 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10235 GetAssembler()->LoadFromOffset(kLoadWord,
10236 OutputRegister(instruction),
10237 OutputRegister(instruction),
10238 method_offset);
10239 }
10240 }
10241
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10242 static void PatchJitRootUse(uint8_t* code,
10243 const uint8_t* roots_data,
10244 VIXLUInt32Literal* literal,
10245 uint64_t index_in_table) {
10246 DCHECK(literal->IsBound());
10247 uint32_t literal_offset = literal->GetLocation();
10248 uintptr_t address =
10249 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10250 uint8_t* data = code + literal_offset;
10251 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10252 }
10253
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10254 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10255 for (const auto& entry : jit_string_patches_) {
10256 const StringReference& string_reference = entry.first;
10257 VIXLUInt32Literal* table_entry_literal = entry.second;
10258 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10259 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10260 }
10261 for (const auto& entry : jit_class_patches_) {
10262 const TypeReference& type_reference = entry.first;
10263 VIXLUInt32Literal* table_entry_literal = entry.second;
10264 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10265 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10266 }
10267 }
10268
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10269 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10270 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10271 vixl32::Register out) {
10272 ExactAssemblyScope aas(GetVIXLAssembler(),
10273 3 * vixl32::kMaxInstructionSizeInBytes,
10274 CodeBufferCheckScope::kMaximumSize);
10275 // TODO(VIXL): Think about using mov instead of movw.
10276 __ bind(&labels->movw_label);
10277 __ movw(out, /* operand= */ 0u);
10278 __ bind(&labels->movt_label);
10279 __ movt(out, /* operand= */ 0u);
10280 __ bind(&labels->add_pc_label);
10281 __ add(out, out, pc);
10282 }
10283
10284 #undef __
10285 #undef QUICK_ENTRY_POINT
10286 #undef TODO_VIXL32
10287
10288 #define __ assembler.GetVIXLAssembler()->
10289
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10290 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10291 vixl32::Register base_reg,
10292 vixl32::MemOperand& lock_word,
10293 vixl32::Label* slow_path,
10294 int32_t raw_ldr_offset,
10295 vixl32::Label* throw_npe = nullptr) {
10296 // Load the lock word containing the rb_state.
10297 __ Ldr(ip, lock_word);
10298 // Given the numeric representation, it's enough to check the low bit of the rb_state.
10299 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10300 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10301 __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10302 __ B(ne, slow_path, /* is_far_target= */ false);
10303 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10304 if (throw_npe != nullptr) {
10305 __ Bind(throw_npe);
10306 }
10307 __ Add(lr, lr, raw_ldr_offset);
10308 // Introduce a dependency on the lock_word including rb_state,
10309 // to prevent load-load reordering, and without using
10310 // a memory barrier (which would be more expensive).
10311 __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10312 __ Bx(lr); // And return back to the function.
10313 // Note: The fake dependency is unnecessary for the slow path.
10314 }
10315
10316 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10317 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10318 // The register where the read barrier introspection entrypoint is loaded
10319 // is the marking register. We clobber it here and the entrypoint restores it to 1.
10320 vixl32::Register entrypoint = mr;
10321 // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10322 DCHECK_EQ(ip.GetCode(), 12u);
10323 const int32_t entry_point_offset =
10324 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10325 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10326 return entrypoint;
10327 }
10328
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10329 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10330 uint32_t encoded_data,
10331 /*out*/ std::string* debug_name) {
10332 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10333 switch (kind) {
10334 case BakerReadBarrierKind::kField: {
10335 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10336 CheckValidReg(base_reg.GetCode());
10337 vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10338 CheckValidReg(holder_reg.GetCode());
10339 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10340 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10341 temps.Exclude(ip);
10342 // In the case of a field load, if `base_reg` differs from
10343 // `holder_reg`, the offset was too large and we must have emitted (during the construction
10344 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10345 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10346 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10347 // not necessarily do that check before going to the thunk.
10348 vixl32::Label throw_npe_label;
10349 vixl32::Label* throw_npe = nullptr;
10350 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10351 throw_npe = &throw_npe_label;
10352 __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10353 }
10354 // Check if the holder is gray and, if not, add fake dependency to the base register
10355 // and return to the LDR instruction to load the reference. Otherwise, use introspection
10356 // to load the reference and call the entrypoint that performs further checks on the
10357 // reference and marks it if needed.
10358 vixl32::Label slow_path;
10359 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10360 const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10361 ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10362 : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10363 EmitGrayCheckAndFastPath(
10364 assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10365 __ Bind(&slow_path);
10366 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10367 raw_ldr_offset;
10368 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10369 if (width == BakerReadBarrierWidth::kWide) {
10370 MemOperand ldr_half_address(lr, ldr_offset + 2);
10371 __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
10372 __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
10373 __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
10374 } else {
10375 MemOperand ldr_address(lr, ldr_offset);
10376 __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
10377 __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
10378 ep_reg, // for narrow LDR.
10379 Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10380 __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
10381 __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
10382 }
10383 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10384 __ Bx(ep_reg); // Jump to the entrypoint.
10385 break;
10386 }
10387 case BakerReadBarrierKind::kArray: {
10388 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10389 CheckValidReg(base_reg.GetCode());
10390 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10391 BakerReadBarrierSecondRegField::Decode(encoded_data));
10392 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10393 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10394 temps.Exclude(ip);
10395 vixl32::Label slow_path;
10396 int32_t data_offset =
10397 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10398 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10399 DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10400 const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10401 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10402 __ Bind(&slow_path);
10403 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10404 raw_ldr_offset;
10405 MemOperand ldr_address(lr, ldr_offset + 2);
10406 __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
10407 // i.e. Rm+32 because the scale in imm2 is 2.
10408 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10409 __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
10410 // a switch case target based on the index register.
10411 __ Mov(ip, base_reg); // Move the base register to ip0.
10412 __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
10413 break;
10414 }
10415 case BakerReadBarrierKind::kGcRoot:
10416 case BakerReadBarrierKind::kIntrinsicCas: {
10417 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10418 // and it does not have a forwarding address), call the correct introspection entrypoint;
10419 // otherwise return the reference (or the extracted forwarding address).
10420 // There is no gray bit check for GC roots.
10421 vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10422 CheckValidReg(root_reg.GetCode());
10423 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10424 BakerReadBarrierSecondRegField::Decode(encoded_data));
10425 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10426 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10427 temps.Exclude(ip);
10428 vixl32::Label return_label, not_marked, forwarding_address;
10429 __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10430 MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10431 __ Ldr(ip, lock_word);
10432 __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10433 __ B(eq, ¬_marked);
10434 __ Bind(&return_label);
10435 __ Bx(lr);
10436 __ Bind(¬_marked);
10437 static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10438 "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10439 " the highest bits and the 'forwarding address' state to have all bits set");
10440 __ Cmp(ip, Operand(0xc0000000));
10441 __ B(hs, &forwarding_address);
10442 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10443 // Adjust the art_quick_read_barrier_mark_introspection address
10444 // in kBakerCcEntrypointRegister to one of
10445 // art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10446 if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10447 DCHECK(width == BakerReadBarrierWidth::kWide);
10448 DCHECK(!root_reg.IsLow());
10449 }
10450 int32_t entrypoint_offset =
10451 (kind == BakerReadBarrierKind::kGcRoot)
10452 ? (width == BakerReadBarrierWidth::kWide)
10453 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10454 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10455 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10456 __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10457 __ Mov(ip, root_reg);
10458 __ Bx(ep_reg);
10459 __ Bind(&forwarding_address);
10460 __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10461 __ Bx(lr);
10462 break;
10463 }
10464 default:
10465 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10466 UNREACHABLE();
10467 }
10468
10469 // For JIT, the slow path is considered part of the compiled method,
10470 // so JIT should pass null as `debug_name`.
10471 DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10472 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10473 std::ostringstream oss;
10474 oss << "BakerReadBarrierThunk";
10475 switch (kind) {
10476 case BakerReadBarrierKind::kField:
10477 oss << "Field";
10478 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10479 oss << "Wide";
10480 }
10481 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10482 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10483 break;
10484 case BakerReadBarrierKind::kArray:
10485 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10486 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10487 BakerReadBarrierSecondRegField::Decode(encoded_data));
10488 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10489 break;
10490 case BakerReadBarrierKind::kGcRoot:
10491 oss << "GcRoot";
10492 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10493 oss << "Wide";
10494 }
10495 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10496 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10497 BakerReadBarrierSecondRegField::Decode(encoded_data));
10498 break;
10499 case BakerReadBarrierKind::kIntrinsicCas:
10500 oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10501 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10502 BakerReadBarrierSecondRegField::Decode(encoded_data));
10503 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10504 break;
10505 }
10506 *debug_name = oss.str();
10507 }
10508 }
10509
10510 #undef __
10511
10512 } // namespace arm
10513 } // namespace art
10514